summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/encoder/mips/msa
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2020-04-07 23:30:51 -0400
committerwolfbeast <mcwerewolf@wolfbeast.com>2020-04-14 13:26:42 +0200
commit277f2116b6660e9bbe7f5d67524be57eceb49b8b (patch)
tree4595f7cc71418f71b9a97dfaeb03a30aa60f336a /third_party/aom/av1/encoder/mips/msa
parentd270404436f6e84ffa3b92af537ac721bf10d66e (diff)
downloadUXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.tar
UXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.tar.gz
UXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.tar.lz
UXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.tar.xz
UXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.zip
Move aom source to a sub-directory under media/libaom
There is no damned reason to treat this differently than any other media lib given its license and there never was.
Diffstat (limited to 'third_party/aom/av1/encoder/mips/msa')
-rw-r--r--third_party/aom/av1/encoder/mips/msa/error_msa.c109
-rw-r--r--third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c46
-rw-r--r--third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c285
3 files changed, 0 insertions, 440 deletions
diff --git a/third_party/aom/av1/encoder/mips/msa/error_msa.c b/third_party/aom/av1/encoder/mips/msa/error_msa.c
deleted file mode 100644
index 2e86dee43..000000000
--- a/third_party/aom/av1/encoder/mips/msa/error_msa.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
- static int64_t block_error_##BSize##size_msa( \
- const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
- int64_t err = 0; \
- uint32_t loop_cnt; \
- v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
- v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
- v2i64 sq_coeff_r, sq_coeff_l; \
- v2i64 err0, err_dup0, err1, err_dup1; \
- \
- coeff = LD_SH(coeff_ptr); \
- dq_coeff = LD_SH(dq_coeff_ptr); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
- sq_coeff_l); \
- DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
- \
- coeff = LD_SH(coeff_ptr + 8); \
- dq_coeff = LD_SH(dq_coeff_ptr + 8); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
- DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
- \
- coeff_ptr += 16; \
- dq_coeff_ptr += 16; \
- \
- for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
- coeff = LD_SH(coeff_ptr); \
- dq_coeff = LD_SH(dq_coeff_ptr); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
- DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
- \
- coeff = LD_SH(coeff_ptr + 8); \
- dq_coeff = LD_SH(dq_coeff_ptr + 8); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
- DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
- \
- coeff_ptr += 16; \
- dq_coeff_ptr += 16; \
- } \
- \
- err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
- err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
- sq_coeff_r += err_dup0; \
- sq_coeff_l += err_dup1; \
- *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
- *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
- \
- err_dup0 = __msa_splati_d(err0, 1); \
- err_dup1 = __msa_splati_d(err1, 1); \
- err0 += err_dup0; \
- err1 += err_dup1; \
- err = __msa_copy_s_d(err0, 0); \
- err += __msa_copy_s_d(err1, 0); \
- \
- return err; \
- }
-
-/* clang-format off */
-BLOCK_ERROR_BLOCKSIZE_MSA(16)
-BLOCK_ERROR_BLOCKSIZE_MSA(64)
-BLOCK_ERROR_BLOCKSIZE_MSA(256)
-BLOCK_ERROR_BLOCKSIZE_MSA(1024)
-/* clang-format on */
-
-int64_t av1_block_error_msa(const tran_low_t *coeff_ptr,
- const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
- int64_t *ssz) {
- int64_t err;
- const int16_t *coeff = (const int16_t *)coeff_ptr;
- const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
-
- switch (blk_size) {
- case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
- case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
- case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
- case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
- default:
- err = av1_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
- break;
- }
-
- return err;
-}
diff --git a/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c b/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c
deleted file mode 100644
index 085c08bfb..000000000
--- a/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/enums.h"
-
-void av1_fwht4x4_msa(const int16_t *input, int16_t *output,
- int32_t src_stride) {
- v8i16 in0, in1, in2, in3, in4;
-
- LD_SH4(input, src_stride, in0, in1, in2, in3);
-
- in0 += in1;
- in3 -= in2;
- in4 = (in0 - in3) >> 1;
- SUB2(in4, in1, in4, in2, in1, in2);
- in0 -= in2;
- in3 += in1;
-
- TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);
-
- in0 += in2;
- in1 -= in3;
- in4 = (in0 - in1) >> 1;
- SUB2(in4, in2, in4, in3, in2, in3);
- in0 -= in3;
- in1 += in2;
-
- SLLI_4V(in0, in1, in2, in3, 2);
-
- TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2);
-
- ST4x2_UB(in0, output, 4);
- ST4x2_UB(in3, output + 4, 4);
- ST4x2_UB(in1, output + 8, 4);
- ST4x2_UB(in2, output + 12, 4);
-}
diff --git a/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c b/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c
deleted file mode 100644
index 531ae090a..000000000
--- a/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride,
- uint8_t *frm2_ptr, int32_t filt_sth,
- int32_t filt_wgt, uint32_t *acc,
- uint16_t *cnt) {
- uint32_t row;
- uint64_t f0, f1, f2, f3;
- v16i8 frm2, frm1 = { 0 };
- v16i8 frm4, frm3 = { 0 };
- v16u8 frm_r, frm_l;
- v8i16 frm2_r, frm2_l;
- v8i16 diff0, diff1, mod0_h, mod1_h;
- v4i32 cnst3, cnst16, filt_wt, strength;
- v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
- v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
- v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
- v4i32 acc0, acc1, acc2, acc3;
- v8i16 cnt0, cnt1;
-
- filt_wt = __msa_fill_w(filt_wgt);
- strength = __msa_fill_w(filt_sth);
- cnst3 = __msa_ldi_w(3);
- cnst16 = __msa_ldi_w(16);
-
- for (row = 2; row--;) {
- LD4(frm1_ptr, stride, f0, f1, f2, f3);
- frm1_ptr += (4 * stride);
-
- LD_SB2(frm2_ptr, 16, frm2, frm4);
- frm2_ptr += 32;
-
- LD_SW2(acc, 4, acc0, acc1);
- LD_SW2(acc + 8, 4, acc2, acc3);
- LD_SH2(cnt, 8, cnt0, cnt1);
-
- INSERT_D2_SB(f0, f1, frm1);
- INSERT_D2_SB(f2, f3, frm3);
- ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
- HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
- UNPCK_SH_SW(diff0, diff0_r, diff0_l);
- UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
- mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
- mod1_w, mod2_w, mod3_w);
- SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
- diff0_r = (mod0_w < cnst16);
- diff0_l = (mod1_w < cnst16);
- diff1_r = (mod2_w < cnst16);
- diff1_l = (mod3_w < cnst16);
-
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
- mod1_w, mod2_w, mod3_w);
-
- mod0_w = diff0_r & mod0_w;
- mod1_w = diff0_l & mod1_w;
- mod2_w = diff1_r & mod2_w;
- mod3_w = diff1_l & mod3_w;
-
- MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
- mod0_w, mod1_w, mod2_w, mod3_w);
- PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
- ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
- ST_SH2(mod0_h, mod1_h, cnt, 8);
- cnt += 16;
-
- UNPCK_UB_SH(frm2, frm2_r, frm2_l);
- UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
- UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
- MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
- mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
- mod2_w, mod3_w);
-
- ST_SW2(mod0_w, mod1_w, acc, 4);
- acc += 8;
- ST_SW2(mod2_w, mod3_w, acc, 4);
- acc += 8;
-
- LD_SW2(acc, 4, acc0, acc1);
- LD_SW2(acc + 8, 4, acc2, acc3);
- LD_SH2(cnt, 8, cnt0, cnt1);
-
- ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
- HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
- UNPCK_SH_SW(diff0, diff0_r, diff0_l);
- UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
- mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
- mod1_w, mod2_w, mod3_w);
- SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
- diff0_r = (mod0_w < cnst16);
- diff0_l = (mod1_w < cnst16);
- diff1_r = (mod2_w < cnst16);
- diff1_l = (mod3_w < cnst16);
-
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
- mod1_w, mod2_w, mod3_w);
-
- mod0_w = diff0_r & mod0_w;
- mod1_w = diff0_l & mod1_w;
- mod2_w = diff1_r & mod2_w;
- mod3_w = diff1_l & mod3_w;
-
- MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
- mod0_w, mod1_w, mod2_w, mod3_w);
- PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
- ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
- ST_SH2(mod0_h, mod1_h, cnt, 8);
- cnt += 16;
- UNPCK_UB_SH(frm4, frm2_r, frm2_l);
- UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
- UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
- MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
- mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
- mod2_w, mod3_w);
-
- ST_SW2(mod0_w, mod1_w, acc, 4);
- acc += 8;
- ST_SW2(mod2_w, mod3_w, acc, 4);
- acc += 8;
- }
-}
-
-static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride,
- uint8_t *frm2_ptr,
- int32_t filt_sth, int32_t filt_wgt,
- uint32_t *acc, uint16_t *cnt) {
- uint32_t row;
- v16i8 frm1, frm2, frm3, frm4;
- v16u8 frm_r, frm_l;
- v16i8 zero = { 0 };
- v8u16 frm2_r, frm2_l;
- v8i16 diff0, diff1, mod0_h, mod1_h;
- v4i32 cnst3, cnst16, filt_wt, strength;
- v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
- v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
- v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
- v4i32 acc0, acc1, acc2, acc3;
- v8i16 cnt0, cnt1;
-
- filt_wt = __msa_fill_w(filt_wgt);
- strength = __msa_fill_w(filt_sth);
- cnst3 = __msa_ldi_w(3);
- cnst16 = __msa_ldi_w(16);
-
- for (row = 8; row--;) {
- LD_SB2(frm1_ptr, stride, frm1, frm3);
- frm1_ptr += stride;
-
- LD_SB2(frm2_ptr, 16, frm2, frm4);
- frm2_ptr += 16;
-
- LD_SW2(acc, 4, acc0, acc1);
- LD_SW2(acc, 4, acc2, acc3);
- LD_SH2(cnt, 8, cnt0, cnt1);
-
- ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
- HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
- UNPCK_SH_SW(diff0, diff0_r, diff0_l);
- UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
- mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
- mod1_w, mod2_w, mod3_w);
- SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
- diff0_r = (mod0_w < cnst16);
- diff0_l = (mod1_w < cnst16);
- diff1_r = (mod2_w < cnst16);
- diff1_l = (mod3_w < cnst16);
-
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
- mod1_w, mod2_w, mod3_w);
-
- mod0_w = diff0_r & mod0_w;
- mod1_w = diff0_l & mod1_w;
- mod2_w = diff1_r & mod2_w;
- mod3_w = diff1_l & mod3_w;
-
- MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
- mod0_w, mod1_w, mod2_w, mod3_w);
- PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
- ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
- ST_SH2(mod0_h, mod1_h, cnt, 8);
- cnt += 16;
-
- ILVRL_B2_UH(zero, frm2, frm2_r, frm2_l);
- UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
- UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
- MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
- mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
- mod2_w, mod3_w);
-
- ST_SW2(mod0_w, mod1_w, acc, 4);
- acc += 8;
- ST_SW2(mod2_w, mod3_w, acc, 4);
- acc += 8;
-
- LD_SW2(acc, 4, acc0, acc1);
- LD_SW2(acc + 8, 4, acc2, acc3);
- LD_SH2(cnt, 8, cnt0, cnt1);
-
- ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
- HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
- UNPCK_SH_SW(diff0, diff0_r, diff0_l);
- UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
- mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
- mod1_w, mod2_w, mod3_w);
- SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
- diff0_r = (mod0_w < cnst16);
- diff0_l = (mod1_w < cnst16);
- diff1_r = (mod2_w < cnst16);
- diff1_l = (mod3_w < cnst16);
-
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
- mod1_w, mod2_w, mod3_w);
-
- mod0_w = diff0_r & mod0_w;
- mod1_w = diff0_l & mod1_w;
- mod2_w = diff1_r & mod2_w;
- mod3_w = diff1_l & mod3_w;
-
- MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
- mod0_w, mod1_w, mod2_w, mod3_w);
- PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
- ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
- ST_SH2(mod0_h, mod1_h, cnt, 8);
- cnt += 16;
-
- ILVRL_B2_UH(zero, frm4, frm2_r, frm2_l);
- UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
- UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
- MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
- mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
- mod2_w, mod3_w);
- ST_SW2(mod0_w, mod1_w, acc, 4);
- acc += 8;
- ST_SW2(mod2_w, mod3_w, acc, 4);
- acc += 8;
-
- frm1_ptr += stride;
- frm2_ptr += 16;
- }
-}
-
-void av1_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
- uint8_t *frame2_ptr, uint32_t blk_w,
- uint32_t blk_h, int32_t strength,
- int32_t filt_wgt, uint32_t *accu,
- uint16_t *cnt) {
- if (8 == (blk_w * blk_h)) {
- temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength,
- filt_wgt, accu, cnt);
- } else if (16 == (blk_w * blk_h)) {
- temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength,
- filt_wgt, accu, cnt);
- } else {
- av1_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
- strength, filt_wgt, accu, cnt);
- }
-}