summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/encoder/mips/msa/error_msa.c
diff options
context:
space:
mode:
authortrav90 <travawine@palemoon.org>2018-10-15 21:45:30 -0500
committertrav90 <travawine@palemoon.org>2018-10-15 21:45:30 -0500
commit68569dee1416593955c1570d638b3d9250b33012 (patch)
treed960f017cd7eba3f125b7e8a813789ee2e076310 /third_party/aom/av1/encoder/mips/msa/error_msa.c
parent07c17b6b98ed32fcecff15c083ab0fd878de3cf0 (diff)
downloadUXP-68569dee1416593955c1570d638b3d9250b33012.tar
UXP-68569dee1416593955c1570d638b3d9250b33012.tar.gz
UXP-68569dee1416593955c1570d638b3d9250b33012.tar.lz
UXP-68569dee1416593955c1570d638b3d9250b33012.tar.xz
UXP-68569dee1416593955c1570d638b3d9250b33012.zip
Import aom library
This is the reference implementation for the Alliance for Open Media's av1 video code. The commit used was 4d668d7feb1f8abd809d1bca0418570a7f142a36.
Diffstat (limited to 'third_party/aom/av1/encoder/mips/msa/error_msa.c')
-rw-r--r--third_party/aom/av1/encoder/mips/msa/error_msa.c108
1 files changed, 108 insertions, 0 deletions
diff --git a/third_party/aom/av1/encoder/mips/msa/error_msa.c b/third_party/aom/av1/encoder/mips/msa/error_msa.c
new file mode 100644
index 000000000..8d13af7ad
--- /dev/null
+++ b/third_party/aom/av1/encoder/mips/msa/error_msa.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "./av1_rtcd.h"
+#include "aom_dsp/mips/macros_msa.h"
+
+#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
+ static int64_t block_error_##BSize##size_msa( \
+ const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
+ int64_t err = 0; \
+ uint32_t loop_cnt; \
+ v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
+ v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
+ v2i64 sq_coeff_r, sq_coeff_l; \
+ v2i64 err0, err_dup0, err1, err_dup1; \
+ \
+ coeff = LD_SH(coeff_ptr); \
+ dq_coeff = LD_SH(dq_coeff_ptr); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
+ sq_coeff_l); \
+ DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
+ \
+ coeff = LD_SH(coeff_ptr + 8); \
+ dq_coeff = LD_SH(dq_coeff_ptr + 8); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff_ptr += 16; \
+ dq_coeff_ptr += 16; \
+ \
+ for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
+ coeff = LD_SH(coeff_ptr); \
+ dq_coeff = LD_SH(dq_coeff_ptr); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff = LD_SH(coeff_ptr + 8); \
+ dq_coeff = LD_SH(dq_coeff_ptr + 8); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff_ptr += 16; \
+ dq_coeff_ptr += 16; \
+ } \
+ \
+ err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
+ err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
+ sq_coeff_r += err_dup0; \
+ sq_coeff_l += err_dup1; \
+ *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
+ *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
+ \
+ err_dup0 = __msa_splati_d(err0, 1); \
+ err_dup1 = __msa_splati_d(err1, 1); \
+ err0 += err_dup0; \
+ err1 += err_dup1; \
+ err = __msa_copy_s_d(err0, 0); \
+ err += __msa_copy_s_d(err1, 0); \
+ \
+ return err; \
+ }
+
+/* clang-format off */
+BLOCK_ERROR_BLOCKSIZE_MSA(16)
+BLOCK_ERROR_BLOCKSIZE_MSA(64)
+BLOCK_ERROR_BLOCKSIZE_MSA(256)
+BLOCK_ERROR_BLOCKSIZE_MSA(1024)
+/* clang-format on */
+
+int64_t av1_block_error_msa(const tran_low_t *coeff_ptr,
+ const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
+ int64_t *ssz) {
+ int64_t err;
+ const int16_t *coeff = (const int16_t *)coeff_ptr;
+ const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
+
+ switch (blk_size) {
+ case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
+ case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
+ case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
+ case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
+ default:
+ err = av1_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
+ break;
+ }
+
+ return err;
+}