diff options
Diffstat (limited to 'third_party/aom/aom_dsp/mips/aom_convolve_copy_msa.c')
-rw-r--r-- | third_party/aom/aom_dsp/mips/aom_convolve_copy_msa.c | 248 |
1 files changed, 0 insertions, 248 deletions
diff --git a/third_party/aom/aom_dsp/mips/aom_convolve_copy_msa.c b/third_party/aom/aom_dsp/mips/aom_convolve_copy_msa.c deleted file mode 100644 index f7f116f4d..000000000 --- a/third_party/aom/aom_dsp/mips/aom_convolve_copy_msa.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <string.h> -#include "aom_dsp/mips/macros_msa.h" - -static void copy_width8_msa(const uint8_t *src, int32_t src_stride, - uint8_t *dst, int32_t dst_stride, int32_t height) { - int32_t cnt; - uint64_t out0, out1, out2, out3, out4, out5, out6, out7; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7; - - if (0 == height % 12) { - for (cnt = (height / 12); cnt--;) { - LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); - src += (8 * src_stride); - - out0 = __msa_copy_u_d((v2i64)src0, 0); - out1 = __msa_copy_u_d((v2i64)src1, 0); - out2 = __msa_copy_u_d((v2i64)src2, 0); - out3 = __msa_copy_u_d((v2i64)src3, 0); - out4 = __msa_copy_u_d((v2i64)src4, 0); - out5 = __msa_copy_u_d((v2i64)src5, 0); - out6 = __msa_copy_u_d((v2i64)src6, 0); - out7 = __msa_copy_u_d((v2i64)src7, 0); - - SD4(out0, out1, out2, out3, dst, dst_stride); - dst += (4 * dst_stride); - SD4(out4, out5, out6, out7, dst, dst_stride); - dst += (4 * dst_stride); - - LD_UB4(src, src_stride, src0, src1, src2, src3); - src += (4 * src_stride); - - out0 = __msa_copy_u_d((v2i64)src0, 0); - out1 = __msa_copy_u_d((v2i64)src1, 0); - out2 = __msa_copy_u_d((v2i64)src2, 0); - out3 = __msa_copy_u_d((v2i64)src3, 0); - SD4(out0, out1, out2, out3, dst, dst_stride); - dst += (4 * dst_stride); - } - } else if (0 == height % 8) { - for (cnt = height >> 3; cnt--;) { - LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); - src += (8 * src_stride); - - out0 = __msa_copy_u_d((v2i64)src0, 0); - out1 = __msa_copy_u_d((v2i64)src1, 0); - out2 = __msa_copy_u_d((v2i64)src2, 0); - out3 = __msa_copy_u_d((v2i64)src3, 0); - out4 = __msa_copy_u_d((v2i64)src4, 0); - out5 = __msa_copy_u_d((v2i64)src5, 0); - out6 = __msa_copy_u_d((v2i64)src6, 0); - out7 = __msa_copy_u_d((v2i64)src7, 0); - - SD4(out0, out1, out2, out3, dst, dst_stride); - dst += (4 * dst_stride); - SD4(out4, out5, out6, out7, dst, dst_stride); - dst += (4 * dst_stride); - } - } else if (0 == height % 4) { - for (cnt = (height / 4); cnt--;) { - LD_UB4(src, src_stride, src0, src1, src2, src3); - src += (4 * src_stride); - out0 = __msa_copy_u_d((v2i64)src0, 0); - out1 = __msa_copy_u_d((v2i64)src1, 0); - out2 = __msa_copy_u_d((v2i64)src2, 0); - out3 = __msa_copy_u_d((v2i64)src3, 0); - - SD4(out0, out1, out2, out3, dst, dst_stride); - dst += (4 * dst_stride); - } - } else if (0 == height % 2) { - for (cnt = (height / 2); cnt--;) { - LD_UB2(src, src_stride, src0, src1); - src += (2 * src_stride); - out0 = __msa_copy_u_d((v2i64)src0, 0); - out1 = __msa_copy_u_d((v2i64)src1, 0); - - SD(out0, dst); - dst += dst_stride; - SD(out1, dst); - dst += dst_stride; - } - } -} - -static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride, - uint8_t *dst, int32_t dst_stride, - int32_t height, int32_t width) { - int32_t cnt, loop_cnt; - const uint8_t *src_tmp; - uint8_t *dst_tmp; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7; - - for (cnt = (width >> 4); cnt--;) { - src_tmp = src; - dst_tmp = dst; - - for (loop_cnt = (height >> 3); loop_cnt--;) { - LD_UB8(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6, - src7); - src_tmp += (8 * src_stride); - - ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst_tmp, - dst_stride); - dst_tmp += (8 * dst_stride); - } - - src += 16; - dst += 16; - } -} - -static void copy_width16_msa(const uint8_t *src, int32_t src_stride, - uint8_t *dst, int32_t dst_stride, int32_t height) { - int32_t cnt; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7; - - if (0 == height % 12) { - for (cnt = (height / 12); cnt--;) { - LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); - src += (8 * src_stride); - ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride); - dst += (8 * dst_stride); - - LD_UB4(src, src_stride, src0, src1, src2, src3); - src += (4 * src_stride); - ST_UB4(src0, src1, src2, src3, dst, dst_stride); - dst += (4 * dst_stride); - } - } else if (0 == height % 8) { - copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16); - } else if (0 == height % 4) { - for (cnt = (height >> 2); cnt--;) { - LD_UB4(src, src_stride, src0, src1, src2, src3); - src += (4 * src_stride); - - ST_UB4(src0, src1, src2, src3, dst, dst_stride); - dst += (4 * dst_stride); - } - } -} - -static void copy_width32_msa(const uint8_t *src, int32_t src_stride, - uint8_t *dst, int32_t dst_stride, int32_t height) { - int32_t cnt; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7; - - if (0 == height % 12) { - for (cnt = (height / 12); cnt--;) { - LD_UB4(src, src_stride, src0, src1, src2, src3); - LD_UB4(src + 16, src_stride, src4, src5, src6, src7); - src += (4 * src_stride); - ST_UB4(src0, src1, src2, src3, dst, dst_stride); - ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); - dst += (4 * dst_stride); - - LD_UB4(src, src_stride, src0, src1, src2, src3); - LD_UB4(src + 16, src_stride, src4, src5, src6, src7); - src += (4 * src_stride); - ST_UB4(src0, src1, src2, src3, dst, dst_stride); - ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); - dst += (4 * dst_stride); - - LD_UB4(src, src_stride, src0, src1, src2, src3); - LD_UB4(src + 16, src_stride, src4, src5, src6, src7); - src += (4 * src_stride); - ST_UB4(src0, src1, src2, src3, dst, dst_stride); - ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); - dst += (4 * dst_stride); - } - } else if (0 == height % 8) { - copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32); - } else if (0 == height % 4) { - for (cnt = (height >> 2); cnt--;) { - LD_UB4(src, src_stride, src0, src1, src2, src3); - LD_UB4(src + 16, src_stride, src4, src5, src6, src7); - src += (4 * src_stride); - ST_UB4(src0, src1, src2, src3, dst, dst_stride); - ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); - dst += (4 * dst_stride); - } - } -} - -static void copy_width64_msa(const uint8_t *src, int32_t src_stride, - uint8_t *dst, int32_t dst_stride, int32_t height) { - copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64); -} - -void aom_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int32_t filter_x_stride, - const int16_t *filter_y, int32_t filter_y_stride, - int32_t w, int32_t h) { - (void)filter_x; - (void)filter_y; - (void)filter_x_stride; - (void)filter_y_stride; - - switch (w) { - case 4: { - uint32_t cnt, tmp; - /* 1 word storage */ - for (cnt = h; cnt--;) { - tmp = LW(src); - SW(tmp, dst); - src += src_stride; - dst += dst_stride; - } - break; - } - case 8: { - copy_width8_msa(src, src_stride, dst, dst_stride, h); - break; - } - case 16: { - copy_width16_msa(src, src_stride, dst, dst_stride, h); - break; - } - case 32: { - copy_width32_msa(src, src_stride, dst, dst_stride, h); - break; - } - case 64: { - copy_width64_msa(src, src_stride, dst, dst_stride, h); - break; - } - default: { - uint32_t cnt; - for (cnt = h; cnt--;) { - memcpy(dst, src, w); - src += src_stride; - dst += dst_stride; - } - break; - } - } -} |