summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c')
-rw-r--r--third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c26
1 files changed, 20 insertions, 6 deletions
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
index ff4a0a0fe..195f0f570 100644
--- a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
+++ b/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
@@ -32,6 +32,7 @@ void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
int i, j;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const int do_average = conv_params->do_average;
const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
/* Horizontal filter */
@@ -185,9 +186,15 @@ void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
// Accumulate values into the destination buffer
__m128i *const p = (__m128i *)&dst[i * dst_stride + j];
- _mm_storeu_si128(p, _mm_add_epi32(_mm_loadu_si128(p), res_lo_round));
- _mm_storeu_si128(p + 1,
- _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+ if (do_average) {
+ _mm_storeu_si128(p + 0,
+ _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
+ _mm_storeu_si128(p + 1,
+ _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+ } else {
+ _mm_storeu_si128(p + 0, res_lo_round);
+ _mm_storeu_si128(p + 1, res_hi_round);
+ }
}
}
}
@@ -204,6 +211,7 @@ void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
int im_h = h + filter_params_y->taps - 1;
int im_stride = MAX_SB_SIZE;
int i, j;
+ const int do_average = conv_params->do_average;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
@@ -362,9 +370,15 @@ void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
// Accumulate values into the destination buffer
__m128i *const p = (__m128i *)&dst[i * dst_stride + j];
- _mm_storeu_si128(p, _mm_add_epi32(_mm_loadu_si128(p), res_lo_round));
- _mm_storeu_si128(p + 1,
- _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+ if (do_average) {
+ _mm_storeu_si128(p + 0,
+ _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
+ _mm_storeu_si128(p + 1,
+ _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+ } else {
+ _mm_storeu_si128(p + 0, res_lo_round);
+ _mm_storeu_si128(p + 1, res_hi_round);
+ }
}
}
}