summaryrefslogtreecommitdiffstats
path: root/third_party/aom/aom_dsp/loopfilter.c
diff options
context:
space:
mode:
authortrav90 <travawine@palemoon.org>2018-10-17 05:59:08 -0500
committertrav90 <travawine@palemoon.org>2018-10-17 05:59:08 -0500
commitdf9477dfa60ebb5d31bc142e58ce46535c17abce (patch)
treec4fdd5d1b09d08c0514f208246260fc87372cb56 /third_party/aom/aom_dsp/loopfilter.c
parent0cc51bc106250988cc3b89cb5d743a5af52cd35a (diff)
downloadUXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.tar
UXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.tar.gz
UXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.tar.lz
UXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.tar.xz
UXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.zip
Update aom to slightly newer commit ID
Diffstat (limited to 'third_party/aom/aom_dsp/loopfilter.c')
-rw-r--r--third_party/aom/aom_dsp/loopfilter.c110
1 files changed, 90 insertions, 20 deletions
diff --git a/third_party/aom/aom_dsp/loopfilter.c b/third_party/aom/aom_dsp/loopfilter.c
index e2e839219..7ea1e6b89 100644
--- a/third_party/aom/aom_dsp/loopfilter.c
+++ b/third_party/aom/aom_dsp/loopfilter.c
@@ -149,10 +149,15 @@ void aom_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int count = 4;
+#else
+ int count = 8;
+#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8; ++i) {
+ for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
@@ -179,10 +184,15 @@ void aom_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int count = 4;
+#else
+ int count = 8;
+#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8; ++i) {
+ for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
@@ -206,7 +216,7 @@ void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
aom_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
}
-static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
+static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat,
uint8_t *op3, uint8_t *op2, uint8_t *op1,
uint8_t *op0, uint8_t *oq0, uint8_t *oq1,
uint8_t *oq2, uint8_t *oq3) {
@@ -229,10 +239,15 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int count = 4;
+#else
+ int count = 8;
+#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8; ++i) {
+ for (i = 0; i < count; ++i) {
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
@@ -256,8 +271,13 @@ void aom_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int count = 4;
+#else
+ int count = 8;
+#endif
- for (i = 0; i < 8; ++i) {
+ for (i = 0; i < count; ++i) {
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask =
@@ -278,8 +298,8 @@ void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
}
#if PARALLEL_DEBLOCKING_11_TAP
-static INLINE void filter12(int8_t mask, uint8_t thresh, uint8_t flat,
- uint8_t flat2, uint8_t *op5, uint8_t *op4,
+static INLINE void filter12(int8_t mask, uint8_t thresh, int8_t flat,
+ int8_t flat2, uint8_t *op5, uint8_t *op4,
uint8_t *op3, uint8_t *op2, uint8_t *op1,
uint8_t *op0, uint8_t *oq0, uint8_t *oq1,
uint8_t *oq2, uint8_t *oq3, uint8_t *oq4,
@@ -308,8 +328,8 @@ static INLINE void filter12(int8_t mask, uint8_t thresh, uint8_t flat,
#endif
#if PARALLEL_DEBLOCKING_9_TAP
-static INLINE void filter10(int8_t mask, uint8_t thresh, uint8_t flat,
- uint8_t flat2, uint8_t *op4, uint8_t *op3,
+static INLINE void filter10(int8_t mask, uint8_t thresh, int8_t flat,
+ int8_t flat2, uint8_t *op4, uint8_t *op3,
uint8_t *op2, uint8_t *op1, uint8_t *op0,
uint8_t *oq0, uint8_t *oq1, uint8_t *oq2,
uint8_t *oq3, uint8_t *oq4) {
@@ -332,8 +352,8 @@ static INLINE void filter10(int8_t mask, uint8_t thresh, uint8_t flat,
}
#endif
-static INLINE void filter16(int8_t mask, uint8_t thresh, uint8_t flat,
- uint8_t flat2, uint8_t *op7, uint8_t *op6,
+static INLINE void filter16(int8_t mask, uint8_t thresh, int8_t flat,
+ int8_t flat2, uint8_t *op7, uint8_t *op6,
uint8_t *op5, uint8_t *op4, uint8_t *op3,
uint8_t *op2, uint8_t *op1, uint8_t *op0,
uint8_t *oq0, uint8_t *oq1, uint8_t *oq2,
@@ -390,10 +410,15 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh, int count) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int step = 4;
+#else
+ int step = 8;
+#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < step * count; ++i) {
const uint8_t p7 = s[-8 * p], p6 = s[-7 * p], p5 = s[-6 * p],
p4 = s[-5 * p], p3 = s[-4 * p], p2 = s[-3 * p],
p1 = s[-2 * p], p0 = s[-p];
@@ -436,7 +461,11 @@ void aom_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
void aom_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
+#else
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
+#endif
}
static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
@@ -478,7 +507,11 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
void aom_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4);
+#else
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
+#endif
}
void aom_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
@@ -596,10 +629,15 @@ void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh, int bd) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int count = 4;
+#else
+ int count = 8;
+#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8; ++i) {
+ for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint16_t p3 = s[-4 * p];
const uint16_t p2 = s[-3 * p];
@@ -636,10 +674,15 @@ void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int count = 4;
+#else
+ int count = 8;
+#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8; ++i) {
+ for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
@@ -665,7 +708,7 @@ void aom_highbd_lpf_vertical_4_dual_c(
bd);
}
-static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat,
+static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, int8_t flat,
uint16_t *op3, uint16_t *op2, uint16_t *op1,
uint16_t *op0, uint16_t *oq0, uint16_t *oq1,
uint16_t *oq2, uint16_t *oq3, int bd) {
@@ -689,10 +732,15 @@ void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int count = 4;
+#else
+ int count = 8;
+#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8; ++i) {
+ for (i = 0; i < count; ++i) {
const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
@@ -718,8 +766,13 @@ void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int count = 4;
+#else
+ int count = 8;
+#endif
- for (i = 0; i < 8; ++i) {
+ for (i = 0; i < count; ++i) {
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask =
@@ -741,8 +794,8 @@ void aom_highbd_lpf_vertical_8_dual_c(
bd);
}
-static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, uint8_t flat,
- uint8_t flat2, uint16_t *op7, uint16_t *op6,
+static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, int8_t flat,
+ int8_t flat2, uint16_t *op7, uint16_t *op6,
uint16_t *op5, uint16_t *op4, uint16_t *op3,
uint16_t *op2, uint16_t *op1, uint16_t *op0,
uint16_t *oq0, uint16_t *oq1, uint16_t *oq2,
@@ -813,10 +866,15 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
const uint8_t *thresh, int count,
int bd) {
int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ int step = 4;
+#else
+ int step = 8;
+#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < step * count; ++i) {
const uint16_t p3 = s[-4 * p];
const uint16_t p2 = s[-3 * p];
const uint16_t p1 = s[-2 * p];
@@ -852,7 +910,11 @@ void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh, int bd) {
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
+#else
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd);
+#endif
}
static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
@@ -888,13 +950,21 @@ static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
void aom_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4, bd);
+#else
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
+#endif
}
void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh, int bd) {
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+ highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
+#else
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd);
+#endif
}
#endif // CONFIG_HIGHBITDEPTH