summaryrefslogtreecommitdiffstats
path: root/media
diff options
context:
space:
mode:
Diffstat (limited to 'media')
-rw-r--r--media/libcubeb/src/cubeb.c6
-rw-r--r--media/libcubeb/src/cubeb_sndio.c2
-rw-r--r--media/libcubeb/src/cubeb_sun.c504
-rw-r--r--media/libcubeb/src/moz.build8
-rw-r--r--media/libcubeb/tests/moz.build2
-rw-r--r--media/libpng/moz.build3
-rw-r--r--media/libstagefright/moz.build2
-rw-r--r--media/libwebp/dsp/alpha_processing_neon.c191
-rw-r--r--media/libwebp/dsp/filters_neon.c329
-rw-r--r--media/libwebp/dsp/moz.build6
-rw-r--r--media/libwebp/dsp/yuv_neon.c288
-rw-r--r--media/libwebp/update.sh3
-rw-r--r--media/mtransport/third_party/nICEr/src/ice/ice_component.c7
13 files changed, 1346 insertions, 5 deletions
diff --git a/media/libcubeb/src/cubeb.c b/media/libcubeb/src/cubeb.c
index eb22a9b94..a239319a4 100644
--- a/media/libcubeb/src/cubeb.c
+++ b/media/libcubeb/src/cubeb.c
@@ -54,6 +54,9 @@ int audiotrack_init(cubeb ** context, char const * context_name);
#if defined(USE_KAI)
int kai_init(cubeb ** context, char const * context_name);
#endif
+#if defined(USE_SUN)
+int sunaudio_init(cubeb ** context, char const * context_name);
+#endif
static int
@@ -141,6 +144,9 @@ cubeb_init(cubeb ** context, char const * context_name)
#if defined(USE_KAI)
kai_init,
#endif
+#if defined(USE_SUN)
+ sunaudio_init,
+#endif
};
int i;
diff --git a/media/libcubeb/src/cubeb_sndio.c b/media/libcubeb/src/cubeb_sndio.c
index 793789765..c7ac18446 100644
--- a/media/libcubeb/src/cubeb_sndio.c
+++ b/media/libcubeb/src/cubeb_sndio.c
@@ -245,7 +245,7 @@ sndio_stream_init(cubeb * context,
s->data_cb = data_callback;
s->state_cb = state_callback;
s->arg = user_ptr;
- s->mtx = PTHREAD_MUTEX_INITIALIZER;
+ s->mtx = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
s->rdpos = s->wrpos = 0;
if (output_stream_params->format == CUBEB_SAMPLE_FLOAT32LE) {
s->conv = 1;
diff --git a/media/libcubeb/src/cubeb_sun.c b/media/libcubeb/src/cubeb_sun.c
new file mode 100644
index 000000000..b768bca56
--- /dev/null
+++ b/media/libcubeb/src/cubeb_sun.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright (c) 2013, 2017 Ginn Chen <ginnchen@gmail.com>
+ *
+ * This program is made available under an ISC-style license. See the
+ * accompanying file LICENSE for details.
+ */
+#include <poll.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/audio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/stropts.h>
+#include "cubeb/cubeb.h"
+#include "cubeb-internal.h"
+
+/* Macros copied from audio_oss.h */
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 4Front Technologies 1996-2008.
+ *
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#define OSSIOCPARM_MASK 0x1fff /* parameters must be < 8192 bytes */
+#define OSSIOC_VOID 0x00000000 /* no parameters */
+#define OSSIOC_OUT 0x20000000 /* copy out parameters */
+#define OSSIOC_IN 0x40000000 /* copy in parameters */
+#define OSSIOC_INOUT (OSSIOC_IN|OSSIOC_OUT)
+#define OSSIOC_SZ(t) ((sizeof (t) & OSSIOCPARM_MASK) << 16)
+#define __OSSIO(x, y) ((int)(OSSIOC_VOID|(x<<8)|y))
+#define __OSSIOR(x, y, t) ((int)(OSSIOC_OUT|OSSIOC_SZ(t)|(x<<8)|y))
+#define __OSSIOWR(x, y, t) ((int)(OSSIOC_INOUT|OSSIOC_SZ(t)|(x<<8)|y))
+#define SNDCTL_DSP_SPEED __OSSIOWR('P', 2, int)
+#define SNDCTL_DSP_CHANNELS __OSSIOWR('P', 6, int)
+#define SNDCTL_DSP_SETFMT __OSSIOWR('P', 5, int) /* Selects ONE fmt */
+#define SNDCTL_DSP_GETODELAY __OSSIOR('P', 23, int)
+#define SNDCTL_DSP_HALT_OUTPUT __OSSIO('P', 34)
+#define AFMT_S16_LE 0x00000010
+#define AFMT_S16_BE 0x00000020
+
+#if defined(WORDS_BIGENDIAN) || defined(__BIG_ENDIAN__)
+#define AFMT_S16_NE AFMT_S16_BE
+#else
+#define AFMT_S16_NE AFMT_S16_LE
+#endif
+
+#define DEFAULT_AUDIO_DEVICE "/dev/audio"
+#define DEFAULT_DSP_DEVICE "/dev/dsp"
+
+#define BUF_SIZE_MS 10
+
+#if defined(CUBEB_SUNAUDIO_DEBUG)
+#define DPR(...) fprintf(stderr, __VA_ARGS__);
+#else
+#define DPR(...) do {} while(0)
+#endif
+
+static struct cubeb_ops const sunaudio_ops;
+
+struct cubeb {
+ struct cubeb_ops const * ops;
+};
+
+struct cubeb_stream {
+ cubeb * context;
+ pthread_t th; /* to run real-time audio i/o */
+ pthread_mutex_t mutex; /* protects fd and frm_played */
+ int fd; /* link us to sunaudio */
+ int active; /* cubec_start() called */
+ int conv; /* need float->s16 conversion */
+ int using_oss;
+ unsigned char *buf; /* data is prepared here */
+ unsigned int rate;
+ unsigned int n_channles;
+ unsigned int bytes_per_ch;
+ unsigned int n_frm;
+ unsigned int buffer_size;
+ int64_t frm_played;
+ cubeb_data_callback data_cb; /* cb to preapare data */
+ cubeb_state_callback state_cb; /* cb to notify about state changes */
+ void *arg; /* user arg to {data,state}_cb */
+};
+
+static void
+float_to_s16(void *ptr, long nsamp)
+{
+ int16_t *dst = ptr;
+ float *src = ptr;
+
+ while (nsamp-- > 0)
+ *(dst++) = *(src++) * 32767;
+}
+
+static void *
+sunaudio_mainloop(void *arg)
+{
+ struct cubeb_stream *s = arg;
+ int state;
+
+ DPR("sunaudio_mainloop()\n");
+
+ s->state_cb(s, s->arg, CUBEB_STATE_STARTED);
+
+ pthread_mutex_lock(&s->mutex);
+ DPR("sunaudio_mainloop(), started\n");
+
+ for (;;) {
+ if (!s->active) {
+ DPR("sunaudio_mainloop() stopped\n");
+ state = CUBEB_STATE_STOPPED;
+ break;
+ }
+
+ if (!s->using_oss) {
+ audio_info_t info;
+ ioctl(s->fd, AUDIO_GETINFO, &info);
+ if (s->frm_played > info.play.samples + 3 * s->n_frm) {
+ pthread_mutex_unlock(&s->mutex);
+ struct timespec ts = {0, 10000}; // 10 ms
+ nanosleep(&ts, NULL);
+ pthread_mutex_lock(&s->mutex);
+ continue;
+ }
+ }
+
+ pthread_mutex_unlock(&s->mutex);
+ unsigned int got = s->data_cb(s, s->arg, NULL, s->buf, s->n_frm);
+ DPR("sunaudio_mainloop() ask %d got %d\n", s->n_frm, got);
+ pthread_mutex_lock(&s->mutex);
+
+ if (got < 0) {
+ DPR("sunaudio_mainloop() cb err\n");
+ state = CUBEB_STATE_ERROR;
+ break;
+ }
+
+ if (s->conv) {
+ float_to_s16(s->buf, got * s->n_channles);
+ }
+
+ unsigned int avail = got * 2 * s->n_channles; // coverted to s16
+ unsigned int pos = 0;
+
+ while (avail > 0 && s->active) {
+ int written = write(s->fd, s->buf + pos, avail);
+ if (written == -1) {
+ if (errno != EINTR && errno != EWOULDBLOCK) {
+ DPR("sunaudio_mainloop() write err\n");
+ state = CUBEB_STATE_ERROR;
+ break;
+ }
+ pthread_mutex_unlock(&s->mutex);
+ struct timespec ts = {0, 10000}; // 10 ms
+ nanosleep(&ts, NULL);
+ pthread_mutex_lock(&s->mutex);
+ } else {
+ pos += written;
+ DPR("sunaudio_mainloop() write %d pos %d\n", written, pos);
+ s->frm_played += written / 2 / s->n_channles;
+ avail -= written;
+ }
+ }
+
+ if ((got < s->n_frm)) {
+ DPR("sunaudio_mainloop() drained\n");
+ state = CUBEB_STATE_DRAINED;
+ break;
+ }
+ }
+
+ pthread_mutex_unlock(&s->mutex);
+ s->state_cb(s, s->arg, state);
+
+ return NULL;
+}
+
+/*static*/ int
+sunaudio_init(cubeb **context, char const *context_name)
+{
+ DPR("sunaudio_init(%s)\n", context_name);
+ *context = malloc(sizeof(*context));
+ (*context)->ops = &sunaudio_ops;
+ (void)context_name;
+ return CUBEB_OK;
+}
+
+static char const *
+sunaudio_get_backend_id(cubeb *context)
+{
+ return "sunaudio";
+}
+
+static void
+sunaudio_destroy(cubeb *context)
+{
+ DPR("sunaudio_destroy()\n");
+ free(context);
+}
+
+static int
+sunaudio_stream_init(cubeb *context,
+ cubeb_stream **stream,
+ char const *stream_name,
+ cubeb_devid input_device,
+ cubeb_stream_params * input_stream_params,
+ cubeb_devid output_device,
+ cubeb_stream_params * output_stream_params,
+ unsigned int latency,
+ cubeb_data_callback data_callback,
+ cubeb_state_callback state_callback,
+ void *user_ptr)
+{
+ struct cubeb_stream *s;
+ DPR("sunaudio_stream_init(%s)\n", stream_name);
+ size_t size;
+
+ s = malloc(sizeof(struct cubeb_stream));
+ if (s == NULL)
+ return CUBEB_ERROR;
+ s->context = context;
+
+ // If UTAUDIODEV is set, use it with Sun Audio interface
+ char * sa_device_name = getenv("UTAUDIODEV");
+ char * dsp_device_name = NULL;
+ if (!sa_device_name) {
+ dsp_device_name = getenv("AUDIODSP");
+ if (!dsp_device_name) {
+ dsp_device_name = DEFAULT_DSP_DEVICE;
+ }
+ sa_device_name = getenv("AUDIODEV");
+ if (!sa_device_name) {
+ sa_device_name = DEFAULT_AUDIO_DEVICE;
+ }
+ }
+
+ s->using_oss = 0;
+ // Try to use OSS if available
+ if (dsp_device_name) {
+ s->fd = open(dsp_device_name, O_WRONLY | O_NONBLOCK);
+ if (s->fd >= 0) {
+ s->using_oss = 1;
+ }
+ }
+
+ // Try Sun Audio
+ if (!s->using_oss) {
+ s->fd = open(sa_device_name, O_WRONLY | O_NONBLOCK);
+ }
+
+ if (s->fd < 0) {
+ free(s);
+ DPR("sunaudio_stream_init(), open() failed\n");
+ return CUBEB_ERROR;
+ }
+
+ if (s->using_oss) {
+ if (ioctl(s->fd, SNDCTL_DSP_SPEED, &output_stream_params->rate) < 0) {
+ DPR("ioctl SNDCTL_DSP_SPEED failed.\n");
+ close(s->fd);
+ free(s);
+ return CUBEB_ERROR_INVALID_FORMAT;
+ }
+
+ if (ioctl(s->fd, SNDCTL_DSP_CHANNELS, &output_stream_params->channels) < 0) {
+ DPR("ioctl SNDCTL_DSP_CHANNELS failed.\n");
+ close(s->fd);
+ free(s);
+ return CUBEB_ERROR_INVALID_FORMAT;
+ }
+
+ int format = AFMT_S16_NE;
+ if (ioctl(s->fd, SNDCTL_DSP_SETFMT, &format) < 0) {
+ DPR("ioctl SNDCTL_DSP_SETFMT failed.\n");
+ close(s->fd);
+ free(s);
+ return CUBEB_ERROR_INVALID_FORMAT;
+ }
+ } else {
+ audio_info_t audio_info;
+ AUDIO_INITINFO(&audio_info)
+ audio_info.play.sample_rate = output_stream_params->rate;
+ audio_info.play.channels = output_stream_params->channels;
+ audio_info.play.encoding = AUDIO_ENCODING_LINEAR;
+ audio_info.play.precision = 16;
+ if (ioctl(s->fd, AUDIO_SETINFO, &audio_info) == -1) {
+ DPR("ioctl AUDIO_SETINFO failed.\n");
+ close(s->fd);
+ free(s);
+ return CUBEB_ERROR_INVALID_FORMAT;
+ }
+ }
+
+ s->conv = 0;
+ switch (output_stream_params->format) {
+ case CUBEB_SAMPLE_S16NE:
+ s->bytes_per_ch = 2;
+ break;
+ case CUBEB_SAMPLE_FLOAT32NE:
+ s->bytes_per_ch = 4;
+ s->conv = 1;
+ break;
+ default:
+ DPR("sunaudio_stream_init() unsupported format\n");
+ close(s->fd);
+ free(s);
+ return CUBEB_ERROR_INVALID_FORMAT;
+ }
+
+ s->active = 0;
+ s->rate = output_stream_params->rate;
+ s->n_channles = output_stream_params->channels;
+ s->data_cb = data_callback;
+ s->state_cb = state_callback;
+ s->arg = user_ptr;
+ if (pthread_mutex_init(&s->mutex, NULL) != 0) {
+ free(s);
+ return CUBEB_ERROR;
+ }
+ s->frm_played = 0;
+ s->n_frm = s->rate * BUF_SIZE_MS / 1000;
+ s->buffer_size = s->bytes_per_ch * s->n_channles * s->n_frm;
+ s->buf = malloc(s->buffer_size);
+ if (s->buf == NULL) {
+ close(s->fd);
+ free(s);
+ return CUBEB_ERROR;
+ }
+
+ *stream = s;
+ DPR("sunaudio_stream_init() end, ok\n");
+ return CUBEB_OK;
+}
+
+static void
+sunaudio_stream_destroy(cubeb_stream *s)
+{
+ DPR("sunaudio_stream_destroy()\n");
+ if (s->fd > 0) {
+ // Flush buffer
+ if (s->using_oss) {
+ ioctl(s->fd, SNDCTL_DSP_HALT_OUTPUT);
+ } else {
+ ioctl(s->fd, I_FLUSH);
+ }
+ close(s->fd);
+ }
+ free(s->buf);
+ free(s);
+}
+
+static int
+sunaudio_stream_start(cubeb_stream *s)
+{
+ int err;
+
+ DPR("sunaudio_stream_start()\n");
+ s->active = 1;
+ err = pthread_create(&s->th, NULL, sunaudio_mainloop, s);
+ if (err) {
+ s->active = 0;
+ return CUBEB_ERROR;
+ }
+ return CUBEB_OK;
+}
+
+static int
+sunaudio_stream_stop(cubeb_stream *s)
+{
+ void *dummy;
+
+ DPR("sunaudio_stream_stop()\n");
+ if (s->active) {
+ s->active = 0;
+ pthread_join(s->th, &dummy);
+ }
+ return CUBEB_OK;
+}
+
+static int
+sunaudio_stream_get_position(cubeb_stream *s, uint64_t *p)
+{
+ int rv = CUBEB_OK;
+ pthread_mutex_lock(&s->mutex);
+ if (s->active && s->fd > 0) {
+ if (s->using_oss) {
+ int delay;
+ ioctl(s->fd, SNDCTL_DSP_GETODELAY, &delay);
+ int64_t t = s->frm_played - delay / s->n_channles / 2;
+ if (t < 0) {
+ *p = 0;
+ } else {
+ *p = t;
+ }
+ } else {
+ audio_info_t info;
+ ioctl(s->fd, AUDIO_GETINFO, &info);
+ *p = info.play.samples;
+ }
+ DPR("sunaudio_stream_get_position() %lld\n", *p);
+ } else {
+ rv = CUBEB_ERROR;
+ }
+ pthread_mutex_unlock(&s->mutex);
+ return rv;
+}
+
+static int
+sunaudio_get_max_channel_count(cubeb * ctx, uint32_t * max_channels)
+{
+ if (!ctx || !max_channels)
+ return CUBEB_ERROR;
+
+ *max_channels = 2;
+
+ return CUBEB_OK;
+}
+
+static int
+sunaudio_get_preferred_sample_rate(cubeb * ctx, uint32_t * rate)
+{
+ if (!ctx || !rate)
+ return CUBEB_ERROR;
+
+ // XXX Not yet implemented.
+ *rate = 44100;
+
+ return CUBEB_OK;
+}
+
+static int
+sunaudio_get_min_latency(cubeb * ctx, cubeb_stream_params params, uint32_t * latency_ms)
+{
+ if (!ctx || !latency_ms)
+ return CUBEB_ERROR;
+
+ // XXX Not yet implemented.
+ *latency_ms = 20;
+
+ return CUBEB_OK;
+}
+
+static int
+sunaudio_stream_get_latency(cubeb_stream * s, uint32_t * latency)
+{
+ if (!s || !latency)
+ return CUBEB_ERROR;
+
+ int rv = CUBEB_OK;
+ pthread_mutex_lock(&s->mutex);
+ if (s->active && s->fd > 0) {
+ if (s->using_oss) {
+ int delay;
+ ioctl(s->fd, SNDCTL_DSP_GETODELAY, &delay);
+ *latency = delay / s->n_channles / 2 / s->rate;
+ } else {
+ audio_info_t info;
+ ioctl(s->fd, AUDIO_GETINFO, &info);
+ *latency = (s->frm_played - info.play.samples) / s->rate;
+ }
+ DPR("sunaudio_stream_get_position() %lld\n", *p);
+ } else {
+ rv = CUBEB_ERROR;
+ }
+ pthread_mutex_unlock(&s->mutex);
+ return rv;
+}
+
+static struct cubeb_ops const sunaudio_ops = {
+ .init = sunaudio_init,
+ .get_backend_id = sunaudio_get_backend_id,
+ .destroy = sunaudio_destroy,
+ .get_preferred_sample_rate = sunaudio_get_preferred_sample_rate,
+ .stream_init = sunaudio_stream_init,
+ .stream_destroy = sunaudio_stream_destroy,
+ .stream_start = sunaudio_stream_start,
+ .stream_stop = sunaudio_stream_stop,
+ .stream_get_position = sunaudio_stream_get_position,
+ .get_max_channel_count = sunaudio_get_max_channel_count,
+ .get_min_latency = sunaudio_get_min_latency,
+ .stream_get_latency = sunaudio_stream_get_latency
+};
diff --git a/media/libcubeb/src/moz.build b/media/libcubeb/src/moz.build
index 2ca3a2f54..772aa6d39 100644
--- a/media/libcubeb/src/moz.build
+++ b/media/libcubeb/src/moz.build
@@ -39,12 +39,18 @@ if CONFIG['MOZ_JACK']:
]
DEFINES['USE_JACK'] = True
-if CONFIG['OS_ARCH'] == 'OpenBSD':
+if CONFIG['MOZ_SNDIO']:
SOURCES += [
'cubeb_sndio.c',
]
DEFINES['USE_SNDIO'] = True
+if CONFIG['OS_ARCH'] == 'SunOS':
+ SOURCES += [
+ 'cubeb_sun.c',
+ ]
+ DEFINES['USE_SUN'] = True
+
if CONFIG['OS_TARGET'] == 'Darwin':
SOURCES += [
'cubeb_audiounit.cpp',
diff --git a/media/libcubeb/tests/moz.build b/media/libcubeb/tests/moz.build
index 1b17c7b1c..ca63a4d8f 100644
--- a/media/libcubeb/tests/moz.build
+++ b/media/libcubeb/tests/moz.build
@@ -68,7 +68,7 @@ elif CONFIG['MOZ_WIDGET_TOOLKIT'] == 'uikit':
'-framework CoreFoundation',
'-framework AudioToolbox',
]
-elif CONFIG['OS_TARGET'] == 'OpenBSD':
+elif CONFIG['MOZ_SNDIO']:
OS_LIBS += [
'sndio',
]
diff --git a/media/libpng/moz.build b/media/libpng/moz.build
index 9146a8d5a..f2538484b 100644
--- a/media/libpng/moz.build
+++ b/media/libpng/moz.build
@@ -51,3 +51,6 @@ FINAL_LIBRARY = 'gkmedias'
# We allow warnings for third-party code that can be updated from upstream.
ALLOW_COMPILER_WARNINGS = True
+
+if CONFIG['GNU_CC']:
+ CFLAGS += ['-std=c89']
diff --git a/media/libstagefright/moz.build b/media/libstagefright/moz.build
index 5a8c9521a..87e162112 100644
--- a/media/libstagefright/moz.build
+++ b/media/libstagefright/moz.build
@@ -7,7 +7,7 @@
DEFINES['ANDROID_SMP'] = 0
DEFINES['LOG_NDEBUG'] = 1
-if CONFIG['OS_TARGET'] != 'WINNT':
+if CONFIG['OS_TARGET'] != 'WINNT' and CONFIG['OS_TARGET'] != 'SunOS':
DEFINES['_GLIBCXX_OS_DEFINES'] = True
if CONFIG['OS_TARGET'] == 'WINNT':
diff --git a/media/libwebp/dsp/alpha_processing_neon.c b/media/libwebp/dsp/alpha_processing_neon.c
new file mode 100644
index 000000000..53dfce2b3
--- /dev/null
+++ b/media/libwebp/dsp/alpha_processing_neon.c
@@ -0,0 +1,191 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel, NEON version.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "../dsp/dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include "../dsp/neon.h"
+
+//------------------------------------------------------------------------------
+
+#define MULTIPLIER(a) ((a) * 0x8081)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+
+#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER) do { \
+ const uint8x8_t alpha = (V).val[(ALPHA)]; \
+ const uint16x8_t r1 = vmull_u8((V).val[1], alpha); \
+ const uint16x8_t g1 = vmull_u8((V).val[2], alpha); \
+ const uint16x8_t b1 = vmull_u8((V).val[(OTHER)], alpha); \
+ /* we use: v / 255 = (v + 1 + (v >> 8)) >> 8 */ \
+ const uint16x8_t r2 = vsraq_n_u16(r1, r1, 8); \
+ const uint16x8_t g2 = vsraq_n_u16(g1, g1, 8); \
+ const uint16x8_t b2 = vsraq_n_u16(b1, b1, 8); \
+ const uint16x8_t r3 = vaddq_u16(r2, kOne); \
+ const uint16x8_t g3 = vaddq_u16(g2, kOne); \
+ const uint16x8_t b3 = vaddq_u16(b2, kOne); \
+ (V).val[1] = vshrn_n_u16(r3, 8); \
+ (V).val[2] = vshrn_n_u16(g3, 8); \
+ (V).val[(OTHER)] = vshrn_n_u16(b3, 8); \
+} while (0)
+
+static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
+ int w, int h, int stride) {
+ const uint16x8_t kOne = vdupq_n_u16(1u);
+ while (h-- > 0) {
+ uint32_t* const rgbx = (uint32_t*)rgba;
+ int i = 0;
+ if (alpha_first) {
+ for (; i + 8 <= w; i += 8) {
+ // load aaaa...|rrrr...|gggg...|bbbb...
+ uint8x8x4_t RGBX = vld4_u8((const uint8_t*)(rgbx + i));
+ MULTIPLY_BY_ALPHA(RGBX, 0, 3);
+ vst4_u8((uint8_t*)(rgbx + i), RGBX);
+ }
+ } else {
+ for (; i + 8 <= w; i += 8) {
+ uint8x8x4_t RGBX = vld4_u8((const uint8_t*)(rgbx + i));
+ MULTIPLY_BY_ALPHA(RGBX, 3, 0);
+ vst4_u8((uint8_t*)(rgbx + i), RGBX);
+ }
+ }
+ // Finish with left-overs.
+ for (; i < w; ++i) {
+ uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+ const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+ const uint32_t a = alpha[4 * i];
+ if (a != 0xff) {
+ const uint32_t mult = MULTIPLIER(a);
+ rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+ rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+ rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+ }
+ }
+ rgba += stride;
+ }
+}
+#undef MULTIPLY_BY_ALPHA
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+//------------------------------------------------------------------------------
+
+static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
+ int width, int height,
+ uint8_t* dst, int dst_stride) {
+ uint32_t alpha_mask = 0xffffffffu;
+ uint8x8_t mask8 = vdup_n_u8(0xff);
+ uint32_t tmp[2];
+ int i, j;
+ for (j = 0; j < height; ++j) {
+ // We don't know if alpha is first or last in dst[] (depending on rgbA/Argb
+ // mode). So we must be sure dst[4*i + 8 - 1] is writable for the store.
+ // Hence the test with 'width - 1' instead of just 'width'.
+ for (i = 0; i + 8 <= width - 1; i += 8) {
+ uint8x8x4_t rgbX = vld4_u8((const uint8_t*)(dst + 4 * i));
+ const uint8x8_t alphas = vld1_u8(alpha + i);
+ rgbX.val[0] = alphas;
+ vst4_u8((uint8_t*)(dst + 4 * i), rgbX);
+ mask8 = vand_u8(mask8, alphas);
+ }
+ for (; i < width; ++i) {
+ const uint32_t alpha_value = alpha[i];
+ dst[4 * i] = alpha_value;
+ alpha_mask &= alpha_value;
+ }
+ alpha += alpha_stride;
+ dst += dst_stride;
+ }
+ vst1_u8((uint8_t*)tmp, mask8);
+ alpha_mask &= tmp[0];
+ alpha_mask &= tmp[1];
+ return (alpha_mask != 0xffffffffu);
+}
+
+static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride,
+ int width, int height,
+ uint32_t* dst, int dst_stride) {
+ int i, j;
+ uint8x8x4_t greens; // leave A/R/B channels zero'd.
+ greens.val[0] = vdup_n_u8(0);
+ greens.val[2] = vdup_n_u8(0);
+ greens.val[3] = vdup_n_u8(0);
+ for (j = 0; j < height; ++j) {
+ for (i = 0; i + 8 <= width; i += 8) {
+ greens.val[1] = vld1_u8(alpha + i);
+ vst4_u8((uint8_t*)(dst + i), greens);
+ }
+ for (; i < width; ++i) dst[i] = alpha[i] << 8;
+ alpha += alpha_stride;
+ dst += dst_stride;
+ }
+}
+
+static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
+ int width, int height,
+ uint8_t* alpha, int alpha_stride) {
+ uint32_t alpha_mask = 0xffffffffu;
+ uint8x8_t mask8 = vdup_n_u8(0xff);
+ uint32_t tmp[2];
+ int i, j;
+ for (j = 0; j < height; ++j) {
+ // We don't know if alpha is first or last in dst[] (depending on rgbA/Argb
+ // mode). So we must be sure dst[4*i + 8 - 1] is writable for the store.
+ // Hence the test with 'width - 1' instead of just 'width'.
+ for (i = 0; i + 8 <= width - 1; i += 8) {
+ const uint8x8x4_t rgbX = vld4_u8((const uint8_t*)(argb + 4 * i));
+ const uint8x8_t alphas = rgbX.val[0];
+ vst1_u8((uint8_t*)(alpha + i), alphas);
+ mask8 = vand_u8(mask8, alphas);
+ }
+ for (; i < width; ++i) {
+ alpha[i] = argb[4 * i];
+ alpha_mask &= alpha[i];
+ }
+ argb += argb_stride;
+ alpha += alpha_stride;
+ }
+ vst1_u8((uint8_t*)tmp, mask8);
+ alpha_mask &= tmp[0];
+ alpha_mask &= tmp[1];
+ return (alpha_mask == 0xffffffffu);
+}
+
+static void ExtractGreen_NEON(const uint32_t* argb,
+ uint8_t* alpha, int size) {
+ int i;
+ for (i = 0; i + 16 <= size; i += 16) {
+ const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i));
+ const uint8x16_t greens = rgbX.val[1];
+ vst1q_u8(alpha + i, greens);
+ }
+ for (; i < size; ++i) alpha[i] = (argb[i] >> 8) & 0xff;
+}
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitAlphaProcessingNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingNEON(void) {
+ WebPApplyAlphaMultiply = ApplyAlphaMultiply_NEON;
+ WebPDispatchAlpha = DispatchAlpha_NEON;
+ WebPDispatchAlphaToGreen = DispatchAlphaToGreen_NEON;
+ WebPExtractAlpha = ExtractAlpha_NEON;
+ WebPExtractGreen = ExtractGreen_NEON;
+}
+
+#else // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingNEON)
+
+#endif // WEBP_USE_NEON
diff --git a/media/libwebp/dsp/filters_neon.c b/media/libwebp/dsp/filters_neon.c
new file mode 100644
index 000000000..4788118c9
--- /dev/null
+++ b/media/libwebp/dsp/filters_neon.c
@@ -0,0 +1,329 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON variant of alpha filters
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "../dsp/dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+#include "../dsp/neon.h"
+
+//------------------------------------------------------------------------------
+// Helpful macros.
+
+# define SANITY_CHECK(in, out) \
+ assert(in != NULL); \
+ assert(out != NULL); \
+ assert(width > 0); \
+ assert(height > 0); \
+ assert(stride >= width); \
+ assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
+ (void)height; // Silence unused warning.
+
+// load eight u8 and widen to s16
+#define U8_TO_S16(A) vreinterpretq_s16_u16(vmovl_u8(A))
+#define LOAD_U8_TO_S16(A) U8_TO_S16(vld1_u8(A))
+
+// shift left or right by N byte, inserting zeros
+#define SHIFT_RIGHT_N_Q(A, N) vextq_u8((A), zero, (N))
+#define SHIFT_LEFT_N_Q(A, N) vextq_u8(zero, (A), (16 - (N)) % 16)
+
+// rotate left by N bytes
+#define ROTATE_LEFT_N(A, N) vext_u8((A), (A), (N))
+// rotate right by N bytes
+#define ROTATE_RIGHT_N(A, N) vext_u8((A), (A), (8 - (N)) % 8)
+
+static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
+ uint8_t* dst, int length) {
+ int i;
+ assert(length >= 0);
+ for (i = 0; i + 16 <= length; i += 16) {
+ const uint8x16_t A = vld1q_u8(&src[i]);
+ const uint8x16_t B = vld1q_u8(&pred[i]);
+ const uint8x16_t C = vsubq_u8(A, B);
+ vst1q_u8(&dst[i], C);
+ }
+ for (; i < length; ++i) dst[i] = src[i] - pred[i];
+}
+
+// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
+static void PredictLineLeft_NEON(const uint8_t* src, uint8_t* dst, int length) {
+ PredictLine_NEON(src, src - 1, dst, length);
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,
+ int width, int height,
+ int stride,
+ int row, int num_rows,
+ uint8_t* out) {
+ const size_t start_offset = row * stride;
+ const int last_row = row + num_rows;
+ SANITY_CHECK(in, out);
+ in += start_offset;
+ out += start_offset;
+
+ if (row == 0) {
+ // Leftmost pixel is the same as input for topmost scanline.
+ out[0] = in[0];
+ PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+ row = 1;
+ in += stride;
+ out += stride;
+ }
+
+ // Filter line-by-line.
+ while (row < last_row) {
+ // Leftmost pixel is predicted from above.
+ out[0] = in[0] - in[-stride];
+ PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+ ++row;
+ in += stride;
+ out += stride;
+ }
+}
+
+static void HorizontalFilter_NEON(const uint8_t* data, int width, int height,
+ int stride, uint8_t* filtered_data) {
+ DoHorizontalFilter_NEON(data, width, height, stride, 0, height,
+ filtered_data);
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
+ int width, int height, int stride,
+ int row, int num_rows,
+ uint8_t* out) {
+ const size_t start_offset = row * stride;
+ const int last_row = row + num_rows;
+ SANITY_CHECK(in, out);
+ in += start_offset;
+ out += start_offset;
+
+ if (row == 0) {
+ // Very first top-left pixel is copied.
+ out[0] = in[0];
+ // Rest of top scan-line is left-predicted.
+ PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+ row = 1;
+ in += stride;
+ out += stride;
+ }
+
+ // Filter line-by-line.
+ while (row < last_row) {
+ PredictLine_NEON(in, in - stride, out, width);
+ ++row;
+ in += stride;
+ out += stride;
+ }
+}
+
+static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
+ int stride, uint8_t* filtered_data) {
+ DoVerticalFilter_NEON(data, width, height, stride, 0, height,
+ filtered_data);
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
+ const int g = a + b - c;
+ return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
+}
+
+static void GradientPredictDirect_NEON(const uint8_t* const row,
+ const uint8_t* const top,
+ uint8_t* const out, int length) {
+ int i;
+ for (i = 0; i + 8 <= length; i += 8) {
+ const uint8x8_t A = vld1_u8(&row[i - 1]);
+ const uint8x8_t B = vld1_u8(&top[i + 0]);
+ const int16x8_t C = vreinterpretq_s16_u16(vaddl_u8(A, B));
+ const int16x8_t D = LOAD_U8_TO_S16(&top[i - 1]);
+ const uint8x8_t E = vqmovun_s16(vsubq_s16(C, D));
+ const uint8x8_t F = vld1_u8(&row[i + 0]);
+ vst1_u8(&out[i], vsub_u8(F, E));
+ }
+ for (; i < length; ++i) {
+ out[i] = row[i] - GradientPredictor_C(row[i - 1], top[i], top[i - 1]);
+ }
+}
+
+static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
+ int width, int height,
+ int stride,
+ int row, int num_rows,
+ uint8_t* out) {
+ const size_t start_offset = row * stride;
+ const int last_row = row + num_rows;
+ SANITY_CHECK(in, out);
+ in += start_offset;
+ out += start_offset;
+
+ // left prediction for top scan-line
+ if (row == 0) {
+ out[0] = in[0];
+ PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+ row = 1;
+ in += stride;
+ out += stride;
+ }
+
+ // Filter line-by-line.
+ while (row < last_row) {
+ out[0] = in[0] - in[-stride];
+ GradientPredictDirect_NEON(in + 1, in + 1 - stride, out + 1, width - 1);
+ ++row;
+ in += stride;
+ out += stride;
+ }
+}
+
+static void GradientFilter_NEON(const uint8_t* data, int width, int height,
+ int stride, uint8_t* filtered_data) {
+ DoGradientFilter_NEON(data, width, height, stride, 0, height,
+ filtered_data);
+}
+
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+// Inverse transforms
+
+static void HorizontalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
+ uint8_t* out, int width) {
+ int i;
+ const uint8x16_t zero = vdupq_n_u8(0);
+ uint8x16_t last;
+ out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
+ if (width <= 1) return;
+ last = vsetq_lane_u8(out[0], zero, 0);
+ for (i = 1; i + 16 <= width; i += 16) {
+ const uint8x16_t A0 = vld1q_u8(&in[i]);
+ const uint8x16_t A1 = vaddq_u8(A0, last);
+ const uint8x16_t A2 = SHIFT_LEFT_N_Q(A1, 1);
+ const uint8x16_t A3 = vaddq_u8(A1, A2);
+ const uint8x16_t A4 = SHIFT_LEFT_N_Q(A3, 2);
+ const uint8x16_t A5 = vaddq_u8(A3, A4);
+ const uint8x16_t A6 = SHIFT_LEFT_N_Q(A5, 4);
+ const uint8x16_t A7 = vaddq_u8(A5, A6);
+ const uint8x16_t A8 = SHIFT_LEFT_N_Q(A7, 8);
+ const uint8x16_t A9 = vaddq_u8(A7, A8);
+ vst1q_u8(&out[i], A9);
+ last = SHIFT_RIGHT_N_Q(A9, 15);
+ }
+ for (; i < width; ++i) out[i] = in[i] + out[i - 1];
+}
+
+static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
+ uint8_t* out, int width) {
+ if (prev == NULL) {
+ HorizontalUnfilter_NEON(NULL, in, out, width);
+ } else {
+ int i;
+ assert(width >= 0);
+ for (i = 0; i + 16 <= width; i += 16) {
+ const uint8x16_t A = vld1q_u8(&in[i]);
+ const uint8x16_t B = vld1q_u8(&prev[i]);
+ const uint8x16_t C = vaddq_u8(A, B);
+ vst1q_u8(&out[i], C);
+ }
+ for (; i < width; ++i) out[i] = in[i] + prev[i];
+ }
+}
+
+// GradientUnfilter_NEON is correct but slower than the C-version,
+// at least on ARM64. For armv7, it's a wash.
+// So best is to disable it for now, but keep the idea around...
+#if !defined(USE_GRADIENT_UNFILTER)
+#define USE_GRADIENT_UNFILTER 0 // ALTERNATE_CODE
+#endif
+
+#if (USE_GRADIENT_UNFILTER == 1)
+#define GRAD_PROCESS_LANE(L) do { \
+ const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1); /* rotate predictor in */ \
+ const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1)); \
+ const uint8x8_t delta = vqmovun_s16(tmp2); \
+ pred = vadd_u8(D, delta); \
+ out = vext_u8(out, ROTATE_LEFT_N(pred, (L)), 1); \
+} while (0)
+
+static void GradientPredictInverse_NEON(const uint8_t* const in,
+ const uint8_t* const top,
+ uint8_t* const row, int length) {
+ if (length > 0) {
+ int i;
+ uint8x8_t pred = vdup_n_u8(row[-1]); // left sample
+ uint8x8_t out = vdup_n_u8(0);
+ for (i = 0; i + 8 <= length; i += 8) {
+ const int16x8_t B = LOAD_U8_TO_S16(&top[i + 0]);
+ const int16x8_t C = LOAD_U8_TO_S16(&top[i - 1]);
+ const int16x8_t BC = vsubq_s16(B, C); // unclipped gradient basis B - C
+ const uint8x8_t D = vld1_u8(&in[i]); // base input
+ GRAD_PROCESS_LANE(0);
+ GRAD_PROCESS_LANE(1);
+ GRAD_PROCESS_LANE(2);
+ GRAD_PROCESS_LANE(3);
+ GRAD_PROCESS_LANE(4);
+ GRAD_PROCESS_LANE(5);
+ GRAD_PROCESS_LANE(6);
+ GRAD_PROCESS_LANE(7);
+ vst1_u8(&row[i], out);
+ }
+ for (; i < length; ++i) {
+ row[i] = in[i] + GradientPredictor_C(row[i - 1], top[i], top[i - 1]);
+ }
+ }
+}
+#undef GRAD_PROCESS_LANE
+
+static void GradientUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
+ uint8_t* out, int width) {
+ if (prev == NULL) {
+ HorizontalUnfilter_NEON(NULL, in, out, width);
+ } else {
+ out[0] = in[0] + prev[0]; // predict from above
+ GradientPredictInverse_NEON(in + 1, prev + 1, out + 1, width - 1);
+ }
+}
+
+#endif // USE_GRADIENT_UNFILTER
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8FiltersInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitNEON(void) {
+ WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_NEON;
+ WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_NEON;
+#if (USE_GRADIENT_UNFILTER == 1)
+ WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_NEON;
+#endif
+
+ WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_NEON;
+ WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_NEON;
+ WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_NEON;
+}
+
+#else // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8FiltersInitNEON)
+
+#endif // WEBP_USE_NEON
diff --git a/media/libwebp/dsp/moz.build b/media/libwebp/dsp/moz.build
index fa6df9e9e..f3c2bdd0b 100644
--- a/media/libwebp/dsp/moz.build
+++ b/media/libwebp/dsp/moz.build
@@ -9,6 +9,7 @@ with Files('**'):
SOURCES += [
'alpha_processing.c',
+ 'alpha_processing_neon.c',
'alpha_processing_sse2.c',
'alpha_processing_sse41.c',
'dec.c',
@@ -17,6 +18,7 @@ SOURCES += [
'dec_sse2.c',
'dec_sse41.c',
'filters.c',
+ 'filters_neon.c',
'filters_sse2.c',
'lossless.c',
'lossless_neon.c',
@@ -29,15 +31,19 @@ SOURCES += [
'upsampling_sse2.c',
'upsampling_sse41.c',
'yuv.c',
+ 'yuv_neon.c',
'yuv_sse2.c',
'yuv_sse41.c',
]
if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
+ SOURCES['alpha_processing_neon.c'].flags += CONFIG['NEON_FLAGS']
SOURCES['dec_neon.c'].flags += CONFIG['NEON_FLAGS']
+ SOURCES['filters_neon.c'].flags += CONFIG['NEON_FLAGS']
SOURCES['lossless_neon.c'].flags += CONFIG['NEON_FLAGS']
SOURCES['rescaler_neon.c'].flags += CONFIG['NEON_FLAGS']
SOURCES['upsampling_neon.c'].flags += CONFIG['NEON_FLAGS']
+ SOURCES['yuv_neon.c'].flags += CONFIG['NEON_FLAGS']
elif CONFIG['INTEL_ARCHITECTURE']:
SOURCES['alpha_processing_sse2.c'].flags += CONFIG['SSE2_FLAGS']
SOURCES['alpha_processing_sse41.c'].flags += CONFIG['SSE2_FLAGS']
diff --git a/media/libwebp/dsp/yuv_neon.c b/media/libwebp/dsp/yuv_neon.c
new file mode 100644
index 000000000..81f00fe5a
--- /dev/null
+++ b/media/libwebp/dsp/yuv_neon.c
@@ -0,0 +1,288 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV->RGB conversion functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "../dsp/yuv.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "../dsp/neon.h"
+
+//-----------------------------------------------------------------------------
+
+static uint8x8_t ConvertRGBToY_NEON(const uint8x8_t R,
+ const uint8x8_t G,
+ const uint8x8_t B) {
+ const uint16x8_t r = vmovl_u8(R);
+ const uint16x8_t g = vmovl_u8(G);
+ const uint16x8_t b = vmovl_u8(B);
+ const uint16x4_t r_lo = vget_low_u16(r);
+ const uint16x4_t r_hi = vget_high_u16(r);
+ const uint16x4_t g_lo = vget_low_u16(g);
+ const uint16x4_t g_hi = vget_high_u16(g);
+ const uint16x4_t b_lo = vget_low_u16(b);
+ const uint16x4_t b_hi = vget_high_u16(b);
+ const uint32x4_t tmp0_lo = vmull_n_u16( r_lo, 16839u);
+ const uint32x4_t tmp0_hi = vmull_n_u16( r_hi, 16839u);
+ const uint32x4_t tmp1_lo = vmlal_n_u16(tmp0_lo, g_lo, 33059u);
+ const uint32x4_t tmp1_hi = vmlal_n_u16(tmp0_hi, g_hi, 33059u);
+ const uint32x4_t tmp2_lo = vmlal_n_u16(tmp1_lo, b_lo, 6420u);
+ const uint32x4_t tmp2_hi = vmlal_n_u16(tmp1_hi, b_hi, 6420u);
+ const uint16x8_t Y1 = vcombine_u16(vrshrn_n_u32(tmp2_lo, 16),
+ vrshrn_n_u32(tmp2_hi, 16));
+ const uint16x8_t Y2 = vaddq_u16(Y1, vdupq_n_u16(16));
+ return vqmovn_u16(Y2);
+}
+
+static void ConvertRGB24ToY_NEON(const uint8_t* rgb, uint8_t* y, int width) {
+ int i;
+ for (i = 0; i + 8 <= width; i += 8, rgb += 3 * 8) {
+ const uint8x8x3_t RGB = vld3_u8(rgb);
+ const uint8x8_t Y = ConvertRGBToY_NEON(RGB.val[0], RGB.val[1], RGB.val[2]);
+ vst1_u8(y + i, Y);
+ }
+ for (; i < width; ++i, rgb += 3) { // left-over
+ y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
+ }
+}
+
+static void ConvertBGR24ToY_NEON(const uint8_t* bgr, uint8_t* y, int width) {
+ int i;
+ for (i = 0; i + 8 <= width; i += 8, bgr += 3 * 8) {
+ const uint8x8x3_t BGR = vld3_u8(bgr);
+ const uint8x8_t Y = ConvertRGBToY_NEON(BGR.val[2], BGR.val[1], BGR.val[0]);
+ vst1_u8(y + i, Y);
+ }
+ for (; i < width; ++i, bgr += 3) { // left-over
+ y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
+ }
+}
+
+static void ConvertARGBToY_NEON(const uint32_t* argb, uint8_t* y, int width) {
+ int i;
+ for (i = 0; i + 8 <= width; i += 8) {
+ const uint8x8x4_t RGB = vld4_u8((const uint8_t*)&argb[i]);
+ const uint8x8_t Y = ConvertRGBToY_NEON(RGB.val[2], RGB.val[1], RGB.val[0]);
+ vst1_u8(y + i, Y);
+ }
+ for (; i < width; ++i) { // left-over
+ const uint32_t p = argb[i];
+ y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff,
+ YUV_HALF);
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+// computes: DST_s16 = [(C0 * r + C1 * g + C2 * b) >> 16] + CST
+#define MULTIPLY_16b_PREAMBLE(r, g, b) \
+ const int16x4_t r_lo = vreinterpret_s16_u16(vget_low_u16(r)); \
+ const int16x4_t r_hi = vreinterpret_s16_u16(vget_high_u16(r)); \
+ const int16x4_t g_lo = vreinterpret_s16_u16(vget_low_u16(g)); \
+ const int16x4_t g_hi = vreinterpret_s16_u16(vget_high_u16(g)); \
+ const int16x4_t b_lo = vreinterpret_s16_u16(vget_low_u16(b)); \
+ const int16x4_t b_hi = vreinterpret_s16_u16(vget_high_u16(b))
+
+#define MULTIPLY_16b(C0, C1, C2, CST, DST_s16) do { \
+ const int32x4_t tmp0_lo = vmull_n_s16( r_lo, C0); \
+ const int32x4_t tmp0_hi = vmull_n_s16( r_hi, C0); \
+ const int32x4_t tmp1_lo = vmlal_n_s16(tmp0_lo, g_lo, C1); \
+ const int32x4_t tmp1_hi = vmlal_n_s16(tmp0_hi, g_hi, C1); \
+ const int32x4_t tmp2_lo = vmlal_n_s16(tmp1_lo, b_lo, C2); \
+ const int32x4_t tmp2_hi = vmlal_n_s16(tmp1_hi, b_hi, C2); \
+ const int16x8_t tmp3 = vcombine_s16(vshrn_n_s32(tmp2_lo, 16), \
+ vshrn_n_s32(tmp2_hi, 16)); \
+ DST_s16 = vaddq_s16(tmp3, vdupq_n_s16(CST)); \
+} while (0)
+
+// This needs to be a macro, since (128 << SHIFT) needs to be an immediate.
+#define CONVERT_RGB_TO_UV(r, g, b, SHIFT, U_DST, V_DST) do { \
+ MULTIPLY_16b_PREAMBLE(r, g, b); \
+ MULTIPLY_16b(-9719, -19081, 28800, 128 << SHIFT, U_DST); \
+ MULTIPLY_16b(28800, -24116, -4684, 128 << SHIFT, V_DST); \
+} while (0)
+
+static void ConvertRGBA32ToUV_NEON(const uint16_t* rgb,
+ uint8_t* u, uint8_t* v, int width) {
+ int i;
+ for (i = 0; i + 8 <= width; i += 8, rgb += 4 * 8) {
+ const uint16x8x4_t RGB = vld4q_u16((const uint16_t*)rgb);
+ int16x8_t U, V;
+ CONVERT_RGB_TO_UV(RGB.val[0], RGB.val[1], RGB.val[2], 2, U, V);
+ vst1_u8(u + i, vqrshrun_n_s16(U, 2));
+ vst1_u8(v + i, vqrshrun_n_s16(V, 2));
+ }
+ for (; i < width; i += 1, rgb += 4) {
+ const int r = rgb[0], g = rgb[1], b = rgb[2];
+ u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2);
+ v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2);
+ }
+}
+
+static void ConvertARGBToUV_NEON(const uint32_t* argb, uint8_t* u, uint8_t* v,
+ int src_width, int do_store) {
+ int i;
+ for (i = 0; i + 16 <= src_width; i += 16, u += 8, v += 8) {
+ const uint8x16x4_t RGB = vld4q_u8((const uint8_t*)&argb[i]);
+ const uint16x8_t R = vpaddlq_u8(RGB.val[2]); // pair-wise adds
+ const uint16x8_t G = vpaddlq_u8(RGB.val[1]);
+ const uint16x8_t B = vpaddlq_u8(RGB.val[0]);
+ int16x8_t U_tmp, V_tmp;
+ CONVERT_RGB_TO_UV(R, G, B, 1, U_tmp, V_tmp);
+ {
+ const uint8x8_t U = vqrshrun_n_s16(U_tmp, 1);
+ const uint8x8_t V = vqrshrun_n_s16(V_tmp, 1);
+ if (do_store) {
+ vst1_u8(u, U);
+ vst1_u8(v, V);
+ } else {
+ const uint8x8_t prev_u = vld1_u8(u);
+ const uint8x8_t prev_v = vld1_u8(v);
+ vst1_u8(u, vrhadd_u8(U, prev_u));
+ vst1_u8(v, vrhadd_u8(V, prev_v));
+ }
+ }
+ }
+ if (i < src_width) { // left-over
+ WebPConvertARGBToUV_C(argb + i, u, v, src_width - i, do_store);
+ }
+}
+
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitConvertARGBToYUVNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
+ WebPConvertRGB24ToY = ConvertRGB24ToY_NEON;
+ WebPConvertBGR24ToY = ConvertBGR24ToY_NEON;
+ WebPConvertARGBToY = ConvertARGBToY_NEON;
+ WebPConvertARGBToUV = ConvertARGBToUV_NEON;
+ WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
+}
+
+//------------------------------------------------------------------------------
+
+#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
+static uint16_t clip_y_NEON(int v) {
+ return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
+}
+
+static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
+ uint16_t* dst, int len) {
+ int i;
+ const int16x8_t zero = vdupq_n_s16(0);
+ const int16x8_t max = vdupq_n_s16(MAX_Y);
+ uint64x2_t sum = vdupq_n_u64(0);
+ uint64_t diff;
+
+ for (i = 0; i + 8 <= len; i += 8) {
+ const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
+ const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
+ const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
+ const int16x8_t D = vsubq_s16(A, B); // diff_y
+ const int16x8_t F = vaddq_s16(C, D); // new_y
+ const uint16x8_t H =
+ vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
+ const int16x8_t I = vabsq_s16(D); // abs(diff_y)
+ vst1q_u16(dst + i, H);
+ sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
+ }
+ diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
+ for (; i < len; ++i) {
+ const int diff_y = ref[i] - src[i];
+ const int new_y = (int)(dst[i]) + diff_y;
+ dst[i] = clip_y_NEON(new_y);
+ diff += (uint64_t)(abs(diff_y));
+ }
+ return diff;
+}
+
+static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
+ int16_t* dst, int len) {
+ int i;
+ for (i = 0; i + 8 <= len; i += 8) {
+ const int16x8_t A = vld1q_s16(ref + i);
+ const int16x8_t B = vld1q_s16(src + i);
+ const int16x8_t C = vld1q_s16(dst + i);
+ const int16x8_t D = vsubq_s16(A, B); // diff_uv
+ const int16x8_t E = vaddq_s16(C, D); // new_uv
+ vst1q_s16(dst + i, E);
+ }
+ for (; i < len; ++i) {
+ const int diff_uv = ref[i] - src[i];
+ dst[i] += diff_uv;
+ }
+}
+
+static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
+ const uint16_t* best_y, uint16_t* out) {
+ int i;
+ const int16x8_t max = vdupq_n_s16(MAX_Y);
+ const int16x8_t zero = vdupq_n_s16(0);
+ for (i = 0; i + 8 <= len; i += 8) {
+ const int16x8_t a0 = vld1q_s16(A + i + 0);
+ const int16x8_t a1 = vld1q_s16(A + i + 1);
+ const int16x8_t b0 = vld1q_s16(B + i + 0);
+ const int16x8_t b1 = vld1q_s16(B + i + 1);
+ const int16x8_t a0b1 = vaddq_s16(a0, b1);
+ const int16x8_t a1b0 = vaddq_s16(a1, b0);
+ const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1
+ const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1)
+ const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0)
+ const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
+ const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
+ const int16x8_t d0 = vaddq_s16(c1, a0);
+ const int16x8_t d1 = vaddq_s16(c0, a1);
+ const int16x8_t e0 = vrshrq_n_s16(d0, 1);
+ const int16x8_t e1 = vrshrq_n_s16(d1, 1);
+ const int16x8x2_t f = vzipq_s16(e0, e1);
+ const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
+ const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
+ const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
+ const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
+ const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
+ const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
+ vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
+ vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
+ }
+ for (; i < len; ++i) {
+ const int a0b1 = A[i + 0] + B[i + 1];
+ const int a1b0 = A[i + 1] + B[i + 0];
+ const int a0a1b0b1 = a0b1 + a1b0 + 8;
+ const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
+ const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
+ out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
+ out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
+ }
+}
+#undef MAX_Y
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitSharpYUVNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
+ WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
+ WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
+ WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
+}
+
+#else // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
+WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)
+
+#endif // WEBP_USE_NEON
diff --git a/media/libwebp/update.sh b/media/libwebp/update.sh
index 652993004..4fff43d69 100644
--- a/media/libwebp/update.sh
+++ b/media/libwebp/update.sh
@@ -38,6 +38,7 @@ cp $1/src/demux/demux.c demux
mkdir -p dsp
cp $1/src/dsp/*.h dsp
cp $1/src/dsp/alpha_processing.c dsp
+cp $1/src/dsp/alpha_processing_neon.c dsp
cp $1/src/dsp/alpha_processing_sse2.c dsp
cp $1/src/dsp/alpha_processing_sse41.c dsp
cp $1/src/dsp/dec.c dsp
@@ -46,6 +47,7 @@ cp $1/src/dsp/dec_neon.c dsp
cp $1/src/dsp/dec_sse2.c dsp
cp $1/src/dsp/dec_sse41.c dsp
cp $1/src/dsp/filters.c dsp
+cp $1/src/dsp/filters_neon.c dsp
cp $1/src/dsp/filters_sse2.c dsp
cp $1/src/dsp/lossless.c dsp
cp $1/src/dsp/lossless_neon.c dsp
@@ -58,6 +60,7 @@ cp $1/src/dsp/upsampling_neon.c dsp
cp $1/src/dsp/upsampling_sse2.c dsp
cp $1/src/dsp/upsampling_sse41.c dsp
cp $1/src/dsp/yuv.c dsp
+cp $1/src/dsp/yuv_neon.c dsp
cp $1/src/dsp/yuv_sse2.c dsp
cp $1/src/dsp/yuv_sse41.c dsp
diff --git a/media/mtransport/third_party/nICEr/src/ice/ice_component.c b/media/mtransport/third_party/nICEr/src/ice/ice_component.c
index 2be25efca..11b4fcbc1 100644
--- a/media/mtransport/third_party/nICEr/src/ice/ice_component.c
+++ b/media/mtransport/third_party/nICEr/src/ice/ice_component.c
@@ -909,7 +909,6 @@ static int nr_ice_component_process_incoming_check(nr_ice_component *comp, nr_tr
nr_ice_candidate_pair_set_state(pair->pctx,pair,NR_ICE_PAIR_STATE_FROZEN);
if(r=nr_ice_component_insert_pair(comp,pair)) {
*error=(r==R_NO_MEMORY)?500:400;
- nr_ice_candidate_pair_destroy(&pair);
ABORT(r);
}
@@ -1615,6 +1614,7 @@ int nr_ice_component_finalize(nr_ice_component *lcomp, nr_ice_component *rcomp)
int nr_ice_component_insert_pair(nr_ice_component *pcomp, nr_ice_cand_pair *pair)
{
int r,_status;
+ int pair_inserted=0;
/* Pairs for peer reflexive are marked SUCCEEDED immediately */
if (pair->state != NR_ICE_PAIR_STATE_FROZEN &&
@@ -1626,6 +1626,8 @@ int nr_ice_component_insert_pair(nr_ice_component *pcomp, nr_ice_cand_pair *pair
if(r=nr_ice_candidate_pair_insert(&pair->remote->stream->check_list,pair))
ABORT(r);
+ pair_inserted=1;
+
/* Make sure the check timer is running, if the stream was previously
* started. We will not start streams just because a pair was created,
* unless it is the first pair to be created across all streams. */
@@ -1642,6 +1644,9 @@ int nr_ice_component_insert_pair(nr_ice_component *pcomp, nr_ice_cand_pair *pair
_status=0;
abort:
+ if (_status && !pair_inserted) {
+ nr_ice_candidate_pair_destroy(&pair);
+ }
return(_status);
}