summaryrefslogtreecommitdiffstats
path: root/dom/media/webaudio/AudioNodeEngineNEON.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/webaudio/AudioNodeEngineNEON.cpp')
-rw-r--r--dom/media/webaudio/AudioNodeEngineNEON.cpp318
1 files changed, 318 insertions, 0 deletions
diff --git a/dom/media/webaudio/AudioNodeEngineNEON.cpp b/dom/media/webaudio/AudioNodeEngineNEON.cpp
new file mode 100644
index 000000000..079a1cc8b
--- /dev/null
+++ b/dom/media/webaudio/AudioNodeEngineNEON.cpp
@@ -0,0 +1,318 @@
+/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* this source code form is subject to the terms of the mozilla public
+ * license, v. 2.0. if a copy of the mpl was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioNodeEngineNEON.h"
+#include <arm_neon.h>
+
+//#ifdef DEBUG
+#if 0 // see bug 921099
+ #define ASSERT_ALIGNED(ptr) \
+ MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr, \
+ #ptr " has to be aligned 16-bytes aligned.");
+#else
+ #define ASSERT_ALIGNED(ptr)
+#endif
+
+#define ADDRESS_OF(array, index) ((float32_t*)&array[index])
+
+namespace mozilla {
+void AudioBufferAddWithScale_NEON(const float* aInput,
+ float aScale,
+ float* aOutput,
+ uint32_t aSize)
+{
+ ASSERT_ALIGNED(aInput);
+ ASSERT_ALIGNED(aOutput);
+
+ float32x4_t vin0, vin1, vin2, vin3;
+ float32x4_t vout0, vout1, vout2, vout3;
+ float32x4_t vscale = vmovq_n_f32(aScale);
+
+ uint32_t dif = aSize % 16;
+ aSize -= dif;
+ unsigned i = 0;
+ for (; i < aSize; i+=16) {
+ vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
+ vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4));
+ vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8));
+ vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12));
+
+ vout0 = vld1q_f32(ADDRESS_OF(aOutput, i));
+ vout1 = vld1q_f32(ADDRESS_OF(aOutput, i+4));
+ vout2 = vld1q_f32(ADDRESS_OF(aOutput, i+8));
+ vout3 = vld1q_f32(ADDRESS_OF(aOutput, i+12));
+
+ vout0 = vmlaq_f32(vout0, vin0, vscale);
+ vout1 = vmlaq_f32(vout1, vin1, vscale);
+ vout2 = vmlaq_f32(vout2, vin2, vscale);
+ vout3 = vmlaq_f32(vout3, vin3, vscale);
+
+ vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
+ vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1);
+ vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2);
+ vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3);
+ }
+
+ for (unsigned j = 0; j < dif; ++i, ++j) {
+ aOutput[i] += aInput[i]*aScale;
+ }
+}
+void
+AudioBlockCopyChannelWithScale_NEON(const float* aInput,
+ float aScale,
+ float* aOutput)
+{
+ ASSERT_ALIGNED(aInput);
+ ASSERT_ALIGNED(aOutput);
+
+ float32x4_t vin0, vin1, vin2, vin3;
+ float32x4_t vout0, vout1, vout2, vout3;
+ float32x4_t vscale = vmovq_n_f32(aScale);
+
+ for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
+ vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
+ vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4));
+ vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8));
+ vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12));
+
+ vout0 = vmulq_f32(vin0, vscale);
+ vout1 = vmulq_f32(vin1, vscale);
+ vout2 = vmulq_f32(vin2, vscale);
+ vout3 = vmulq_f32(vin3, vscale);
+
+ vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
+ vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1);
+ vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2);
+ vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3);
+ }
+}
+
+void
+AudioBlockCopyChannelWithScale_NEON(const float aInput[WEBAUDIO_BLOCK_SIZE],
+ const float aScale[WEBAUDIO_BLOCK_SIZE],
+ float aOutput[WEBAUDIO_BLOCK_SIZE])
+{
+ ASSERT_ALIGNED(aInput);
+ ASSERT_ALIGNED(aScale);
+ ASSERT_ALIGNED(aOutput);
+
+ float32x4_t vin0, vin1, vin2, vin3;
+ float32x4_t vout0, vout1, vout2, vout3;
+ float32x4_t vscale0, vscale1, vscale2, vscale3;
+
+ for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
+ vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
+ vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4));
+ vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8));
+ vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12));
+
+ vscale0 = vld1q_f32(ADDRESS_OF(aScale, i));
+ vscale1 = vld1q_f32(ADDRESS_OF(aScale, i+4));
+ vscale2 = vld1q_f32(ADDRESS_OF(aScale, i+8));
+ vscale3 = vld1q_f32(ADDRESS_OF(aScale, i+12));
+
+ vout0 = vmulq_f32(vin0, vscale0);
+ vout1 = vmulq_f32(vin1, vscale1);
+ vout2 = vmulq_f32(vin2, vscale2);
+ vout3 = vmulq_f32(vin3, vscale3);
+
+ vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
+ vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1);
+ vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2);
+ vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3);
+ }
+}
+
+void
+AudioBufferInPlaceScale_NEON(float* aBlock,
+ float aScale,
+ uint32_t aSize)
+{
+ ASSERT_ALIGNED(aBlock);
+
+ float32x4_t vin0, vin1, vin2, vin3;
+ float32x4_t vout0, vout1, vout2, vout3;
+ float32x4_t vscale = vmovq_n_f32(aScale);
+
+ uint32_t dif = aSize % 16;
+ uint32_t vectorSize = aSize - dif;
+ uint32_t i = 0;
+ for (; i < vectorSize; i+=16) {
+ vin0 = vld1q_f32(ADDRESS_OF(aBlock, i));
+ vin1 = vld1q_f32(ADDRESS_OF(aBlock, i+4));
+ vin2 = vld1q_f32(ADDRESS_OF(aBlock, i+8));
+ vin3 = vld1q_f32(ADDRESS_OF(aBlock, i+12));
+
+ vout0 = vmulq_f32(vin0, vscale);
+ vout1 = vmulq_f32(vin1, vscale);
+ vout2 = vmulq_f32(vin2, vscale);
+ vout3 = vmulq_f32(vin3, vscale);
+
+ vst1q_f32(ADDRESS_OF(aBlock, i), vout0);
+ vst1q_f32(ADDRESS_OF(aBlock, i+4), vout1);
+ vst1q_f32(ADDRESS_OF(aBlock, i+8), vout2);
+ vst1q_f32(ADDRESS_OF(aBlock, i+12), vout3);
+ }
+
+ for (unsigned j = 0; j < dif; ++i, ++j) {
+ aBlock[i] *= aScale;
+ }
+}
+
+void
+AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE],
+ const float aInputR[WEBAUDIO_BLOCK_SIZE],
+ float aGainL, float aGainR, bool aIsOnTheLeft,
+ float aOutputL[WEBAUDIO_BLOCK_SIZE],
+ float aOutputR[WEBAUDIO_BLOCK_SIZE])
+{
+ ASSERT_ALIGNED(aInputL);
+ ASSERT_ALIGNED(aInputR);
+ ASSERT_ALIGNED(aOutputL);
+ ASSERT_ALIGNED(aOutputR);
+
+ float32x4_t vinL0, vinL1;
+ float32x4_t vinR0, vinR1;
+ float32x4_t voutL0, voutL1;
+ float32x4_t voutR0, voutR1;
+ float32x4_t vscaleL = vmovq_n_f32(aGainL);
+ float32x4_t vscaleR = vmovq_n_f32(aGainR);
+
+ if (aIsOnTheLeft) {
+ for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
+ vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
+ vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4));
+
+ vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
+ vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4));
+
+ voutL0 = vmlaq_f32(vinL0, vinR0, vscaleL);
+ voutL1 = vmlaq_f32(vinL1, vinR1, vscaleL);
+
+ vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
+ vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1);
+
+ voutR0 = vmulq_f32(vinR0, vscaleR);
+ voutR1 = vmulq_f32(vinR1, vscaleR);
+
+ vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
+ vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1);
+ }
+ } else {
+ for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
+ vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
+ vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4));
+
+ vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
+ vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4));
+
+ voutL0 = vmulq_f32(vinL0, vscaleL);
+ voutL1 = vmulq_f32(vinL1, vscaleL);
+
+ vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
+ vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1);
+
+ voutR0 = vmlaq_f32(vinR0, vinL0, vscaleR);
+ voutR1 = vmlaq_f32(vinR1, vinL1, vscaleR);
+
+ vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
+ vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1);
+ }
+ }
+}
+
+void
+AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE],
+ const float aInputR[WEBAUDIO_BLOCK_SIZE],
+ float aGainL[WEBAUDIO_BLOCK_SIZE],
+ float aGainR[WEBAUDIO_BLOCK_SIZE],
+ const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE],
+ float aOutputL[WEBAUDIO_BLOCK_SIZE],
+ float aOutputR[WEBAUDIO_BLOCK_SIZE])
+{
+ ASSERT_ALIGNED(aInputL);
+ ASSERT_ALIGNED(aInputR);
+ ASSERT_ALIGNED(aGainL);
+ ASSERT_ALIGNED(aGainR);
+ ASSERT_ALIGNED(aIsOnTheLeft);
+ ASSERT_ALIGNED(aOutputL);
+ ASSERT_ALIGNED(aOutputR);
+
+ float32x4_t vinL0, vinL1;
+ float32x4_t vinR0, vinR1;
+ float32x4_t voutL0, voutL1;
+ float32x4_t voutR0, voutR1;
+ float32x4_t vscaleL0, vscaleL1;
+ float32x4_t vscaleR0, vscaleR1;
+ float32x4_t onleft0, onleft1, notonleft0, notonleft1;
+
+ float32x4_t zero = {0, 0, 0, 0};
+ uint8x8_t isOnTheLeft;
+
+ for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
+ vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
+ vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4));
+
+ vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
+ vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4));
+
+ vscaleL0 = vld1q_f32(ADDRESS_OF(aGainL, i));
+ vscaleL1 = vld1q_f32(ADDRESS_OF(aGainL, i+4));
+
+ vscaleR0 = vld1q_f32(ADDRESS_OF(aGainR, i));
+ vscaleR1 = vld1q_f32(ADDRESS_OF(aGainR, i+4));
+
+ // Load output with boolean "on the left" values. This assumes that
+ // bools are stored as a single byte.
+ isOnTheLeft = vld1_u8((uint8_t *)&aIsOnTheLeft[i]);
+ voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 0), voutL0, 0);
+ voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 1), voutL0, 1);
+ voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 2), voutL0, 2);
+ voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 3), voutL0, 3);
+ voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 4), voutL1, 0);
+ voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 5), voutL1, 1);
+ voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 6), voutL1, 2);
+ voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 7), voutL1, 3);
+
+ // Convert the boolean values into masks by setting all bits to 1
+ // if true.
+ voutL0 = (float32x4_t)vcgtq_f32(voutL0, zero);
+ voutL1 = (float32x4_t)vcgtq_f32(voutL1, zero);
+
+ // The right output masks are the same as the left masks
+ voutR0 = voutL0;
+ voutR1 = voutL1;
+
+ // Calculate left channel assuming isOnTheLeft
+ onleft0 = vmlaq_f32(vinL0, vinR0, vscaleL0);
+ onleft1 = vmlaq_f32(vinL1, vinR1, vscaleL0);
+
+ // Calculate left channel assuming not isOnTheLeft
+ notonleft0 = vmulq_f32(vinL0, vscaleL0);
+ notonleft1 = vmulq_f32(vinL1, vscaleL1);
+
+ // Write results using previously stored masks
+ voutL0 = vbslq_f32((uint32x4_t)voutL0, onleft0, notonleft0);
+ voutL1 = vbslq_f32((uint32x4_t)voutL1, onleft1, notonleft1);
+
+ // Calculate right channel assuming isOnTheLeft
+ onleft0 = vmulq_f32(vinR0, vscaleR0);
+ onleft1 = vmulq_f32(vinR1, vscaleR1);
+
+ // Calculate right channel assuming not isOnTheLeft
+ notonleft0 = vmlaq_f32(vinR0, vinL0, vscaleR0);
+ notonleft1 = vmlaq_f32(vinR1, vinL1, vscaleR1);
+
+ // Write results using previously stored masks
+ voutR0 = vbslq_f32((uint32x4_t)voutR0, onleft0, notonleft0);
+ voutR1 = vbslq_f32((uint32x4_t)voutR1, onleft1, notonleft1);
+
+ vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
+ vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1);
+ vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
+ vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1);
+ }
+}
+}