summaryrefslogtreecommitdiffstats
path: root/third_party/aom/aom_dsp/simd/v64_intrinsics.h
blob: afc55428d85ad33d8e9d09d4f368ff62861c0e98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#ifndef AOM_AOM_DSP_SIMD_V64_INTRINSICS_H_
#define AOM_AOM_DSP_SIMD_V64_INTRINSICS_H_

#include <stdio.h>
#include <stdlib.h>

#include "aom_dsp/simd/v64_intrinsics_c.h"

/* Fallback to plain, unoptimised C. */

typedef c_v64 v64;

SIMD_INLINE uint32_t v64_low_u32(v64 a) { return c_v64_low_u32(a); }
SIMD_INLINE uint32_t v64_high_u32(v64 a) { return c_v64_high_u32(a); }
SIMD_INLINE int32_t v64_low_s32(v64 a) { return c_v64_low_s32(a); }
SIMD_INLINE int32_t v64_high_s32(v64 a) { return c_v64_high_s32(a); }
SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
  return c_v64_from_32(x, y);
}
SIMD_INLINE v64 v64_from_64(uint64_t x) { return c_v64_from_64(x); }
SIMD_INLINE uint64_t v64_u64(v64 x) { return c_v64_u64(x); }
SIMD_INLINE v64 v64_from_16(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
  return c_v64_from_16(a, b, c, d);
}

SIMD_INLINE uint32_t u32_load_unaligned(const void *p) {
  return c_u32_load_unaligned(p);
}
SIMD_INLINE uint32_t u32_load_aligned(const void *p) {
  return c_u32_load_aligned(p);
}
SIMD_INLINE void u32_store_unaligned(void *p, uint32_t a) {
  c_u32_store_unaligned(p, a);
}
SIMD_INLINE void u32_store_aligned(void *p, uint32_t a) {
  c_u32_store_aligned(p, a);
}

SIMD_INLINE v64 v64_load_unaligned(const void *p) {
  return c_v64_load_unaligned(p);
}
SIMD_INLINE v64 v64_load_aligned(const void *p) {
  return c_v64_load_aligned(p);
}

SIMD_INLINE void v64_store_unaligned(void *p, v64 a) {
  c_v64_store_unaligned(p, a);
}
SIMD_INLINE void v64_store_aligned(void *p, v64 a) {
  c_v64_store_aligned(p, a);
}

SIMD_INLINE v64 v64_align(v64 a, v64 b, unsigned int c) {
  return c_v64_align(a, b, c);
}

SIMD_INLINE v64 v64_zero() { return c_v64_zero(); }
SIMD_INLINE v64 v64_dup_8(uint8_t x) { return c_v64_dup_8(x); }
SIMD_INLINE v64 v64_dup_16(uint16_t x) { return c_v64_dup_16(x); }
SIMD_INLINE v64 v64_dup_32(uint32_t x) { return c_v64_dup_32(x); }

SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return c_v64_add_8(a, b); }
SIMD_INLINE v64 v64_add_16(v64 a, v64 b) { return c_v64_add_16(a, b); }
SIMD_INLINE v64 v64_sadd_u8(v64 a, v64 b) { return c_v64_sadd_u8(a, b); }
SIMD_INLINE v64 v64_sadd_s8(v64 a, v64 b) { return c_v64_sadd_s8(a, b); }
SIMD_INLINE v64 v64_sadd_s16(v64 a, v64 b) { return c_v64_sadd_s16(a, b); }
SIMD_INLINE v64 v64_add_32(v64 a, v64 b) { return c_v64_add_32(a, b); }
SIMD_INLINE v64 v64_sub_8(v64 a, v64 b) { return c_v64_sub_8(a, b); }
SIMD_INLINE v64 v64_ssub_u8(v64 a, v64 b) { return c_v64_ssub_u8(a, b); }
SIMD_INLINE v64 v64_ssub_s8(v64 a, v64 b) { return c_v64_ssub_s8(a, b); }
SIMD_INLINE v64 v64_sub_16(v64 a, v64 b) { return c_v64_sub_16(a, b); }
SIMD_INLINE v64 v64_ssub_s16(v64 a, v64 b) { return c_v64_ssub_s16(a, b); }
SIMD_INLINE v64 v64_ssub_u16(v64 a, v64 b) { return c_v64_ssub_u16(a, b); }
SIMD_INLINE v64 v64_sub_32(v64 a, v64 b) { return c_v64_sub_32(a, b); }
SIMD_INLINE v64 v64_abs_s16(v64 a) { return c_v64_abs_s16(a); }
SIMD_INLINE v64 v64_abs_s8(v64 a) { return c_v64_abs_s8(a); }

SIMD_INLINE v64 v64_ziplo_8(v64 a, v64 b) { return c_v64_ziplo_8(a, b); }
SIMD_INLINE v64 v64_ziphi_8(v64 a, v64 b) { return c_v64_ziphi_8(a, b); }
SIMD_INLINE v64 v64_ziplo_16(v64 a, v64 b) { return c_v64_ziplo_16(a, b); }
SIMD_INLINE v64 v64_ziphi_16(v64 a, v64 b) { return c_v64_ziphi_16(a, b); }
SIMD_INLINE v64 v64_ziplo_32(v64 a, v64 b) { return c_v64_ziplo_32(a, b); }
SIMD_INLINE v64 v64_ziphi_32(v64 a, v64 b) { return c_v64_ziphi_32(a, b); }
SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) { return c_v64_unziplo_8(a, b); }
SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) { return c_v64_unziphi_8(a, b); }
SIMD_INLINE v64 v64_unziplo_16(v64 a, v64 b) { return c_v64_unziplo_16(a, b); }
SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) { return c_v64_unziphi_16(a, b); }
SIMD_INLINE v64 v64_unpacklo_u8_s16(v64 a) { return c_v64_unpacklo_u8_s16(a); }
SIMD_INLINE v64 v64_unpackhi_u8_s16(v64 a) { return c_v64_unpackhi_u8_s16(a); }
SIMD_INLINE v64 v64_unpacklo_s8_s16(v64 a) { return c_v64_unpacklo_s8_s16(a); }
SIMD_INLINE v64 v64_unpackhi_s8_s16(v64 a) { return c_v64_unpackhi_s8_s16(a); }
SIMD_INLINE v64 v64_pack_s32_s16(v64 a, v64 b) {
  return c_v64_pack_s32_s16(a, b);
}
SIMD_INLINE v64 v64_pack_s32_u16(v64 a, v64 b) {
  return c_v64_pack_s32_u16(a, b);
}
SIMD_INLINE v64 v64_pack_s16_u8(v64 a, v64 b) {
  return c_v64_pack_s16_u8(a, b);
}
SIMD_INLINE v64 v64_pack_s16_s8(v64 a, v64 b) {
  return c_v64_pack_s16_s8(a, b);
}
SIMD_INLINE v64 v64_unpacklo_u16_s32(v64 a) {
  return c_v64_unpacklo_u16_s32(a);
}
SIMD_INLINE v64 v64_unpacklo_s16_s32(v64 a) {
  return c_v64_unpacklo_s16_s32(a);
}
SIMD_INLINE v64 v64_unpackhi_u16_s32(v64 a) {
  return c_v64_unpackhi_u16_s32(a);
}
SIMD_INLINE v64 v64_unpackhi_s16_s32(v64 a) {
  return c_v64_unpackhi_s16_s32(a);
}
SIMD_INLINE v64 v64_shuffle_8(v64 a, v64 pattern) {
  return c_v64_shuffle_8(a, pattern);
}

typedef uint32_t sad64_internal;
SIMD_INLINE sad64_internal v64_sad_u8_init() { return c_v64_sad_u8_init(); }
SIMD_INLINE sad64_internal v64_sad_u8(sad64_internal s, v64 a, v64 b) {
  return c_v64_sad_u8(s, a, b);
}
SIMD_INLINE uint32_t v64_sad_u8_sum(sad64_internal s) {
  return c_v64_sad_u8_sum(s);
}
typedef uint32_t ssd64_internal;
SIMD_INLINE ssd64_internal v64_ssd_u8_init() { return c_v64_ssd_u8_init(); }
SIMD_INLINE ssd64_internal v64_ssd_u8(ssd64_internal s, v64 a, v64 b) {
  return c_v64_ssd_u8(s, a, b);
}
SIMD_INLINE uint32_t v64_ssd_u8_sum(ssd64_internal s) {
  return c_v64_ssd_u8_sum(s);
}
SIMD_INLINE int64_t v64_dotp_su8(v64 a, v64 b) { return c_v64_dotp_su8(a, b); }
SIMD_INLINE int64_t v64_dotp_s16(v64 a, v64 b) { return c_v64_dotp_s16(a, b); }
SIMD_INLINE uint64_t v64_hadd_u8(v64 a) { return c_v64_hadd_u8(a); }
SIMD_INLINE int64_t v64_hadd_s16(v64 a) { return c_v64_hadd_s16(a); }

SIMD_INLINE v64 v64_or(v64 a, v64 b) { return c_v64_or(a, b); }
SIMD_INLINE v64 v64_xor(v64 a, v64 b) { return c_v64_xor(a, b); }
SIMD_INLINE v64 v64_and(v64 a, v64 b) { return c_v64_and(a, b); }
SIMD_INLINE v64 v64_andn(v64 a, v64 b) { return c_v64_andn(a, b); }

SIMD_INLINE v64 v64_mullo_s16(v64 a, v64 b) { return c_v64_mullo_s16(a, b); }
SIMD_INLINE v64 v64_mulhi_s16(v64 a, v64 b) { return c_v64_mulhi_s16(a, b); }
SIMD_INLINE v64 v64_mullo_s32(v64 a, v64 b) { return c_v64_mullo_s32(a, b); }
SIMD_INLINE v64 v64_madd_s16(v64 a, v64 b) { return c_v64_madd_s16(a, b); }
SIMD_INLINE v64 v64_madd_us8(v64 a, v64 b) { return c_v64_madd_us8(a, b); }

SIMD_INLINE v64 v64_avg_u8(v64 a, v64 b) { return c_v64_avg_u8(a, b); }
SIMD_INLINE v64 v64_rdavg_u8(v64 a, v64 b) { return c_v64_rdavg_u8(a, b); }
SIMD_INLINE v64 v64_rdavg_u16(v64 a, v64 b) { return c_v64_rdavg_u16(a, b); }
SIMD_INLINE v64 v64_avg_u16(v64 a, v64 b) { return c_v64_avg_u16(a, b); }
SIMD_INLINE v64 v64_min_u8(v64 a, v64 b) { return c_v64_min_u8(a, b); }
SIMD_INLINE v64 v64_max_u8(v64 a, v64 b) { return c_v64_max_u8(a, b); }
SIMD_INLINE v64 v64_min_s8(v64 a, v64 b) { return c_v64_min_s8(a, b); }
SIMD_INLINE v64 v64_max_s8(v64 a, v64 b) { return c_v64_max_s8(a, b); }
SIMD_INLINE v64 v64_min_s16(v64 a, v64 b) { return c_v64_min_s16(a, b); }
SIMD_INLINE v64 v64_max_s16(v64 a, v64 b) { return c_v64_max_s16(a, b); }

SIMD_INLINE v64 v64_cmpgt_s8(v64 a, v64 b) { return c_v64_cmpgt_s8(a, b); }
SIMD_INLINE v64 v64_cmplt_s8(v64 a, v64 b) { return c_v64_cmplt_s8(a, b); }
SIMD_INLINE v64 v64_cmpeq_8(v64 a, v64 b) { return c_v64_cmpeq_8(a, b); }
SIMD_INLINE v64 v64_cmpgt_s16(v64 a, v64 b) { return c_v64_cmpgt_s16(a, b); }
SIMD_INLINE v64 v64_cmplt_s16(v64 a, v64 b) { return c_v64_cmplt_s16(a, b); }
SIMD_INLINE v64 v64_cmpeq_16(v64 a, v64 b) { return c_v64_cmpeq_16(a, b); }

SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int n) { return c_v64_shl_8(a, n); }
SIMD_INLINE v64 v64_shr_u8(v64 a, unsigned int n) { return c_v64_shr_u8(a, n); }
SIMD_INLINE v64 v64_shr_s8(v64 a, unsigned int n) { return c_v64_shr_s8(a, n); }
SIMD_INLINE v64 v64_shl_16(v64 a, unsigned int n) { return c_v64_shl_16(a, n); }
SIMD_INLINE v64 v64_shr_u16(v64 a, unsigned int n) {
  return c_v64_shr_u16(a, n);
}
SIMD_INLINE v64 v64_shr_s16(v64 a, unsigned int n) {
  return c_v64_shr_s16(a, n);
}
SIMD_INLINE v64 v64_shl_32(v64 a, unsigned int n) { return c_v64_shl_32(a, n); }
SIMD_INLINE v64 v64_shr_u32(v64 a, unsigned int n) {
  return c_v64_shr_u32(a, n);
}
SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int n) {
  return c_v64_shr_s32(a, n);
}
SIMD_INLINE v64 v64_shr_n_byte(v64 a, unsigned int n) {
  return c_v64_shr_n_byte(a, n);
}
SIMD_INLINE v64 v64_shl_n_byte(v64 a, unsigned int n) {
  return c_v64_shl_n_byte(a, n);
}
SIMD_INLINE v64 v64_shl_n_8(v64 a, unsigned int c) {
  return c_v64_shl_n_8(a, c);
}
SIMD_INLINE v64 v64_shr_n_u8(v64 a, unsigned int c) {
  return c_v64_shr_n_u8(a, c);
}
SIMD_INLINE v64 v64_shr_n_s8(v64 a, unsigned int c) {
  return c_v64_shr_n_s8(a, c);
}
SIMD_INLINE v64 v64_shl_n_16(v64 a, unsigned int c) {
  return c_v64_shl_n_16(a, c);
}
SIMD_INLINE v64 v64_shr_n_u16(v64 a, unsigned int c) {
  return c_v64_shr_n_u16(a, c);
}
SIMD_INLINE v64 v64_shr_n_s16(v64 a, unsigned int c) {
  return c_v64_shr_n_s16(a, c);
}
SIMD_INLINE v64 v64_shl_n_32(v64 a, unsigned int c) {
  return c_v64_shl_n_32(a, c);
}
SIMD_INLINE v64 v64_shr_n_u32(v64 a, unsigned int c) {
  return c_v64_shr_n_u32(a, c);
}
SIMD_INLINE v64 v64_shr_n_s32(v64 a, unsigned int c) {
  return c_v64_shr_n_s32(a, c);
}

#endif  // AOM_AOM_DSP_SIMD_V64_INTRINSICS_H_