1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
load(libdir + 'simd.js');
if (typeof SIMD === "undefined")
quit();
setJitCompilerOption("baseline.warmup.trigger", 10);
setJitCompilerOption("ion.warmup.trigger", 90);
var max = 100; // Make have the warm-up counter high enough to
// consider inlining functions.
var f4 = SIMD.Int32x4; // :TODO: Support Float32x4 arith.
var f4add = f4.add;
var f4sub = f4.sub;
var f4mul = f4.mul;
function c4mul(z1, z2) {
var { re: re1, im: im1 } = z1;
var { re: re2, im: im2 } = z2;
var rere = f4mul(re1, re2);
var reim = f4mul(re1, im2);
var imre = f4mul(im1, re2);
var imim = f4mul(im1, im2);
return { re: f4sub(rere, imim), im: f4add(reim, imre) };
}
function c4inv(z) {
var { re: re, im: im } = z;
var minus = f4(-1, -1, -1, -1);
return { re: re, im: f4mul(im, minus) };
}
function c4inv_inplace(z) {
var res = c4inv(z);
z.re = res.re;
z.im = res.im;
}
function c4norm(z) {
var { re: re, im: im } = c4mul(z, c4inv(z));
return re;
}
function c4scale(z, s) {
var { re: re, im: im } = z;
var f4s = f4(s, s, s, s);
return { re: f4mul(re, f4s), im: f4mul(im, f4s) };
}
var rotate90 = { re: f4(0, 0, 0, 0), im: f4(1, 1, 1, 1) };
var cardinals = { re: f4(1, 0, -1, 0), im: f4(0, 1, 0, -1) };
function test(dots) {
for (var j = 0; j < 4; j++) {
dots = c4mul(rotate90, dots);
if (j % 2 == 0) // Magic !
c4inv_inplace(dots);
dots = c4scale(dots, 2);
}
return dots;
}
assertEqX4(c4norm(cardinals), simdToArray(f4.splat(1)));
var cardinals16 = c4scale(cardinals, 16);
for (var i = 0; i < max; i++) {
var res = test(cardinals);
assertEqX4(c4norm(res), simdToArray(f4.splat(16 * 16)));
assertEqX4(res.re, simdToArray(cardinals16.re));
assertEqX4(res.im, simdToArray(cardinals16.im));
}
|