summaryrefslogtreecommitdiffstats
path: root/media/ffvpx/libavutil/x86
diff options
context:
space:
mode:
Diffstat (limited to 'media/ffvpx/libavutil/x86')
-rw-r--r--media/ffvpx/libavutil/x86/asm.h66
-rw-r--r--media/ffvpx/libavutil/x86/cpu.c4
-rw-r--r--media/ffvpx/libavutil/x86/emms.h2
-rw-r--r--media/ffvpx/libavutil/x86/x86util.asm69
4 files changed, 105 insertions, 36 deletions
diff --git a/media/ffvpx/libavutil/x86/asm.h b/media/ffvpx/libavutil/x86/asm.h
index 109b65e54..9bff42d62 100644
--- a/media/ffvpx/libavutil/x86/asm.h
+++ b/media/ffvpx/libavutil/x86/asm.h
@@ -28,46 +28,46 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg;
#if ARCH_X86_64
-# define OPSIZE "q"
-# define REG_a "rax"
-# define REG_b "rbx"
-# define REG_c "rcx"
-# define REG_d "rdx"
-# define REG_D "rdi"
-# define REG_S "rsi"
-# define PTR_SIZE "8"
+# define FF_OPSIZE "q"
+# define FF_REG_a "rax"
+# define FF_REG_b "rbx"
+# define FF_REG_c "rcx"
+# define FF_REG_d "rdx"
+# define FF_REG_D "rdi"
+# define FF_REG_S "rsi"
+# define FF_PTR_SIZE "8"
typedef int64_t x86_reg;
-/* REG_SP is defined in Solaris sys headers, so use REG_sp */
-# define REG_sp "rsp"
-# define REG_BP "rbp"
-# define REGBP rbp
-# define REGa rax
-# define REGb rbx
-# define REGc rcx
-# define REGd rdx
-# define REGSP rsp
+/* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */
+# define FF_REG_sp "rsp"
+# define FF_REG_BP "rbp"
+# define FF_REGBP rbp
+# define FF_REGa rax
+# define FF_REGb rbx
+# define FF_REGc rcx
+# define FF_REGd rdx
+# define FF_REGSP rsp
#elif ARCH_X86_32
-# define OPSIZE "l"
-# define REG_a "eax"
-# define REG_b "ebx"
-# define REG_c "ecx"
-# define REG_d "edx"
-# define REG_D "edi"
-# define REG_S "esi"
-# define PTR_SIZE "4"
+# define FF_OPSIZE "l"
+# define FF_REG_a "eax"
+# define FF_REG_b "ebx"
+# define FF_REG_c "ecx"
+# define FF_REG_d "edx"
+# define FF_REG_D "edi"
+# define FF_REG_S "esi"
+# define FF_PTR_SIZE "4"
typedef int32_t x86_reg;
-# define REG_sp "esp"
-# define REG_BP "ebp"
-# define REGBP ebp
-# define REGa eax
-# define REGb ebx
-# define REGc ecx
-# define REGd edx
-# define REGSP esp
+# define FF_REG_sp "esp"
+# define FF_REG_BP "ebp"
+# define FF_REGBP ebp
+# define FF_REGa eax
+# define FF_REGb ebx
+# define FF_REGc ecx
+# define FF_REGd edx
+# define FF_REGSP esp
#else
typedef int x86_reg;
#endif
diff --git a/media/ffvpx/libavutil/x86/cpu.c b/media/ffvpx/libavutil/x86/cpu.c
index b9f239be4..f3a49c677 100644
--- a/media/ffvpx/libavutil/x86/cpu.c
+++ b/media/ffvpx/libavutil/x86/cpu.c
@@ -41,9 +41,9 @@
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
#define cpuid(index, eax, ebx, ecx, edx) \
__asm__ volatile ( \
- "mov %%"REG_b", %%"REG_S" \n\t" \
+ "mov %%"FF_REG_b", %%"FF_REG_S" \n\t" \
"cpuid \n\t" \
- "xchg %%"REG_b", %%"REG_S \
+ "xchg %%"FF_REG_b", %%"FF_REG_S \
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
: "0" (index), "2"(0))
diff --git a/media/ffvpx/libavutil/x86/emms.h b/media/ffvpx/libavutil/x86/emms.h
index 6fda6e276..42c18e295 100644
--- a/media/ffvpx/libavutil/x86/emms.h
+++ b/media/ffvpx/libavutil/x86/emms.h
@@ -31,6 +31,8 @@ void avpriv_emms_yasm(void);
* Empty mmx state.
* this must be called between any dsp function and float/double code.
* for example sin(); dsp->idct_put(); emms_c(); cos()
+ * Note, *alloc() and *free() also use float code in some libc implementations
+ * thus this also applies to them or any function using them.
*/
static av_always_inline void emms_c(void)
{
diff --git a/media/ffvpx/libavutil/x86/x86util.asm b/media/ffvpx/libavutil/x86/x86util.asm
index e7493f488..44ed750ae 100644
--- a/media/ffvpx/libavutil/x86/x86util.asm
+++ b/media/ffvpx/libavutil/x86/x86util.asm
@@ -30,7 +30,10 @@
%include "libavutil/x86/x86inc.asm"
%macro SBUTTERFLY 4
-%if avx_enabled == 0
+%ifidn %1, dqqq
+ vperm2i128 m%4, m%2, m%3, q0301
+ vinserti128 m%2, m%2, xm%3, 1
+%elif avx_enabled == 0
mova m%4, m%2
punpckl%1 m%2, m%3
punpckh%1 m%4, m%3
@@ -193,6 +196,70 @@
%endif
%endmacro
+%macro TRANSPOSE16x16W 18-19
+; in: m0..m15, unless %19 in which case m6 is in %17
+; out: m0..m15, unless %19 in which case m4 is in %18
+; spills into %17 and %18
+%if %0 < 19
+ mova %17, m%7
+%endif
+
+ SBUTTERFLY dqqq, %1, %9, %7
+ SBUTTERFLY dqqq, %2, %10, %7
+ SBUTTERFLY dqqq, %3, %11, %7
+ SBUTTERFLY dqqq, %4, %12, %7
+ SBUTTERFLY dqqq, %5, %13, %7
+ SBUTTERFLY dqqq, %6, %14, %7
+ mova %18, m%14
+ mova m%7, %17
+ SBUTTERFLY dqqq, %7, %15, %14
+ SBUTTERFLY dqqq, %8, %16, %14
+
+ SBUTTERFLY wd, %1, %2, %14
+ SBUTTERFLY wd, %3, %4, %14
+ SBUTTERFLY wd, %5, %6, %14
+ SBUTTERFLY wd, %7, %8, %14
+ SBUTTERFLY wd, %9, %10, %14
+ SBUTTERFLY wd, %11, %12, %14
+ mova %17, m%12
+ mova m%14, %18
+ SBUTTERFLY wd, %13, %14, %12
+ SBUTTERFLY wd, %15, %16, %12
+
+ SBUTTERFLY dq, %1, %3, %12
+ SBUTTERFLY dq, %2, %4, %12
+ SBUTTERFLY dq, %5, %7, %12
+ SBUTTERFLY dq, %6, %8, %12
+ SBUTTERFLY dq, %9, %11, %12
+ mova %18, m%11
+ mova m%12, %17
+ SBUTTERFLY dq, %10, %12, %11
+ SBUTTERFLY dq, %13, %15, %11
+ SBUTTERFLY dq, %14, %16, %11
+
+ SBUTTERFLY qdq, %1, %5, %11
+ SBUTTERFLY qdq, %2, %6, %11
+ SBUTTERFLY qdq, %3, %7, %11
+ SBUTTERFLY qdq, %4, %8, %11
+
+ SWAP %2, %5
+ SWAP %4, %7
+
+ SBUTTERFLY qdq, %9, %13, %11
+ SBUTTERFLY qdq, %10, %14, %11
+ mova m%11, %18
+ mova %18, m%5
+ SBUTTERFLY qdq, %11, %15, %5
+ SBUTTERFLY qdq, %12, %16, %5
+
+%if %0 < 19
+ mova m%5, %18
+%endif
+
+ SWAP %10, %13
+ SWAP %12, %15
+%endmacro
+
; PABSW macro assumes %1 != %2, while ABS1/2 macros work in-place
%macro PABSW 2
%if cpuflag(ssse3)