diff options
Diffstat (limited to 'media/ffvpx/libavutil/x86')
-rw-r--r-- | media/ffvpx/libavutil/x86/asm.h | 66 | ||||
-rw-r--r-- | media/ffvpx/libavutil/x86/cpu.c | 4 | ||||
-rw-r--r-- | media/ffvpx/libavutil/x86/emms.h | 2 | ||||
-rw-r--r-- | media/ffvpx/libavutil/x86/x86util.asm | 69 |
4 files changed, 105 insertions, 36 deletions
diff --git a/media/ffvpx/libavutil/x86/asm.h b/media/ffvpx/libavutil/x86/asm.h index 109b65e54..9bff42d62 100644 --- a/media/ffvpx/libavutil/x86/asm.h +++ b/media/ffvpx/libavutil/x86/asm.h @@ -28,46 +28,46 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg; typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg; #if ARCH_X86_64 -# define OPSIZE "q" -# define REG_a "rax" -# define REG_b "rbx" -# define REG_c "rcx" -# define REG_d "rdx" -# define REG_D "rdi" -# define REG_S "rsi" -# define PTR_SIZE "8" +# define FF_OPSIZE "q" +# define FF_REG_a "rax" +# define FF_REG_b "rbx" +# define FF_REG_c "rcx" +# define FF_REG_d "rdx" +# define FF_REG_D "rdi" +# define FF_REG_S "rsi" +# define FF_PTR_SIZE "8" typedef int64_t x86_reg; -/* REG_SP is defined in Solaris sys headers, so use REG_sp */ -# define REG_sp "rsp" -# define REG_BP "rbp" -# define REGBP rbp -# define REGa rax -# define REGb rbx -# define REGc rcx -# define REGd rdx -# define REGSP rsp +/* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */ +# define FF_REG_sp "rsp" +# define FF_REG_BP "rbp" +# define FF_REGBP rbp +# define FF_REGa rax +# define FF_REGb rbx +# define FF_REGc rcx +# define FF_REGd rdx +# define FF_REGSP rsp #elif ARCH_X86_32 -# define OPSIZE "l" -# define REG_a "eax" -# define REG_b "ebx" -# define REG_c "ecx" -# define REG_d "edx" -# define REG_D "edi" -# define REG_S "esi" -# define PTR_SIZE "4" +# define FF_OPSIZE "l" +# define FF_REG_a "eax" +# define FF_REG_b "ebx" +# define FF_REG_c "ecx" +# define FF_REG_d "edx" +# define FF_REG_D "edi" +# define FF_REG_S "esi" +# define FF_PTR_SIZE "4" typedef int32_t x86_reg; -# define REG_sp "esp" -# define REG_BP "ebp" -# define REGBP ebp -# define REGa eax -# define REGb ebx -# define REGc ecx -# define REGd edx -# define REGSP esp +# define FF_REG_sp "esp" +# define FF_REG_BP "ebp" +# define FF_REGBP ebp +# define FF_REGa eax +# define FF_REGb ebx +# define FF_REGc ecx +# define FF_REGd edx +# define FF_REGSP esp #else typedef int x86_reg; #endif diff --git a/media/ffvpx/libavutil/x86/cpu.c b/media/ffvpx/libavutil/x86/cpu.c index b9f239be4..f3a49c677 100644 --- a/media/ffvpx/libavutil/x86/cpu.c +++ b/media/ffvpx/libavutil/x86/cpu.c @@ -41,9 +41,9 @@ /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ #define cpuid(index, eax, ebx, ecx, edx) \ __asm__ volatile ( \ - "mov %%"REG_b", %%"REG_S" \n\t" \ + "mov %%"FF_REG_b", %%"FF_REG_S" \n\t" \ "cpuid \n\t" \ - "xchg %%"REG_b", %%"REG_S \ + "xchg %%"FF_REG_b", %%"FF_REG_S \ : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \ : "0" (index), "2"(0)) diff --git a/media/ffvpx/libavutil/x86/emms.h b/media/ffvpx/libavutil/x86/emms.h index 6fda6e276..42c18e295 100644 --- a/media/ffvpx/libavutil/x86/emms.h +++ b/media/ffvpx/libavutil/x86/emms.h @@ -31,6 +31,8 @@ void avpriv_emms_yasm(void); * Empty mmx state. * this must be called between any dsp function and float/double code. * for example sin(); dsp->idct_put(); emms_c(); cos() + * Note, *alloc() and *free() also use float code in some libc implementations + * thus this also applies to them or any function using them. */ static av_always_inline void emms_c(void) { diff --git a/media/ffvpx/libavutil/x86/x86util.asm b/media/ffvpx/libavutil/x86/x86util.asm index e7493f488..44ed750ae 100644 --- a/media/ffvpx/libavutil/x86/x86util.asm +++ b/media/ffvpx/libavutil/x86/x86util.asm @@ -30,7 +30,10 @@ %include "libavutil/x86/x86inc.asm" %macro SBUTTERFLY 4 -%if avx_enabled == 0 +%ifidn %1, dqqq + vperm2i128 m%4, m%2, m%3, q0301 + vinserti128 m%2, m%2, xm%3, 1 +%elif avx_enabled == 0 mova m%4, m%2 punpckl%1 m%2, m%3 punpckh%1 m%4, m%3 @@ -193,6 +196,70 @@ %endif %endmacro +%macro TRANSPOSE16x16W 18-19 +; in: m0..m15, unless %19 in which case m6 is in %17 +; out: m0..m15, unless %19 in which case m4 is in %18 +; spills into %17 and %18 +%if %0 < 19 + mova %17, m%7 +%endif + + SBUTTERFLY dqqq, %1, %9, %7 + SBUTTERFLY dqqq, %2, %10, %7 + SBUTTERFLY dqqq, %3, %11, %7 + SBUTTERFLY dqqq, %4, %12, %7 + SBUTTERFLY dqqq, %5, %13, %7 + SBUTTERFLY dqqq, %6, %14, %7 + mova %18, m%14 + mova m%7, %17 + SBUTTERFLY dqqq, %7, %15, %14 + SBUTTERFLY dqqq, %8, %16, %14 + + SBUTTERFLY wd, %1, %2, %14 + SBUTTERFLY wd, %3, %4, %14 + SBUTTERFLY wd, %5, %6, %14 + SBUTTERFLY wd, %7, %8, %14 + SBUTTERFLY wd, %9, %10, %14 + SBUTTERFLY wd, %11, %12, %14 + mova %17, m%12 + mova m%14, %18 + SBUTTERFLY wd, %13, %14, %12 + SBUTTERFLY wd, %15, %16, %12 + + SBUTTERFLY dq, %1, %3, %12 + SBUTTERFLY dq, %2, %4, %12 + SBUTTERFLY dq, %5, %7, %12 + SBUTTERFLY dq, %6, %8, %12 + SBUTTERFLY dq, %9, %11, %12 + mova %18, m%11 + mova m%12, %17 + SBUTTERFLY dq, %10, %12, %11 + SBUTTERFLY dq, %13, %15, %11 + SBUTTERFLY dq, %14, %16, %11 + + SBUTTERFLY qdq, %1, %5, %11 + SBUTTERFLY qdq, %2, %6, %11 + SBUTTERFLY qdq, %3, %7, %11 + SBUTTERFLY qdq, %4, %8, %11 + + SWAP %2, %5 + SWAP %4, %7 + + SBUTTERFLY qdq, %9, %13, %11 + SBUTTERFLY qdq, %10, %14, %11 + mova m%11, %18 + mova %18, m%5 + SBUTTERFLY qdq, %11, %15, %5 + SBUTTERFLY qdq, %12, %16, %5 + +%if %0 < 19 + mova m%5, %18 +%endif + + SWAP %10, %13 + SWAP %12, %15 +%endmacro + ; PABSW macro assumes %1 != %2, while ABS1/2 macros work in-place %macro PABSW 2 %if cpuflag(ssse3) |