summaryrefslogtreecommitdiff
path: root/vp8/encoder/x86/quantize_sse2.asm
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/encoder/x86/quantize_sse2.asm')
-rw-r--r--vp8/encoder/x86/quantize_sse2.asm147
1 files changed, 3 insertions, 144 deletions
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index fe9464b3d..b41768ce0 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -27,7 +27,7 @@ sym(vp8_regular_quantize_b_sse2):
push rdi
push rsi
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
push rdi
push rsi
%endif
@@ -46,7 +46,7 @@ sym(vp8_regular_quantize_b_sse2):
mov rdi, arg(0) ; BLOCK *b
mov rsi, arg(1) ; BLOCKD *d
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
mov rdi, rcx ; BLOCK *b
mov rsi, rdx ; BLOCKD *d
%else
@@ -226,7 +226,7 @@ ZIGZAG_LOOP 15
pop rsi
pop rdi
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
pop rsi
pop rdi
%endif
@@ -236,147 +236,6 @@ ZIGZAG_LOOP 15
pop rbp
ret
-; void vp8_fast_quantize_b_sse2 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-
-global sym(vp8_fast_quantize_b_sse2) PRIVATE
-sym(vp8_fast_quantize_b_sse2):
- push rbp
- mov rbp, rsp
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %ifidn __OUTPUT_FORMAT__,x64
- push rdi
- push rsi
- %else
- ; these registers are used for passing arguments
- %endif
-%endif
-
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %ifidn __OUTPUT_FORMAT__,x64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rax, [rdi + vp8_block_coeff]
- mov rcx, [rdi + vp8_block_round]
- mov rdx, [rdi + vp8_block_quant_fast]
-
- ; z = coeff
- movdqa xmm0, [rax]
- movdqa xmm4, [rax + 16]
-
- ; dup z so we can save sz
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz = z >> 15
- psraw xmm0, 15
- psraw xmm4, 15
-
- ; x = abs(z) = (z ^ sz) - sz
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- ; x += round
- paddw xmm1, [rcx]
- paddw xmm5, [rcx + 16]
-
- mov rax, [rsi + vp8_blockd_qcoeff]
- mov rcx, [rsi + vp8_blockd_dequant]
- mov rdi, [rsi + vp8_blockd_dqcoeff]
-
- ; y = x * quant >> 16
- pmulhw xmm1, [rdx]
- pmulhw xmm5, [rdx + 16]
-
- ; x = (y ^ sz) - sz
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- ; qcoeff = x
- movdqa [rax], xmm1
- movdqa [rax + 16], xmm5
-
- ; x * dequant
- movdqa xmm2, xmm1
- movdqa xmm3, xmm5
- pmullw xmm2, [rcx]
- pmullw xmm3, [rcx + 16]
-
- ; dqcoeff = x * dequant
- movdqa [rdi], xmm2
- movdqa [rdi + 16], xmm3
-
- pxor xmm4, xmm4 ;clear all bits
- pcmpeqw xmm1, xmm4
- pcmpeqw xmm5, xmm4
-
- pcmpeqw xmm4, xmm4 ;set all bits
- pxor xmm1, xmm4
- pxor xmm5, xmm4
-
- pand xmm1, [GLOBAL(inv_zig_zag)]
- pand xmm5, [GLOBAL(inv_zig_zag + 16)]
-
- pmaxsw xmm1, xmm5
-
- mov rcx, [rsi + vp8_blockd_eob]
-
- ; now down to 8
- pshufd xmm5, xmm1, 00001110b
-
- pmaxsw xmm1, xmm5
-
- ; only 4 left
- pshuflw xmm5, xmm1, 00001110b
-
- pmaxsw xmm1, xmm5
-
- ; okay, just 2!
- pshuflw xmm5, xmm1, 00000001b
-
- pmaxsw xmm1, xmm5
-
- movd eax, xmm1
- and eax, 0xff
-
- mov BYTE PTR [rcx], al ; store eob
-
- ; begin epilog
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %ifidn __OUTPUT_FORMAT__,x64
- pop rsi
- pop rdi
- %endif
-%endif
-
- RESTORE_GOT
- pop rbp
- ret
-
SECTION_RODATA
align 16
inv_zig_zag: