diff options
Diffstat (limited to 'vp8/encoder/x86/quantize_sse2.asm')
-rw-r--r-- | vp8/encoder/x86/quantize_sse2.asm | 147 |
1 files changed, 3 insertions, 144 deletions
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm index fe9464b3d..b41768ce0 100644 --- a/vp8/encoder/x86/quantize_sse2.asm +++ b/vp8/encoder/x86/quantize_sse2.asm @@ -27,7 +27,7 @@ sym(vp8_regular_quantize_b_sse2): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -46,7 +46,7 @@ sym(vp8_regular_quantize_b_sse2): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -226,7 +226,7 @@ ZIGZAG_LOOP 15 pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif @@ -236,147 +236,6 @@ ZIGZAG_LOOP 15 pop rbp ret -; void vp8_fast_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp8_fast_quantize_b_sse2) PRIVATE -sym(vp8_fast_quantize_b_sse2): - push rbp - mov rbp, rsp - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %ifidn __OUTPUT_FORMAT__,x64 - push rdi - push rsi - %else - ; these registers are used for passing arguments - %endif -%endif - - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %ifidn __OUTPUT_FORMAT__,x64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp8_block_coeff] - mov rcx, [rdi + vp8_block_round] - mov rdx, [rdi + vp8_block_quant_fast] - - ; z = coeff - movdqa xmm0, [rax] - movdqa xmm4, [rax + 16] - - ; dup z so we can save sz - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz = z >> 15 - psraw xmm0, 15 - psraw xmm4, 15 - - ; x = abs(z) = (z ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; x += round - paddw xmm1, [rcx] - paddw xmm5, [rcx + 16] - - mov rax, [rsi + vp8_blockd_qcoeff] - mov rcx, [rsi + vp8_blockd_dequant] - mov rdi, [rsi + vp8_blockd_dqcoeff] - - ; y = x * quant >> 16 - pmulhw xmm1, [rdx] - pmulhw xmm5, [rdx + 16] - - ; x = (y ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; qcoeff = x - movdqa [rax], xmm1 - movdqa [rax + 16], xmm5 - - ; x * dequant - movdqa xmm2, xmm1 - movdqa xmm3, xmm5 - pmullw xmm2, [rcx] - pmullw xmm3, [rcx + 16] - - ; dqcoeff = x * dequant - movdqa [rdi], xmm2 - movdqa [rdi + 16], xmm3 - - pxor xmm4, xmm4 ;clear all bits - pcmpeqw xmm1, xmm4 - pcmpeqw xmm5, xmm4 - - pcmpeqw xmm4, xmm4 ;set all bits - pxor xmm1, xmm4 - pxor xmm5, xmm4 - - pand xmm1, [GLOBAL(inv_zig_zag)] - pand xmm5, [GLOBAL(inv_zig_zag + 16)] - - pmaxsw xmm1, xmm5 - - mov rcx, [rsi + vp8_blockd_eob] - - ; now down to 8 - pshufd xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; only 4 left - pshuflw xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; okay, just 2! - pshuflw xmm5, xmm1, 00000001b - - pmaxsw xmm1, xmm5 - - movd eax, xmm1 - and eax, 0xff - - mov BYTE PTR [rcx], al ; store eob - - ; begin epilog -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %ifidn __OUTPUT_FORMAT__,x64 - pop rsi - pop rdi - %endif -%endif - - RESTORE_GOT - pop rbp - ret - SECTION_RODATA align 16 inv_zig_zag: |