diff options
author | John Koleszar <jkoleszar@google.com> | 2010-11-05 12:30:33 -0400 |
---|---|---|
committer | John Koleszar <jkoleszar@google.com> | 2010-11-05 12:30:33 -0400 |
commit | 7a590c902b9a77d9792d3a2497d28302eb0e0834 (patch) | |
tree | b1f735eee5d5a6fbc633b11eecf90dc47f8d7e42 /vp8/common/x86/idctllm_mmx.asm | |
parent | f4020e2338a1786b1db0f67075ceb7d9c01be6a3 (diff) | |
parent | 5551ef0ef4fd3271330fa5a2fbdfe70d4d2a1d2e (diff) | |
download | libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.tar libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.tar.gz libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.tar.bz2 libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.zip |
Merge remote branch 'origin/master' into experimental
Conflicts:
configure
ivfenc.c
vp8/common/alloccommon.c
vp8/common/onyxc_int.h
vp8/vp8_cx_iface.c
Diffstat (limited to 'vp8/common/x86/idctllm_mmx.asm')
-rw-r--r-- | vp8/common/x86/idctllm_mmx.asm | 91 |
1 files changed, 59 insertions, 32 deletions
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm index 2751c6934..43735bc4b 100644 --- a/vp8/common/x86/idctllm_mmx.asm +++ b/vp8/common/x86/idctllm_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -57,11 +58,11 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL] ; + pmulhw mm5, [GLOBAL(x_s1sqr2)] ; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL] ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -69,10 +70,10 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 @@ -112,11 +113,11 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL] ; + pmulhw mm5, [GLOBAL(x_s1sqr2)] ; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL] ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -124,16 +125,16 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] - paddw mm2, [fours GLOBAL] + paddw mm2, [GLOBAL(fours)] movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 @@ -195,7 +196,7 @@ sym(vp8_short_idct4x4llm_1_mmx): mov rax, arg(0) ;input movd mm0, [rax] - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] mov rdx, arg(1) ;output psraw mm0, 3 @@ -219,35 +220,61 @@ sym(vp8_short_idct4x4llm_1_mmx): pop rbp ret -;void dc_only_idct_mmx(short input_dc, short *output, int pitch) -global sym(vp8_dc_only_idct_mmx) -sym(vp8_dc_only_idct_mmx): +;void vp8_dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride) +global sym(vp8_dc_only_idct_add_mmx) +sym(vp8_dc_only_idct_add_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 + SHADOW_ARGS_TO_STACK 5 GET_GOT rbx + push rsi + push rdi ; end prolog - movd mm0, arg(0) ;input_dc + mov rsi, arg(1) ;s -- prediction + mov rdi, arg(2) ;d -- destination + movsxd rax, dword ptr arg(4) ;stride + movsxd rdx, dword ptr arg(3) ;pitch + pxor mm0, mm0 - paddw mm0, [fours GLOBAL] - mov rdx, arg(1) ;output + movd mm5, arg(0) ;input_dc - psraw mm0, 3 - movsxd rax, dword ptr arg(2) ;pitch + paddw mm5, [GLOBAL(fours)] - punpcklwd mm0, mm0 - punpckldq mm0, mm0 + psraw mm5, 3 - movq [rdx], mm0 - movq [rdx+rax], mm0 + punpcklwd mm5, mm5 + punpckldq mm5, mm5 - movq [rdx+rax*2], mm0 - add rdx, rax + movd mm1, [rsi] + punpcklbw mm1, mm0 + paddsw mm1, mm5 + packuswb mm1, mm0 ; pack and unpack to saturate + movd [rdi], mm1 - movq [rdx+rax*2], mm0 + movd mm2, [rsi+rdx] + punpcklbw mm2, mm0 + paddsw mm2, mm5 + packuswb mm2, mm0 ; pack and unpack to saturate + movd [rdi+rax], mm2 + + movd mm3, [rsi+2*rdx] + punpcklbw mm3, mm0 + paddsw mm3, mm5 + packuswb mm3, mm0 ; pack and unpack to saturate + movd [rdi+2*rax], mm3 + + add rdi, rax + add rsi, rdx + movd mm4, [rsi+2*rdx] + punpcklbw mm4, mm0 + paddsw mm4, mm5 + packuswb mm4, mm0 ; pack and unpack to saturate + movd [rdi+2*rax], mm4 ; begin epilog + pop rdi + pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp |