summaryrefslogtreecommitdiff
path: root/vp8/common/x86/idctllm_mmx.asm
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2010-11-05 12:30:33 -0400
committerJohn Koleszar <jkoleszar@google.com>2010-11-05 12:30:33 -0400
commit7a590c902b9a77d9792d3a2497d28302eb0e0834 (patch)
treeb1f735eee5d5a6fbc633b11eecf90dc47f8d7e42 /vp8/common/x86/idctllm_mmx.asm
parentf4020e2338a1786b1db0f67075ceb7d9c01be6a3 (diff)
parent5551ef0ef4fd3271330fa5a2fbdfe70d4d2a1d2e (diff)
downloadlibvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.tar
libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.tar.gz
libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.tar.bz2
libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.zip
Merge remote branch 'origin/master' into experimental
Conflicts: configure ivfenc.c vp8/common/alloccommon.c vp8/common/onyxc_int.h vp8/vp8_cx_iface.c
Diffstat (limited to 'vp8/common/x86/idctllm_mmx.asm')
-rw-r--r--vp8/common/x86/idctllm_mmx.asm91
1 files changed, 59 insertions, 32 deletions
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index 2751c6934..43735bc4b 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -57,11 +58,11 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL] ;
+ pmulhw mm5, [GLOBAL(x_s1sqr2)] ;
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL] ;
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ;
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -69,10 +70,10 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
@@ -112,11 +113,11 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL] ;
+ pmulhw mm5, [GLOBAL(x_s1sqr2)] ;
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL] ;
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ;
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -124,16 +125,16 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
- paddw mm0, [fours GLOBAL]
+ paddw mm0, [GLOBAL(fours)]
- paddw mm2, [fours GLOBAL]
+ paddw mm2, [GLOBAL(fours)]
movq mm6, mm2 ; a1
movq mm4, mm0 ; b1
@@ -195,7 +196,7 @@ sym(vp8_short_idct4x4llm_1_mmx):
mov rax, arg(0) ;input
movd mm0, [rax]
- paddw mm0, [fours GLOBAL]
+ paddw mm0, [GLOBAL(fours)]
mov rdx, arg(1) ;output
psraw mm0, 3
@@ -219,35 +220,61 @@ sym(vp8_short_idct4x4llm_1_mmx):
pop rbp
ret
-;void dc_only_idct_mmx(short input_dc, short *output, int pitch)
-global sym(vp8_dc_only_idct_mmx)
-sym(vp8_dc_only_idct_mmx):
+;void vp8_dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
+global sym(vp8_dc_only_idct_add_mmx)
+sym(vp8_dc_only_idct_add_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 3
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
+ push rsi
+ push rdi
; end prolog
- movd mm0, arg(0) ;input_dc
+ mov rsi, arg(1) ;s -- prediction
+ mov rdi, arg(2) ;d -- destination
+ movsxd rax, dword ptr arg(4) ;stride
+ movsxd rdx, dword ptr arg(3) ;pitch
+ pxor mm0, mm0
- paddw mm0, [fours GLOBAL]
- mov rdx, arg(1) ;output
+ movd mm5, arg(0) ;input_dc
- psraw mm0, 3
- movsxd rax, dword ptr arg(2) ;pitch
+ paddw mm5, [GLOBAL(fours)]
- punpcklwd mm0, mm0
- punpckldq mm0, mm0
+ psraw mm5, 3
- movq [rdx], mm0
- movq [rdx+rax], mm0
+ punpcklwd mm5, mm5
+ punpckldq mm5, mm5
- movq [rdx+rax*2], mm0
- add rdx, rax
+ movd mm1, [rsi]
+ punpcklbw mm1, mm0
+ paddsw mm1, mm5
+ packuswb mm1, mm0 ; pack and unpack to saturate
+ movd [rdi], mm1
- movq [rdx+rax*2], mm0
+ movd mm2, [rsi+rdx]
+ punpcklbw mm2, mm0
+ paddsw mm2, mm5
+ packuswb mm2, mm0 ; pack and unpack to saturate
+ movd [rdi+rax], mm2
+
+ movd mm3, [rsi+2*rdx]
+ punpcklbw mm3, mm0
+ paddsw mm3, mm5
+ packuswb mm3, mm0 ; pack and unpack to saturate
+ movd [rdi+2*rax], mm3
+
+ add rdi, rax
+ add rsi, rdx
+ movd mm4, [rsi+2*rdx]
+ punpcklbw mm4, mm0
+ paddsw mm4, mm5
+ packuswb mm4, mm0 ; pack and unpack to saturate
+ movd [rdi+2*rax], mm4
; begin epilog
+ pop rdi
+ pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp