Add save/restore xmm registers in x86 assembly code

Went through the code and fixed it. Verified on Windows. Where possible, remove dependencies on xmm[67] Current code relies on pushing rbp to the stack to get 16 byte alignment. This broke when rbp wasn't pushed (vp8/encoder/x86/sad_sse3.asm). Work around this by using unaligned memory accesses. Revisit this and the offsets in vp8/encoder/x86/sad_sse3.asm in another change to SAVE_XMM. Change-Id: I5f940994d3ebfd977c3d68446cef20fd78b07877
author: Johann <johannkoenig@google.com> 2011-04-07 13:17:22 -0400
committer: Johann <johannkoenig@google.com> 2011-04-18 16:30:38 -0400
commit: c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a (patch)
tree: 395d38ba42df5e8be5abe33baa028bc937226155 /vp8/encoder/x86/sad_sse2.asm
parent: d889035fe6802b64567c2ed250c1dff0eb377acf (diff)
download: libvpx-c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a.tar
libvpx-c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a.tar.gz
libvpx-c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a.tar.bz2
libvpx-c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a.zip
1 files changed, 10 insertions, 8 deletions
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index cc6bc3cd9..d9ac3ff4f 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -21,6 +21,7 @@ sym(vp8_sad16x16_wmt):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 4
+    SAVE_XMM ; 6
     push        rsi
     push        rdi
     ; end prolog
@@ -34,7 +35,7 @@ sym(vp8_sad16x16_wmt):
         lea             rcx,        [rsi+rax*8]
 
         lea             rcx,        [rcx+rax*8]
-        pxor            xmm7,       xmm7
+        pxor            xmm6,       xmm6
 
 x16x16sad_wmt_loop:
 
@@ -52,32 +53,33 @@ x16x16sad_wmt_loop:
         punpcklbw       xmm1,       xmm3
 
         psadbw          xmm0,       xmm1
-        movq            xmm6,       QWORD PTR [rsi+rax+8]
+        movq            xmm2,       QWORD PTR [rsi+rax+8]
 
         movq            xmm3,       QWORD PTR [rdi+rdx+8]
         lea             rsi,        [rsi+rax*2]
 
         lea             rdi,        [rdi+rdx*2]
-        punpcklbw       xmm4,       xmm6
+        punpcklbw       xmm4,       xmm2
 
         punpcklbw       xmm5,       xmm3
         psadbw          xmm4,       xmm5
 
-        paddw           xmm7,       xmm0
-        paddw           xmm7,       xmm4
+        paddw           xmm6,       xmm0
+        paddw           xmm6,       xmm4
 
         cmp             rsi,        rcx
         jne             x16x16sad_wmt_loop
 
-        movq            xmm0,       xmm7
-        psrldq          xmm7,       8
+        movq            xmm0,       xmm6
+        psrldq          xmm6,       8
 
-        paddw           xmm0,       xmm7
+        paddw           xmm0,       xmm6
         movq            rax,        xmm0
 
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
author	Johann <johannkoenig@google.com>	2011-04-07 13:17:22 -0400
committer	Johann <johannkoenig@google.com>	2011-04-18 16:30:38 -0400
commit	c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a (patch)
tree	395d38ba42df5e8be5abe33baa028bc937226155 /vp8/encoder/x86/sad_sse2.asm
parent	d889035fe6802b64567c2ed250c1dff0eb377acf (diff)
download	libvpx-c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a.tar libvpx-c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a.tar.gz libvpx-c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a.tar.bz2 libvpx-c7cfde42a9ec05b72d15ebaa9a59cefed4cd323a.zip