summaryrefslogtreecommitdiff
path: root/vp8
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2011-02-14 14:09:25 -0800
committerCode Review <code-review@webmproject.org>2011-02-14 14:09:25 -0800
commit7725a7eb56e74ebbdba14d01cfe85a151f81bf1c (patch)
tree7e36f0e87db4356bb52e4e46133beb224d5a72c0 /vp8
parent27dad21548253f95a880e52fd0f8fb8a398b72f0 (diff)
parent2debd5b5f75ab11bb6835b929e468f2873a88277 (diff)
downloadlibvpx-7725a7eb56e74ebbdba14d01cfe85a151f81bf1c.tar
libvpx-7725a7eb56e74ebbdba14d01cfe85a151f81bf1c.tar.gz
libvpx-7725a7eb56e74ebbdba14d01cfe85a151f81bf1c.tar.bz2
libvpx-7725a7eb56e74ebbdba14d01cfe85a151f81bf1c.zip
Merge "Improve vp8_sad16x16_sse3 function"
Diffstat (limited to 'vp8')
-rw-r--r--vp8/encoder/x86/sad_sse3.asm57
1 files changed, 25 insertions, 32 deletions
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index 575417516..f0336ab17 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -586,52 +586,45 @@ sym(vp8_sad16x16_sse3):
STACK_FRAME_CREATE_X3
- lea end_ptr, [src_ptr+src_stride*8]
-
- lea end_ptr, [end_ptr+src_stride*8]
- pxor mm7, mm7
+ mov end_ptr, 4
+ pxor xmm7, xmm7
.vp8_sad16x16_sse3_loop:
+ movdqa xmm0, XMMWORD PTR [src_ptr]
+ movdqu xmm1, XMMWORD PTR [ref_ptr]
+ movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
+ movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
- movq ret_var, mm7
- cmp ret_var, max_err
- jg .vp8_sad16x16_early_exit
-
- movq mm0, QWORD PTR [src_ptr]
- movq mm2, QWORD PTR [src_ptr+8]
-
- movq mm1, QWORD PTR [ref_ptr]
- movq mm3, QWORD PTR [ref_ptr+8]
+ lea src_ptr, [src_ptr+src_stride*2]
+ lea ref_ptr, [ref_ptr+ref_stride*2]
- movq mm4, QWORD PTR [src_ptr+src_stride]
- movq mm5, QWORD PTR [ref_ptr+ref_stride]
+ movdqa xmm4, XMMWORD PTR [src_ptr]
+ movdqu xmm5, XMMWORD PTR [ref_ptr]
+ movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
- psadbw mm0, mm1
- psadbw mm2, mm3
+ psadbw xmm0, xmm1
- movq mm1, QWORD PTR [src_ptr+src_stride+8]
- movq mm3, QWORD PTR [ref_ptr+ref_stride+8]
+ movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
- psadbw mm4, mm5
- psadbw mm1, mm3
+ psadbw xmm2, xmm3
+ psadbw xmm4, xmm5
+ psadbw xmm6, xmm1
lea src_ptr, [src_ptr+src_stride*2]
lea ref_ptr, [ref_ptr+ref_stride*2]
- paddw mm0, mm2
- paddw mm4, mm1
-
- paddw mm7, mm0
- paddw mm7, mm4
+ paddw xmm7, xmm0
+ paddw xmm7, xmm2
+ paddw xmm7, xmm4
+ paddw xmm7, xmm6
- cmp src_ptr, end_ptr
+ sub end_ptr, 1
jne .vp8_sad16x16_sse3_loop
- movq ret_var, mm7
-
-.vp8_sad16x16_early_exit:
-
- mov rax, ret_var
+ movq xmm0, xmm7
+ psrldq xmm7, 8
+ paddw xmm0, xmm7
+ movq rax, xmm0
STACK_FRAME_DESTROY_X3