diff options
author | Ronald S. Bultje <rbultje@google.com> | 2012-11-01 11:09:58 -0700 |
---|---|---|
committer | Ronald S. Bultje <rbultje@google.com> | 2012-11-01 16:31:22 -0700 |
commit | 4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f (patch) | |
tree | 20eef975f1a8c28978d826a354092433b9093588 /vp9/common/x86/recon_mmx.asm | |
parent | 6c280c2299f078a475dc87e7615fdf1a4998cd31 (diff) | |
download | libvpx-4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f.tar libvpx-4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f.tar.gz libvpx-4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f.tar.bz2 libvpx-4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f.zip |
Rename vp8/ codec directory to vp9/.
Change-Id: Ic084c475844b24092a433ab88138cf58af3abbe4
Diffstat (limited to 'vp9/common/x86/recon_mmx.asm')
-rw-r--r-- | vp9/common/x86/recon_mmx.asm | 321 |
1 files changed, 321 insertions, 0 deletions
diff --git a/vp9/common/x86/recon_mmx.asm b/vp9/common/x86/recon_mmx.asm new file mode 100644 index 000000000..20f582dba --- /dev/null +++ b/vp9/common/x86/recon_mmx.asm @@ -0,0 +1,321 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" +;void vp9_recon_b_mmx(unsigned char *s, short *q, unsigned char *d, int stride) +global sym(vp9_recon_b_mmx) +sym(vp9_recon_b_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;s + mov rdi, arg(2) ;d + mov rdx, arg(1) ;q + movsxd rax, dword ptr arg(3) ;stride + pxor mm0, mm0 + + movd mm1, [rsi] + punpcklbw mm1, mm0 + paddsw mm1, [rdx] + packuswb mm1, mm0 ; pack and unpack to saturate + movd [rdi], mm1 + + movd mm2, [rsi+16] + punpcklbw mm2, mm0 + paddsw mm2, [rdx+32] + packuswb mm2, mm0 ; pack and unpack to saturate + movd [rdi+rax], mm2 + + movd mm3, [rsi+32] + punpcklbw mm3, mm0 + paddsw mm3, [rdx+64] + packuswb mm3, mm0 ; pack and unpack to saturate + movd [rdi+2*rax], mm3 + + add rdi, rax + movd mm4, [rsi+48] + punpcklbw mm4, mm0 + paddsw mm4, [rdx+96] + packuswb mm4, mm0 ; pack and unpack to saturate + movd [rdi+2*rax], mm4 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void copy_mem8x8_mmx( +; unsigned char *src, +; int src_stride, +; unsigned char *dst, +; int dst_stride +; ) +global sym(vp9_copy_mem8x8_mmx) +sym(vp9_copy_mem8x8_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src; + movq mm0, [rsi] + + movsxd rax, dword ptr arg(1) ;src_stride; + mov rdi, arg(2) ;dst; + + movq mm1, [rsi+rax] + movq mm2, [rsi+rax*2] + + movsxd rcx, dword ptr arg(3) ;dst_stride + lea rsi, [rsi+rax*2] + + movq [rdi], mm0 + add rsi, rax + + movq [rdi+rcx], mm1 + movq [rdi+rcx*2], mm2 + + + lea rdi, [rdi+rcx*2] + movq mm3, [rsi] + + add rdi, rcx + movq mm4, [rsi+rax] + + movq mm5, [rsi+rax*2] + movq [rdi], mm3 + + lea rsi, [rsi+rax*2] + movq [rdi+rcx], mm4 + + movq [rdi+rcx*2], mm5 + lea rdi, [rdi+rcx*2] + + movq mm0, [rsi+rax] + movq mm1, [rsi+rax*2] + + movq [rdi+rcx], mm0 + movq [rdi+rcx*2],mm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void copy_mem8x4_mmx( +; unsigned char *src, +; int src_stride, +; unsigned char *dst, +; int dst_stride +; ) +global sym(vp9_copy_mem8x4_mmx) +sym(vp9_copy_mem8x4_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src; + movq mm0, [rsi] + + movsxd rax, dword ptr arg(1) ;src_stride; + mov rdi, arg(2) ;dst; + + movq mm1, [rsi+rax] + movq mm2, [rsi+rax*2] + + movsxd rcx, dword ptr arg(3) ;dst_stride + lea rsi, [rsi+rax*2] + + movq [rdi], mm0 + movq [rdi+rcx], mm1 + + movq [rdi+rcx*2], mm2 + lea rdi, [rdi+rcx*2] + + movq mm3, [rsi+rax] + movq [rdi+rcx], mm3 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void copy_mem16x16_mmx( +; unsigned char *src, +; int src_stride, +; unsigned char *dst, +; int dst_stride +; ) +global sym(vp9_copy_mem16x16_mmx) +sym(vp9_copy_mem16x16_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src; + movsxd rax, dword ptr arg(1) ;src_stride; + + mov rdi, arg(2) ;dst; + movsxd rcx, dword ptr arg(3) ;dst_stride + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq [rdi], mm0 + movq [rdi+8], mm3 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret |