summaryrefslogtreecommitdiff
path: root/vp9/common/x86
diff options
context:
space:
mode:
authorhkuang <hkuang@google.com>2015-05-05 17:11:06 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-05-05 17:11:07 +0000
commite47811ef8f045352dfad6c00add79e57e04fc96b (patch)
tree5b6957a78b5d445fe9721dab34ca6c7aa10c912d /vp9/common/x86
parentaf7695344867d2ac955570f18a9aac0502a842a0 (diff)
parent493a8579f151df871d4b5b2aef2646cd7f7a179a (diff)
downloadlibvpx-e47811ef8f045352dfad6c00add79e57e04fc96b.tar
libvpx-e47811ef8f045352dfad6c00add79e57e04fc96b.tar.gz
libvpx-e47811ef8f045352dfad6c00add79e57e04fc96b.tar.bz2
libvpx-e47811ef8f045352dfad6c00add79e57e04fc96b.zip
Merge "Add some sse2 code for intra prediction."
Diffstat (limited to 'vp9/common/x86')
-rw-r--r--vp9/common/x86/vp9_intrapred_sse2.asm183
1 files changed, 183 insertions, 0 deletions
diff --git a/vp9/common/x86/vp9_intrapred_sse2.asm b/vp9/common/x86/vp9_intrapred_sse2.asm
index 69b07f645..f8c8f621a 100644
--- a/vp9/common/x86/vp9_intrapred_sse2.asm
+++ b/vp9/common/x86/vp9_intrapred_sse2.asm
@@ -15,6 +15,10 @@ pw_4: times 8 dw 4
pw_8: times 8 dw 8
pw_16: times 8 dw 16
pw_32: times 8 dw 32
+dc_128: times 16 db 128
+pw2_4: times 8 dw 2
+pw2_8: times 8 dw 4
+pw2_16: times 8 dw 8
SECTION .text
@@ -40,6 +44,46 @@ cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
RET
INIT_MMX sse
+cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ movd m0, [leftq]
+ psadbw m0, m1
+ paddw m0, [GLOBAL(pw2_4)]
+ psraw m0, 2
+ pshufw m0, m0, 0x0
+ packuswb m0, m0
+ movd [dstq ], m0
+ movd [dstq+strideq], m0
+ lea dstq, [dstq+strideq*2]
+ movd [dstq ], m0
+ movd [dstq+strideq], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
+cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ movd m0, [aboveq]
+ psadbw m0, m1
+ paddw m0, [GLOBAL(pw2_4)]
+ psraw m0, 2
+ pshufw m0, m0, 0x0
+ packuswb m0, m0
+ movd [dstq ], m0
+ movd [dstq+strideq], m0
+ lea dstq, [dstq+strideq*2]
+ movd [dstq ], m0
+ movd [dstq+strideq], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
@@ -68,6 +112,91 @@ cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
RESTORE_GOT
RET
+INIT_MMX sse
+cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ movq m0, [aboveq]
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ psadbw m0, m1
+ paddw m0, [GLOBAL(pw2_8)]
+ psraw m0, 3
+ pshufw m0, m0, 0x0
+ packuswb m0, m0
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
+cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ movq m0, [leftq]
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ psadbw m0, m1
+ paddw m0, [GLOBAL(pw2_8)]
+ psraw m0, 3
+ pshufw m0, m0, 0x0
+ packuswb m0, m0
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
+cglobal dc_128_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ movd m0, [GLOBAL(dc_128)]
+ movd [dstq ], m0
+ movd [dstq+strideq ], m0
+ movd [dstq+strideq*2], m0
+ movd [dstq+stride3q ], m0
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
+cglobal dc_128_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ movq m0, [GLOBAL(dc_128)]
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+ RESTORE_GOT
+ RET
+
INIT_XMM sse2
cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
@@ -100,6 +229,60 @@ cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
RESTORE_GOT
REP_RET
+
+INIT_XMM sse2
+cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ pxor m2, m2
+ mova m0, [aboveq]
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 4
+ psadbw m0, m1
+ psadbw m2, m1
+ paddw m0, m2
+ movhlps m2, m0
+ paddw m0, m2
+ paddw m0, [GLOBAL(pw2_16)]
+ psraw m0, 4
+ pshuflw m0, m0, 0x0
+ punpcklqdq m0, m0
+ packuswb m0, m0
+.loop:
+ mova [dstq ], m0
+ mova [dstq+strideq ], m0
+ mova [dstq+strideq*2], m0
+ mova [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ dec lines4d
+ jnz .loop
+
+ RESTORE_GOT
+ REP_RET
+
+
+INIT_MMX sse
+cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 4
+ movq m0, [GLOBAL(dc_128)]
+.loop:
+ mova [dstq ], m0
+ mova [dstq+strideq ], m0
+ mova [dstq+strideq*2], m0
+ mova [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ dec lines4d
+ jnz .loop
+ RESTORE_GOT
+ RET
+
+
INIT_XMM sse2
cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
GET_GOT goffsetq