summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
authorRonald S. Bultje <rbultje@google.com>2013-07-10 14:52:23 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-07-10 14:52:23 -0700
commite6f955251f936e90444f83ffcbc87d3a3d4fe24c (patch)
treece5693b685ec8b3d52e3de4e7be12a35a60a9c8d /vp9/common
parent6a60249071f9bed09d9b0033064d1c1511a1ca13 (diff)
parent7fd643264a80dcde9c994237b2b39433d9ce96b3 (diff)
downloadlibvpx-e6f955251f936e90444f83ffcbc87d3a3d4fe24c.tar
libvpx-e6f955251f936e90444f83ffcbc87d3a3d4fe24c.tar.gz
libvpx-e6f955251f936e90444f83ffcbc87d3a3d4fe24c.tar.bz2
libvpx-e6f955251f936e90444f83ffcbc87d3a3d4fe24c.zip
Merge "SSSE3 assembly for 4x4/8x8/16x16/32x32 H intra prediction."
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/vp9_rtcd_defs.sh8
-rw-r--r--vp9/common/x86/vp9_intrapred_ssse3.asm87
2 files changed, 91 insertions, 4 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 36c7afcd0..d861a7a5e 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -64,7 +64,7 @@ prototype void vp9_d63_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui
specialize vp9_d63_predictor_4x4
prototype void vp9_h_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_h_predictor_4x4
+specialize vp9_h_predictor_4x4 ssse3
prototype void vp9_d117_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d117_predictor_4x4
@@ -103,7 +103,7 @@ prototype void vp9_d63_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui
specialize vp9_d63_predictor_8x8
prototype void vp9_h_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_h_predictor_8x8
+specialize vp9_h_predictor_8x8 ssse3
prototype void vp9_d117_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d117_predictor_8x8
@@ -142,7 +142,7 @@ prototype void vp9_d63_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride,
specialize vp9_d63_predictor_16x16
prototype void vp9_h_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_h_predictor_16x16
+specialize vp9_h_predictor_16x16 ssse3
prototype void vp9_d117_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d117_predictor_16x16
@@ -181,7 +181,7 @@ prototype void vp9_d63_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride,
specialize vp9_d63_predictor_32x32
prototype void vp9_h_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_h_predictor_32x32
+specialize vp9_h_predictor_32x32 ssse3
prototype void vp9_d117_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d117_predictor_32x32
diff --git a/vp9/common/x86/vp9_intrapred_ssse3.asm b/vp9/common/x86/vp9_intrapred_ssse3.asm
new file mode 100644
index 000000000..bc8ed5c1f
--- /dev/null
+++ b/vp9/common/x86/vp9_intrapred_ssse3.asm
@@ -0,0 +1,87 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+INIT_MMX ssse3
+cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left
+ movifnidn leftq, leftmp
+ add leftq, 4
+ mov lineq, -2
+ pxor m0, m0
+.loop:
+ movd m1, [leftq+lineq*2 ]
+ movd m2, [leftq+lineq*2+1]
+ pshufb m1, m0
+ pshufb m2, m0
+ movd [dstq ], m1
+ movd [dstq+strideq], m2
+ lea dstq, [dstq+strideq*2]
+ inc lineq
+ jnz .loop
+ REP_RET
+
+INIT_MMX ssse3
+cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left
+ movifnidn leftq, leftmp
+ add leftq, 8
+ mov lineq, -4
+ pxor m0, m0
+.loop:
+ movd m1, [leftq+lineq*2 ]
+ movd m2, [leftq+lineq*2+1]
+ pshufb m1, m0
+ pshufb m2, m0
+ movq [dstq ], m1
+ movq [dstq+strideq], m2
+ lea dstq, [dstq+strideq*2]
+ inc lineq
+ jnz .loop
+ REP_RET
+
+INIT_XMM ssse3
+cglobal h_predictor_16x16, 2, 4, 3, dst, stride, line, left
+ movifnidn leftq, leftmp
+ add leftq, 16
+ mov lineq, -8
+ pxor m0, m0
+.loop:
+ movd m1, [leftq+lineq*2 ]
+ movd m2, [leftq+lineq*2+1]
+ pshufb m1, m0
+ pshufb m2, m0
+ mova [dstq ], m1
+ mova [dstq+strideq], m2
+ lea dstq, [dstq+strideq*2]
+ inc lineq
+ jnz .loop
+ REP_RET
+
+INIT_XMM ssse3
+cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left
+ movifnidn leftq, leftmp
+ add leftq, 32
+ mov lineq, -16
+ pxor m0, m0
+.loop:
+ movd m1, [leftq+lineq*2 ]
+ movd m2, [leftq+lineq*2+1]
+ pshufb m1, m0
+ pshufb m2, m0
+ mova [dstq ], m1
+ mova [dstq +16], m1
+ mova [dstq+strideq ], m2
+ mova [dstq+strideq+16], m2
+ lea dstq, [dstq+strideq*2]
+ inc lineq
+ jnz .loop
+ REP_RET