summaryrefslogtreecommitdiff
path: root/vp9/common/arm
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2013-08-16 11:28:35 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-08-16 11:28:35 -0700
commit65aa89af1a70b82dc6aba8e63fe03a9ae188ae46 (patch)
treefd3d60c9eeb45aa976d9d1363b9b20f7cb046232 /vp9/common/arm
parentbdc785e976c0f75cc8e970cbf434d782dc51b76e (diff)
parentdf0715204cccc9d9652bf43eb6fc164cca6a0fe4 (diff)
downloadlibvpx-65aa89af1a70b82dc6aba8e63fe03a9ae188ae46.tar
libvpx-65aa89af1a70b82dc6aba8e63fe03a9ae188ae46.tar.gz
libvpx-65aa89af1a70b82dc6aba8e63fe03a9ae188ae46.tar.bz2
libvpx-65aa89af1a70b82dc6aba8e63fe03a9ae188ae46.zip
Merge "Reduce instructions of idct4x4."
Diffstat (limited to 'vp9/common/arm')
-rw-r--r--vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm20
1 files changed, 8 insertions, 12 deletions
diff --git a/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
index 433032237..adc17fc97 100644
--- a/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
@@ -81,17 +81,15 @@
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
vmull.s16 q15, d17, d22
- vmull.s16 q0, d19, d20
vmull.s16 q1, d17, d20
- vmull.s16 q2, d19, d22
- vsub.s32 q3, q15, q0
- vadd.s32 q8, q1, q2
+ vmlsl.s16 q15, d19, d20
+ vmlal.s16 q1, d19, d22
; dct_const_round_shift
vqrshrn.s32 d26, q13, #14
vqrshrn.s32 d27, q14, #14
- vqrshrn.s32 d29, q3, #14
- vqrshrn.s32 d28, q8, #14
+ vqrshrn.s32 d29, q15, #14
+ vqrshrn.s32 d28, q1, #14
; stage 2
; output[0] = step[0] + step[3];
@@ -132,17 +130,15 @@
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
vmull.s16 q15, d17, d22
- vmull.s16 q0, d19, d20
vmull.s16 q1, d17, d20
- vmull.s16 q2, d19, d22
- vsub.s32 q3, q15, q0
- vadd.s32 q8, q1, q2
+ vmlsl.s16 q15, d19, d20
+ vmlal.s16 q1, d19, d22
; dct_const_round_shift
vqrshrn.s32 d26, q13, #14
vqrshrn.s32 d27, q14, #14
- vqrshrn.s32 d29, q3, #14
- vqrshrn.s32 d28, q8, #14
+ vqrshrn.s32 d29, q15, #14
+ vqrshrn.s32 d28, q1, #14
; stage 2
; output[0] = step[0] + step[3];