summaryrefslogtreecommitdiff
path: root/vp8/common/arm/neon/reconb_neon.asm
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2010-05-18 11:58:33 -0400
committerJohn Koleszar <jkoleszar@google.com>2010-05-18 11:58:33 -0400
commit0ea50ce9cb4b65eee6afa1d041fe8beb5abda667 (patch)
tree1f3b9019f28bc56fd3156f96e5a9653a983ee61b /vp8/common/arm/neon/reconb_neon.asm
downloadlibvpx-0ea50ce9cb4b65eee6afa1d041fe8beb5abda667.tar
libvpx-0ea50ce9cb4b65eee6afa1d041fe8beb5abda667.tar.gz
libvpx-0ea50ce9cb4b65eee6afa1d041fe8beb5abda667.tar.bz2
libvpx-0ea50ce9cb4b65eee6afa1d041fe8beb5abda667.zip
Initial WebM release
Diffstat (limited to 'vp8/common/arm/neon/reconb_neon.asm')
-rw-r--r--vp8/common/arm/neon/reconb_neon.asm60
1 files changed, 60 insertions, 0 deletions
diff --git a/vp8/common/arm/neon/reconb_neon.asm b/vp8/common/arm/neon/reconb_neon.asm
new file mode 100644
index 000000000..16d85a0d5
--- /dev/null
+++ b/vp8/common/arm/neon/reconb_neon.asm
@@ -0,0 +1,60 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_recon_b_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0 unsigned char *pred_ptr,
+; r1 short *diff_ptr,
+; r2 unsigned char *dst_ptr,
+; r3 int stride
+
+|vp8_recon_b_neon| PROC
+ mov r12, #16
+
+ vld1.u8 {d28}, [r0], r12 ;load 4 data/line from pred_ptr
+ vld1.16 {q10, q11}, [r1]! ;load data from diff_ptr
+ vld1.u8 {d29}, [r0], r12
+ vld1.16 {q11, q12}, [r1]!
+ vld1.u8 {d30}, [r0], r12
+ vld1.16 {q12, q13}, [r1]!
+ vld1.u8 {d31}, [r0], r12
+ vld1.16 {q13}, [r1]
+
+ vmovl.u8 q0, d28 ;modify Pred data from 8 bits to 16 bits
+ vmovl.u8 q1, d29 ;Pred data in d0, d2, d4, d6
+ vmovl.u8 q2, d30
+ vmovl.u8 q3, d31
+
+ vadd.s16 d0, d0, d20 ;add Diff data and Pred data together
+ vadd.s16 d2, d2, d22
+ vadd.s16 d4, d4, d24
+ vadd.s16 d6, d6, d26
+
+ vqmovun.s16 d0, q0 ;CLAMP() saturation
+ vqmovun.s16 d1, q1
+ vqmovun.s16 d2, q2
+ vqmovun.s16 d3, q3
+ add r1, r2, r3
+
+ vst1.32 {d0[0]}, [r2] ;store result
+ vst1.32 {d1[0]}, [r1], r3
+ add r2, r1, r3
+ vst1.32 {d2[0]}, [r1]
+ vst1.32 {d3[0]}, [r2], r3
+
+ bx lr
+
+ ENDP
+ END