diff options
author | James Yu <james.yu@linaro.org> | 2013-12-17 22:03:09 +0800 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2014-05-03 04:54:39 -0700 |
commit | feaf766bd04fc2d0a75d322f41785a1ecf076b30 (patch) | |
tree | a67a7b109cdcbe9847bd418579f2a47278d4196c /vp8/common/arm/neon/sad8_neon.asm | |
parent | 4a8336fa9da0e338f22012cb603be314c1414264 (diff) | |
download | libvpx-feaf766bd04fc2d0a75d322f41785a1ecf076b30.tar libvpx-feaf766bd04fc2d0a75d322f41785a1ecf076b30.tar.gz libvpx-feaf766bd04fc2d0a75d322f41785a1ecf076b30.tar.bz2 libvpx-feaf766bd04fc2d0a75d322f41785a1ecf076b30.zip |
VP8 for ARMv8 by using NEON intrinsics 12
Add sad_neon.c
- vp8_sad16x16_neon
- vp8_sad16x8_neon
- vp8_sad8x8_neon
- vp8_sad8x16_neon
- vp8_sad4x4_neon
Change-Id: I08eaae49ec03fb91b394354660a5df0367cea311
Signed-off-by: James Yu <james.yu@linaro.org>
Diffstat (limited to 'vp8/common/arm/neon/sad8_neon.asm')
-rw-r--r-- | vp8/common/arm/neon/sad8_neon.asm | 215 |
1 files changed, 0 insertions, 215 deletions
diff --git a/vp8/common/arm/neon/sad8_neon.asm b/vp8/common/arm/neon/sad8_neon.asm deleted file mode 100644 index 6b849d933..000000000 --- a/vp8/common/arm/neon/sad8_neon.asm +++ /dev/null @@ -1,215 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sad8x8_neon| - EXPORT |vp8_sad8x16_neon| - EXPORT |vp8_sad4x4_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; unsigned int vp8_sad8x8_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) - -|vp8_sad8x8_neon| PROC - vpush {d8-d15} - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabdl.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q12, d6, d14 - - vpaddl.u16 q1, q12 - vpaddl.u32 q0, q1 - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - vpop {d8-d15} - bx lr - - ENDP - -;============================ -;unsigned int vp8_sad8x16_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) - -|vp8_sad8x16_neon| PROC - vpush {d8-d15} - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabdl.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q12, d6, d14 - - vpaddl.u16 q1, q12 - vpaddl.u32 q0, q1 - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - vpop {d8-d15} - bx lr - - ENDP - -;=========================== -;unsigned int vp8_sad4x4_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) - -|vp8_sad4x4_neon| PROC - vpush {d8-d15} - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabdl.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q12, d6, d14 - - vpaddl.u16 d1, d24 - vpaddl.u32 d0, d1 - vmov.32 r0, d0[0] - - vpop {d8-d15} - bx lr - - ENDP - - END |