summaryrefslogtreecommitdiff
path: root/vpx_dsp/arm/idct4x4_add_neon.asm
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2016-10-18 12:30:43 -0700
committerJames Zern <jzern@google.com>2016-10-31 11:21:16 -0700
commit3ae25974fd4abfed344216a28d5af92fb62e3cc6 (patch)
tree5b1d996e7705955902b95a7a29e80c2b1e9869ad /vpx_dsp/arm/idct4x4_add_neon.asm
parentae206924a621f42cac1a252f2695fac43c9b166a (diff)
downloadlibvpx-3ae25974fd4abfed344216a28d5af92fb62e3cc6.tar
libvpx-3ae25974fd4abfed344216a28d5af92fb62e3cc6.tar.gz
libvpx-3ae25974fd4abfed344216a28d5af92fb62e3cc6.tar.bz2
libvpx-3ae25974fd4abfed344216a28d5af92fb62e3cc6.zip
idct,NEON: add a tran_low_t->s16 load adapter
enable idct4x4* and idct8x8* which are compatible for 8-bit decodes in high-bitdepth mode. the adapter narrows 32-bit input to 16, whether the expansion can be avoided at all in this case remains a TODO. roughly matches sse2. BUG=webm:1294 Change-Id: I3ea94e5a2070dfd509b5de0c555aab4e1f4da036
Diffstat (limited to 'vpx_dsp/arm/idct4x4_add_neon.asm')
-rw-r--r--vpx_dsp/arm/idct4x4_add_neon.asm4
1 files changed, 3 insertions, 1 deletions
diff --git a/vpx_dsp/arm/idct4x4_add_neon.asm b/vpx_dsp/arm/idct4x4_add_neon.asm
index a4ccba993..c7c60c7ca 100644
--- a/vpx_dsp/arm/idct4x4_add_neon.asm
+++ b/vpx_dsp/arm/idct4x4_add_neon.asm
@@ -15,6 +15,8 @@
AREA ||.text||, CODE, READONLY, ALIGN=2
+ INCLUDE vpx_dsp/arm/idct_neon.asm.s
+
AREA Block, CODE, READONLY ; name this block of code
;void vpx_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
@@ -33,7 +35,7 @@
; So, two passes of a transpose followed by a column transform.
; load the inputs into q8-q9, d16-d19
- vld1.s16 {q8,q9}, [r0]!
+ LOAD_TRAN_LOW_TO_S16 d16, d17, d18, d19, r0
; generate scalar constants
; cospi_8_64 = 15137