diff options
author | Johann <johannkoenig@google.com> | 2017-06-22 18:22:27 -0700 |
---|---|---|
committer | Johann <johannkoenig@google.com> | 2017-06-28 15:37:44 -0700 |
commit | f310ddc4704e8b1cd5ec72472495ee8c3b13a486 (patch) | |
tree | e25f8f743a7dae92ebe50102711d403c44fe3847 /vpx_dsp/arm | |
parent | 4959dd3eb36308aa9d94ba4feb6305e8b3f9148b (diff) | |
download | libvpx-f310ddc4704e8b1cd5ec72472495ee8c3b13a486.tar libvpx-f310ddc4704e8b1cd5ec72472495ee8c3b13a486.tar.gz libvpx-f310ddc4704e8b1cd5ec72472495ee8c3b13a486.tar.bz2 libvpx-f310ddc4704e8b1cd5ec72472495ee8c3b13a486.zip |
partial fdct neon: add 16x16_1
For the 8x8_1, the highbd output fit nicely in the existing function. 12
bit input will overflow this implementation of 16x16_1.
BUG=webm:1424
Change-Id: I2945fe5478b18f996f1a5de80110fa30f3f4e7ec
Diffstat (limited to 'vpx_dsp/arm')
-rw-r--r-- | vpx_dsp/arm/fdct_partial_neon.c | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/vpx_dsp/arm/fdct_partial_neon.c b/vpx_dsp/arm/fdct_partial_neon.c index 945b96a21..4e1a6dfda 100644 --- a/vpx_dsp/arm/fdct_partial_neon.c +++ b/vpx_dsp/arm/fdct_partial_neon.c @@ -59,3 +59,21 @@ void vpx_fdct8x8_1_neon(const int16_t *input, tran_low_t *output, int stride) { output[0] = sum_int16x8(sum); output[1] = 0; } + +void vpx_fdct16x16_1_neon(const int16_t *input, tran_low_t *output, + int stride) { + int r; + int16x8_t left = vld1q_s16(input); + int16x8_t right = vld1q_s16(input + 8); + input += stride; + for (r = 1; r < 16; ++r) { + const int16x8_t a = vld1q_s16(input); + const int16x8_t b = vld1q_s16(input + 8); + input += stride; + left = vaddq_s16(left, a); + right = vaddq_s16(right, b); + } + + output[0] = (sum_int16x8(left) + sum_int16x8(right)) >> 1; + output[1] = 0; +} |