summaryrefslogtreecommitdiff
path: root/vpx_dsp/arm
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2017-06-22 18:22:27 -0700
committerJohann <johannkoenig@google.com>2017-06-28 15:37:44 -0700
commitf310ddc4704e8b1cd5ec72472495ee8c3b13a486 (patch)
treee25f8f743a7dae92ebe50102711d403c44fe3847 /vpx_dsp/arm
parent4959dd3eb36308aa9d94ba4feb6305e8b3f9148b (diff)
downloadlibvpx-f310ddc4704e8b1cd5ec72472495ee8c3b13a486.tar
libvpx-f310ddc4704e8b1cd5ec72472495ee8c3b13a486.tar.gz
libvpx-f310ddc4704e8b1cd5ec72472495ee8c3b13a486.tar.bz2
libvpx-f310ddc4704e8b1cd5ec72472495ee8c3b13a486.zip
partial fdct neon: add 16x16_1
For the 8x8_1, the highbd output fit nicely in the existing function. 12 bit input will overflow this implementation of 16x16_1. BUG=webm:1424 Change-Id: I2945fe5478b18f996f1a5de80110fa30f3f4e7ec
Diffstat (limited to 'vpx_dsp/arm')
-rw-r--r--vpx_dsp/arm/fdct_partial_neon.c18
1 files changed, 18 insertions, 0 deletions
diff --git a/vpx_dsp/arm/fdct_partial_neon.c b/vpx_dsp/arm/fdct_partial_neon.c
index 945b96a21..4e1a6dfda 100644
--- a/vpx_dsp/arm/fdct_partial_neon.c
+++ b/vpx_dsp/arm/fdct_partial_neon.c
@@ -59,3 +59,21 @@ void vpx_fdct8x8_1_neon(const int16_t *input, tran_low_t *output, int stride) {
output[0] = sum_int16x8(sum);
output[1] = 0;
}
+
+void vpx_fdct16x16_1_neon(const int16_t *input, tran_low_t *output,
+ int stride) {
+ int r;
+ int16x8_t left = vld1q_s16(input);
+ int16x8_t right = vld1q_s16(input + 8);
+ input += stride;
+ for (r = 1; r < 16; ++r) {
+ const int16x8_t a = vld1q_s16(input);
+ const int16x8_t b = vld1q_s16(input + 8);
+ input += stride;
+ left = vaddq_s16(left, a);
+ right = vaddq_s16(right, b);
+ }
+
+ output[0] = (sum_int16x8(left) + sum_int16x8(right)) >> 1;
+ output[1] = 0;
+}