summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Galligan <fgalligan@google.com>2015-01-20 14:38:39 -0800
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2015-01-20 14:38:39 -0800
commit469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39 (patch)
tree6aa61284d9e5a6b5fb459ac50bbf62921188bd08
parent79b88cc2a57a9c27bf12e053204dbc5d0c141ec6 (diff)
parent6e7e1cf32f85f91ddfcb49a807e598e8ead131fe (diff)
downloadlibvpx-469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39.tar
libvpx-469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39.tar.gz
libvpx-469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39.tar.bz2
libvpx-469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39.zip
Merge "Add Neon intrinsics for vp9_avg_8x8_neon"
-rw-r--r--test/vp9_avg_test.cc10
-rw-r--r--vp9/common/vp9_rtcd_defs.pl2
-rw-r--r--vp9/encoder/arm/neon/vp9_avg_neon.c49
-rw-r--r--vp9/vp9cx.mk1
4 files changed, 61 insertions, 1 deletions
diff --git a/test/vp9_avg_test.cc b/test/vp9_avg_test.cc
index fa04528a2..252ed4efa 100644
--- a/test/vp9_avg_test.cc
+++ b/test/vp9_avg_test.cc
@@ -165,4 +165,14 @@ INSTANTIATE_TEST_CASE_P(
#endif
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+ NEON, AverageTest,
+ ::testing::Values(
+ make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
+ make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
+ make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon)));
+
+#endif
+
} // namespace
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 11df21f07..4e9ec0f56 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1098,7 +1098,7 @@ add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
-specialize qw/vp9_avg_8x8 sse2/;
+specialize qw/vp9_avg_8x8 sse2 neon/;
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_avg_4x4 sse2/;
diff --git a/vp9/encoder/arm/neon/vp9_avg_neon.c b/vp9/encoder/arm/neon/vp9_avg_neon.c
new file mode 100644
index 000000000..f505fcb7a
--- /dev/null
+++ b/vp9/encoder/arm/neon/vp9_avg_neon.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+
+#include "vpx/vpx_integer.h"
+
+static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
+ const uint32x4_t a = vpaddlq_u16(v_16x8);
+ const uint64x2_t b = vpaddlq_u32(a);
+ const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
+ vreinterpret_u32_u64(vget_high_u64(b)));
+ return vget_lane_u32(c, 0);
+}
+
+unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
+ uint8x8_t v_s0 = vld1_u8(s);
+ const uint8x8_t v_s1 = vld1_u8(s + p);
+ uint16x8_t v_sum = vaddl_u8(v_s0, v_s1);
+
+ v_s0 = vld1_u8(s + 2 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 3 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 4 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 5 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 6 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 7 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ return (horizontal_add_u16x8(v_sum) + 32) >> 6;
+}
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index c75fd8a01..33a1e6735 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -150,6 +150,7 @@ VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c
+VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_avg_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_sad_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_dct_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_variance_neon.c