diff options
author | Frank Galligan <fgalligan@google.com> | 2015-01-20 14:38:39 -0800 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2015-01-20 14:38:39 -0800 |
commit | 469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39 (patch) | |
tree | 6aa61284d9e5a6b5fb459ac50bbf62921188bd08 | |
parent | 79b88cc2a57a9c27bf12e053204dbc5d0c141ec6 (diff) | |
parent | 6e7e1cf32f85f91ddfcb49a807e598e8ead131fe (diff) | |
download | libvpx-469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39.tar libvpx-469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39.tar.gz libvpx-469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39.tar.bz2 libvpx-469ff48d7bdbd2e39ca4c8ec2a530a6e79f85b39.zip |
Merge "Add Neon intrinsics for vp9_avg_8x8_neon"
-rw-r--r-- | test/vp9_avg_test.cc | 10 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 2 | ||||
-rw-r--r-- | vp9/encoder/arm/neon/vp9_avg_neon.c | 49 | ||||
-rw-r--r-- | vp9/vp9cx.mk | 1 |
4 files changed, 61 insertions, 1 deletions
diff --git a/test/vp9_avg_test.cc b/test/vp9_avg_test.cc index fa04528a2..252ed4efa 100644 --- a/test/vp9_avg_test.cc +++ b/test/vp9_avg_test.cc @@ -165,4 +165,14 @@ INSTANTIATE_TEST_CASE_P( #endif +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P( + NEON, AverageTest, + ::testing::Values( + make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon), + make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon), + make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon))); + +#endif + } // namespace diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 11df21f07..4e9ec0f56 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1098,7 +1098,7 @@ add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; specialize qw/vp9_get_mb_ss/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p"; -specialize qw/vp9_avg_8x8 sse2/; +specialize qw/vp9_avg_8x8 sse2 neon/; add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p"; specialize qw/vp9_avg_4x4 sse2/; diff --git a/vp9/encoder/arm/neon/vp9_avg_neon.c b/vp9/encoder/arm/neon/vp9_avg_neon.c new file mode 100644 index 000000000..f505fcb7a --- /dev/null +++ b/vp9/encoder/arm/neon/vp9_avg_neon.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> +#include "./vp9_rtcd.h" +#include "./vpx_config.h" + +#include "vpx/vpx_integer.h" + +static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) { + const uint32x4_t a = vpaddlq_u16(v_16x8); + const uint64x2_t b = vpaddlq_u32(a); + const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), + vreinterpret_u32_u64(vget_high_u64(b))); + return vget_lane_u32(c, 0); +} + +unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) { + uint8x8_t v_s0 = vld1_u8(s); + const uint8x8_t v_s1 = vld1_u8(s + p); + uint16x8_t v_sum = vaddl_u8(v_s0, v_s1); + + v_s0 = vld1_u8(s + 2 * p); + v_sum = vaddw_u8(v_sum, v_s0); + + v_s0 = vld1_u8(s + 3 * p); + v_sum = vaddw_u8(v_sum, v_s0); + + v_s0 = vld1_u8(s + 4 * p); + v_sum = vaddw_u8(v_sum, v_s0); + + v_s0 = vld1_u8(s + 5 * p); + v_sum = vaddw_u8(v_sum, v_s0); + + v_s0 = vld1_u8(s + 6 * p); + v_sum = vaddw_u8(v_sum, v_s0); + + v_s0 = vld1_u8(s + 7 * p); + v_sum = vaddw_u8(v_sum, v_s0); + + return (horizontal_add_u16x8(v_sum) + 32) >> 6; +} diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index c75fd8a01..33a1e6735 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -150,6 +150,7 @@ VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c +VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_avg_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_sad_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_dct_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_variance_neon.c |