From 9ba2efd0340c2e0b9a3ac46ac1c94563552a112b Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Mon, 22 Oct 2012 16:16:04 -0700 Subject: Added sse2 instrinsic version of vp8_sad16x3 3.7% boost in decoder performance for the clip used. Change-Id: I74f28486a9352b472b36e21b5eaf30eff35e9199 --- vp8/common/rtcd_defs.sh | 4 +++- vp8/common/x86/sadmxn_x86.c | 50 +++++++++++++++++++++++++++++++++++++++++++++ vp8/vp8_common.mk | 3 ++- 3 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 vp8/common/x86/sadmxn_x86.c (limited to 'vp8') diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh index ea64c9682..90debf5cb 100644 --- a/vp8/common/rtcd_defs.sh +++ b/vp8/common/rtcd_defs.sh @@ -177,11 +177,13 @@ vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon # # sad 16x3, 3x16 # +if [ "$CONFIG_NEWBESTREFMV" = "yes" ]; then prototype unsigned int vp8_sad16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad" -specialize vp8_sad16x3 +specialize vp8_sad16x3 sse2 prototype unsigned int vp8_sad3x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad" specialize vp8_sad3x16 +fi # # Encoder functions below this point. diff --git a/vp8/common/x86/sadmxn_x86.c b/vp8/common/x86/sadmxn_x86.c new file mode 100644 index 000000000..5057703bd --- /dev/null +++ b/vp8/common/x86/sadmxn_x86.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include // SSE2 +#include "./vpx_config.h" +#include "./vpx_rtcd.h" + + +#if CONFIG_NEWBESTREFMV + + +#if HAVE_SSE2 +unsigned int vp8_sad16x3_sse2( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int max_sad) { + __m128i s0, s1, s2; + __m128i r0, r1, r2; + __m128i sad; + + (void)max_sad; + + s0 = _mm_loadu_si128((const __m128i *)(src_ptr + 0 * src_stride)); + s1 = _mm_loadu_si128((const __m128i *)(src_ptr + 1 * src_stride)); + s2 = _mm_loadu_si128((const __m128i *)(src_ptr + 2 * src_stride)); + + r0 = _mm_loadu_si128((const __m128i *)(ref_ptr + 0 * src_stride)); + r1 = _mm_loadu_si128((const __m128i *)(ref_ptr + 1 * src_stride)); + r2 = _mm_loadu_si128((const __m128i *)(ref_ptr + 2 * src_stride)); + + sad = _mm_sad_epu8(s0, r0); + sad = _mm_add_epi16(sad, _mm_sad_epu8(s1, r1)); + sad = _mm_add_epi16(sad, _mm_sad_epu8(s2, r2)); + sad = _mm_add_epi16(sad, _mm_srli_si128(sad, 8)); + + return _mm_cvtsi128_si32(sad); +} +#endif + + +#endif // CONFIG_NEWBESTREFMV diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index fbbdec145..d6f31ed79 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -118,10 +118,11 @@ vp8/common/x86/filter_sse4.c.o: CFLAGS += -msse4 endif VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/filter_sse2.c +VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/sadmxn_x86.c ifeq ($(HAVE_SSE2),yes) vp8/common/x86/filter_sse2.c.o: CFLAGS += -msse2 vp8/common/x86/loopfilter_x86.c.o: CFLAGS += -msse2 -vp8/common/loopfilter_filters.c.o: CFLAGS += -msse2 +vp8/common/x86/sadmxn_x86.c.o: CFLAGS += -msse2 endif VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c -- cgit v1.2.3