summaryrefslogtreecommitdiff
path: root/vp9/vp9_common.mk
diff options
context:
space:
mode:
authorlevytamar82 <levytamar82@gmail.com>2013-11-21 15:49:29 -0700
committerlevytamar82 <levytamar82@gmail.com>2014-01-09 12:27:51 -0700
commit511d218c60b9b6c1ab9383db746815e907af0359 (patch)
treea7cbf64477adac2433384293d88d08f27c373fec /vp9/vp9_common.mk
parenta622ed554f7072268e4c8d0b8f26d2e8865c2b3b (diff)
downloadlibvpx-511d218c60b9b6c1ab9383db746815e907af0359.tar
libvpx-511d218c60b9b6c1ab9383db746815e907af0359.tar.gz
libvpx-511d218c60b9b6c1ab9383db746815e907af0359.tar.bz2
libvpx-511d218c60b9b6c1ab9383db746815e907af0359.zip
SSSE3 convolution optimization
Optimizing all SSSE3 assembly for convolution: 1. vp9_filter_block1d4_h8_sse2 2. vp9_filter_block1d8_h8_sse2 3. vp9_filter_block1d16_h8_sse2 4. vp9_filter_block1d4_v8_sse2 5. vp9_filter_block1d8_v8_sse2 6. vp9_filter_block1d16_v8_sse2 my optimization include: -processing 2x8 elements in one 128 bit register instead of processing 8 elements in one 128 bit register. -removing unecessary loads. This optimization gives between 2.4% user level gain for 480p input and 1.6% user level gain for 720p. This Optimization done only for 64bit. Change-Id: Icb586dc0c938b56699864fcee6c52fd43b36b969
Diffstat (limited to 'vp9/vp9_common.mk')
-rw-r--r--vp9/vp9_common.mk1
1 files changed, 1 insertions, 0 deletions
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index eefbd1ac9..8a8b63d97 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -77,6 +77,7 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_intrin_ssse3.c
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm