5 files changed, 552 insertions, 5 deletions
diff --git a/vp8/common/x86/mask_sse3.asm b/vp8/common/x86/mask_sse3.asm
new file mode 100644
index 000000000..0d90cfa86
--- /dev/null
+++ b/vp8/common/x86/mask_sse3.asm
@@ -0,0 +1,484 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void int vp8_makemask_sse3(
+;    unsigned char *y,
+;    unsigned char *u,
+;    unsigned char *v,
+;    unsigned char *ym,
+;    unsigned char *uvm,
+;    int yp,
+;    int uvp,
+;    int ys,
+;    int us,
+;    int vs,
+;    int yt,
+;    int ut,
+;    int vt)
+global sym(vp8_makemask_sse3)
+sym(vp8_makemask_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 14
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov             rsi,        arg(0) ;y
+        mov             rdi,        arg(1) ;u
+        mov             rcx,        arg(2) ;v
+        mov             rax,        arg(3) ;ym
+        movsxd          rbx,        dword arg(4) ;yp
+        movsxd          rdx,        dword arg(5) ;uvp
+
+        pxor            xmm0,xmm0
+
+        ;make 16 copies of the center y value
+        movd            xmm1, arg(6)
+        pshufb          xmm1, xmm0
+
+        ; make 16 copies of the center u value
+        movd            xmm2, arg(7)
+        pshufb          xmm2, xmm0
+
+        ; make 16 copies of the center v value
+        movd            xmm3, arg(8)
+        pshufb          xmm3, xmm0
+        unpcklpd        xmm2, xmm3
+
+        ;make 16 copies of the y tolerance
+        movd            xmm3, arg(9)
+        pshufb          xmm3, xmm0
+
+        ;make 16 copies of the u tolerance
+        movd            xmm4, arg(10)
+        pshufb          xmm4, xmm0
+
+        ;make 16 copies of the v tolerance
+        movd            xmm5, arg(11)
+        pshufb          xmm5, xmm0
+        unpckhpd        xmm4, xmm5
+
+        mov             r8,8
+
+NextPairOfRows:
+
+        ;grab the y source values
+        movdqu          xmm0, [rsi]
+
+        ;compute abs difference between source and y target
+        movdqa          xmm6, xmm1
+        movdqa          xmm7, xmm0
+        psubusb         xmm0, xmm1
+        psubusb         xmm6, xmm7
+        por             xmm0, xmm6
+
+        ;compute abs difference between
+        movdqa          xmm6, xmm3
+        pcmpgtb         xmm6, xmm0
+
+        ;grab the y source values
+        add             rsi, rbx
+        movdqu          xmm0, [rsi]
+
+        ;compute abs difference between source and y target
+        movdqa          xmm11, xmm1
+        movdqa          xmm7, xmm0
+        psubusb         xmm0, xmm1
+        psubusb         xmm11, xmm7
+        por             xmm0, xmm11
+
+        ;compute abs difference between
+        movdqa          xmm11, xmm3
+        pcmpgtb         xmm11, xmm0
+
+
+        ;grab the u and v source values
+        movdqu          xmm7, [rdi]
+        movdqu          xmm8, [rcx]
+        unpcklpd        xmm7, xmm8
+
+        ;compute abs difference between source and uv targets
+        movdqa          xmm9, xmm2
+        movdqa          xmm10, xmm7
+        psubusb         xmm7, xmm2
+        psubusb         xmm9, xmm10
+        por             xmm7, xmm9
+
+        ;check whether the number is < tolerance
+        movdqa          xmm0, xmm4
+        pcmpgtb         xmm0, xmm7
+
+        ;double  u and v masks
+        movdqa          xmm8, xmm0
+        punpckhbw       xmm0, xmm0
+        punpcklbw       xmm8, xmm8
+
+        ;mask row 0 and output
+        pand            xmm6, xmm8
+        pand            xmm6, xmm0
+        movdqa          [rax],xmm6
+
+        ;mask row 1 and output
+        pand            xmm11, xmm8
+        pand            xmm11, xmm0
+        movdqa          [rax+16],xmm11
+
+
+        ; to the next row or set of rows
+        add             rsi, rbx
+        add             rdi, rdx
+        add             rcx, rdx
+        add             rax,32
+        dec r8
+        jnz NextPairOfRows
+
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;GROW_HORIZ (register for result, source register or mem local)
+; takes source and shifts left and ors with source
+; then shifts right and ors with source
+%macro GROW_HORIZ 2
+    movdqa          %1, %2
+    movdqa          xmm14, %1
+    movdqa          xmm15, %1
+    pslldq          xmm14, 1
+    psrldq          xmm15, 1
+    por             %1,xmm14
+    por             %1,xmm15
+%endmacro
+;GROW_VERT (result, center row, above row, below row)
+%macro GROW_VERT 4
+    movdqa          %1,%2
+    por             %1,%3
+    por             %1,%4
+%endmacro
+
+;GROW_NEXTLINE (new line to grow, new source, line to write)
+%macro GROW_NEXTLINE 3
+    GROW_HORIZ %1, %2
+    GROW_VERT xmm3, xmm0, xmm1, xmm2
+    movdqa %3,xmm3
+%endmacro
+
+
+;void int vp8_growmaskmb_sse3(
+;    unsigned char *om,
+;    unsigned char *nm,
+global sym(vp8_growmaskmb_sse3)
+sym(vp8_growmaskmb_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 2
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    mov             rsi,        arg(0) ;src
+    mov             rdi,        arg(1) ;rst
+
+    GROW_HORIZ xmm0, [rsi]
+    GROW_HORIZ xmm1, [rsi+16]
+    GROW_HORIZ xmm2, [rsi+32]
+
+    GROW_VERT xmm3, xmm0, xmm1, xmm2
+    por xmm0,xmm1
+    movdqa [rdi], xmm0
+    movdqa [rdi+16],xmm3
+
+    GROW_NEXTLINE xmm0,[rsi+48],[rdi+32]
+    GROW_NEXTLINE xmm1,[rsi+64],[rdi+48]
+    GROW_NEXTLINE xmm2,[rsi+80],[rdi+64]
+    GROW_NEXTLINE xmm0,[rsi+96],[rdi+80]
+    GROW_NEXTLINE xmm1,[rsi+112],[rdi+96]
+    GROW_NEXTLINE xmm2,[rsi+128],[rdi+112]
+    GROW_NEXTLINE xmm0,[rsi+144],[rdi+128]
+    GROW_NEXTLINE xmm1,[rsi+160],[rdi+144]
+    GROW_NEXTLINE xmm2,[rsi+176],[rdi+160]
+    GROW_NEXTLINE xmm0,[rsi+192],[rdi+176]
+    GROW_NEXTLINE xmm1,[rsi+208],[rdi+192]
+    GROW_NEXTLINE xmm2,[rsi+224],[rdi+208]
+    GROW_NEXTLINE xmm0,[rsi+240],[rdi+224]
+
+    por xmm0,xmm2
+    movdqa [rdi+240], xmm0
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+
+;unsigned int vp8_sad16x16_masked_wmt(
+;    unsigned char *src_ptr,
+;    int  src_stride,
+;    unsigned char *ref_ptr,
+;    int  ref_stride,
+;    unsigned char *mask)
+global sym(vp8_sad16x16_masked_wmt)
+sym(vp8_sad16x16_masked_wmt):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(2) ;ref_ptr
+
+    mov             rbx,        arg(4) ;mask
+    movsxd          rax,        dword ptr arg(1) ;src_stride
+    movsxd          rdx,        dword ptr arg(3) ;ref_stride
+
+    mov             rcx,        16
+
+    pxor            xmm3,       xmm3
+
+NextSadRow:
+    movdqu          xmm0,       [rsi]
+    movdqu          xmm1,       [rdi]
+    movdqu          xmm2,       [rbx]
+    pand            xmm0,       xmm2
+    pand            xmm1,       xmm2
+
+    psadbw          xmm0,       xmm1
+    paddw           xmm3,       xmm0
+
+    add             rsi, rax
+    add             rdi, rdx
+    add             rbx,  16
+
+    dec rcx
+    jnz NextSadRow
+
+    movdqa          xmm4 ,     xmm3
+    psrldq          xmm4,       8
+    paddw           xmm3,      xmm4
+    movq            rax,       xmm3
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;unsigned int vp8_sad16x16_unmasked_wmt(
+;    unsigned char *src_ptr,
+;    int  src_stride,
+;    unsigned char *ref_ptr,
+;    int  ref_stride,
+;    unsigned char *mask)
+global sym(vp8_sad16x16_unmasked_wmt)
+sym(vp8_sad16x16_unmasked_wmt):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(2) ;ref_ptr
+
+    mov             rbx,        arg(4) ;mask
+    movsxd          rax,        dword ptr arg(1) ;src_stride
+    movsxd          rdx,        dword ptr arg(3) ;ref_stride
+
+    mov             rcx,        16
+
+    pxor            xmm3,       xmm3
+
+next_vp8_sad16x16_unmasked_wmt:
+    movdqu          xmm0,       [rsi]
+    movdqu          xmm1,       [rdi]
+    movdqu          xmm2,       [rbx]
+    por             xmm0,       xmm2
+    por             xmm1,       xmm2
+
+    psadbw          xmm0,       xmm1
+    paddw           xmm3,       xmm0
+
+    add             rsi, rax
+    add             rdi, rdx
+    add             rbx,  16
+
+    dec rcx
+    jnz next_vp8_sad16x16_unmasked_wmt
+
+    movdqa          xmm4 ,     xmm3
+    psrldq          xmm4,       8
+    paddw           xmm3,      xmm4
+    movq            rax,        xmm3
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;unsigned int vp8_masked_predictor_wmt(
+;    unsigned char *masked,
+;    unsigned char *unmasked,
+;    int  src_stride,
+;    unsigned char *dst_ptr,
+;    int  dst_stride,
+;    unsigned char *mask)
+global sym(vp8_masked_predictor_wmt)
+sym(vp8_masked_predictor_wmt):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(1) ;ref_ptr
+
+    mov             rbx,        arg(5) ;mask
+    movsxd          rax,        dword ptr arg(2) ;src_stride
+    mov             r11,        arg(3) ; destination
+    movsxd          rdx,        dword ptr arg(4) ;dst_stride
+
+    mov             rcx,        16
+
+    pxor            xmm3,       xmm3
+
+next_vp8_masked_predictor_wmt:
+    movdqu          xmm0,       [rsi]
+    movdqu          xmm1,       [rdi]
+    movdqu          xmm2,       [rbx]
+
+    pand            xmm0,       xmm2
+    pandn           xmm2,       xmm1
+    por             xmm0,       xmm2
+    movdqu          [r11],      xmm0
+
+    add             r11, rdx
+    add             rsi, rax
+    add             rdi, rdx
+    add             rbx,  16
+
+    dec rcx
+    jnz next_vp8_masked_predictor_wmt
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;unsigned int vp8_masked_predictor_uv_wmt(
+;    unsigned char *masked,
+;    unsigned char *unmasked,
+;    int  src_stride,
+;    unsigned char *dst_ptr,
+;    int  dst_stride,
+;    unsigned char *mask)
+global sym(vp8_masked_predictor_uv_wmt)
+sym(vp8_masked_predictor_uv_wmt):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(1) ;ref_ptr
+
+    mov             rbx,        arg(5) ;mask
+    movsxd          rax,        dword ptr arg(2) ;src_stride
+    mov             r11,        arg(3) ; destination
+    movsxd          rdx,        dword ptr arg(4) ;dst_stride
+
+    mov             rcx,        8
+
+    pxor            xmm3,       xmm3
+
+next_vp8_masked_predictor_uv_wmt:
+    movq            xmm0,       [rsi]
+    movq            xmm1,       [rdi]
+    movq            xmm2,       [rbx]
+
+    pand            xmm0,       xmm2
+    pandn           xmm2,       xmm1
+    por             xmm0,       xmm2
+    movq            [r11],      xmm0
+
+    add             r11, rdx
+    add             rsi, rax
+    add             rdi, rax
+    add             rbx,  8
+
+    dec rcx
+    jnz next_vp8_masked_predictor_uv_wmt
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;unsigned int vp8_uv_from_y_mask(
+;    unsigned char *ymask,
+;    unsigned char *uvmask)
+global sym(vp8_uv_from_y_mask)
+sym(vp8_uv_from_y_mask):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(1) ;dst_ptr
+
+
+    mov             rcx,        8
+
+    pxor            xmm3,       xmm3
+
+next_p8_uv_from_y_mask:
+    movdqu          xmm0,       [rsi]
+    pshufb          xmm0, [shuf1b] ;[GLOBAL(shuf1b)]
+    movq            [rdi],xmm0
+    add             rdi, 8
+    add             rsi,32
+
+    dec rcx
+    jnz next_p8_uv_from_y_mask
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+    db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+
diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c
index fcc75a901..cb7b69c08 100644
--- a/vp8/common/x86/recon_wrapper_sse2.c
+++ b/vp8/common/x86/recon_wrapper_sse2.c
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/recon.h"
 #include "recon_x86.h"
 #include "vpx_mem/vpx_mem.h"
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index 6bca82bfb..39f4f7b88 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -1495,13 +1495,33 @@ k2_k4:
     times 8 db  36,  -11
     times 8 db  12,   -6
 align 16
+%if CONFIG_SIXTEENTH_SUBPEL_UV
 vp8_bilinear_filters_ssse3:
     times 8 db 128, 0
+    times 8 db 120, 8
     times 8 db 112, 16
+    times 8 db 104, 24
     times 8 db 96,  32
+    times 8 db 88,  40
     times 8 db 80,  48
+    times 8 db 72,  56
     times 8 db 64,  64
+    times 8 db 56,  72
     times 8 db 48,  80
+    times 8 db 40,  88
     times 8 db 32,  96
+    times 8 db 24,  104
     times 8 db 16,  112
+    times 8 db 8,   120
+%else
+vp8_bilinear_filters_ssse3:
+    times 8 db 128, 0
+    times 8 db 112, 16
+    times 8 db 96,  32
+    times 8 db 80,  48
+    times 8 db 64,  64
+    times 8 db 48,  80
+    times 8 db 32,  96
+    times 8 db 16,  112
+%endif
 
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index bce7bc38e..458b3f638 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -9,12 +9,19 @@
  */
 
 
-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx_ports/mem.h"
 #include "vp8/common/subpixel.h"
 
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+extern const short vp8_six_tap_mmx[16][6*8];
+extern const short vp8_bilinear_filters_mmx[16][2*8];
+#else
 extern const short vp8_six_tap_mmx[8][6*8];
 extern const short vp8_bilinear_filters_mmx[8][2*8];
+#endif
+
+//#define ANNOUNCE_FUNCTION
 
 extern void vp8_filter_block1d_h6_mmx
 (
@@ -128,6 +135,9 @@ void vp8_sixtap_predict4x4_mmx
     int dst_pitch
 )
 {
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict4x4_mmx\n");
+#endif
     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16);  /* Temp data bufffer used in filtering */
     const short *HFilter, *VFilter;
     HFilter = vp8_six_tap_mmx[xoffset];
@@ -149,6 +159,9 @@ void vp8_sixtap_predict16x16_mmx
 )
 {
 
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict16x16_mmx\n");
+#endif
     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);  /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
@@ -181,6 +194,9 @@ void vp8_sixtap_predict8x8_mmx
 )
 {
 
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x8_mmx\n");
+#endif
     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
@@ -206,7 +222,9 @@ void vp8_sixtap_predict8x4_mmx
     int dst_pitch
 )
 {
-
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x4_mmx\n");
+#endif
     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
@@ -256,6 +274,9 @@ void vp8_sixtap_predict16x16_sse2
     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);    /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict16x16_sse2\n");
+#endif
 
     if (xoffset)
     {
@@ -295,6 +316,9 @@ void vp8_sixtap_predict8x8_sse2
 {
     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
     const short *HFilter, *VFilter;
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x8_sse2\n");
+#endif
 
     if (xoffset)
     {
@@ -333,6 +357,9 @@ void vp8_sixtap_predict8x4_sse2
 {
     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
     const short *HFilter, *VFilter;
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x4_sse2\n");
+#endif
 
     if (xoffset)
     {
@@ -434,6 +461,9 @@ void vp8_sixtap_predict16x16_ssse3
 )
 {
     DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict16x16_ssse3\n");
+#endif
 
     if (xoffset)
     {
@@ -466,6 +496,9 @@ void vp8_sixtap_predict8x8_ssse3
 )
 {
     DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x8_ssse3\n");
+#endif
 
     if (xoffset)
     {
@@ -498,6 +531,9 @@ void vp8_sixtap_predict8x4_ssse3
 )
 {
     DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x4_ssse3\n");
+#endif
 
     if (xoffset)
     {
@@ -530,6 +566,9 @@ void vp8_sixtap_predict4x4_ssse3
 )
 {
   DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict4x4_ssse3\n");
+#endif
 
   if (xoffset)
   {
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 33a984b79..53009502c 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -43,17 +43,17 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
         rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_mmx;
         rtcd->idct.iwalsh1     = vp8_short_inv_walsh4x4_1_mmx;
 
-
-
         rtcd->recon.recon       = vp8_recon_b_mmx;
         rtcd->recon.copy8x8     = vp8_copy_mem8x8_mmx;
         rtcd->recon.copy8x4     = vp8_copy_mem8x4_mmx;
         rtcd->recon.copy16x16   = vp8_copy_mem16x16_mmx;
 
+#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0 && CONFIG_SIXTEENTH_SUBPEL_UV == 0
         rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_mmx;
         rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_mmx;
         rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_mmx;
         rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_mmx;
+#endif
         rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_mmx;
         rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_mmx;
         rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_mmx;
@@ -91,9 +91,11 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
 
         rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_sse2;
 
+#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0 && CONFIG_SIXTEENTH_SUBPEL_UV == 0
         rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_sse2;
         rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_sse2;
         rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_sse2;
+#endif
         rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_sse2;
         rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_sse2;
 
@@ -120,12 +122,14 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
 
     if (flags & HAS_SSSE3)
     {
+#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0
         rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_ssse3;
         rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_ssse3;
         rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_ssse3;
         rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
         rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3;
         rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_ssse3;
+#endif
 
         rtcd->recon.build_intra_predictors_mbuv =
             vp8_build_intra_predictors_mbuv_ssse3;