summaryrefslogtreecommitdiff
path: root/vp8/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/common')
-rw-r--r--vp8/common/arm/arm_systemdependent.c1
-rw-r--r--vp8/common/arm/dequantize_arm.c6
-rw-r--r--vp8/common/bigend.h32
-rw-r--r--vp8/common/blockd.h10
-rw-r--r--vp8/common/common.h2
-rw-r--r--vp8/common/common_types.h18
-rw-r--r--vp8/common/dequantize.c3
-rw-r--r--vp8/common/dequantize.h2
-rw-r--r--vp8/common/dma_desc.h125
-rw-r--r--vp8/common/duck_io.h116
-rw-r--r--vp8/common/findnearmv.h8
-rw-r--r--vp8/common/g_common.h21
-rw-r--r--vp8/common/generic/systemdependent.c1
-rw-r--r--vp8/common/invtrans.h16
-rw-r--r--vp8/common/littlend.h33
-rw-r--r--vp8/common/mbpitch.c5
-rw-r--r--vp8/common/onyx.h38
-rw-r--r--vp8/common/onyxc_int.h6
-rw-r--r--vp8/common/onyxd.h20
-rw-r--r--vp8/common/ppc/systemdependent.c1
-rw-r--r--vp8/common/type_aliases.h117
-rw-r--r--vp8/common/x86/idct_blk_mmx.c6
-rw-r--r--vp8/common/x86/loopfilter_sse2.asm199
-rw-r--r--vp8/common/x86/recon_sse2.asm482
-rw-r--r--vp8/common/x86/recon_wrapper_sse2.c66
-rw-r--r--vp8/common/x86/recon_x86.h16
-rw-r--r--vp8/common/x86/x86_systemdependent.c9
27 files changed, 720 insertions, 639 deletions
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
index 1e2467411..89a2be825 100644
--- a/vp8/common/arm/arm_systemdependent.c
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -11,7 +11,6 @@
#include "vpx_config.h"
#include "vpx_ports/arm.h"
-#include "vp8/common/g_common.h"
#include "vp8/common/pragmas.h"
#include "vp8/common/subpixel.h"
#include "vp8/common/loopfilter.h"
diff --git a/vp8/common/arm/dequantize_arm.c b/vp8/common/arm/dequantize_arm.c
index 20a8ac4fc..7cf4bf943 100644
--- a/vp8/common/arm/dequantize_arm.c
+++ b/vp8/common/arm/dequantize_arm.c
@@ -23,22 +23,20 @@ extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
#if HAVE_ARMV7
-void vp8_dequantize_b_neon(BLOCKD *d)
+void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
{
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = d->dequant;
vp8_dequantize_b_loop_neon(Q, DQC, DQ);
}
#endif
#if HAVE_ARMV6
-void vp8_dequantize_b_v6(BLOCKD *d)
+void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
{
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = d->dequant;
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
}
diff --git a/vp8/common/bigend.h b/vp8/common/bigend.h
deleted file mode 100644
index 6ac3f8b5a..000000000
--- a/vp8/common/bigend.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef _bigend_h
-#define _bigend_h
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#define invert2(x) ( (((x)>>8)&0x00ff) | (((x)<<8)&0xff00) )
-#define invert4(x) ( ((invert2(x)&0x0000ffff)<<16) | (invert2((x>>16))&0x0000ffff) )
-
-#define high_byte(x) (unsigned char)x
-#define mid2Byte(x) (unsigned char)(x >> 8)
-#define mid1Byte(x) (unsigned char)(x >> 16)
-#define low_byte(x) (unsigned char)(x >> 24)
-
-#define SWAPENDS 1
-
-#if defined(__cplusplus)
-}
-#endif
-#endif
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 91e90e2a6..b237206e6 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -21,9 +21,6 @@ void vpx_log(const char *format, ...);
#include "subpixel.h"
#include "vpx_ports/mem.h"
-#define TRUE 1
-#define FALSE 0
-
/*#define DCPRED 1*/
#define DCPREDSIMTHRESH 0
#define DCPREDCNTTHRESH 3
@@ -187,7 +184,6 @@ typedef struct
short *qcoeff;
short *dqcoeff;
unsigned char *predictor;
- short *diff;
short *dequant;
/* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
@@ -206,12 +202,16 @@ typedef struct
typedef struct MacroBlockD
{
- DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
DECLARE_ALIGNED(16, short, qcoeff[400]);
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, char, eobs[25]);
+ DECLARE_ALIGNED(16, short, dequant_y1[16]);
+ DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
+ DECLARE_ALIGNED(16, short, dequant_y2[16]);
+ DECLARE_ALIGNED(16, short, dequant_uv[16]);
+
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
BLOCKD block[25];
int fullpixel_mask;
diff --git a/vp8/common/common.h b/vp8/common/common.h
index 9a93da991..2cc1c544c 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@@ -18,8 +18,6 @@
#include "vpx_mem/vpx_mem.h"
-#include "common_types.h"
-
/* Only need this for fixed-size arrays, for structs just assign. */
#define vp8_copy( Dest, Src) { \
diff --git a/vp8/common/common_types.h b/vp8/common/common_types.h
deleted file mode 100644
index 4e6248697..000000000
--- a/vp8/common/common_types.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __INC_COMMON_TYPES
-#define __INC_COMMON_TYPES
-
-#define TRUE 1
-#define FALSE 0
-
-#endif
diff --git a/vp8/common/dequantize.c b/vp8/common/dequantize.c
index 4a48a3192..96245162f 100644
--- a/vp8/common/dequantize.c
+++ b/vp8/common/dequantize.c
@@ -14,12 +14,11 @@
#include "vp8/common/idct.h"
#include "vpx_mem/vpx_mem.h"
-void vp8_dequantize_b_c(BLOCKD *d)
+void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
{
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = d->dequant;
for (i = 0; i < 16; i++)
{
diff --git a/vp8/common/dequantize.h b/vp8/common/dequantize.h
index f66cf2bac..429359190 100644
--- a/vp8/common/dequantize.h
+++ b/vp8/common/dequantize.h
@@ -14,7 +14,7 @@
#include "vp8/common/blockd.h"
#define prototype_dequant_block(sym) \
- void sym(BLOCKD *x)
+ void sym(BLOCKD *x, short *DQC)
#define prototype_dequant_idct_add(sym) \
void sym(short *input, short *dq, \
diff --git a/vp8/common/dma_desc.h b/vp8/common/dma_desc.h
deleted file mode 100644
index b923da6e0..000000000
--- a/vp8/common/dma_desc.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef _dma_desc_h
-#define _dma_desc_h
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-
-#define NDSIZE_LG 0x00000900 // Next Descriptor Size
-#define NDSIZE_SM 0x00000800 // Next Descriptor Size
-#define NDSIZE_7 0x00000700 // Next Descriptor Size
-#define NDSIZE_6 0x00000600 // Next Descriptor Size
-#define NDSIZE_5 0x00000500 // Next Descriptor Size
-#define NDSIZE_4 0x00000400 // Next Descriptor Size
-#define NDSIZE_3 0x00000300 // Next Descriptor Size
-#define NDSIZE_2 0x00000200 // Next Descriptor Size
-#define NDSIZE_1 0x00000100 // Next Descriptor Size
-
-#define FLOW_STOP 0x0000
-#define FLOW_AUTO 0x1000
-#define FLOW_DESC_AR 0x4000
-#define FLOW_DESC_SM 0x6000
-#define FLOW_DESC_LG 0x7000
-
- typedef struct
- {
- unsigned int ndp;
- //unsigned short ndpl;
- //unsigned short ndph;
- unsigned int sa;
- //unsigned short sal;
- //unsigned short sah;
-
- unsigned short dmacfg;
- unsigned short xcnt;
- unsigned short xmod;
- unsigned short ycnt;
- unsigned short ymod;
-
- } LARGE_DESC;
-
- typedef struct
- {
- unsigned short ndpl;
- unsigned short sal;
- unsigned short sah;
- unsigned short dmacfg;
- unsigned short xcnt;
- unsigned short xmod;
- unsigned short ycnt;
- unsigned short ymod;
- } SMALL_DESC;
-
- typedef struct
- {
- unsigned short sal;
- unsigned short sah;
- unsigned short dmacfg;
- unsigned short xcnt;
- unsigned short xmod;
- unsigned short ycnt;
- unsigned short ymod;
- } ARRAY_DESC_7;
-
- typedef struct
- {
- unsigned short sal;
- unsigned short sah;
- unsigned short dmacfg;
- unsigned short xcnt;
- unsigned short xmod;
- unsigned short ycnt;
- } ARRAY_DESC_6;
-
- typedef struct
- {
- unsigned short sal;
- unsigned short sah;
- unsigned short dmacfg;
- unsigned short xcnt;
- unsigned short xmod;
- } ARRAY_DESC_5;
-
- typedef struct
- {
- unsigned short sal;
- unsigned short sah;
- unsigned short dmacfg;
- unsigned short xcnt;
- } ARRAY_DESC_4;
-
- typedef struct
- {
- unsigned short sal;
- unsigned short sah;
- unsigned short dmacfg;
- } ARRAY_DESC_3;
-
- typedef struct
- {
- unsigned short sal;
- unsigned short sah;
- } ARRAY_DESC_2;
-
- typedef struct
- {
- unsigned short sal;
- } ARRAY_DESC_1;
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif //_dma_desc_h
diff --git a/vp8/common/duck_io.h b/vp8/common/duck_io.h
deleted file mode 100644
index 43daa65bc..000000000
--- a/vp8/common/duck_io.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef _duck_io_h
-#define _duck_io_h
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#if defined (_WIN32)
- typedef __int64 int64_t;
-#elif defined(__MWERKS__)
- typedef long long int64_t;
-#elif defined(__APPLE__) || defined(__POWERPC)
-#include <ppc/types.h>
-#else
- typedef long long int64_t;
-#endif
-
- typedef struct
- {
- int64_t offset; // offset to start from
- int blocking; // non-zero for blocking
- } re_open_t;
-
-
- typedef enum
- {
- SAL_ERR_MAX = -10,
- SAL_ERROR = -11, // Default error
- SAL_ERR_WSASTARTUP = -12,
- SAL_ERR_SOCKET_CREATE = -13,
- SAL_ERR_RESOLVING_HOSTNAME = -14,
- SAL_ERR_SERVER_CONNECTION = -15,
- SAL_ERR_SENDING_DATA = -16,
- SAL_ERR_RECEIVING_DATA = -17,
- SAL_ERR_404_FILE_NOT_FOUND = -18,
- SAL_ERR_PARSING_HTTP_HEADER = -19,
- SAL_ERR_PARSING_CONTENT_LEN = -20,
- SAL_ERR_CONNECTION_TIMEOUT = -21,
- SAL_ERR_FILE_OPEN_FAILED = -22,
- SAL_ERR_MIN = -23
- } SAL_ERR; /* EMH 1-15-03 */
-
-
- typedef struct sal_err_map_temp
- {
- SAL_ERR code;
- const char *decode;
-
- } sal_err_map_t;
-
-
- static char *sal_err_text(SAL_ERR e)
- {
- int t;
- const sal_err_map_t g_sal_err_map[] =
- {
- { SAL_ERR_WSASTARTUP, "Error with WSAStartup" },
- { SAL_ERR_SOCKET_CREATE, "Error creating socket" },
- { SAL_ERR_RESOLVING_HOSTNAME, "Error resolving hostname" },
- { SAL_ERR_SERVER_CONNECTION, "Error connecting to server" },
- { SAL_ERR_SENDING_DATA, "Error sending data" },
- { SAL_ERR_RECEIVING_DATA, "Error receiving data" },
- { SAL_ERR_404_FILE_NOT_FOUND, "Error file not found " },
- { SAL_ERR_PARSING_HTTP_HEADER, "Error parsing http header" },
- { SAL_ERR_PARSING_CONTENT_LEN, "Error parsing content length" },
- { SAL_ERR_CONNECTION_TIMEOUT, "Error Connection timed out" },
- { SAL_ERR_FILE_OPEN_FAILED, "Error opening file" }
- };
-
- for (t = 0; t < sizeof(g_sal_err_map) / sizeof(sal_err_map_t); t++)
- {
- if (e == g_sal_err_map[t].code)
- return (char *) g_sal_err_map[t].decode;
- }
-
- return 0;
- }
-
-
-
-
-
-
-
- int duck_open(const char *fname, unsigned long user_data);
-
- void duck_close(int ghndl);
-
- int duck_read(int ghndl, unsigned char *buf, int nbytes);
-
- int64_t duck_seek(int g_hndl, int64_t offs, int origin);
-
- int duck_read_finished(int han, int flag); /* FWG 7-9-99 */
-
- int duck_name(int handle, char name[], size_t max_len); /* EMH 9-23-03 */
-
- int duck_read_blocking(int handle, unsigned char *buffer, int bytes); /* EMH 9-23-03 */
-
- int64_t duck_available_data(int handle); /* EMH 10-23-03 */
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 01909b937..a3443d765 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -60,10 +60,10 @@ static unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
int mb_to_bottom_edge)
{
unsigned int need_to_clamp;
- need_to_clamp = (mv->as_mv.col < mb_to_left_edge) ? 1 : 0;
- need_to_clamp |= (mv->as_mv.col > mb_to_right_edge) ? 1 : 0;
- need_to_clamp |= (mv->as_mv.row < mb_to_top_edge) ? 1 : 0;
- need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge) ? 1 : 0;
+ need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
+ need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
+ need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
+ need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
return need_to_clamp;
}
diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h
deleted file mode 100644
index 5f523980b..000000000
--- a/vp8/common/g_common.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-extern void (*vp8_clear_system_state)(void);
-extern void (*vp8_plane_add_noise)(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int DPitch, int q);
-extern void (*de_interlace)
-(
- unsigned char *src_ptr,
- unsigned char *dst_ptr,
- int Width,
- int Height,
- int Stride
-);
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index dbf8d6504..01d76206d 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -10,7 +10,6 @@
#include "vpx_config.h"
-#include "vp8/common/g_common.h"
#include "vp8/common/subpixel.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/recon.h"
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 7eec58e26..f49e2e577 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -17,6 +17,10 @@
#include "blockd.h"
#include "onyxc_int.h"
+#if CONFIG_MULTITHREAD
+#include "vpx_mem/vpx_mem.h"
+#endif
+
static void eob_adjust(char *eobs, short *diff)
{
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
@@ -32,9 +36,7 @@ static void eob_adjust(char *eobs, short *diff)
static void vp8_inverse_transform_mby(MACROBLOCKD *xd,
const VP8_COMMON_RTCD *rtcd)
{
- short *DQC = xd->block[0].dequant;
- /* save the dc dequant constant in case it is overridden */
- short dc_dequant_temp = DQC[0];
+ short *DQC = xd->dequant_y1;
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
@@ -51,15 +53,11 @@ static void vp8_inverse_transform_mby(MACROBLOCKD *xd,
}
eob_adjust(xd->eobs, xd->qcoeff);
- /* override the dc dequant constant */
- DQC[0] = 1;
+ DQC = xd->dequant_y1_dc;
}
DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block)
- (xd->qcoeff, xd->block[0].dequant,
+ (xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
-
- /* restore the dc dequant constant */
- DQC[0] = dc_dequant_temp;
}
#endif
diff --git a/vp8/common/littlend.h b/vp8/common/littlend.h
deleted file mode 100644
index 99df1164c..000000000
--- a/vp8/common/littlend.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef _littlend_h
-#define _littlend_h
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#define invert2(x) (x)
-#define invert4(x) (x)
-
-#define low_byte(x) (unsigned char)x
-#define mid1Byte(x) (unsigned char)(x >> 8)
-#define mid2Byte(x) (unsigned char)(x >> 16)
-#define high_byte(x) (unsigned char)(x >> 24)
-
-#define SWAPENDS 0
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index 11fa3ffa7..f8971d754 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -87,7 +87,6 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
{
for (c = 0; c < 4; c++)
{
- x->block[r*4+c].diff = &x->diff[r * 4 * 16 + c * 4];
x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
}
}
@@ -96,7 +95,6 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
{
for (c = 0; c < 2; c++)
{
- x->block[16+r*2+c].diff = &x->diff[256 + r * 4 * 8 + c * 4];
x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
}
@@ -106,14 +104,11 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
{
for (c = 0; c < 2; c++)
{
- x->block[20+r*2+c].diff = &x->diff[320+ r * 4 * 8 + c * 4];
x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
}
}
- x->block[24].diff = &x->diff[384];
-
for (r = 0; r < 25; r++)
{
x->block[r].qcoeff = x->qcoeff + r * 16;
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 37fa5a0cd..d17a32b82 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -22,9 +22,9 @@ extern "C"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
#include "vpx_scale/yv12config.h"
-#include "type_aliases.h"
#include "ppflags.h"
- typedef int *VP8_PTR;
+
+ struct VP8_COMP;
/* Create/destroy static data structures. */
@@ -226,27 +226,27 @@ extern "C"
void vp8_initialize();
- VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf);
- void vp8_remove_compressor(VP8_PTR *comp);
+ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf);
+ void vp8_remove_compressor(struct VP8_COMP* *comp);
- void vp8_init_config(VP8_PTR onyx, VP8_CONFIG *oxcf);
- void vp8_change_config(VP8_PTR onyx, VP8_CONFIG *oxcf);
+ void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
+ void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
// receive a frames worth of data caller can assume that a copy of this frame is made
// and not just a copy of the pointer..
- int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
- int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
- int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
-
- int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);
- int vp8_update_reference(VP8_PTR comp, int ref_frame_flags);
- int vp8_get_reference(VP8_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
- int vp8_set_reference(VP8_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
- int vp8_update_entropy(VP8_PTR comp, int update);
- int vp8_set_roimap(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
- int vp8_set_active_map(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols);
- int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
- int vp8_get_quantizer(VP8_PTR c);
+ int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
+ int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
+ int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
+
+ int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags);
+ int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags);
+ int vp8_get_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+ int vp8_set_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+ int vp8_update_entropy(struct VP8_COMP* comp, int update);
+ int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
+ int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols);
+ int vp8_set_internal_size(struct VP8_COMP* comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
+ int vp8_get_quantizer(struct VP8_COMP* c);
#ifdef __cplusplus
}
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index f733ff774..77a0ca2cc 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -93,9 +93,9 @@ typedef struct VP8Common
{
struct vpx_internal_error_info error;
- DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
+ DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
+ DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
int Width;
int Height;
diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
index 43fa00bd3..35a8b6e55 100644
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h
@@ -18,13 +18,13 @@
extern "C"
{
#endif
-#include "type_aliases.h"
#include "vpx_scale/yv12config.h"
#include "ppflags.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_codec.h"
- typedef void *VP8D_PTR;
+ struct VP8D_COMP;
+
typedef struct
{
int Width;
@@ -49,19 +49,19 @@ extern "C"
void vp8dx_initialize(void);
- void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x);
+ void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x);
- int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
+ int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
- int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, int64_t time_stamp);
- int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
+ int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, unsigned long size, const unsigned char *dest, int64_t time_stamp);
+ int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
- vpx_codec_err_t vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
- vpx_codec_err_t vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+ vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+ vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
- VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf);
+ struct VP8D_COMP* vp8dx_create_decompressor(VP8D_CONFIG *oxcf);
- void vp8dx_remove_decompressor(VP8D_PTR comp);
+ void vp8dx_remove_decompressor(struct VP8D_COMP* comp);
#ifdef __cplusplus
}
diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 1f5d79068..7046a63e8 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -9,7 +9,6 @@
*/
-#include "g_common.h"
#include "subpixel.h"
#include "loopfilter.h"
#include "recon.h"
diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h
deleted file mode 100644
index 22b531a76..000000000
--- a/vp8/common/type_aliases.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-* Module Title : type_aliases.h
-*
-* Description : Standard type aliases
-*
-****************************************************************************/
-#ifndef __INC_TYPE_ALIASES_H
-#define __INC_TYPE_ALIASES_H
-
-/****************************************************************************
-* Macros
-****************************************************************************/
-#define EXPORT
-#define IMPORT extern /* Used to declare imported data & routines */
-#define PRIVATE static /* Used to declare & define module-local data */
-#define LOCAL static /* Used to define all persistent routine-local data */
-#define STD_IN_PATH 0 /* Standard input path */
-#define STD_OUT_PATH 1 /* Standard output path */
-#define STD_ERR_PATH 2 /* Standard error path */
-#define STD_IN_FILE stdin /* Standard input file pointer */
-#define STD_OUT_FILE stdout /* Standard output file pointer */
-#define STD_ERR_FILE stderr /* Standard error file pointer */
-#define max_int 0x7FFFFFFF
-
-#define __export
-#define _export
-
-#define CCONV
-
-#ifndef NULL
-#ifdef __cplusplus
-#define NULL 0
-#else
-#define NULL ((void *)0)
-#endif
-#endif
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-/****************************************************************************
-* Typedefs
-****************************************************************************/
-#ifndef TYPE_INT8
-#define TYPE_INT8
-typedef signed char INT8;
-#endif
-
-#ifndef TYPE_INT16
-/*#define TYPE_INT16*/
-typedef signed short INT16;
-#endif
-
-#ifndef TYPE_INT32
-/*#define TYPE_INT32*/
-typedef signed int INT32;
-#endif
-
-#ifndef TYPE_UINT8
-/*#define TYPE_UINT8*/
-typedef unsigned char UINT8;
-#endif
-
-#ifndef TYPE_UINT32
-/*#define TYPE_UINT32*/
-typedef unsigned int UINT32;
-#endif
-
-#ifndef TYPE_UINT16
-/*#define TYPE_UINT16*/
-typedef unsigned short UINT16;
-#endif
-
-#ifndef TYPE_BOOL
-/*#define TYPE_BOOL*/
-typedef int BOOL;
-#endif
-
-typedef unsigned char BOOLEAN;
-
-#ifdef _MSC_VER
-typedef __int64 INT64;
-#else
-
-#ifndef TYPE_INT64
-#ifdef _TMS320C6X
-/* for now we only have 40bits */
-typedef long INT64;
-#else
-typedef long long INT64;
-#endif
-#endif
-
-#endif
-
-/* Floating point */
-typedef double FLOAT64;
-typedef float FLOAT32;
-
-#endif
diff --git a/vp8/common/x86/idct_blk_mmx.c b/vp8/common/x86/idct_blk_mmx.c
index 49cebd6f5..8ff483708 100644
--- a/vp8/common/x86/idct_blk_mmx.c
+++ b/vp8/common/x86/idct_blk_mmx.c
@@ -14,12 +14,12 @@
extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
-void vp8_dequantize_b_mmx(BLOCKD *d)
+void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC)
{
short *sq = (short *) d->qcoeff;
short *dq = (short *) d->dqcoeff;
- short *q = (short *) d->dequant;
- vp8_dequantize_b_impl_mmx(sq, dq, q);
+
+ vp8_dequantize_b_impl_mmx(sq, dq, DQC);
}
void vp8_dequant_idct_add_y_block_mmx
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 86927d9f1..2ad010adb 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -1385,52 +1385,54 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx
- push rsi
- push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
+ mov rcx, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
- mov rdx, arg(2) ;blimit
- movdqa xmm3, XMMWORD PTR [rdx]
- mov rdi, rsi ; rdi points to row +1 for indirect addressing
- add rdi, rax
+ lea rdx, [rcx + rax]
neg rax
; calculate mask
- movdqa xmm1, [rsi+2*rax] ; p1
- movdqa xmm0, [rdi] ; q1
+ movdqa xmm0, [rdx] ; q1
+ mov rdx, arg(2) ;blimit
+ movdqa xmm1, [rcx+2*rax] ; p1
+
movdqa xmm2, xmm1
movdqa xmm7, xmm0
- movdqa xmm4, xmm0
+
psubusb xmm0, xmm1 ; q1-=p1
- psubusb xmm1, xmm4 ; p1-=q1
+ psubusb xmm1, xmm7 ; p1-=q1
por xmm1, xmm0 ; abs(p1-q1)
pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw xmm1, 1 ; abs(p1-q1)/2
- movdqa xmm5, [rsi+rax] ; p0
- movdqa xmm4, [rsi] ; q0
+ movdqa xmm3, XMMWORD PTR [rdx]
+
+ movdqa xmm5, [rcx+rax] ; p0
+ movdqa xmm4, [rcx] ; q0
movdqa xmm0, xmm4 ; q0
movdqa xmm6, xmm5 ; p0
psubusb xmm5, xmm4 ; p0-=q0
psubusb xmm4, xmm6 ; q0-=p0
por xmm5, xmm4 ; abs(p0 - q0)
+
+ movdqa xmm4, [GLOBAL(t80)]
+
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm3, xmm3
pcmpeqb xmm5, xmm3
+
; start work on filters
- pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
- pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
+ pxor xmm2, xmm4 ; p1 offset to convert to signed values
+ pxor xmm7, xmm4 ; q1 offset to convert to signed values
psubsb xmm2, xmm7 ; p1 - q1
- pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor xmm6, xmm4 ; offset to convert to signed values
+ pxor xmm0, xmm4 ; offset to convert to signed values
movdqa xmm3, xmm0 ; q0
psubsb xmm0, xmm6 ; q0 - p0
paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0)
@@ -1438,42 +1440,36 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0)
pand xmm5, xmm2 ; mask filter values we don't care about
- ; do + 4 side
- paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
-
- movdqa xmm0, xmm5 ; get a copy of filters
- psllw xmm0, 8 ; shift left 8
- psraw xmm0, 3 ; arithmetic shift right 11
- psrlw xmm0, 8
- movdqa xmm1, xmm5 ; get a copy of filters
- psraw xmm1, 11 ; arithmetic shift right 11
- psllw xmm1, 8 ; shift left 8 to put it back
-
- por xmm0, xmm1 ; put the two together to get result
+ paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
+ movdqa xmm0, xmm5
+ psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
- psubsb xmm3, xmm0 ; q0-= q0 add
- pxor xmm3, [GLOBAL(t80)] ; unoffset
- movdqa [rsi], xmm3 ; write back
+ movdqa xmm1, [GLOBAL(te0)]
+ movdqa xmm2, [GLOBAL(t1f)]
- ; now do +3 side
- psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
+ pxor xmm7, xmm7
+ pcmpgtb xmm7, xmm0 ;save sign
+ pand xmm7, xmm1 ;preserve the upper 3 bits
+ psrlw xmm0, 3
+ pand xmm0, xmm2 ;clear out upper 3 bits
+ por xmm0, xmm7 ;add sign
+ psubsb xmm3, xmm0 ; q0-= q0sz add
- movdqa xmm0, xmm5 ; get a copy of filters
- psllw xmm0, 8 ; shift left 8
- psraw xmm0, 3 ; arithmetic shift right 11
- psrlw xmm0, 8
- psraw xmm5, 11 ; arithmetic shift right 11
- psllw xmm5, 8 ; shift left 8 to put it back
- por xmm0, xmm5 ; put the two together to get result
+ pxor xmm7, xmm7
+ pcmpgtb xmm7, xmm5 ;save sign
+ pand xmm7, xmm1 ;preserve the upper 3 bits
+ psrlw xmm5, 3
+ pand xmm5, xmm2 ;clear out upper 3 bits
+ por xmm5, xmm7 ;add sign
+ paddsb xmm6, xmm5 ; p0+= p0 add
+ pxor xmm3, xmm4 ; unoffset
+ movdqa [rcx], xmm3 ; write back
- paddsb xmm6, xmm0 ; p0+= p0 add
- pxor xmm6, [GLOBAL(t80)] ; unoffset
- movdqa [rsi+rax], xmm6 ; write back
+ pxor xmm6, xmm4 ; unoffset
+ movdqa [rcx+rax], xmm6 ; write back
; begin epilog
- pop rdi
- pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
@@ -1536,9 +1532,6 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
- movdqa t0, xmm0 ; save to t0
- movdqa t1, xmm2 ; save to t1
-
lea rsi, [rsi + rax*8]
lea rdi, [rsi + rax]
lea rdx, [rsi + rax*4]
@@ -1551,26 +1544,24 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80
punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90
- movd xmm0, [rsi + rax*2] ; a3 a2 a1 a0
+ movd xmm1, [rsi + rax*2] ; a3 a2 a1 a0
movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0
- movd xmm2, [rdi + rax*2] ; b3 b2 b1 b0
+ movd xmm3, [rdi + rax*2] ; b3 b2 b1 b0
movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0
- punpckldq xmm0, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0
- punpckldq xmm2, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0
+ punpckldq xmm1, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0
+ punpckldq xmm3, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0
punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80
- punpcklbw xmm0, xmm2 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0
+ punpcklbw xmm1, xmm3 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0
- movdqa xmm1, xmm4
- punpcklwd xmm4, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
- punpckhwd xmm1, xmm0 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
+ movdqa xmm7, xmm4
+ punpcklwd xmm4, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
+ punpckhwd xmm7, xmm1 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
movdqa xmm6, xmm4
- punpckldq xmm4, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
- punpckhdq xmm6, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+ punpckldq xmm4, xmm7 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+ punpckhdq xmm6, xmm7 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
- movdqa xmm0, t0 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
- movdqa xmm2, t1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
movdqa xmm1, xmm0
movdqa xmm3, xmm2
@@ -1579,6 +1570,8 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+ mov rdx, arg(2) ;blimit
+
; calculate mask
movdqa xmm6, xmm0 ; p1
movdqa xmm7, xmm3 ; q1
@@ -1588,6 +1581,8 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw xmm6, 1 ; abs(p1-q1)/2
+ movdqa xmm7, [rdx]
+
movdqa xmm5, xmm1 ; p0
movdqa xmm4, xmm2 ; q0
psubusb xmm5, xmm2 ; p0-=q0
@@ -1596,8 +1591,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;blimit
- movdqa xmm7, XMMWORD PTR [rdx]
+ movdqa xmm4, [GLOBAL(t80)]
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm7, xmm7
@@ -1607,59 +1601,48 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
movdqa t0, xmm0
movdqa t1, xmm3
- pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values
- pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values
-
+ pxor xmm0, xmm4 ; p1 offset to convert to signed values
+ pxor xmm3, xmm4 ; q1 offset to convert to signed values
psubsb xmm0, xmm3 ; p1 - q1
- movdqa xmm6, xmm1 ; p0
-
- movdqa xmm7, xmm2 ; q0
- pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values
- movdqa xmm3, xmm7 ; offseted ; q0
-
- psubsb xmm7, xmm6 ; q0 - p0
- paddsb xmm0, xmm7 ; p1 - q1 + 1 * (q0 - p0)
+ movdqa xmm6, xmm1 ; p0
+; movdqa xmm7, xmm2 ; q0
- paddsb xmm0, xmm7 ; p1 - q1 + 2 * (q0 - p0)
- paddsb xmm0, xmm7 ; p1 - q1 + 3 * (q0 - p0)
+ pxor xmm6, xmm4 ; offset to convert to signed values
+ pxor xmm2, xmm4 ; offset to convert to signed values
+ movdqa xmm3, xmm2 ; offseted ; q0
+ psubsb xmm2, xmm6 ; q0 - p0
+ paddsb xmm0, xmm2 ; p1 - q1 + 1 * (q0 - p0)
+ paddsb xmm0, xmm2 ; p1 - q1 + 2 * (q0 - p0)
+ paddsb xmm0, xmm2 ; p1 - q1 + 3 * (q0 - p0)
pand xmm5, xmm0 ; mask filter values we don't care about
-
paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
-
- movdqa xmm0, xmm5 ; get a copy of filters
- psllw xmm0, 8 ; shift left 8
-
- psraw xmm0, 3 ; arithmetic shift right 11
- psrlw xmm0, 8
-
- movdqa xmm7, xmm5 ; get a copy of filters
- psraw xmm7, 11 ; arithmetic shift right 11
-
- psllw xmm7, 8 ; shift left 8 to put it back
- por xmm0, xmm7 ; put the two together to get result
-
- psubsb xmm3, xmm0 ; q0-= q0sz add
- pxor xmm3, [GLOBAL(t80)] ; unoffset q0
-
- ; now do +3 side
+ movdqa xmm0, xmm5
psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
- movdqa xmm0, xmm5 ; get a copy of filters
- psllw xmm0, 8 ; shift left 8
- psraw xmm0, 3 ; arithmetic shift right 11
+ movdqa xmm1, [GLOBAL(te0)]
+ movdqa xmm2, [GLOBAL(t1f)]
- psrlw xmm0, 8
- psraw xmm5, 11 ; arithmetic shift right 11
+ pxor xmm7, xmm7
+ pcmpgtb xmm7, xmm0 ;save sign
+ pand xmm7, xmm1 ;preserve the upper 3 bits
+ psrlw xmm0, 3
+ pand xmm0, xmm2 ;clear out upper 3 bits
+ por xmm0, xmm7 ;add sign
+ psubsb xmm3, xmm0 ; q0-= q0sz add
- psllw xmm5, 8 ; shift left 8 to put it back
- por xmm0, xmm5 ; put the two together to get result
+ pxor xmm7, xmm7
+ pcmpgtb xmm7, xmm5 ;save sign
+ pand xmm7, xmm1 ;preserve the upper 3 bits
+ psrlw xmm5, 3
+ pand xmm5, xmm2 ;clear out upper 3 bits
+ por xmm5, xmm7 ;add sign
+ paddsb xmm6, xmm5 ; p0+= p0 add
- paddsb xmm6, xmm0 ; p0+= p0 add
- pxor xmm6, [GLOBAL(t80)] ; unoffset p0
+ pxor xmm3, xmm4 ; unoffset q0
+ pxor xmm6, xmm4 ; unoffset p0
movdqa xmm0, t0 ; p1
movdqa xmm4, t1 ; q1
@@ -1763,3 +1746,9 @@ s9:
align 16
s63:
times 8 dw 0x003f
+align 16
+te0:
+ times 16 db 0xe0
+align 16
+t1f:
+ times 16 db 0x1f
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index a82c1b4fd..4b68ef5f2 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -559,12 +559,492 @@ sym(vp8_intra_pred_uv_ho_%1):
vp8_intra_pred_uv_ho mmx2
vp8_intra_pred_uv_ho ssse3
+;void vp8_intra_pred_y_dc_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_dc_sse2)
+sym(vp8_intra_pred_y_dc_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from top
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor xmm0, xmm0
+ movdqa xmm1, [rsi]
+ psadbw xmm1, xmm0
+ movq xmm2, xmm1
+ punpckhqdq xmm1, xmm1
+ paddw xmm1, xmm2
+
+ ; from left
+ dec rsi
+ lea rdi, [rax*3]
+ movzx ecx, byte [rsi+rax]
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*4]
+ add ecx, edx
+
+ ; add up
+ pextrw edx, xmm1, 0x0
+ lea edx, [edx+ecx+16]
+ sar edx, 5
+ movd xmm1, edx
+ ; FIXME use pshufb for ssse3 version
+ pshuflw xmm1, xmm1, 0x0
+ punpcklqdq xmm1, xmm1
+ packuswb xmm1, xmm1
+
+ ; write out
+ mov rsi, 2
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+.label
+ movdqa [rdi ], xmm1
+ movdqa [rdi+rcx ], xmm1
+ movdqa [rdi+rcx*2], xmm1
+ movdqa [rdi+rax ], xmm1
+ lea rdi, [rdi+rcx*4]
+ movdqa [rdi ], xmm1
+ movdqa [rdi+rcx ], xmm1
+ movdqa [rdi+rcx*2], xmm1
+ movdqa [rdi+rax ], xmm1
+ lea rdi, [rdi+rcx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_dctop_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_dctop_sse2)
+sym(vp8_intra_pred_y_dctop_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ GET_GOT rbx
+ ; end prolog
+
+ ; from top
+ mov rcx, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rcx, rax
+ pxor xmm0, xmm0
+ movdqa xmm1, [rcx]
+ psadbw xmm1, xmm0
+ movdqa xmm2, xmm1
+ punpckhqdq xmm1, xmm1
+ paddw xmm1, xmm2
+
+ ; add up
+ paddw xmm1, [GLOBAL(dc_8)]
+ psraw xmm1, 4
+ ; FIXME use pshufb for ssse3 version
+ pshuflw xmm1, xmm1, 0x0
+ punpcklqdq xmm1, xmm1
+ packuswb xmm1, xmm1
+
+ ; write out
+ mov rsi, 2
+ mov rdx, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+.label
+ movdqa [rdx ], xmm1
+ movdqa [rdx+rcx ], xmm1
+ movdqa [rdx+rcx*2], xmm1
+ movdqa [rdx+rax ], xmm1
+ lea rdx, [rdx+rcx*4]
+ movdqa [rdx ], xmm1
+ movdqa [rdx+rcx ], xmm1
+ movdqa [rdx+rcx*2], xmm1
+ movdqa [rdx+rax ], xmm1
+ lea rdx, [rdx+rcx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ RESTORE_GOT
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_dcleft_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_dcleft_sse2)
+sym(vp8_intra_pred_y_dcleft_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from left
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ dec rsi
+ lea rdi, [rax*3]
+ movzx ecx, byte [rsi]
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ lea edx, [ecx+edx+8]
+
+ ; add up
+ shr edx, 4
+ movd xmm1, edx
+ ; FIXME use pshufb for ssse3 version
+ pshuflw xmm1, xmm1, 0x0
+ punpcklqdq xmm1, xmm1
+ packuswb xmm1, xmm1
+
+ ; write out
+ mov rsi, 2
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+.label
+ movdqa [rdi ], xmm1
+ movdqa [rdi+rcx ], xmm1
+ movdqa [rdi+rcx*2], xmm1
+ movdqa [rdi+rax ], xmm1
+ lea rdi, [rdi+rcx*4]
+ movdqa [rdi ], xmm1
+ movdqa [rdi+rcx ], xmm1
+ movdqa [rdi+rcx*2], xmm1
+ movdqa [rdi+rax ], xmm1
+ lea rdi, [rdi+rcx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_dc128_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_dc128_sse2)
+sym(vp8_intra_pred_y_dc128_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ GET_GOT rbx
+ ; end prolog
+
+ ; write out
+ mov rsi, 2
+ movdqa xmm1, [GLOBAL(dc_128)]
+ mov rax, arg(0) ;dst;
+ movsxd rdx, dword ptr arg(1) ;dst_stride
+ lea rcx, [rdx*3]
+
+.label
+ movdqa [rax ], xmm1
+ movdqa [rax+rdx ], xmm1
+ movdqa [rax+rdx*2], xmm1
+ movdqa [rax+rcx ], xmm1
+ lea rax, [rax+rdx*4]
+ movdqa [rax ], xmm1
+ movdqa [rax+rdx ], xmm1
+ movdqa [rax+rdx*2], xmm1
+ movdqa [rax+rcx ], xmm1
+ lea rax, [rax+rdx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ RESTORE_GOT
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_tm_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+%macro vp8_intra_pred_y_tm 1
+global sym(vp8_intra_pred_y_tm_%1)
+sym(vp8_intra_pred_y_tm_%1):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ GET_GOT rbx
+ ; end prolog
+
+ ; read top row
+ mov edx, 8
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor xmm0, xmm0
+%ifidn %1, ssse3
+ movdqa xmm3, [GLOBAL(dc_1024)]
+%endif
+ movdqa xmm1, [rsi]
+ movdqa xmm2, xmm1
+ punpcklbw xmm1, xmm0
+ punpckhbw xmm2, xmm0
+
+ ; set up left ptrs ans subtract topleft
+ movd xmm4, [rsi-1]
+ lea rsi, [rsi+rax-1]
+%ifidn %1, sse2
+ punpcklbw xmm4, xmm0
+ pshuflw xmm4, xmm4, 0x0
+ punpcklqdq xmm4, xmm4
+%else
+ pshufb xmm4, xmm3
+%endif
+ psubw xmm1, xmm4
+ psubw xmm2, xmm4
+
+ ; set up dest ptrs
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+vp8_intra_pred_y_tm_%1_loop:
+ movd xmm4, [rsi]
+ movd xmm5, [rsi+rax]
+%ifidn %1, sse2
+ punpcklbw xmm4, xmm0
+ punpcklbw xmm5, xmm0
+ pshuflw xmm4, xmm4, 0x0
+ pshuflw xmm5, xmm5, 0x0
+ punpcklqdq xmm4, xmm4
+ punpcklqdq xmm5, xmm5
+%else
+ pshufb xmm4, xmm3
+ pshufb xmm5, xmm3
+%endif
+ movdqa xmm6, xmm4
+ movdqa xmm7, xmm5
+ paddw xmm4, xmm1
+ paddw xmm6, xmm2
+ paddw xmm5, xmm1
+ paddw xmm7, xmm2
+ packuswb xmm4, xmm6
+ packuswb xmm5, xmm7
+ movdqa [rdi ], xmm4
+ movdqa [rdi+rcx], xmm5
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rcx*2]
+ dec edx
+ jnz vp8_intra_pred_y_tm_%1_loop
+
+ ; begin epilog
+ RESTORE_GOT
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%endmacro
+
+vp8_intra_pred_y_tm sse2
+vp8_intra_pred_y_tm ssse3
+
+;void vp8_intra_pred_y_ve_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_ve_sse2)
+sym(vp8_intra_pred_y_ve_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ ; end prolog
+
+ ; read from top
+ mov rax, arg(2) ;src;
+ movsxd rdx, dword ptr arg(3) ;src_stride;
+ sub rax, rdx
+ movdqa xmm1, [rax]
+
+ ; write out
+ mov rsi, 2
+ mov rax, arg(0) ;dst;
+ movsxd rdx, dword ptr arg(1) ;dst_stride
+ lea rcx, [rdx*3]
+
+.label
+ movdqa [rax ], xmm1
+ movdqa [rax+rdx ], xmm1
+ movdqa [rax+rdx*2], xmm1
+ movdqa [rax+rcx ], xmm1
+ lea rax, [rax+rdx*4]
+ movdqa [rax ], xmm1
+ movdqa [rax+rdx ], xmm1
+ movdqa [rax+rdx*2], xmm1
+ movdqa [rax+rcx ], xmm1
+ lea rax, [rax+rdx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_ho_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_ho_sse2)
+sym(vp8_intra_pred_y_ho_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; read from left and write out
+ mov edx, 8
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ dec rsi
+
+vp8_intra_pred_y_ho_sse2_loop:
+ movd xmm0, [rsi]
+ movd xmm1, [rsi+rax]
+ ; FIXME use pshufb for ssse3 version
+ punpcklbw xmm0, xmm0
+ punpcklbw xmm1, xmm1
+ pshuflw xmm0, xmm0, 0x0
+ pshuflw xmm1, xmm1, 0x0
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ movdqa [rdi ], xmm0
+ movdqa [rdi+rcx], xmm1
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rcx*2]
+ dec edx
+ jnz vp8_intra_pred_y_ho_sse2_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
SECTION_RODATA
+align 16
dc_128:
- times 8 db 128
+ times 16 db 128
dc_4:
times 4 dw 4
align 16
+dc_8:
+ times 8 dw 8
+align 16
dc_1024:
times 8 dw 0x400
align 16
diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c
index fcc75a901..44221cd0b 100644
--- a/vp8/common/x86/recon_wrapper_sse2.c
+++ b/vp8/common/x86/recon_wrapper_sse2.c
@@ -94,3 +94,69 @@ void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
+
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dctop_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dcleft_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc128_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ho_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ve_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_ssse3);
+
+static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x,
+ unsigned char *dst_y,
+ int dst_stride,
+ build_intra_predictors_mbuv_fn_t tm_func)
+{
+ int mode = x->mode_info_context->mbmi.mode;
+ build_intra_predictors_mbuv_fn_t fn;
+ int src_stride = x->dst.y_stride;
+ switch (mode) {
+ case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break;
+ case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break;
+ case TM_PRED: fn = tm_func; break;
+ case DC_PRED:
+ if (x->up_available) {
+ if (x->left_available) {
+ fn = vp8_intra_pred_y_dc_sse2; break;
+ } else {
+ fn = vp8_intra_pred_y_dctop_sse2; break;
+ }
+ } else if (x->left_available) {
+ fn = vp8_intra_pred_y_dcleft_sse2; break;
+ } else {
+ fn = vp8_intra_pred_y_dc128_sse2; break;
+ }
+ break;
+ default: return;
+ }
+
+ fn(dst_y, dst_stride, x->dst.y_buffer, src_stride);
+ return;
+}
+
+void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
+ vp8_intra_pred_y_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
+ vp8_intra_pred_y_tm_ssse3);
+}
+
+void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
+ vp8_intra_pred_y_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
+ vp8_intra_pred_y_tm_ssse3);
+
+}
diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h
index fbb3dcb63..afacc60d1 100644
--- a/vp8/common/x86/recon_x86.h
+++ b/vp8/common/x86/recon_x86.h
@@ -42,6 +42,8 @@ extern prototype_copy_block(vp8_copy_mem16x16_mmx);
extern prototype_copy_block(vp8_copy_mem16x16_sse2);
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_sse2);
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_sse2);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_copy16x16
@@ -53,12 +55,20 @@ extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2);
#undef vp8_recon_build_intra_predictors_mbuv_s
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_sse2
+#undef vp8_recon_build_intra_predictors_mby
+#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_sse2
+
+#undef vp8_recon_build_intra_predictors_mby_s
+#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_sse2
+
#endif
#endif
#if HAVE_SSSE3
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_ssse3);
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_ssse3);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_ssse3);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_build_intra_predictors_mbuv
@@ -67,6 +77,12 @@ extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3)
#undef vp8_recon_build_intra_predictors_mbuv_s
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3
+#undef vp8_recon_build_intra_predictors_mby
+#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_ssse3
+
+#undef vp8_recon_build_intra_predictors_mby_s
+#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_ssse3
+
#endif
#endif
#endif
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index ad3a1f76b..e1e1b7987 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -11,7 +11,6 @@
#include "vpx_config.h"
#include "vpx_ports/x86.h"
-#include "vp8/common/g_common.h"
#include "vp8/common/subpixel.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/recon.h"
@@ -86,6 +85,10 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
vp8_build_intra_predictors_mbuv_sse2;
rtcd->recon.build_intra_predictors_mbuv_s =
vp8_build_intra_predictors_mbuv_s_sse2;
+ rtcd->recon.build_intra_predictors_mby =
+ vp8_build_intra_predictors_mby_sse2;
+ rtcd->recon.build_intra_predictors_mby_s =
+ vp8_build_intra_predictors_mby_s_sse2;
rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
@@ -132,6 +135,10 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
vp8_build_intra_predictors_mbuv_ssse3;
rtcd->recon.build_intra_predictors_mbuv_s =
vp8_build_intra_predictors_mbuv_s_ssse3;
+ rtcd->recon.build_intra_predictors_mby =
+ vp8_build_intra_predictors_mby_ssse3;
+ rtcd->recon.build_intra_predictors_mby_s =
+ vp8_build_intra_predictors_mby_s_ssse3;
}
#endif