summaryrefslogtreecommitdiff
path: root/vp8
diff options
context:
space:
mode:
Diffstat (limited to 'vp8')
-rw-r--r--vp8/common/blockd.h45
-rw-r--r--vp8/common/default_coef_probs.h24
-rw-r--r--vp8/common/entropy.c15
-rw-r--r--vp8/common/entropy.h2
-rw-r--r--vp8/common/entropymode.c21
-rw-r--r--vp8/common/entropymode.h8
-rw-r--r--vp8/common/entropymv.c402
-rw-r--r--vp8/common/entropymv.h62
-rw-r--r--vp8/common/findnearmv.c27
-rw-r--r--vp8/common/idct.h3
-rw-r--r--vp8/common/idctllm.c8
-rw-r--r--vp8/common/loopfilter_filters.c6
-rw-r--r--vp8/common/onyxc_int.h42
-rw-r--r--vp8/common/postproc.c7
-rw-r--r--vp8/common/pred_common.c5
-rw-r--r--vp8/common/pred_common.h5
-rw-r--r--vp8/common/reconinter.c10
-rw-r--r--vp8/common/rtcd_defs.sh219
-rw-r--r--vp8/common/sadmxn.h37
-rw-r--r--vp8/common/x86/loopfilter_mmx.asm784
-rw-r--r--vp8/common/x86/loopfilter_sse2.asm523
-rw-r--r--vp8/common/x86/loopfilter_x86.c430
-rw-r--r--vp8/decoder/decodemv.c216
-rw-r--r--vp8/decoder/decodframe.c91
-rw-r--r--vp8/decoder/dequantize.c6
-rw-r--r--vp8/decoder/dequantize.h6
-rw-r--r--vp8/decoder/detokenize.c80
-rw-r--r--vp8/encoder/bitstream.c172
-rw-r--r--vp8/encoder/block.h22
-rw-r--r--vp8/encoder/dct.c7
-rw-r--r--vp8/encoder/dct.h2
-rw-r--r--vp8/encoder/encodeframe.c63
-rw-r--r--vp8/encoder/encodeintra.c22
-rw-r--r--vp8/encoder/encodemb.c44
-rw-r--r--vp8/encoder/encodemv.c592
-rw-r--r--vp8/encoder/encodemv.h15
-rw-r--r--vp8/encoder/firstpass.c10
-rw-r--r--vp8/encoder/generic/csystemdependent.c79
-rw-r--r--vp8/encoder/mbgraph.c26
-rw-r--r--vp8/encoder/mcomp.c137
-rw-r--r--vp8/encoder/mcomp.h9
-rw-r--r--vp8/encoder/modecosts.c3
-rw-r--r--vp8/encoder/onyx_if.c278
-rw-r--r--vp8/encoder/onyx_int.h41
-rw-r--r--vp8/encoder/picklpf.c28
-rw-r--r--vp8/encoder/quantize.c2
-rw-r--r--vp8/encoder/quantize.h2
-rw-r--r--vp8/encoder/ratectrl.c40
-rw-r--r--vp8/encoder/rdopt.c614
-rw-r--r--vp8/encoder/sad_c.c44
-rw-r--r--vp8/encoder/ssim.c109
-rw-r--r--vp8/encoder/tokenize.c97
-rw-r--r--vp8/encoder/variance.h568
-rw-r--r--vp8/encoder/x86/variance_x86.h328
-rw-r--r--vp8/encoder/x86/x86_csystemdependent.c85
-rw-r--r--vp8/vp8_common.mk3
-rw-r--r--vp8/vp8cx.mk1
57 files changed, 1374 insertions, 5153 deletions
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 3ab4cc3a9..cb546e74b 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -86,9 +86,7 @@ typedef enum
BILINEAR = 1,
EIGHTTAP = 2,
EIGHTTAP_SHARP = 3,
-#if CONFIG_SWITCHABLE_INTERP
SWITCHABLE /* should be the last one */
-#endif
} INTERPOLATIONFILTERTYPE;
typedef enum
@@ -135,14 +133,12 @@ typedef enum {
TX_SIZE_MAX // Number of different transforms available
} TX_SIZE;
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
typedef enum {
DCT_DCT = 0, // DCT in both horizontal and vertical
- ADST_DCT = 1, // ADST in horizontal, DCT in vertical
- DCT_ADST = 2, // DCT in horizontal, ADST in vertical
+ ADST_DCT = 1, // ADST in vertical, DCT in horizontal
+ DCT_ADST = 2, // DCT in vertical, ADST in horizontal
ADST_ADST = 3 // ADST in both directions
} TX_TYPE;
-#endif
#define VP8_YMODES (B_PRED + 1)
#define VP8_UV_MODES (TM_PRED + 1)
@@ -177,6 +173,14 @@ typedef enum {
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
+typedef enum {
+ PARTITIONING_16X8 = 0,
+ PARTITIONING_8X16,
+ PARTITIONING_8X8,
+ PARTITIONING_4X4,
+ NB_PARTITIONINGS,
+} SPLITMV_PARTITIONING_TYPE;
+
/* For keyframes, intra block modes are predicted by the (already decoded)
modes for the Y blocks to the left and above us; for interframes, there
is a single probability table. */
@@ -184,9 +188,7 @@ typedef enum {
union b_mode_info {
struct {
B_PREDICTION_MODE first;
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type;
-#endif
#if CONFIG_COMP_INTRA_PRED
B_PREDICTION_MODE second;
@@ -220,7 +222,7 @@ typedef struct {
int mv_ref_index[MAX_REF_FRAMES];
#endif
- unsigned char partitioning;
+ SPLITMV_PARTITIONING_TYPE partitioning;
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
unsigned char need_to_clamp_mvs;
unsigned char need_to_clamp_secondmv;
@@ -239,9 +241,7 @@ typedef struct {
// Flag to turn prediction signal filter on(1)/off(0 ) at the MB level
unsigned int pred_filter_enabled;
#endif
-#if CONFIG_SWITCHABLE_INTERP
INTERPOLATIONFILTERTYPE interp_filter;
-#endif
#if CONFIG_SUPERBLOCKS
// FIXME need a SB array of 4 MB_MODE_INFOs that
@@ -388,17 +388,11 @@ typedef struct MacroBlockD {
} MACROBLOCKD;
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
#define ACTIVE_HT 110 // quantization stepsize threshold
-#endif
-#if CONFIG_HYBRIDTRANSFORM8X8
#define ACTIVE_HT8 300
-#endif
-#if CONFIG_HYBRIDTRANSFORM16X16
#define ACTIVE_HT16 300
-#endif
// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
@@ -442,7 +436,6 @@ static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
return b_mode;
}
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
// transform mapping
static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) {
// map transform type
@@ -470,9 +463,7 @@ static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) {
}
return tx_type;
}
-#endif
-#if CONFIG_HYBRIDTRANSFORM
static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
TX_TYPE tx_type = DCT_DCT;
if (xd->mode_info_context->mbmi.mode == B_PRED &&
@@ -481,9 +472,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
}
return tx_type;
}
-#endif
-#if CONFIG_HYBRIDTRANSFORM8X8
static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
TX_TYPE tx_type = DCT_DCT;
if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
@@ -492,9 +481,7 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
}
return tx_type;
}
-#endif
-#if CONFIG_HYBRIDTRANSFORM16X16
static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
TX_TYPE tx_type = DCT_DCT;
if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
@@ -503,34 +490,24 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
}
return tx_type;
}
-#endif
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || \
- CONFIG_HYBRIDTRANSFORM16X16
static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) {
TX_TYPE tx_type = DCT_DCT;
int ib = (b - xd->block);
if (ib >= 16)
return tx_type;
-#if CONFIG_HYBRIDTRANSFORM16X16
if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) {
tx_type = get_tx_type_16x16(xd, b);
}
-#endif
-#if CONFIG_HYBRIDTRANSFORM8X8
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
ib = (ib & 8) + ((ib & 4) >> 1);
tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
}
-#endif
-#if CONFIG_HYBRIDTRANSFORM
if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
tx_type = get_tx_type_4x4(xd, b);
}
-#endif
return tx_type;
}
-#endif
extern void vp8_build_block_doffsets(MACROBLOCKD *xd);
extern void vp8_setup_block_dptrs(MACROBLOCKD *xd);
diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h
index 5e21195ee..bd1f795d0 100644
--- a/vp8/common/default_coef_probs.h
+++ b/vp8/common/default_coef_probs.h
@@ -13,9 +13,9 @@
static const vp8_prob default_coef_probs [BLOCK_TYPES]
-[COEF_BANDS]
-[PREV_COEF_CONTEXTS]
-[ENTROPY_NODES] = {
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] = {
{
/* Block Type ( 0 ) */
{
@@ -254,11 +254,10 @@ static const vp8_prob default_coef_probs [BLOCK_TYPES]
}
};
-#if CONFIG_HYBRIDTRANSFORM
static const vp8_prob default_hybrid_coef_probs [BLOCK_TYPES]
-[COEF_BANDS]
-[PREV_COEF_CONTEXTS]
-[ENTROPY_NODES] = {
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] = {
{
/* Block Type ( 0 ) */
{
@@ -496,7 +495,6 @@ static const vp8_prob default_hybrid_coef_probs [BLOCK_TYPES]
}
}
};
-#endif
static const vp8_prob
default_coef_probs_8x8[BLOCK_TYPES_8X8]
@@ -731,12 +729,11 @@ default_coef_probs_8x8[BLOCK_TYPES_8X8]
}
};
-#if CONFIG_HYBRIDTRANSFORM8X8
static const vp8_prob
default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]
- [COEF_BANDS]
- [PREV_COEF_CONTEXTS]
- [ENTROPY_NODES] = {
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] = {
{
/* block Type 0 */
{
@@ -964,7 +961,6 @@ default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]
}
}
};
-#endif
static const vp8_prob
default_coef_probs_16x16[BLOCK_TYPES_16X16]
@@ -1173,7 +1169,6 @@ static const vp8_prob
}
};
-#if CONFIG_HYBRIDTRANSFORM16X16
static const vp8_prob
default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]
[COEF_BANDS]
@@ -1380,4 +1375,3 @@ static const vp8_prob
}
}
};
-#endif
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index 90f7a52c2..a3f731a3c 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -64,8 +64,6 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = {
7, 11, 14, 15,
};
-
-#if CONFIG_HYBRIDTRANSFORM
DECLARE_ALIGNED(16, const int, vp8_col_scan[16]) = {
0, 4, 8, 12,
1, 5, 9, 13,
@@ -78,7 +76,6 @@ DECLARE_ALIGNED(16, const int, vp8_row_scan[16]) = {
8, 9, 10, 11,
12, 13, 14, 15
};
-#endif
DECLARE_ALIGNED(64, const int, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5,
@@ -208,25 +205,19 @@ vp8_extra_bit_struct vp8_extra_bits[12] = {
void vp8_default_coef_probs(VP8_COMMON *pc) {
vpx_memcpy(pc->fc.coef_probs, default_coef_probs,
sizeof(pc->fc.coef_probs));
-#if CONFIG_HYBRIDTRANSFORM
vpx_memcpy(pc->fc.hybrid_coef_probs, default_hybrid_coef_probs,
sizeof(pc->fc.hybrid_coef_probs));
-#endif
vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8,
sizeof(pc->fc.coef_probs_8x8));
-#if CONFIG_HYBRIDTRANSFORM8X8
vpx_memcpy(pc->fc.hybrid_coef_probs_8x8, default_hybrid_coef_probs_8x8,
sizeof(pc->fc.hybrid_coef_probs_8x8));
-#endif
vpx_memcpy(pc->fc.coef_probs_16x16, default_coef_probs_16x16,
sizeof(pc->fc.coef_probs_16x16));
-#if CONFIG_HYBRIDTRANSFORM16X16
vpx_memcpy(pc->fc.hybrid_coef_probs_16x16,
default_hybrid_coef_probs_16x16,
sizeof(pc->fc.hybrid_coef_probs_16x16));
-#endif
}
void vp8_coef_tree_initialize() {
@@ -344,7 +335,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
}
}
-#if CONFIG_HYBRIDTRANSFORM
for (i = 0; i < BLOCK_TYPES; ++i)
for (j = 0; j < COEF_BANDS; ++j)
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -366,7 +356,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
else cm->fc.hybrid_coef_probs[i][j][k][t] = prob;
}
}
-#endif
for (i = 0; i < BLOCK_TYPES_8X8; ++i)
for (j = 0; j < COEF_BANDS; ++j)
@@ -390,7 +379,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
}
}
-#if CONFIG_HYBRIDTRANSFORM8X8
for (i = 0; i < BLOCK_TYPES_8X8; ++i)
for (j = 0; j < COEF_BANDS; ++j)
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -413,7 +401,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
else cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = prob;
}
}
-#endif
for (i = 0; i < BLOCK_TYPES_16X16; ++i)
for (j = 0; j < COEF_BANDS; ++j)
@@ -437,7 +424,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
}
}
-#if CONFIG_HYBRIDTRANSFORM16X16
for (i = 0; i < BLOCK_TYPES_16X16; ++i)
for (j = 0; j < COEF_BANDS; ++j)
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -458,5 +444,4 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
else cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = prob;
}
}
-#endif
}
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index b9dfb344f..48a100ac6 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -104,10 +104,8 @@ struct VP8Common;
void vp8_default_coef_probs(struct VP8Common *);
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
-#if CONFIG_HYBRIDTRANSFORM
extern DECLARE_ALIGNED(16, const int, vp8_col_scan[16]);
extern DECLARE_ALIGNED(16, const int, vp8_row_scan[16]);
-#endif
extern short vp8_default_zig_zag_mask[16];
extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]);
diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
index 5627aa43a..bcd9f3707 100644
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -215,9 +215,9 @@ const vp8_tree_index vp8_uv_mode_tree[VP8_UV_MODES * 2 - 2] = {
};
const vp8_tree_index vp8_mbsplit_tree[6] = {
- -3, 2,
- -2, 4,
- -0, -1
+ -PARTITIONING_4X4, 2,
+ -PARTITIONING_8X8, 4,
+ -PARTITIONING_16X8, -PARTITIONING_8X16,
};
const vp8_tree_index vp8_mv_ref_tree[8] = {
@@ -301,11 +301,8 @@ void vp8_init_mbmode_probs(VP8_COMMON *x) {
vpx_memcpy(x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_prob2, sizeof(vp8_sub_mv_ref_prob2));
vpx_memcpy(x->fc.mbsplit_prob, vp8_mbsplit_probs, sizeof(vp8_mbsplit_probs));
-#if CONFIG_SWITCHABLE_INTERP
vpx_memcpy(x->fc.switchable_interp_prob, vp8_switchable_interp_prob,
sizeof(vp8_switchable_interp_prob));
-#endif
-
}
@@ -338,7 +335,6 @@ void vp8_kf_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES] [VP8_BINTRAMODES] [
} while (++i < VP8_BINTRAMODES);
}
-#if CONFIG_SWITCHABLE_INTERP
#if VP8_SWITCHABLE_FILTERS == 3
const vp8_tree_index vp8_switchable_interp_tree[VP8_SWITCHABLE_FILTERS*2-2] = {
-0, 2,
@@ -363,19 +359,10 @@ const vp8_prob vp8_switchable_interp_prob [VP8_SWITCHABLE_FILTERS+1]
{ 64},
{192},
};
-//#define SWITCHABLE_86
-#ifdef SWITCHABLE_86
-const INTERPOLATIONFILTERTYPE vp8_switchable_interp[VP8_SWITCHABLE_FILTERS] = {
- EIGHTTAP, SIXTAP};
-const int vp8_switchable_interp_map[SWITCHABLE+1] = {1, -1, 0, -1, -1}; //8, 6
-#else
const INTERPOLATIONFILTERTYPE vp8_switchable_interp[VP8_SWITCHABLE_FILTERS] = {
EIGHTTAP, EIGHTTAP_SHARP};
const int vp8_switchable_interp_map[SWITCHABLE+1] = {-1, -1, 0, 1, -1}; //8, 8s
#endif
-#endif
-#endif
-
void vp8_entropy_mode_init() {
vp8_tokens_from_tree(vp8_bmode_encodings, vp8_bmode_tree);
@@ -387,10 +374,8 @@ void vp8_entropy_mode_init() {
vp8_tokens_from_tree(vp8_uv_mode_encodings, vp8_uv_mode_tree);
vp8_tokens_from_tree(vp8_i8x8_mode_encodings, vp8_i8x8_mode_tree);
vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree);
-#if CONFIG_SWITCHABLE_INTERP
vp8_tokens_from_tree(vp8_switchable_interp_encodings,
vp8_switchable_interp_tree);
-#endif
vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array,
vp8_mv_ref_tree, NEARESTMV);
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index 430c949a6..debb5659e 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -76,16 +76,14 @@ void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES
void vp8_adapt_mode_probs(struct VP8Common *);
-#if CONFIG_SWITCHABLE_INTERP
#define VP8_SWITCHABLE_FILTERS 2 /* number of switchable filters */
extern const INTERPOLATIONFILTERTYPE vp8_switchable_interp
[VP8_SWITCHABLE_FILTERS];
-extern const int vp8_switchable_interp_map[SWITCHABLE+1];
+extern const int vp8_switchable_interp_map[SWITCHABLE + 1];
extern const vp8_tree_index vp8_switchable_interp_tree
- [2*(VP8_SWITCHABLE_FILTERS-1)];
+ [2*(VP8_SWITCHABLE_FILTERS - 1)];
extern struct vp8_token_struct vp8_switchable_interp_encodings
[VP8_SWITCHABLE_FILTERS];
extern const vp8_prob vp8_switchable_interp_prob
- [VP8_SWITCHABLE_FILTERS+1][VP8_SWITCHABLE_FILTERS-1];
-#endif
+ [VP8_SWITCHABLE_FILTERS + 1][VP8_SWITCHABLE_FILTERS - 1];
#endif
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 6c31236ec..a442a2438 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -14,8 +14,6 @@
//#define MV_COUNT_TESTING
-#if CONFIG_NEWMVENTROPY
-
#define MV_COUNT_SAT 16
#define MV_MAX_UPDATE_FACTOR 160
@@ -450,413 +448,13 @@ void vp8_adapt_nmv_probs(VP8_COMMON *cm, int usehp) {
}
}
-#else /* CONFIG_NEWMVENTROPY */
-
-#define MV_COUNT_SAT 16
-#define MV_MAX_UPDATE_FACTOR 128
-
-const MV_CONTEXT_HP vp8_mv_update_probs_hp[2] = {
- {{
- 237,
- 246,
- 253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
- 254, 254, 254, 254, 254, 250, 250, 252, 254, 254, 254
- }
- },
- {{
- 231,
- 243,
- 245, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
- 254, 254, 254, 254, 254, 251, 251, 254, 254, 254, 254
- }
- }
-};
-const MV_CONTEXT_HP vp8_default_mv_context_hp[2] = {
- {{
- /* row */
- 162, /* is short */
- 128, /* sign */
- 220, 204, 180, 192, 192, 119, 192, 192, 180, 140, 192, 192, 224, 224, 224, /* short tree */
- 128, 129, 132, 75, 145, 178, 206, 239, 254, 254, 254 /* long bits */
- }
- },
- {{
- /* same for column */
- 164, /* is short */
- 128,
- 220, 204, 180, 192, 192, 119, 192, 192, 180, 140, 192, 192, 224, 224, 224, /* short tree */
- 128, 130, 130, 74, 148, 180, 203, 236, 254, 254, 254 /* long bits */
- }
- }
-};
-
-const MV_CONTEXT vp8_mv_update_probs[2] = {
- {{
- 237,
- 246,
- 253, 253, 254, 254, 254, 254, 254,
- 254, 254, 254, 254, 254, 250, 250, 252, 254, 254
- }
- },
- {{
- 231,
- 243,
- 245, 253, 254, 254, 254, 254, 254,
- 254, 254, 254, 254, 254, 251, 251, 254, 254, 254
- }
- }
-};
-const MV_CONTEXT vp8_default_mv_context[2] = {
- {{
- /* row */
- 162, /* is short */
- 128, /* sign */
- 225, 146, 172, 147, 214, 39, 156, /* short tree */
- 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
- }
- },
- {{
- /* same for column */
- 164, /* is short */
- 128,
- 204, 170, 119, 235, 140, 230, 228,
- 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
- }
- }
-};
-
-const vp8_tree_index vp8_small_mvtree_hp [30] = {
- 2, 16,
- 4, 10,
- 6, 8,
- -0, -1,
- -2, -3,
- 12, 14,
- -4, -5,
- -6, -7,
- 18, 24,
- 20, 22,
- -8, -9,
- -10, -11,
- 26, 28,
- -12, -13,
- -14, -15
-};
-struct vp8_token_struct vp8_small_mvencodings_hp [16];
-
-const vp8_tree_index vp8_small_mvtree [14] = {
- 2, 8,
- 4, 6,
- -0, -1,
- -2, -3,
- 10, 12,
- -4, -5,
- -6, -7
-};
-struct vp8_token_struct vp8_small_mvencodings [8];
-
-__inline static void calc_prob(vp8_prob *p, const unsigned int ct[2], int pbits) {
- const unsigned int tot = ct[0] + ct[1];
- if (tot) {
- const vp8_prob x = ((ct[0] * 255) / tot) & -(1 << (8 - pbits));
- *p = x ? x : 1;
- } else {
- *p = 128;
- }
-}
-
-static void compute_component_probs(
- const unsigned int events [MVvals],
- vp8_prob Pnew [MVPcount],
- unsigned int is_short_ct[2],
- unsigned int sign_ct[2],
- unsigned int bit_ct [mvlong_width] [2],
- unsigned int short_ct [mvnum_short],
- unsigned int short_bct [mvnum_short - 1] [2]
-) {
- is_short_ct[0] = is_short_ct[1] = 0;
- sign_ct[0] = sign_ct[1] = 0;
- vpx_memset(bit_ct, 0, sizeof(unsigned int)*mvlong_width * 2);
- vpx_memset(short_ct, 0, sizeof(unsigned int)*mvnum_short);
- vpx_memset(short_bct, 0, sizeof(unsigned int) * (mvnum_short - 1) * 2);
-
- {
- const int c = events [mv_max];
- is_short_ct [0] += c; // Short vector
- short_ct [0] += c; // Magnitude distribution
- }
- {
- int j = 1;
- do {
- const int c1 = events [mv_max + j]; // positive
- const int c2 = events [mv_max - j]; // negative
- const int c = c1 + c2;
- int a = j;
-
- sign_ct [0] += c1;
- sign_ct [1] += c2;
-
- if (a < mvnum_short) {
- is_short_ct [0] += c; // Short vector
- short_ct [a] += c; // Magnitude distribution
- } else {
- int k = mvlong_width - 1;
- is_short_ct [1] += c; // Long vector
-
- do
- bit_ct [k] [(a >> k) & 1] += c;
-
- while (--k >= 0);
- }
- } while (++j <= mv_max);
- }
- calc_prob(Pnew + mvpis_short, is_short_ct, 8);
-
- calc_prob(Pnew + MVPsign, sign_ct, 8);
-
- {
- vp8_prob p [mvnum_short - 1]; /* actually only need branch ct */
- int j = 0;
-
- vp8_tree_probs_from_distribution(
- mvnum_short, vp8_small_mvencodings, vp8_small_mvtree,
- p, short_bct, short_ct,
- 256, 1
- );
-
- do
- calc_prob(Pnew + MVPshort + j, short_bct[j], 8);
- while (++j < mvnum_short - 1);
- }
-
- {
- int j = 0;
- do
- calc_prob(Pnew + MVPbits + j, bit_ct[j], 8);
- while (++j < mvlong_width);
- }
-}
-
-static void compute_component_probs_hp(
- const unsigned int events [MVvals_hp],
- vp8_prob Pnew [MVPcount_hp],
- unsigned int is_short_ct[2],
- unsigned int sign_ct[2],
- unsigned int bit_ct [mvlong_width_hp] [2],
- unsigned int short_ct [mvnum_short_hp],
- unsigned int short_bct [mvnum_short_hp - 1] [2]
-) {
- is_short_ct[0] = is_short_ct[1] = 0;
- sign_ct[0] = sign_ct[1] = 0;
- vpx_memset(bit_ct, 0, sizeof(unsigned int)*mvlong_width_hp * 2);
- vpx_memset(short_ct, 0, sizeof(unsigned int)*mvnum_short_hp);
- vpx_memset(short_bct, 0, sizeof(unsigned int) * (mvnum_short_hp - 1) * 2);
-
- {
- const int c = events [mv_max_hp];
- is_short_ct [0] += c; // Short vector
- short_ct [0] += c; // Magnitude distribution
- }
- {
- int j = 1;
- do {
- const int c1 = events [mv_max_hp + j]; // positive
- const int c2 = events [mv_max_hp - j]; // negative
- const int c = c1 + c2;
- int a = j;
-
- sign_ct [0] += c1;
- sign_ct [1] += c2;
-
- if (a < mvnum_short_hp) {
- is_short_ct [0] += c; // Short vector
- short_ct [a] += c; // Magnitude distribution
- } else {
- int k = mvlong_width_hp - 1;
- is_short_ct [1] += c; // Long vector
-
- do
- bit_ct [k] [(a >> k) & 1] += c;
-
- while (--k >= 0);
- }
- } while (++j <= mv_max_hp);
- }
- calc_prob(Pnew + mvpis_short_hp, is_short_ct, 8);
-
- calc_prob(Pnew + MVPsign_hp, sign_ct, 8);
-
- {
- vp8_prob p [mvnum_short_hp - 1]; /* actually only need branch ct */
- int j = 0;
-
- vp8_tree_probs_from_distribution(
- mvnum_short_hp, vp8_small_mvencodings_hp, vp8_small_mvtree_hp,
- p, short_bct, short_ct,
- 256, 1
- );
-
- do
- calc_prob(Pnew + MVPshort_hp + j, short_bct[j], 8);
- while (++j < mvnum_short_hp - 1);
- }
-
- {
- int j = 0;
- do
- calc_prob(Pnew + MVPbits_hp + j, bit_ct[j], 8);
- while (++j < mvlong_width_hp);
- }
-}
-
-void vp8_adapt_mv_probs(VP8_COMMON *cm) {
- int i, t, count, factor;
-#ifdef MV_COUNT_TESTING
- printf("static const unsigned int\nMVcount[2][MVvals]={\n");
- for (i = 0; i < 2; ++i) {
- printf(" { ");
- for (t = 0; t < MVvals; t++) {
- printf("%d, ", cm->fc.MVcount[i][t]);
- if (t % 16 == 15 && t != MVvals - 1) printf("\n ");
- }
- printf("},\n");
- }
- printf("};\n");
- printf("static const unsigned int\nMVcount_hp[2][MVvals_hp]={\n");
- for (i = 0; i < 2; ++i) {
- printf(" { ");
- for (t = 0; t < MVvals_hp; t++) {
- printf("%d, ", cm->fc.MVcount_hp[i][t]);
- if (t % 16 == 15 && t != MVvals_hp - 1) printf("\n ");
- }
- printf("},\n");
- }
- printf("};\n");
-#endif /* MV_COUNT_TESTING */
-
- for (i = 0; i < 2; ++i) {
- int prob;
- unsigned int is_short_ct[2];
- unsigned int sign_ct[2];
- unsigned int bit_ct [mvlong_width] [2];
- unsigned int short_ct [mvnum_short];
- unsigned int short_bct [mvnum_short - 1] [2];
- vp8_prob Pnew [MVPcount];
- compute_component_probs(cm->fc.MVcount[i], Pnew,
- is_short_ct, sign_ct,
- bit_ct, short_ct, short_bct);
- count = is_short_ct[0] + is_short_ct[1];
- count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
- factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
- prob = ((int)cm->fc.pre_mvc[i].prob[mvpis_short] * (256 - factor) +
- (int)Pnew[mvpis_short] * factor + 128) >> 8;
- if (prob <= 0) cm->fc.mvc[i].prob[mvpis_short] = 1;
- else if (prob > 255) cm->fc.mvc[i].prob[mvpis_short] = 255;
- else cm->fc.mvc[i].prob[mvpis_short] = prob;
-
- count = sign_ct[0] + sign_ct[1];
- count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
- factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
- prob = ((int)cm->fc.pre_mvc[i].prob[MVPsign] * (256 - factor) +
- (int)Pnew[MVPsign] * factor + 128) >> 8;
- if (prob <= 0) cm->fc.mvc[i].prob[MVPsign] = 1;
- else if (prob > 255) cm->fc.mvc[i].prob[MVPsign] = 255;
- else cm->fc.mvc[i].prob[MVPsign] = prob;
-
- for (t = 0; t < mvnum_short - 1; ++t) {
- count = short_bct[t][0] + short_bct[t][1];
- count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
- factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
- prob = ((int)cm->fc.pre_mvc[i].prob[MVPshort + t] * (256 - factor) +
- (int)Pnew[MVPshort + t] * factor + 128) >> 8;
- if (prob <= 0) cm->fc.mvc[i].prob[MVPshort + t] = 1;
- else if (prob > 255) cm->fc.mvc[i].prob[MVPshort + t] = 255;
- else cm->fc.mvc[i].prob[MVPshort + t] = prob;
- }
- for (t = 0; t < mvlong_width; ++t) {
- count = bit_ct[t][0] + bit_ct[t][1];
- count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
- factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
- prob = ((int)cm->fc.pre_mvc[i].prob[MVPbits + t] * (256 - factor) +
- (int)Pnew[MVPbits + t] * factor + 128) >> 8;
- if (prob <= 0) cm->fc.mvc[i].prob[MVPbits + t] = 1;
- else if (prob > 255) cm->fc.mvc[i].prob[MVPbits + t] = 255;
- else cm->fc.mvc[i].prob[MVPbits + t] = prob;
- }
- }
- for (i = 0; i < 2; ++i) {
- int prob;
- unsigned int is_short_ct[2];
- unsigned int sign_ct[2];
- unsigned int bit_ct [mvlong_width_hp] [2];
- unsigned int short_ct [mvnum_short_hp];
- unsigned int short_bct [mvnum_short_hp - 1] [2];
- vp8_prob Pnew [MVPcount_hp];
- compute_component_probs_hp(cm->fc.MVcount_hp[i], Pnew,
- is_short_ct, sign_ct,
- bit_ct, short_ct, short_bct);
- count = is_short_ct[0] + is_short_ct[1];
- count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
- factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
- prob = ((int)cm->fc.pre_mvc_hp[i].prob[mvpis_short_hp] * (256 - factor) +
- (int)Pnew[mvpis_short_hp] * factor + 128) >> 8;
- if (prob <= 0) cm->fc.mvc_hp[i].prob[mvpis_short_hp] = 1;
- else if (prob > 255) cm->fc.mvc_hp[i].prob[mvpis_short_hp] = 255;
- else cm->fc.mvc_hp[i].prob[mvpis_short_hp] = prob;
-
- count = sign_ct[0] + sign_ct[1];
- count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
- factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
- prob = ((int)cm->fc.pre_mvc_hp[i].prob[MVPsign_hp] * (256 - factor) +
- (int)Pnew[MVPsign_hp] * factor + 128) >> 8;
- if (prob <= 0) cm->fc.mvc_hp[i].prob[MVPsign_hp] = 1;
- else if (prob > 255) cm->fc.mvc_hp[i].prob[MVPsign_hp] = 255;
- else cm->fc.mvc_hp[i].prob[MVPsign_hp] = prob;
-
- for (t = 0; t < mvnum_short_hp - 1; ++t) {
- count = short_bct[t][0] + short_bct[t][1];
- count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
- factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
- prob = ((int)cm->fc.pre_mvc_hp[i].prob[MVPshort_hp + t] * (256 - factor) +
- (int)Pnew[MVPshort_hp + t] * factor + 128) >> 8;
- if (prob <= 0) cm->fc.mvc_hp[i].prob[MVPshort_hp + t] = 1;
- else if (prob > 255) cm->fc.mvc_hp[i].prob[MVPshort_hp + t] = 255;
- else cm->fc.mvc_hp[i].prob[MVPshort_hp + t] = prob;
- }
- for (t = 0; t < mvlong_width_hp; ++t) {
- count = bit_ct[t][0] + bit_ct[t][1];
- count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
- factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
- prob = ((int)cm->fc.pre_mvc_hp[i].prob[MVPbits_hp + t] * (256 - factor) +
- (int)Pnew[MVPbits_hp + t] * factor + 128) >> 8;
- if (prob <= 0) cm->fc.mvc_hp[i].prob[MVPbits_hp + t] = 1;
- else if (prob > 255) cm->fc.mvc_hp[i].prob[MVPbits_hp + t] = 255;
- else cm->fc.mvc_hp[i].prob[MVPbits_hp + t] = prob;
- }
- }
-}
-
-#endif /* CONFIG_NEWMVENTROPY */
-
void vp8_entropy_mv_init() {
-#if CONFIG_NEWMVENTROPY
vp8_tokens_from_tree(vp8_mv_joint_encodings, vp8_mv_joint_tree);
vp8_tokens_from_tree(vp8_mv_class_encodings, vp8_mv_class_tree);
vp8_tokens_from_tree(vp8_mv_class0_encodings, vp8_mv_class0_tree);
vp8_tokens_from_tree(vp8_mv_fp_encodings, vp8_mv_fp_tree);
-#else
- vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
- vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp);
-#endif
}
void vp8_init_mv_probs(VP8_COMMON *cm) {
-#if CONFIG_NEWMVENTROPY
vpx_memcpy(&cm->fc.nmvc, &vp8_default_nmv_context, sizeof(nmv_context));
-#else
- vpx_memcpy(cm->fc.mvc,
- vp8_default_mv_context, sizeof(vp8_default_mv_context));
- vpx_memcpy(cm->fc.mvc_hp,
- vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp));
-#endif
}
diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index 1a193b172..80540a54c 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@@ -22,7 +22,6 @@ void vp8_entropy_mv_init();
void vp8_init_mv_probs(struct VP8Common *cm);
void vp8_adapt_mv_probs(struct VP8Common *cm);
-#if CONFIG_NEWMVENTROPY
void vp8_adapt_nmv_probs(struct VP8Common *cm, int usehp);
void vp8_lower_mv_precision(MV *mv);
int vp8_use_nmv_hp(const MV *ref);
@@ -129,65 +128,4 @@ void vp8_counts_to_nmv_context(
unsigned int (*branch_ct_class0_hp)[2],
unsigned int (*branch_ct_hp)[2]);
-#else /* CONFIG_NEWMVENTROPY */
-
-enum {
- mv_max = 1023, /* max absolute value of a MV component */
- MVvals = (2 * mv_max) + 1, /* # possible values "" */
- mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
- mvnum_short = 8, /* magnitudes 0 through 7 */
- mvnum_short_bits = 3, /* number of bits for short mvs */
-
- mvfp_max = 255, /* max absolute value of a full pixel MV component */
- MVfpvals = (2 * mvfp_max) + 1, /* # possible full pixel MV values */
-
- /* probability offsets for coding each MV component */
-
- mvpis_short = 0, /* short (<= 7) vs long (>= 8) */
- MVPsign, /* sign for non-zero */
- MVPshort, /* 8 short values = 7-position tree */
-
- MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
- MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */
-};
-
-typedef struct mv_context {
- vp8_prob prob[MVPcount]; /* often come in row, col pairs */
-} MV_CONTEXT;
-
-extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];
-
-enum {
- mv_max_hp = 2047, /* max absolute value of a MV component */
- MVvals_hp = (2 * mv_max_hp) + 1, /* # possible values "" */
- mvlong_width_hp = 11, /* Large MVs have 9 bit magnitudes */
- mvnum_short_hp = 16, /* magnitudes 0 through 15 */
- mvnum_short_bits_hp = 4, /* number of bits for short mvs */
-
- mvfp_max_hp = 255, /* max absolute value of a full pixel MV component */
- MVfpvals_hp = (2 * mvfp_max_hp) + 1, /* # possible full pixel MV values */
-
- /* probability offsets for coding each MV component */
-
- mvpis_short_hp = 0, /* short (<= 7) vs long (>= 8) */
- MVPsign_hp, /* sign for non-zero */
- MVPshort_hp, /* 8 short values = 7-position tree */
-
- MVPbits_hp = MVPshort_hp + mvnum_short_hp - 1, /* mvlong_width long value bits */
- MVPcount_hp = MVPbits_hp + mvlong_width_hp /* (with independent probabilities) */
-};
-
-typedef struct mv_context_hp {
- vp8_prob prob[MVPcount_hp]; /* often come in row, col pairs */
-} MV_CONTEXT_HP;
-
-extern const MV_CONTEXT_HP vp8_mv_update_probs_hp[2], vp8_default_mv_context_hp[2];
-
-extern const vp8_tree_index vp8_small_mvtree[];
-extern struct vp8_token_struct vp8_small_mvencodings [8];
-extern const vp8_tree_index vp8_small_mvtree_hp[];
-extern struct vp8_token_struct vp8_small_mvencodings_hp [16];
-
-#endif /* CONFIG_NEWMVENTROPY */
-
#endif
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index 7c9ea1066..5fc135090 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -10,7 +10,7 @@
#include "findnearmv.h"
-#include "vp8/encoder/variance.h"
+#include "vp8/common/sadmxn.h"
#include <limits.h>
const unsigned char vp8_mbsplit_offset[4][16] = {
@@ -22,11 +22,7 @@ const unsigned char vp8_mbsplit_offset[4][16] = {
static void lower_mv_precision(int_mv *mv, int usehp)
{
-#if CONFIG_NEWMVENTROPY
if (!usehp || !vp8_use_nmv_hp(&mv->as_mv)) {
-#else
- if (!usehp) {
-#endif
if (mv->as_mv.row & 1)
mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1);
if (mv->as_mv.col & 1)
@@ -199,6 +195,23 @@ vp8_prob *vp8_mv_ref_probs(VP8_COMMON *pc,
}
#if CONFIG_NEWBESTREFMV
+unsigned int vp8_sad3x16_c(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16);
+}
+unsigned int vp8_sad16x3_c(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3);
+}
+
/* check a list of motion vectors by sad score using a number rows of pixels
* above and a number cols of pixels in the left to select the one with best
* score to use as ref motion vector
@@ -260,10 +273,10 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd,
sad = 0;
if (xd->up_available)
- sad += vp8_sad16x3_c(above_src, xd->dst.y_stride,
+ sad += vp8_sad16x3(above_src, xd->dst.y_stride,
above_ref + offset, ref_y_stride, INT_MAX);
if (xd->left_available)
- sad += vp8_sad3x16_c(left_src, xd->dst.y_stride,
+ sad += vp8_sad3x16(left_src, xd->dst.y_stride,
left_ref + offset, ref_y_stride, INT_MAX);
// Add the entry to our list and then resort the list on score.
sad_scores[i] = sad;
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index d096e8182..ae33df668 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -109,12 +109,9 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c);
extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
#endif
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
#include "vp8/common/blockd.h"
void vp8_ihtllm_c(short *input, short *output, int pitch,
TX_TYPE tx_type, int tx_dim);
-#endif
-
typedef prototype_idct((*vp8_idct_fn_t));
typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index d705fec32..c7369b2e2 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -26,9 +26,7 @@
#include "vp8/common/idct.h"
#include "vp8/common/systemdependent.h"
-#if CONFIG_HYBRIDTRANSFORM
#include "vp8/common/blockd.h"
-#endif
#include <math.h>
@@ -38,7 +36,6 @@ static const int rounding = 0;
// TODO: these transforms can be further converted into integer forms
// for complexity optimization
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
float idct_4[16] = {
0.500000000000000, 0.653281482438188, 0.500000000000000, 0.270598050073099,
0.500000000000000, 0.270598050073099, -0.500000000000000, -0.653281482438188,
@@ -90,9 +87,7 @@ float iadst_8[64] = {
0.483002021635509, -0.466553967085785, 0.434217976756762, -0.387095214016348,
0.326790388032145, -0.255357107325375, 0.175227946595736, -0.089131608307532
};
-#endif
-#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
float idct_16[256] = {
0.250000, 0.351851, 0.346760, 0.338330, 0.326641, 0.311806, 0.293969, 0.273300,
0.250000, 0.224292, 0.196424, 0.166664, 0.135299, 0.102631, 0.068975, 0.034654,
@@ -162,9 +157,7 @@ float iadst_16[256] = {
0.347761, -0.344612, 0.338341, -0.329007, 0.316693, -0.301511, 0.283599, -0.263118,
0.240255, -0.215215, 0.188227, -0.159534, 0.129396, -0.098087, 0.065889, -0.033094
};
-#endif
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
void vp8_ihtllm_c(short *input, short *output, int pitch,
TX_TYPE tx_type, int tx_dim) {
@@ -289,7 +282,6 @@ void vp8_ihtllm_c(short *input, short *output, int pitch,
}
vp8_clear_system_state(); // Make it simd safe : __asm emms;
}
-#endif
void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) {
int i;
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 3f97d2101..323d48de8 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -7,8 +7,6 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-
-
#include <stdlib.h>
#include "vpx_config.h"
#include "loopfilter.h"
@@ -94,6 +92,7 @@ static __inline void vp8_filter(signed char mask, uc hev, uc *op1,
*op1 = u ^ 0x80;
}
+
void vp8_loop_filter_horizontal_edge_c
(
unsigned char *s,
@@ -218,6 +217,7 @@ static __inline void vp8_mbfilter(signed char mask, uc hev, uc flat,
Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
Filter1 >>= 3;
Filter2 >>= 3;
+
u = vp8_signed_char_clamp(qs0 - Filter1);
*oq0 = u ^ 0x80;
u = vp8_signed_char_clamp(ps0 + Filter2);
@@ -271,8 +271,6 @@ void vp8_mbloop_filter_horizontal_edge_c
} while (++i < count * 8);
}
-
-
void vp8_mbloop_filter_vertical_edge_c
(
unsigned char *s,
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 0396a7087..38df3500a 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -51,27 +51,14 @@ typedef struct frame_contexts {
vp8_prob sub_mv_ref_prob [SUBMVREF_COUNT][VP8_SUBMVREFS - 1];
vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1];
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM
vp8_prob hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#endif
vp8_prob coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_prob hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#endif
vp8_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_prob hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#endif
-#if CONFIG_NEWMVENTROPY
nmv_context nmvc;
nmv_context pre_nmvc;
-#else
- MV_CONTEXT mvc[2];
- MV_CONTEXT_HP mvc_hp[2];
- MV_CONTEXT pre_mvc[2];
- MV_CONTEXT_HP pre_mvc_hp[2];
-#endif
vp8_prob pre_bmode_prob [VP8_BINTRAMODES - 1];
vp8_prob pre_ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */
vp8_prob pre_uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1];
@@ -87,56 +74,37 @@ typedef struct frame_contexts {
vp8_prob pre_coef_probs [BLOCK_TYPES] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM
vp8_prob pre_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#endif
vp8_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_prob pre_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#endif
vp8_prob pre_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_prob pre_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#endif
unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM
unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#endif
unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM8X8
unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#endif
unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM16X16
unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#endif
-#if CONFIG_NEWMVENTROPY
nmv_context_counts NMVcount;
-#else
- unsigned int MVcount [2] [MVvals];
- unsigned int MVcount_hp [2] [MVvals_hp];
-#endif
-#if CONFIG_SWITCHABLE_INTERP
- vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS+1]
- [VP8_SWITCHABLE_FILTERS-1];
-#endif
+ vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS + 1]
+ [VP8_SWITCHABLE_FILTERS - 1];
int mode_context[6][4];
int mode_context_a[6][4];
@@ -161,10 +129,8 @@ typedef enum {
ONLY_4X4 = 0,
ALLOW_8X8 = 1,
ALLOW_16X16 = 2,
-#if CONFIG_TX_SELECT
TX_MODE_SELECT = 3,
-#endif
- NB_TXFM_MODES = 3 + CONFIG_TX_SELECT,
+ NB_TXFM_MODES = 4,
} TXFM_MODE;
typedef struct VP8_COMMON_RTCD {
@@ -302,10 +268,8 @@ typedef struct VP8Common {
vp8_prob prob_comppred[COMP_PRED_CONTEXTS];
-#if CONFIG_TX_SELECT
// FIXME contextualize
vp8_prob prob_tx[TX_SIZE_MAX - 1];
-#endif
vp8_prob mbskip_pred_probs[MBSKIP_CONTEXTS];
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 17bbe3281..388612e8a 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -783,7 +783,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
if (mi->mbmi.mode == SPLITMV) {
switch (mi->mbmi.partitioning) {
- case 0 : { /* mv_top_bottom */
+ case PARTITIONING_16X8 : { /* mv_top_bottom */
union b_mode_info *bmi = &mi->bmi[0];
MV *mv = &bmi->mv.as_mv;
@@ -803,7 +803,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
break;
}
- case 1 : { /* mv_left_right */
+ case PARTITIONING_8X16 : { /* mv_left_right */
union b_mode_info *bmi = &mi->bmi[0];
MV *mv = &bmi->mv.as_mv;
@@ -823,7 +823,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
break;
}
- case 2 : { /* mv_quarters */
+ case PARTITIONING_8X8 : { /* mv_quarters */
union b_mode_info *bmi = &mi->bmi[0];
MV *mv = &bmi->mv.as_mv;
@@ -858,6 +858,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
vp8_blit_line(x0 + 12, x1, y0 + 12, y1, y_buffer, y_stride);
break;
}
+ case PARTITIONING_4X4:
default : {
union b_mode_info *bmi = mi->bmi;
int bx0, by0;
diff --git a/vp8/common/pred_common.c b/vp8/common/pred_common.c
index a32389433..a97eed8e4 100644
--- a/vp8/common/pred_common.c
+++ b/vp8/common/pred_common.c
@@ -63,7 +63,6 @@ unsigned char get_pred_context(const VP8_COMMON *const cm,
(m - cm->mode_info_stride)->mbmi.mb_skip_coeff;
break;
-#if CONFIG_SWITCHABLE_INTERP
case PRED_SWITCHABLE_INTERP:
{
int left_in_image = (m - 1)->mbmi.mb_in_image;
@@ -93,7 +92,6 @@ unsigned char get_pred_context(const VP8_COMMON *const cm,
pred_context = VP8_SWITCHABLE_FILTERS;
}
break;
-#endif
default:
// TODO *** add error trap code.
@@ -175,11 +173,10 @@ const vp8_prob *get_pred_probs(const VP8_COMMON *const cm,
pred_probability = &cm->mbskip_pred_probs[pred_context];
break;
-#if CONFIG_SWITCHABLE_INTERP
case PRED_SWITCHABLE_INTERP:
pred_probability = &cm->fc.switchable_interp_prob[pred_context][0];
break;
-#endif
+
default:
// TODO *** add error trap code.
pred_probability = NULL;
diff --git a/vp8/common/pred_common.h b/vp8/common/pred_common.h
index 402e0235f..2a9875dfe 100644
--- a/vp8/common/pred_common.h
+++ b/vp8/common/pred_common.h
@@ -22,12 +22,9 @@ typedef enum {
PRED_REF = 1,
PRED_COMP = 2,
PRED_MBSKIP = 3,
-#if CONFIG_SWITCHABLE_INTERP
- PRED_SWITCHABLE_INTERP = 4,
-#endif
+ PRED_SWITCHABLE_INTERP = 4
} PRED_ID;
-
extern unsigned char get_pred_context(const VP8_COMMON *const cm,
const MACROBLOCKD *const xd,
PRED_ID pred_id);
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index a41d233ab..6c60845fb 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -36,13 +36,7 @@ void vp8_setup_interp_filters(MACROBLOCKD *xd,
&cm->rtcd.subpix, sixtap_avg8x8);
xd->subpixel_predict_avg16x16 = SUBPIX_INVOKE(
&cm->rtcd.subpix, sixtap_avg16x16);
- }
- else if (mcomp_filter_type == EIGHTTAP
-#if CONFIG_SWITCHABLE_INTERP
- ||
- mcomp_filter_type == SWITCHABLE
-#endif
- ) {
+ } else if (mcomp_filter_type == EIGHTTAP || mcomp_filter_type == SWITCHABLE) {
xd->subpixel_predict = SUBPIX_INVOKE(
&cm->rtcd.subpix, eighttap4x4);
xd->subpixel_predict8x4 = SUBPIX_INVOKE(
@@ -965,7 +959,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
BLOCKD *blockd = xd->block;
- if (xd->mode_info_context->mbmi.partitioning < 3) {
+ if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
blockd[ 0].bmi = xd->mode_info_context->bmi[ 0];
blockd[ 2].bmi = xd->mode_info_context->bmi[ 2];
blockd[ 8].bmi = xd->mode_info_context->bmi[ 8];
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index ef272df90..ea64c9682 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -125,22 +125,22 @@ specialize vp8_comp_intra_uv4x4_predict;
# Loopfilter
#
prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbv;
+specialize vp8_loop_filter_mbv sse2
prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bv;
+specialize vp8_loop_filter_bv sse2
prototype void vp8_loop_filter_bv8x8 "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bv8x8;
+specialize vp8_loop_filter_bv8x8 sse2
prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbh;
+specialize vp8_loop_filter_mbh sse2
prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bh;
+specialize vp8_loop_filter_bh sse2
prototype void vp8_loop_filter_bh8x8 "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bh8x8;
+specialize vp8_loop_filter_bh8x8 sse2
prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
specialize vp8_loop_filter_simple_mbv mmx sse2 media neon
@@ -174,3 +174,210 @@ vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2
vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6
vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon
+#
+# sad 16x3, 3x16
+#
+prototype unsigned int vp8_sad16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+specialize vp8_sad16x3
+
+prototype unsigned int vp8_sad3x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+specialize vp8_sad3x16
+
+#
+# Encoder functions below this point.
+#
+if [ "$CONFIG_VP8_ENCODER" = "yes" ]; then
+
+
+# variance
+[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2
+
+prototype unsigned int vp8_variance32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance32x32
+
+prototype unsigned int vp8_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance16x16 mmx sse2
+vp8_variance16x16_sse2=vp8_variance16x16_wmt
+vp8_variance16x16_mmx=vp8_variance16x16_mmx
+
+prototype unsigned int vp8_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance16x8 mmx sse2
+vp8_variance16x8_sse2=vp8_variance16x8_wmt
+vp8_variance16x8_mmx=vp8_variance16x8_mmx
+
+prototype unsigned int vp8_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance8x16 mmx sse2
+vp8_variance8x16_sse2=vp8_variance8x16_wmt
+vp8_variance8x16_mmx=vp8_variance8x16_mmx
+
+prototype unsigned int vp8_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance8x8 mmx sse2
+vp8_variance8x8_sse2=vp8_variance8x8_wmt
+vp8_variance8x8_mmx=vp8_variance8x8_mmx
+
+prototype unsigned int vp8_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance4x4 mmx sse2
+vp8_variance4x4_sse2=vp8_variance4x4_wmt
+vp8_variance4x4_mmx=vp8_variance4x4_mmx
+
+prototype unsigned int vp8_sub_pixel_variance32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp8_sub_pixel_variance32x32
+
+prototype unsigned int vp8_sub_pixel_variance16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp8_sub_pixel_variance16x16 sse2 mmx ssse3
+vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt
+
+prototype unsigned int vp8_sub_pixel_variance8x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp8_sub_pixel_variance8x16 sse2 mmx
+vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt
+
+prototype unsigned int vp8_sub_pixel_variance16x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp8_sub_pixel_variance16x8 sse2 mmx ssse3
+vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_ssse3;
+vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt
+
+prototype unsigned int vp8_sub_pixel_variance8x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp8_sub_pixel_variance8x8 sse2 mmx
+vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt
+
+prototype unsigned int vp8_sub_pixel_variance4x4 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp8_sub_pixel_variance4x4 sse2 mmx
+vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt
+
+prototype unsigned int vp8_sad32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp8_sad32x32
+
+prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp8_sad16x16 mmx sse2 sse3
+vp8_sad16x16_sse2=vp8_sad16x16_wmt
+
+prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp8_sad16x8 mmx sse2
+vp8_sad16x8_sse2=vp8_sad16x8_wmt
+
+prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp8_sad8x16 mmx sse2
+vp8_sad8x16_sse2=vp8_sad8x16_wmt
+
+prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp8_sad8x8 mmx sse2
+vp8_sad8x8_sse2=vp8_sad8x8_wmt
+
+prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp8_sad4x4 mmx sse2
+vp8_sad4x4_sse2=vp8_sad4x4_wmt
+
+prototype unsigned int vp8_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance_halfpixvar16x16_h mmx sse2
+vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt
+
+prototype unsigned int vp8_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance_halfpixvar16x16_v mmx sse2
+vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt
+
+prototype unsigned int vp8_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance_halfpixvar16x16_hv mmx sse2
+vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt
+
+prototype unsigned int vp8_variance_halfpixvar32x32_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance_halfpixvar32x32_h
+
+prototype unsigned int vp8_variance_halfpixvar32x32_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance_halfpixvar32x32_v
+
+prototype unsigned int vp8_variance_halfpixvar32x32_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp8_variance_halfpixvar32x32_hv
+
+prototype void vp8_sad32x32x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp8_sad32x32x3
+
+prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp8_sad16x16x3 sse3 ssse3
+
+prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp8_sad16x8x3 sse3 ssse3
+
+prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp8_sad8x16x3 sse3
+
+prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp8_sad8x8x3 sse3
+
+prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp8_sad4x4x3 sse3
+
+prototype void vp8_sad32x32x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp8_sad32x32x8
+
+prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp8_sad16x16x8 sse4
+
+prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp8_sad16x8x8 sse4
+
+prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp8_sad8x16x8 sse4
+
+prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp8_sad8x8x8 sse4
+
+prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp8_sad4x4x8 sse4
+
+prototype void vp8_sad32x32x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp8_sad32x32x4d
+
+prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp8_sad16x16x4d sse3
+
+prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp8_sad16x8x4d sse3
+
+prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp8_sad8x16x4d sse3
+
+prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp8_sad8x8x4d sse3
+
+prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp8_sad4x4x4d sse3
+
+#
+# Block copy
+#
+case $arch in
+ x86*)
+ prototype void vp8_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n"
+ specialize vp8_copy32xn sse2 sse3
+ ;;
+esac
+
+prototype unsigned int vp8_sub_pixel_mse16x16 "const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
+specialize vp8_sub_pixel_mse16x16 sse2 mmx
+vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt
+
+prototype unsigned int vp8_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse"
+specialize vp8_mse16x16 mmx sse2
+vp8_mse16x16_sse2=vp8_mse16x16_wmt
+
+prototype unsigned int vp8_sub_pixel_mse32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp8_sub_pixel_mse32x32
+
+prototype unsigned int vp8_get_mb_ss "const short *"
+specialize vp8_get_mb_ss mmx sse2
+
+#
+# Structured Similarity (SSIM)
+#
+if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
+ [ $arch = "x86_64" ] && sse2_on_x86_64=sse2
+
+ prototype void vp8_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
+ specialize vp8_ssim_parms_8x8 $sse2_on_x86_64
+
+ prototype void vp8_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
+ specialize vp8_ssim_parms_16x16 $sse2_on_x86_64
+fi
+
+fi
+# end encoder functions
diff --git a/vp8/common/sadmxn.h b/vp8/common/sadmxn.h
new file mode 100644
index 000000000..47b8dfc58
--- /dev/null
+++ b/vp8/common/sadmxn.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef __INC_SAD_H
+#define __INC_SAD_H
+
+static __inline
+unsigned int sad_mx_n_c(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int m,
+ int n) {
+ int r, c;
+ unsigned int sad = 0;
+
+ for (r = 0; r < n; r++) {
+ for (c = 0; c < m; c++) {
+ sad += abs(src_ptr[c] - ref_ptr[c]);
+ }
+
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
+ }
+
+ return sad;
+}
+
+#endif
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index 697a5dee6..63b72385b 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -594,790 +594,6 @@ sym(vp8_loop_filter_vertical_edge_mmx):
ret
-;void vp8_mbloop_filter_horizontal_edge_mmx
-;(
-; unsigned char *src_ptr,
-; int src_pixel_step,
-; const char *blimit,
-; const char *limit,
-; const char *thresh,
-; int count
-;)
-global sym(vp8_mbloop_filter_horizontal_edge_mmx)
-sym(vp8_mbloop_filter_horizontal_edge_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 32 ; reserve 32 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8];
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
-
- movsxd rcx, dword ptr arg(5) ;count
-.next8_mbh:
- mov rdx, arg(3) ;limit
- movq mm7, [rdx]
- mov rdi, rsi ; rdi points to row +1 for indirect addressing
- add rdi, rax
-
- ; calculate breakout conditions
- movq mm2, [rdi+2*rax] ; q3
-
- movq mm1, [rsi+2*rax] ; q2
- movq mm6, mm1 ; q2
- psubusb mm1, mm2 ; q2-=q3
- psubusb mm2, mm6 ; q3-=q2
- por mm1, mm2 ; abs(q3-q2)
- psubusb mm1, mm7
-
-
- ; mm1 = abs(q3-q2), mm6 =q2, mm7 = limit
- movq mm4, [rsi+rax] ; q1
- movq mm3, mm4 ; q1
- psubusb mm4, mm6 ; q1-=q2
- psubusb mm6, mm3 ; q2-=q1
- por mm4, mm6 ; abs(q2-q1)
- psubusb mm4, mm7
- por mm1, mm4
-
-
- ; mm1 = mask, mm3=q1, mm7 = limit
-
- movq mm4, [rsi] ; q0
- movq mm0, mm4 ; q0
- psubusb mm4, mm3 ; q0-=q1
- psubusb mm3, mm0 ; q1-=q0
- por mm4, mm3 ; abs(q0-q1)
- movq t0, mm4 ; save to t0
- psubusb mm4, mm7
- por mm1, mm4
-
-
- ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1)
-
- neg rax ; negate pitch to deal with above border
-
- movq mm2, [rsi+4*rax] ; p3
- movq mm4, [rdi+4*rax] ; p2
- movq mm5, mm4 ; p2
- psubusb mm4, mm2 ; p2-=p3
- psubusb mm2, mm5 ; p3-=p2
- por mm4, mm2 ; abs(p3 - p2)
- psubusb mm4, mm7
- por mm1, mm4
- ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1)
-
- movq mm4, [rsi+2*rax] ; p1
- movq mm3, mm4 ; p1
- psubusb mm4, mm5 ; p1-=p2
- psubusb mm5, mm3 ; p2-=p1
- por mm4, mm5 ; abs(p2 - p1)
- psubusb mm4, mm7
- por mm1, mm4
-
- movq mm2, mm3 ; p1
-
-
- ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1)
-
- movq mm4, [rsi+rax] ; p0
- movq mm5, mm4 ; p0
- psubusb mm4, mm3 ; p0-=p1
- psubusb mm3, mm5 ; p1-=p0
- por mm4, mm3 ; abs(p1 - p0)
- movq t1, mm4 ; save to t1
- psubusb mm4, mm7
- por mm1, mm4
- ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) t1 = abs(p1-p0)
- ; mm5 = p0
- movq mm3, [rdi] ; q1
- movq mm4, mm3 ; q1
- psubusb mm3, mm2 ; q1-=p1
- psubusb mm2, mm4 ; p1-=q1
- por mm2, mm3 ; abs(p1-q1)
- pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
- psrlw mm2, 1 ; abs(p1-q1)/2
-
- movq mm6, mm5 ; p0
- movq mm3, mm0 ; q0
- psubusb mm5, mm3 ; p0-=q0
- psubusb mm3, mm6 ; q0-=p0
- por mm5, mm3 ; abs(p0 - q0)
- paddusb mm5, mm5 ; abs(p0-q0)*2
- paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
- mov rdx, arg(2) ;blimit ; get blimit
- movq mm7, [rdx] ; blimit
-
- psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
- por mm1, mm5
- pxor mm5, mm5
- pcmpeqb mm1, mm5 ; mask mm1
-
- ; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
- ; mm6 = p0,
-
- ; calculate high edge variance
- mov rdx, arg(4) ;thresh ; get thresh
- movq mm7, [rdx] ;
- movq mm4, t0 ; get abs (q1 - q0)
- psubusb mm4, mm7
- movq mm3, t1 ; get abs (p1 - p0)
- psubusb mm3, mm7
- paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-
- pcmpeqb mm4, mm5
-
- pcmpeqb mm5, mm5
- pxor mm4, mm5
-
-
-
- ; mm1 = mask, mm0=q0, mm7 = thresh, t0 = abs(q0-q1) t1 = abs(p1-p0)
- ; mm6 = p0, mm4=hev
- ; start work on filters
- movq mm2, [rsi+2*rax] ; p1
- movq mm7, [rdi] ; q1
- pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
- pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
- psubsb mm2, mm7 ; p1 - q1
-
- pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
- movq mm3, mm0 ; q0
- psubsb mm0, mm6 ; q0 - p0
- paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1)
- paddsb mm2, mm0 ; 2 * (q0 - p0)
- paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1)
- pand mm1, mm2 ; mask filter values we don't care about
-
-
- ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0
- movq mm2, mm1 ; vp8_filter
- pand mm2, mm4; ; Filter2 = vp8_filter & hev
-
- movq mm5, mm2 ;
- paddsb mm5, [GLOBAL(t3)];
-
- pxor mm0, mm0 ; 0
- pxor mm7, mm7 ; 0
-
- punpcklbw mm0, mm5 ; e0f0g0h0
- psraw mm0, 11 ; sign extended shift right by 3
- punpckhbw mm7, mm5 ; a0b0c0d0
- psraw mm7, 11 ; sign extended shift right by 3
- packsswb mm0, mm7 ; Filter2 >>=3;
-
- movq mm5, mm0 ; Filter2
-
- paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
- pxor mm0, mm0 ; 0
- pxor mm7, mm7 ; 0
-
- punpcklbw mm0, mm2 ; e0f0g0h0
- psraw mm0, 11 ; sign extended shift right by 3
- punpckhbw mm7, mm2 ; a0b0c0d0
- psraw mm7, 11 ; sign extended shift right by 3
- packsswb mm0, mm7 ; Filter2 >>=3;
-
- ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0
- psubsb mm3, mm0 ; qs0 =qs0 - filter1
- paddsb mm6, mm5 ; ps0 =ps0 + Fitler2
-
- ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0
- ; vp8_filter &= ~hev;
- ; Filter2 = vp8_filter;
- pandn mm4, mm1 ; vp8_filter&=~hev
-
-
- ; mm3=qs0, mm4=filter2, mm6=ps0
-
- ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
- ; s = vp8_signed_char_clamp(qs0 - u);
- ; *oq0 = s^0x80;
- ; s = vp8_signed_char_clamp(ps0 + u);
- ; *op0 = s^0x80;
- pxor mm0, mm0
-
- pxor mm1, mm1
- pxor mm2, mm2
- punpcklbw mm1, mm4
- punpckhbw mm2, mm4
- pmulhw mm1, [GLOBAL(s27)]
- pmulhw mm2, [GLOBAL(s27)]
- paddw mm1, [GLOBAL(s63)]
- paddw mm2, [GLOBAL(s63)]
- psraw mm1, 7
- psraw mm2, 7
- packsswb mm1, mm2
-
- psubsb mm3, mm1
- paddsb mm6, mm1
-
- pxor mm3, [GLOBAL(t80)]
- pxor mm6, [GLOBAL(t80)]
- movq [rsi+rax], mm6
- movq [rsi], mm3
-
- ; roughly 2/7th difference across boundary
- ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
- ; s = vp8_signed_char_clamp(qs1 - u);
- ; *oq1 = s^0x80;
- ; s = vp8_signed_char_clamp(ps1 + u);
- ; *op1 = s^0x80;
- pxor mm1, mm1
- pxor mm2, mm2
- punpcklbw mm1, mm4
- punpckhbw mm2, mm4
- pmulhw mm1, [GLOBAL(s18)]
- pmulhw mm2, [GLOBAL(s18)]
- paddw mm1, [GLOBAL(s63)]
- paddw mm2, [GLOBAL(s63)]
- psraw mm1, 7
- psraw mm2, 7
- packsswb mm1, mm2
-
- movq mm3, [rdi]
- movq mm6, [rsi+rax*2] ; p1
-
- pxor mm3, [GLOBAL(t80)]
- pxor mm6, [GLOBAL(t80)]
-
- paddsb mm6, mm1
- psubsb mm3, mm1
-
- pxor mm6, [GLOBAL(t80)]
- pxor mm3, [GLOBAL(t80)]
- movq [rdi], mm3
- movq [rsi+rax*2], mm6
-
- ; roughly 1/7th difference across boundary
- ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
- ; s = vp8_signed_char_clamp(qs2 - u);
- ; *oq2 = s^0x80;
- ; s = vp8_signed_char_clamp(ps2 + u);
- ; *op2 = s^0x80;
- pxor mm1, mm1
- pxor mm2, mm2
- punpcklbw mm1, mm4
- punpckhbw mm2, mm4
- pmulhw mm1, [GLOBAL(s9)]
- pmulhw mm2, [GLOBAL(s9)]
- paddw mm1, [GLOBAL(s63)]
- paddw mm2, [GLOBAL(s63)]
- psraw mm1, 7
- psraw mm2, 7
- packsswb mm1, mm2
-
-
- movq mm6, [rdi+rax*4]
- neg rax
- movq mm3, [rdi+rax ]
-
- pxor mm6, [GLOBAL(t80)]
- pxor mm3, [GLOBAL(t80)]
-
- paddsb mm6, mm1
- psubsb mm3, mm1
-
- pxor mm6, [GLOBAL(t80)]
- pxor mm3, [GLOBAL(t80)]
- movq [rdi+rax ], mm3
- neg rax
- movq [rdi+rax*4], mm6
-
-;EARLY_BREAK_OUT:
- neg rax
- add rsi,8
- dec rcx
- jnz .next8_mbh
-
- add rsp, 32
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_mbloop_filter_vertical_edge_mmx
-;(
-; unsigned char *src_ptr,
-; int src_pixel_step,
-; const char *blimit,
-; const char *limit,
-; const char *thresh,
-; int count
-;)
-global sym(vp8_mbloop_filter_vertical_edge_mmx)
-sym(vp8_mbloop_filter_vertical_edge_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 96 ; reserve 96 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8];
- %define srct [rsp + 32] ;__declspec(align(16)) char srct[64];
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
-
- lea rsi, [rsi + rax*4 - 4]
-
- movsxd rcx, dword ptr arg(5) ;count
-.next8_mbv:
- lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
-
- ;transpose
- movq mm0, [rdi+2*rax] ; 77 76 75 74 73 72 71 70
- movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60
-
- movq mm7, mm6 ; 77 76 75 74 73 72 71 70
- punpckhbw mm7, mm0 ; 77 67 76 66 75 65 74 64
-
- punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60
- movq mm0, [rsi+rax] ; 57 56 55 54 53 52 51 50
-
- movq mm4, [rsi] ; 47 46 45 44 43 42 41 40
- movq mm5, mm4 ; 47 46 45 44 43 42 41 40
-
- punpckhbw mm5, mm0 ; 57 47 56 46 55 45 54 44
- punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40
-
- movq mm3, mm5 ; 57 47 56 46 55 45 54 44
- punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46
-
- punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44
- movq mm2, mm4 ; 53 43 52 42 51 41 50 40
-
- punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42
- punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40
-
- neg rax
-
- movq mm7, [rsi+rax] ; 37 36 35 34 33 32 31 30
- movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20
-
- movq mm1, mm6 ; 27 26 25 24 23 22 21 20
- punpckhbw mm6, mm7 ; 37 27 36 36 35 25 34 24
-
- punpcklbw mm1, mm7 ; 33 23 32 22 31 21 30 20
-
- movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00
- punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04
-
- movq mm0, mm7 ; 17 07 16 06 15 05 14 04
- punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06
-
- punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04
- movq mm6, mm7 ; 37 27 17 07 36 26 16 06
-
- punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3
- punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2
-
- lea rdx, srct
- movq mm5, mm6 ; 76 66 56 46 36 26 16 06
-
- movq [rdx+56], mm7
- psubusb mm5, mm7 ; q2-q3
-
-
- movq [rdx+48], mm6
- psubusb mm7, mm6 ; q3-q2
-
- por mm7, mm5; ; mm7=abs (q3-q2)
- movq mm5, mm0 ; 35 25 15 05 34 24 14 04
-
- punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1
- punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0
-
- movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1
- psubusb mm3, mm6 ; q1-q2
-
- psubusb mm6, mm5 ; q2-q1
- por mm6, mm3 ; mm6=abs(q2-q1)
-
- movq [rdx+40], mm5 ; save q1
- movq [rdx+32], mm0 ; save q0
-
- movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00
- punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00
-
- movq mm0, mm3 ; 13 03 12 02 11 01 10 00
- punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00
-
- punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02
- movq mm1, mm0 ; 31 21 11 01 30 20 10 00
-
- punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3
- punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2
-
- movq [rdx], mm0 ; save p3
- movq [rdx+8], mm1 ; save p2
-
- movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2
- psubusb mm2, mm0 ; p2-p3
-
- psubusb mm0, mm1 ; p3-p2
- por mm0, mm2 ; mm0=abs(p3-p2)
-
- movq mm2, mm3 ; 33 23 13 03 32 22 12 02
- punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1
-
- punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0
- movq [rdx+24], mm3 ; save p0
-
- movq [rdx+16], mm2 ; save p1
- movq mm5, mm2 ; mm5 = p1
-
- psubusb mm2, mm1 ; p1-p2
- psubusb mm1, mm5 ; p2-p1
-
- por mm1, mm2 ; mm1=abs(p2-p1)
- mov rdx, arg(3) ;limit
-
- movq mm4, [rdx] ; mm4 = limit
- psubusb mm7, mm4 ; abs(q3-q2) > limit
-
- psubusb mm0, mm4 ; abs(p3-p2) > limit
- psubusb mm1, mm4 ; abs(p2-p1) > limit
-
- psubusb mm6, mm4 ; abs(q2-q1) > limit
- por mm7, mm6 ; or
-
- por mm0, mm1 ;
- por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
-
- movq mm1, mm5 ; p1
-
- movq mm7, mm3 ; mm3=mm7=p0
- psubusb mm7, mm5 ; p0 - p1
-
- psubusb mm5, mm3 ; p1 - p0
- por mm5, mm7 ; abs(p1-p0)
-
- movq t0, mm5 ; save abs(p1-p0)
- lea rdx, srct
-
- psubusb mm5, mm4 ; mm5 = abs(p1-p0) > limit
- por mm0, mm5 ; mm0=mask
-
- movq mm5, [rdx+32] ; mm5=q0
- movq mm7, [rdx+40] ; mm7=q1
-
- movq mm6, mm5 ; mm6=q0
- movq mm2, mm7 ; q1
- psubusb mm5, mm7 ; q0-q1
-
- psubusb mm7, mm6 ; q1-q0
- por mm7, mm5 ; abs(q1-q0)
-
- movq t1, mm7 ; save abs(q1-q0)
- psubusb mm7, mm4 ; mm7=abs(q1-q0)> limit
-
- por mm0, mm7 ; mask
-
- movq mm5, mm2 ; q1
- psubusb mm5, mm1 ; q1-=p1
- psubusb mm1, mm2 ; p1-=q1
- por mm5, mm1 ; abs(p1-q1)
- pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
- psrlw mm5, 1 ; abs(p1-q1)/2
-
- mov rdx, arg(2) ;blimit ;
-
- movq mm4, [rdx] ;blimit
- movq mm1, mm3 ; mm1=mm3=p0
-
- movq mm7, mm6 ; mm7=mm6=q0
- psubusb mm1, mm7 ; p0-q0
-
- psubusb mm7, mm3 ; q0-p0
- por mm1, mm7 ; abs(q0-p0)
- paddusb mm1, mm1 ; abs(q0-p0)*2
- paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
- psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
- por mm1, mm0; ; mask
-
- pxor mm0, mm0
- pcmpeqb mm1, mm0
-
- ; calculate high edge variance
- mov rdx, arg(4) ;thresh ; get thresh
- movq mm7, [rdx]
- ;
- movq mm4, t0 ; get abs (q1 - q0)
- psubusb mm4, mm7 ; abs(q1 - q0) > thresh
-
- movq mm3, t1 ; get abs (p1 - p0)
- psubusb mm3, mm7 ; abs(p1 - p0)> thresh
-
- por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
- pcmpeqb mm4, mm0
-
- pcmpeqb mm0, mm0
- pxor mm4, mm0
-
-
-
-
- ; start work on filters
- lea rdx, srct
-
- ; start work on filters
- movq mm2, [rdx+16] ; p1
- movq mm7, [rdx+40] ; q1
- pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
- pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
- psubsb mm2, mm7 ; p1 - q1
-
- movq mm6, [rdx+24] ; p0
- movq mm0, [rdx+32] ; q0
- pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
-
- movq mm3, mm0 ; q0
- psubsb mm0, mm6 ; q0 - p0
- paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1)
- paddsb mm2, mm0 ; 2 * (q0 - p0)
- paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1)
- pand mm1, mm2 ; mask filter values we don't care about
-
- ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0
- movq mm2, mm1 ; vp8_filter
- pand mm2, mm4; ; Filter2 = vp8_filter & hev
-
- movq mm5, mm2 ;
- paddsb mm5, [GLOBAL(t3)];
-
- pxor mm0, mm0 ; 0
- pxor mm7, mm7 ; 0
-
- punpcklbw mm0, mm5 ; e0f0g0h0
- psraw mm0, 11 ; sign extended shift right by 3
- punpckhbw mm7, mm5 ; a0b0c0d0
- psraw mm7, 11 ; sign extended shift right by 3
- packsswb mm0, mm7 ; Filter2 >>=3;
-
- movq mm5, mm0 ; Filter2
-
- paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
- pxor mm0, mm0 ; 0
- pxor mm7, mm7 ; 0
-
- punpcklbw mm0, mm2 ; e0f0g0h0
- psraw mm0, 11 ; sign extended shift right by 3
- punpckhbw mm7, mm2 ; a0b0c0d0
- psraw mm7, 11 ; sign extended shift right by 3
- packsswb mm0, mm7 ; Filter2 >>=3;
-
- ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0
- psubsb mm3, mm0 ; qs0 =qs0 - filter1
- paddsb mm6, mm5 ; ps0 =ps0 + Fitler2
-
- ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0
- ; vp8_filter &= ~hev;
- ; Filter2 = vp8_filter;
- pandn mm4, mm1 ; vp8_filter&=~hev
-
-
- ; mm3=qs0, mm4=filter2, mm6=ps0
-
- ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
- ; s = vp8_signed_char_clamp(qs0 - u);
- ; *oq0 = s^0x80;
- ; s = vp8_signed_char_clamp(ps0 + u);
- ; *op0 = s^0x80;
- pxor mm0, mm0
-
- pxor mm1, mm1
- pxor mm2, mm2
- punpcklbw mm1, mm4
- punpckhbw mm2, mm4
- pmulhw mm1, [GLOBAL(s27)]
- pmulhw mm2, [GLOBAL(s27)]
- paddw mm1, [GLOBAL(s63)]
- paddw mm2, [GLOBAL(s63)]
- psraw mm1, 7
- psraw mm2, 7
- packsswb mm1, mm2
-
- psubsb mm3, mm1
- paddsb mm6, mm1
-
- pxor mm3, [GLOBAL(t80)]
- pxor mm6, [GLOBAL(t80)]
- movq [rdx+24], mm6
- movq [rdx+32], mm3
-
- ; roughly 2/7th difference across boundary
- ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
- ; s = vp8_signed_char_clamp(qs1 - u);
- ; *oq1 = s^0x80;
- ; s = vp8_signed_char_clamp(ps1 + u);
- ; *op1 = s^0x80;
- pxor mm1, mm1
- pxor mm2, mm2
- punpcklbw mm1, mm4
- punpckhbw mm2, mm4
- pmulhw mm1, [GLOBAL(s18)]
- pmulhw mm2, [GLOBAL(s18)]
- paddw mm1, [GLOBAL(s63)]
- paddw mm2, [GLOBAL(s63)]
- psraw mm1, 7
- psraw mm2, 7
- packsswb mm1, mm2
-
- movq mm3, [rdx + 40]
- movq mm6, [rdx + 16] ; p1
- pxor mm3, [GLOBAL(t80)]
- pxor mm6, [GLOBAL(t80)]
-
- paddsb mm6, mm1
- psubsb mm3, mm1
-
- pxor mm6, [GLOBAL(t80)]
- pxor mm3, [GLOBAL(t80)]
- movq [rdx + 40], mm3
- movq [rdx + 16], mm6
-
- ; roughly 1/7th difference across boundary
- ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
- ; s = vp8_signed_char_clamp(qs2 - u);
- ; *oq2 = s^0x80;
- ; s = vp8_signed_char_clamp(ps2 + u);
- ; *op2 = s^0x80;
- pxor mm1, mm1
- pxor mm2, mm2
- punpcklbw mm1, mm4
- punpckhbw mm2, mm4
- pmulhw mm1, [GLOBAL(s9)]
- pmulhw mm2, [GLOBAL(s9)]
- paddw mm1, [GLOBAL(s63)]
- paddw mm2, [GLOBAL(s63)]
- psraw mm1, 7
- psraw mm2, 7
- packsswb mm1, mm2
-
- movq mm6, [rdx+ 8]
- movq mm3, [rdx+48]
-
- pxor mm6, [GLOBAL(t80)]
- pxor mm3, [GLOBAL(t80)]
-
- paddsb mm6, mm1
- psubsb mm3, mm1
-
- pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01
- pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06
-
- ; tranpose and write back
- movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00
- movq mm1, mm0 ; mm0 = 70 60 50 40 30 20 10 00
-
- punpcklbw mm0, mm6 ; mm0 = 31 30 21 20 11 10 01 00
- punpckhbw mm1, mm6 ; mm3 = 71 70 61 60 51 50 41 40
-
- movq mm2, [rdx+16] ; mm2 = 72 62 52 42 32 22 12 02
- movq mm6, mm2 ; mm3 = 72 62 52 42 32 22 12 02
-
- punpcklbw mm2, [rdx+24] ; mm2 = 33 32 23 22 13 12 03 02
- punpckhbw mm6, [rdx+24] ; mm3 = 73 72 63 62 53 52 43 42
-
- movq mm5, mm0 ; mm5 = 31 30 21 20 11 10 01 00
- punpcklwd mm0, mm2 ; mm0 = 13 12 11 10 03 02 01 00
-
- punpckhwd mm5, mm2 ; mm5 = 33 32 31 30 23 22 21 20
- movq mm4, mm1 ; mm4 = 71 70 61 60 51 50 41 40
-
- punpcklwd mm1, mm6 ; mm1 = 53 52 51 50 43 42 41 40
- punpckhwd mm4, mm6 ; mm4 = 73 72 71 70 63 62 61 60
-
- movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04
- punpcklbw mm2, [rdx+40] ; mm2 = 35 34 25 24 15 14 05 04
-
- movq mm6, mm3 ; mm6 = 76 66 56 46 36 26 15 06
- punpcklbw mm6, [rdx+56] ; mm6 = 37 36 27 26 17 16 07 06
-
- movq mm7, mm2 ; mm7 = 35 34 25 24 15 14 05 04
- punpcklwd mm2, mm6 ; mm2 = 17 16 15 14 07 06 05 04
-
- punpckhwd mm7, mm6 ; mm7 = 37 36 35 34 27 26 25 24
- movq mm6, mm0 ; mm6 = 13 12 11 10 03 02 01 00
-
- punpckldq mm0, mm2 ; mm0 = 07 06 05 04 03 02 01 00
- punpckhdq mm6, mm2 ; mm6 = 17 16 15 14 13 12 11 10
-
- movq [rsi+rax*4], mm0 ; write out
- movq [rdi+rax*4], mm6 ; write out
-
- movq mm0, mm5 ; mm0 = 33 32 31 30 23 22 21 20
- punpckldq mm0, mm7 ; mm0 = 27 26 25 24 23 22 20 20
-
- punpckhdq mm5, mm7 ; mm5 = 37 36 35 34 33 32 31 30
- movq [rsi+rax*2], mm0 ; write out
-
- movq [rdi+rax*2], mm5 ; write out
- movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04
-
- punpckhbw mm2, [rdx+40] ; mm2 = 75 74 65 64 54 54 45 44
- punpckhbw mm3, [rdx+56] ; mm3 = 77 76 67 66 57 56 47 46
-
- movq mm5, mm2 ; mm5 = 75 74 65 64 54 54 45 44
- punpcklwd mm2, mm3 ; mm2 = 57 56 55 54 47 46 45 44
-
- punpckhwd mm5, mm3 ; mm5 = 77 76 75 74 67 66 65 64
- movq mm0, mm1 ; mm0= 53 52 51 50 43 42 41 40
-
- movq mm3, mm4 ; mm4 = 73 72 71 70 63 62 61 60
- punpckldq mm0, mm2 ; mm0 = 47 46 45 44 43 42 41 40
-
- punpckhdq mm1, mm2 ; mm1 = 57 56 55 54 53 52 51 50
- movq [rsi], mm0 ; write out
-
- movq [rdi], mm1 ; write out
- neg rax
-
- punpckldq mm3, mm5 ; mm3 = 67 66 65 64 63 62 61 60
- punpckhdq mm4, mm5 ; mm4 = 77 76 75 74 73 72 71 60
-
- movq [rsi+rax*2], mm3
- movq [rdi+rax*2], mm4
-
- lea rsi, [rsi+rax*8]
- dec rcx
-
- jnz .next8_mbv
-
- add rsp, 96
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
;void vp8_loop_filter_simple_horizontal_edge_mmx
;(
; unsigned char *src_ptr,
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 295609c58..6f6531c86 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -380,302 +380,6 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
ret
-%macro MB_FILTER_AND_WRITEBACK 1
-%if %1 == 0
- movdqa xmm2, p1 ; p1
- movdqa xmm7, q1 ; q1
-%elif %1 == 1
- movdqa xmm2, [rsi+2*rax] ; p1
- movdqa xmm7, [rdi] ; q1
-
- mov rcx, rax
- neg rcx
-%elif %1 == 2
- lea rdx, srct
-
- movdqa xmm2, [rdx+32] ; p1
- movdqa xmm7, [rdx+80] ; q1
- movdqa xmm6, [rdx+48] ; p0
- movdqa xmm0, [rdx+64] ; q0
-%endif
-
- pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
- pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
- pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
-
- psubsb xmm2, xmm7 ; p1 - q1
- movdqa xmm3, xmm0 ; q0
-
- psubsb xmm0, xmm6 ; q0 - p0
-
- paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1)
-
- paddsb xmm2, xmm0 ; 2 * (q0 - p0)
-
- paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1)
-
- pand xmm1, xmm2 ; mask filter values we don't care about
-
- movdqa xmm2, xmm1 ; vp8_filter
-
- pand xmm2, xmm4 ; Filter2 = vp8_filter & hev
- pxor xmm0, xmm0
-
- pandn xmm4, xmm1 ; vp8_filter&=~hev
- pxor xmm1, xmm1
-
- punpcklbw xmm0, xmm4 ; Filter 2 (hi)
- movdqa xmm5, xmm2
-
- punpckhbw xmm1, xmm4 ; Filter 2 (lo)
- paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3)
-
- pmulhw xmm1, [GLOBAL(s9)] ; Filter 2 (lo) * 9
-
- pmulhw xmm0, [GLOBAL(s9)] ; Filter 2 (hi) * 9
-
- punpckhbw xmm7, xmm5 ; axbxcxdx
- paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
-
- punpcklbw xmm5, xmm5 ; exfxgxhx
- psraw xmm7, 11 ; sign extended shift right by 3
-
- psraw xmm5, 11 ; sign extended shift right by 3
- punpckhbw xmm4, xmm2 ; axbxcxdx
-
- punpcklbw xmm2, xmm2 ; exfxgxhx
- psraw xmm4, 11 ; sign extended shift right by 3
-
- packsswb xmm5, xmm7 ; Filter2 >>=3;
- psraw xmm2, 11 ; sign extended shift right by 3
-
- packsswb xmm2, xmm4 ; Filter1 >>=3;
- movdqa xmm7, xmm1
-
- paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2
- movdqa xmm4, xmm1
-
- psubsb xmm3, xmm2 ; qs0 =qs0 - Filter1
- movdqa xmm5, xmm0
-
- movdqa xmm2, xmm5
- paddw xmm0, [GLOBAL(s63)] ; Filter 2 (hi) * 9 + 63
-
- paddw xmm1, [GLOBAL(s63)] ; Filter 2 (lo) * 9 + 63
- paddw xmm5, xmm5 ; Filter 2 (hi) * 18
-
- paddw xmm7, xmm7 ; Filter 2 (lo) * 18
- paddw xmm5, xmm0 ; Filter 2 (hi) * 27 + 63
-
- paddw xmm7, xmm1 ; Filter 2 (lo) * 27 + 63
- paddw xmm2, xmm0 ; Filter 2 (hi) * 18 + 63
-
- paddw xmm4, xmm1 ; Filter 2 (lo) * 18 + 63
- psraw xmm0, 7 ; (Filter 2 (hi) * 9 + 63) >> 7
-
- psraw xmm1, 7 ; (Filter 2 (lo) * 9 + 63) >> 7
- psraw xmm2, 7 ; (Filter 2 (hi) * 18 + 63) >> 7
-
- packsswb xmm0, xmm1 ; u1 = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
- psraw xmm4, 7 ; (Filter 2 (lo) * 18 + 63) >> 7
-
- psraw xmm5, 7 ; (Filter 2 (hi) * 27 + 63) >> 7
- packsswb xmm2, xmm4 ; u2 = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-
- psraw xmm7, 7 ; (Filter 2 (lo) * 27 + 63) >> 7
-
- packsswb xmm5, xmm7 ; u3 = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-
- psubsb xmm3, xmm5 ; sq = vp8_signed_char_clamp(qs0 - u3)
- paddsb xmm6, xmm5 ; sp = vp8_signed_char_clamp(ps0 - u3)
-
-%if %1 == 0
- movdqa xmm5, q2 ; q2
- movdqa xmm1, q1 ; q1
- movdqa xmm4, p1 ; p1
- movdqa xmm7, p2 ; p2
-
-%elif %1 == 1
- movdqa xmm5, XMMWORD PTR [rdi+rcx] ; q2
- movdqa xmm1, XMMWORD PTR [rdi] ; q1
- movdqa xmm4, XMMWORD PTR [rsi+rax*2] ; p1
- movdqa xmm7, XMMWORD PTR [rdi+rax*4] ; p2
-%elif %1 == 2
- movdqa xmm5, XMMWORD PTR [rdx+96] ; q2
- movdqa xmm1, XMMWORD PTR [rdx+80] ; q1
- movdqa xmm4, XMMWORD PTR [rdx+32] ; p1
- movdqa xmm7, XMMWORD PTR [rdx+16] ; p2
-%endif
-
- pxor xmm3, [GLOBAL(t80)] ; *oq0 = sq^0x80
- pxor xmm6, [GLOBAL(t80)] ; *oq0 = sp^0x80
-
- pxor xmm1, [GLOBAL(t80)]
- pxor xmm4, [GLOBAL(t80)]
-
- psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2)
- paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2)
-
- pxor xmm1, [GLOBAL(t80)] ; *oq1 = sq^0x80;
- pxor xmm4, [GLOBAL(t80)] ; *op1 = sp^0x80;
-
- pxor xmm7, [GLOBAL(t80)]
- pxor xmm5, [GLOBAL(t80)]
-
- paddsb xmm7, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u)
- psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u)
-
- pxor xmm7, [GLOBAL(t80)] ; *op2 = sp^0x80;
- pxor xmm5, [GLOBAL(t80)] ; *oq2 = sq^0x80;
-
-%if %1 == 0
- lea rsi, [rsi+rcx*2]
- lea rdi, [rdi+rcx*2]
-
- movq MMWORD PTR [rsi], xmm6 ; p0
- movhps MMWORD PTR [rdi], xmm6
- movq MMWORD PTR [rsi + rcx], xmm3 ; q0
- movhps MMWORD PTR [rdi + rcx], xmm3
-
- movq MMWORD PTR [rsi+rcx*2], xmm1 ; q1
- movhps MMWORD PTR [rdi+rcx*2], xmm1
-
- movq MMWORD PTR [rsi + rax], xmm4 ; p1
- movhps MMWORD PTR [rdi + rax], xmm4
-
- movq MMWORD PTR [rsi+rax*2], xmm7 ; p2
- movhps MMWORD PTR [rdi+rax*2], xmm7
-
- lea rsi, [rsi + rcx]
- lea rdi, [rdi + rcx]
- movq MMWORD PTR [rsi+rcx*2], xmm5 ; q2
- movhps MMWORD PTR [rdi+rcx*2], xmm5
-%elif %1 == 1
- movdqa XMMWORD PTR [rdi+rcx], xmm5 ; q2
- movdqa XMMWORD PTR [rdi], xmm1 ; q1
- movdqa XMMWORD PTR [rsi], xmm3 ; q0
- movdqa XMMWORD PTR [rsi+rax ],xmm6 ; p0
- movdqa XMMWORD PTR [rsi+rax*2],xmm4 ; p1
- movdqa XMMWORD PTR [rdi+rax*4],xmm7 ; p2
-%elif %1 == 2
- movdqa XMMWORD PTR [rdx+80], xmm1 ; q1
- movdqa XMMWORD PTR [rdx+64], xmm3 ; q0
- movdqa XMMWORD PTR [rdx+48], xmm6 ; p0
- movdqa XMMWORD PTR [rdx+32], xmm4 ; p1
-%endif
-
-%endmacro
-
-
-;void vp8_mbloop_filter_horizontal_edge_sse2
-;(
-; unsigned char *src_ptr,
-; int src_pixel_step,
-; const char *blimit,
-; const char *limit,
-; const char *thresh,
-; int count
-;)
-global sym(vp8_mbloop_filter_horizontal_edge_sse2)
-sym(vp8_mbloop_filter_horizontal_edge_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 32 ; reserve 32 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixel_step
-
- mov rdx, arg(3) ;limit
- movdqa xmm7, XMMWORD PTR [rdx]
-
- lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing
-
- ; calculate breakout conditions and high edge variance
- LFH_FILTER_AND_HEV_MASK 1
- ; filter and write back the results
- MB_FILTER_AND_WRITEBACK 1
-
- add rsp, 32
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_mbloop_filter_horizontal_edge_uv_sse2
-;(
-; unsigned char *u,
-; int src_pixel_step,
-; const char *blimit,
-; const char *limit,
-; const char *thresh,
-; unsigned char *v
-;)
-global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2)
-sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 96 ; reserve 96 bytes
- %define q2 [rsp + 0] ;__declspec(align(16)) char q2[16];
- %define q1 [rsp + 16] ;__declspec(align(16)) char q1[16];
- %define p2 [rsp + 32] ;__declspec(align(16)) char p2[16];
- %define p1 [rsp + 48] ;__declspec(align(16)) char p1[16];
- %define t0 [rsp + 64] ;__declspec(align(16)) char t0[16];
- %define t1 [rsp + 80] ;__declspec(align(16)) char t1[16];
-
- mov rsi, arg(0) ; u
- mov rdi, arg(5) ; v
- movsxd rax, dword ptr arg(1) ; src_pixel_step
- mov rcx, rax
- neg rax ; negate pitch to deal with above border
-
- mov rdx, arg(3) ;limit
- movdqa xmm7, XMMWORD PTR [rdx]
-
- lea rsi, [rsi + rcx]
- lea rdi, [rdi + rcx]
-
- ; calculate breakout conditions and high edge variance
- LFH_FILTER_AND_HEV_MASK 0
- ; filter and write back the results
- MB_FILTER_AND_WRITEBACK 0
-
- add rsp, 96
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
%macro TRANSPOSE_16X8 2
movq xmm4, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
movq xmm1, QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
@@ -1141,233 +845,6 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
pop rbp
ret
-%macro MBV_TRANSPOSE 0
- movdqa xmm0, [rdx] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
- movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
-
- punpcklbw xmm0, xmm7 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
- punpckhbw xmm1, xmm7 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
-
- movdqa xmm2, [rdx+32] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
- movdqa xmm6, xmm2 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-
- punpcklbw xmm2, [rdx+48] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
- punpckhbw xmm6, [rdx+48] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
-
- movdqa xmm3, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
- punpcklwd xmm0, xmm2 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
-
- punpckhwd xmm3, xmm2 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
- movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
-
- punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
- punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
-
- movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
- punpcklbw xmm2, [rdx+80] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
-
- movdqa xmm6, xmm5 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
- punpcklbw xmm6, [rdx+112] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06
-
- movdqa xmm7, xmm2 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
- punpcklwd xmm2, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04
-
- punpckhwd xmm7, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44
- movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
-
- punpckldq xmm0, xmm2 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00
- punpckhdq xmm6, xmm2 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20
-%endmacro
-
-%macro MBV_WRITEBACK_1 0
- movq QWORD PTR [rsi], xmm0
- movhps MMWORD PTR [rdi], xmm0
-
- movq QWORD PTR [rsi+2*rax], xmm6
- movhps MMWORD PTR [rdi+2*rax], xmm6
-
- movdqa xmm0, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
- punpckldq xmm0, xmm7 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
-
- punpckhdq xmm3, xmm7 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
-
- movq QWORD PTR [rsi+4*rax], xmm0
- movhps MMWORD PTR [rdi+4*rax], xmm0
-
- movq QWORD PTR [rsi+2*rcx], xmm3
- movhps MMWORD PTR [rdi+2*rcx], xmm3
-
- movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
- punpckhbw xmm2, [rdx+80] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
-
- punpckhbw xmm5, [rdx+112] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86
- movdqa xmm0, xmm2
-
- punpcklwd xmm0, xmm5 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84
- punpckhwd xmm2, xmm5 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4
-
- movdqa xmm5, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
- punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80
-
- punpckhdq xmm5, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0
-%endmacro
-
-%macro MBV_WRITEBACK_2 0
- movq QWORD PTR [rsi], xmm1
- movhps MMWORD PTR [rdi], xmm1
-
- movq QWORD PTR [rsi+2*rax], xmm5
- movhps MMWORD PTR [rdi+2*rax], xmm5
-
- movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
- punpckldq xmm1, xmm2 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
- punpckhdq xmm4, xmm2 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
-
- movq QWORD PTR [rsi+4*rax], xmm1
- movhps MMWORD PTR [rdi+4*rax], xmm1
-
- movq QWORD PTR [rsi+2*rcx], xmm4
- movhps MMWORD PTR [rdi+2*rcx], xmm4
-%endmacro
-
-
-;void vp8_mbloop_filter_vertical_edge_sse2
-;(
-; unsigned char *src_ptr,
-; int src_pixel_step,
-; const char *blimit,
-; const char *limit,
-; const char *thresh,
-; int count
-;)
-global sym(vp8_mbloop_filter_vertical_edge_sse2)
-sym(vp8_mbloop_filter_vertical_edge_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 160 ; reserve 160 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
- %define srct [rsp + 32] ;__declspec(align(16)) char srct[128];
-
- mov rsi, arg(0) ; src_ptr
- movsxd rax, dword ptr arg(1) ; src_pixel_step
-
- lea rsi, [rsi - 4]
- lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
- lea rcx, [rax*2+rax]
-
- ; Transpose
- TRANSPOSE_16X8 1, 0
-
- ; calculate filter mask and high edge variance
- LFV_FILTER_MASK_HEV_MASK 0
-
- neg rax
- ; start work on filters
- MB_FILTER_AND_WRITEBACK 2
-
- lea rsi, [rsi+rax*8]
- lea rdi, [rdi+rax*8]
-
- ; transpose and write back
- MBV_TRANSPOSE
-
- neg rax
-
- MBV_WRITEBACK_1
-
- lea rsi, [rsi+rax*8]
- lea rdi, [rdi+rax*8]
- MBV_WRITEBACK_2
-
- add rsp, 160
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_mbloop_filter_vertical_edge_uv_sse2
-;(
-; unsigned char *u,
-; int src_pixel_step,
-; const char *blimit,
-; const char *limit,
-; const char *thresh,
-; unsigned char *v
-;)
-global sym(vp8_mbloop_filter_vertical_edge_uv_sse2)
-sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 160 ; reserve 160 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
- %define srct [rsp + 32] ;__declspec(align(16)) char srct[128];
-
- mov rsi, arg(0) ; u_ptr
- movsxd rax, dword ptr arg(1) ; src_pixel_step
-
- lea rsi, [rsi - 4]
- lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
- lea rcx, [rax+2*rax]
-
- lea rdx, srct
-
- ; Transpose
- TRANSPOSE_16X8 0, 0
-
- ; calculate filter mask and high edge variance
- LFV_FILTER_MASK_HEV_MASK 0
-
- ; start work on filters
- MB_FILTER_AND_WRITEBACK 2
-
- ; transpose and write back
- MBV_TRANSPOSE
-
- mov rsi, arg(0) ;u_ptr
- lea rsi, [rsi - 4]
- lea rdi, [rsi + rax]
- MBV_WRITEBACK_1
- mov rsi, arg(5) ;v_ptr
- lea rsi, [rsi - 4]
- lea rdi, [rsi + rax]
- MBV_WRITEBACK_2
-
- add rsp, 160
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
;void vp8_loop_filter_simple_horizontal_edge_sse2
;(
; unsigned char *src_ptr,
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index e7239818e..716d10c79 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -9,63 +9,36 @@
*/
+#include <emmintrin.h> // SSE2
#include "vpx_config.h"
#include "vp8/common/loopfilter.h"
-prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
-prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
-prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
-prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
-extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
-extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
#if HAVE_MMX
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, struct loop_filter_info *lfi) {
- vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
-
- if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
-
- if (v_ptr)
- vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, struct loop_filter_info *lfi) {
- vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
-
- if (u_ptr)
- vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
-
- if (v_ptr)
- vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, struct loop_filter_info *lfi) {
- vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
- vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
- vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
-
- if (u_ptr)
- vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
- if (v_ptr)
- vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
@@ -99,26 +72,413 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned
#endif
-/* Horizontal MB filtering */
#if HAVE_SSE2
+void vp8_mbloop_filter_horizontal_edge_c_sse2
+(
+ unsigned char *s,
+ int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh,
+ int count
+) {
+ DECLARE_ALIGNED(16, unsigned char, flat_op2[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_op1[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_op0[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]);
+ __m128i mask, hev, flat;
+ __m128i thresh, limit, blimit;
+ const __m128i zero = _mm_set1_epi16(0);
+ __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4;
+
+ thresh = _mm_shuffle_epi32(_mm_cvtsi32_si128(_thresh[0] * 0x01010101), 0);
+ limit = _mm_shuffle_epi32(_mm_cvtsi32_si128(_limit[0] * 0x01010101), 0);
+ blimit = _mm_shuffle_epi32(_mm_cvtsi32_si128(_blimit[0] * 0x01010101), 0);
+
+ p4 = _mm_loadu_si128((__m128i *)(s - 5 * p));
+ p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
+ p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
+ p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
+ p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
+ q0 = _mm_loadu_si128((__m128i *)(s - 0 * p));
+ q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
+ q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
+ q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
+ q4 = _mm_loadu_si128((__m128i *)(s + 4 * p));
+ {
+ const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0),
+ _mm_subs_epu8(p0, p1));
+ const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0),
+ _mm_subs_epu8(q0, q1));
+ const __m128i one = _mm_set1_epi8(1);
+ const __m128i fe = _mm_set1_epi8(0xfe);
+ const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0);
+ __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0),
+ _mm_subs_epu8(q0, p0));
+ __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1),
+ _mm_subs_epu8(q1, p1));
+ __m128i work;
+ flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
+ hev = _mm_subs_epu8(flat, thresh);
+ hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
+
+ abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0);
+ abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
+ mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit);
+ mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
+ // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
+ mask = _mm_max_epu8(flat, mask);
+ // mask |= (abs(p1 - p0) > limit) * -1;
+ // mask |= (abs(q1 - q0) > limit) * -1;
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1),
+ _mm_subs_epu8(p1, p2)),
+ _mm_or_si128(_mm_subs_epu8(p3, p2),
+ _mm_subs_epu8(p2, p3)));
+ mask = _mm_max_epu8(work, mask);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1),
+ _mm_subs_epu8(q1, q2)),
+ _mm_or_si128(_mm_subs_epu8(q3, q2),
+ _mm_subs_epu8(q2, q3)));
+ mask = _mm_max_epu8(work, mask);
+ mask = _mm_subs_epu8(mask, limit);
+ mask = _mm_cmpeq_epi8(mask, zero);
+
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0),
+ _mm_subs_epu8(p0, p2)),
+ _mm_or_si128(_mm_subs_epu8(q2, q0),
+ _mm_subs_epu8(q0, q2)));
+ flat = _mm_max_epu8(work, flat);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0),
+ _mm_subs_epu8(p0, p3)),
+ _mm_or_si128(_mm_subs_epu8(q3, q0),
+ _mm_subs_epu8(q0, q3)));
+ flat = _mm_max_epu8(work, flat);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p4, p0),
+ _mm_subs_epu8(p0, p4)),
+ _mm_or_si128(_mm_subs_epu8(q4, q0),
+ _mm_subs_epu8(q0, q4)));
+ flat = _mm_max_epu8(work, flat);
+ flat = _mm_subs_epu8(flat, one);
+ flat = _mm_cmpeq_epi8(flat, zero);
+ flat = _mm_and_si128(flat, mask);
+ }
+ {
+ const __m128i four = _mm_set1_epi16(4);
+ unsigned char *src = s;
+ int i = 0;
+ do {
+ __m128i workp_a, workp_b, workp_shft;
+ p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 5 * p)), zero);
+ p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero);
+ p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero);
+ p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero);
+ p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero);
+ q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero);
+ q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero);
+ q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero);
+ q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero);
+ q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 4 * p)), zero);
+
+ workp_a = _mm_add_epi16(_mm_add_epi16(p4, p3), _mm_add_epi16(p2, p1));
+ workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0);
+ workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p4);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_op2[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_op1[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p4), q2);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_op0[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_oq0[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q4);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_oq1[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q4);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_oq2[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ src += 8;
+ } while (++i < count);
+ }
+ // lp filter
+ {
+ const __m128i t4 = _mm_set1_epi8(4);
+ const __m128i t3 = _mm_set1_epi8(3);
+ const __m128i t80 = _mm_set1_epi8(0x80);
+ const __m128i te0 = _mm_set1_epi8(0xe0);
+ const __m128i t1f = _mm_set1_epi8(0x1f);
+ const __m128i t1 = _mm_set1_epi8(0x1);
+ const __m128i t7f = _mm_set1_epi8(0x7f);
+
+ const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)),
+ t80);
+ const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)),
+ t80);
+ const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)),
+ t80);
+ const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)),
+ t80);
+ __m128i vp8_filt;
+ __m128i work_a;
+ __m128i filter1, filter2;
+
+ vp8_filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev);
+ work_a = _mm_subs_epi8(qs0, ps0);
+ vp8_filt = _mm_adds_epi8(vp8_filt, work_a);
+ vp8_filt = _mm_adds_epi8(vp8_filt, work_a);
+ vp8_filt = _mm_adds_epi8(vp8_filt, work_a);
+ /* (vp8_filter + 3 * (qs0 - ps0)) & mask */
+ vp8_filt = _mm_and_si128(vp8_filt, mask);
+
+ filter1 = _mm_adds_epi8(vp8_filt, t4);
+ filter2 = _mm_adds_epi8(vp8_filt, t3);
+
+ /* Filter1 >> 3 */
+ work_a = _mm_cmpgt_epi8(zero, filter1);
+ filter1 = _mm_srli_epi16(filter1, 3);
+ work_a = _mm_and_si128(work_a, te0);
+ filter1 = _mm_and_si128(filter1, t1f);
+ filter1 = _mm_or_si128(filter1, work_a);
+
+ /* Filter2 >> 3 */
+ work_a = _mm_cmpgt_epi8(zero, filter2);
+ filter2 = _mm_srli_epi16(filter2, 3);
+ work_a = _mm_and_si128(work_a, te0);
+ filter2 = _mm_and_si128(filter2, t1f);
+ filter2 = _mm_or_si128(filter2, work_a);
+
+ /* vp8_filt >> 1 */
+ vp8_filt = _mm_adds_epi8(filter1, t1);
+ work_a = _mm_cmpgt_epi8(zero, vp8_filt);
+ vp8_filt = _mm_srli_epi16(vp8_filt, 1);
+ work_a = _mm_and_si128(work_a, t80);
+ vp8_filt = _mm_and_si128(vp8_filt, t7f);
+ vp8_filt = _mm_or_si128(vp8_filt, work_a);
+
+ vp8_filt = _mm_andnot_si128(hev, vp8_filt);
+
+ work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80);
+ q0 = _mm_load_si128((__m128i *)flat_oq0);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q0 = _mm_and_si128(flat, q0);
+ q0 = _mm_or_si128(work_a, q0);
+
+ work_a = _mm_xor_si128(_mm_subs_epi8(qs1, vp8_filt), t80);
+ q1 = _mm_load_si128((__m128i *)flat_oq1);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q1 = _mm_and_si128(flat, q1);
+ q1 = _mm_or_si128(work_a, q1);
+
+ work_a = _mm_loadu_si128((__m128i *)(s + 2 * p));
+ q2 = _mm_load_si128((__m128i *)flat_oq2);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q2 = _mm_and_si128(flat, q2);
+ q2 = _mm_or_si128(work_a, q2);
+
+ work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80);
+ p0 = _mm_load_si128((__m128i *)flat_op0);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p0 = _mm_and_si128(flat, p0);
+ p0 = _mm_or_si128(work_a, p0);
+
+ work_a = _mm_xor_si128(_mm_adds_epi8(ps1, vp8_filt), t80);
+ p1 = _mm_load_si128((__m128i *)flat_op1);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p1 = _mm_and_si128(flat, p1);
+ p1 = _mm_or_si128(work_a, p1);
+
+ work_a = _mm_loadu_si128((__m128i *)(s - 3 * p));
+ p2 = _mm_load_si128((__m128i *)flat_op2);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p2 = _mm_and_si128(flat, p2);
+ p2 = _mm_or_si128(work_a, p2);
+
+ if (count == 1) {
+ _mm_storel_epi64((__m128i *)(s - 3 * p), p2);
+ _mm_storel_epi64((__m128i *)(s - 2 * p), p1);
+ _mm_storel_epi64((__m128i *)(s - 1 * p), p0);
+ _mm_storel_epi64((__m128i *)(s + 0 * p), q0);
+ _mm_storel_epi64((__m128i *)(s + 1 * p), q1);
+ _mm_storel_epi64((__m128i *)(s + 2 * p), q2);
+ } else {
+ _mm_storeu_si128((__m128i *)(s - 3 * p), p2);
+ _mm_storeu_si128((__m128i *)(s - 2 * p), p1);
+ _mm_storeu_si128((__m128i *)(s - 1 * p), p0);
+ _mm_storeu_si128((__m128i *)(s + 0 * p), q0);
+ _mm_storeu_si128((__m128i *)(s + 1 * p), q1);
+ _mm_storeu_si128((__m128i *)(s + 2 * p), q2);
+ }
+ }
+}
+static __inline void transpose(unsigned char *src[], int in_p,
+ unsigned char *dst[], int out_p,
+ int num_8x8_to_transpose) {
+ int idx8x8 = 0;
+ __m128i x0, x1, x2, x3, x4, x5, x6, x7;
+
+ do {
+ unsigned char *in = src[idx8x8];
+ unsigned char *out = dst[idx8x8];
+
+ x0 = _mm_loadl_epi64((__m128i *)(in + 0*in_p)); // 00 01 02 03 04 05 06 07
+ x1 = _mm_loadl_epi64((__m128i *)(in + 1*in_p)); // 10 11 12 13 14 15 16 17
+ x2 = _mm_loadl_epi64((__m128i *)(in + 2*in_p)); // 20 21 22 23 24 25 26 27
+ x3 = _mm_loadl_epi64((__m128i *)(in + 3*in_p)); // 30 31 32 33 34 35 36 37
+ x4 = _mm_loadl_epi64((__m128i *)(in + 4*in_p)); // 40 41 42 43 44 45 46 47
+ x5 = _mm_loadl_epi64((__m128i *)(in + 5*in_p)); // 50 51 52 53 54 55 56 57
+ x6 = _mm_loadl_epi64((__m128i *)(in + 6*in_p)); // 60 61 62 63 64 65 66 67
+ x7 = _mm_loadl_epi64((__m128i *)(in + 7*in_p)); // 70 71 72 73 74 75 76 77
+ // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
+ x0 = _mm_unpacklo_epi8(x0, x1);
+ // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
+ x1 = _mm_unpacklo_epi8(x2, x3);
+ // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
+ x2 = _mm_unpacklo_epi8(x4, x5);
+ // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
+ x3 = _mm_unpacklo_epi8(x6, x7);
+ // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
+ x4 = _mm_unpacklo_epi16(x0, x1);
+ // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
+ x5 = _mm_unpacklo_epi16(x2, x3);
+ // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
+ x6 = _mm_unpacklo_epi32(x4, x5);
+ // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
+ x7 = _mm_unpackhi_epi32(x4, x5);
+
+ _mm_storel_pd((double *)(out + 0*out_p),
+ _mm_cvtepi32_pd(x6)); // 00 10 20 30 40 50 60 70
+ _mm_storeh_pd((double *)(out + 1*out_p),
+ _mm_cvtepi32_pd(x6)); // 01 11 21 31 41 51 61 71
+ _mm_storel_pd((double *)(out + 2*out_p),
+ _mm_cvtepi32_pd(x7)); // 02 12 22 32 42 52 62 72
+ _mm_storeh_pd((double *)(out + 3*out_p),
+ _mm_cvtepi32_pd(x7)); // 03 13 23 33 43 53 63 73
+
+ // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
+ x4 = _mm_unpackhi_epi16(x0, x1);
+ // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77
+ x5 = _mm_unpackhi_epi16(x2, x3);
+ // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75
+ x6 = _mm_unpacklo_epi32(x4, x5);
+ // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77
+ x7 = _mm_unpackhi_epi32(x4, x5);
+
+ _mm_storel_pd((double *)(out + 4*out_p),
+ _mm_cvtepi32_pd(x6)); // 04 14 24 34 44 54 64 74
+ _mm_storeh_pd((double *)(out + 5*out_p),
+ _mm_cvtepi32_pd(x6)); // 05 15 25 35 45 55 65 75
+ _mm_storel_pd((double *)(out + 6*out_p),
+ _mm_cvtepi32_pd(x7)); // 06 16 26 36 46 56 66 76
+ _mm_storeh_pd((double *)(out + 7*out_p),
+ _mm_cvtepi32_pd(x7)); // 07 17 27 37 47 57 67 77
+ } while (++idx8x8 < num_8x8_to_transpose);
+}
+void vp8_mbloop_filter_vertical_edge_c_sse2
+(
+ unsigned char *s,
+ int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
+ int count
+) {
+ DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 16]);
+ unsigned char *src[4];
+ unsigned char *dst[4];
+
+ src[0] = s - 5;
+ src[1] = s - 5 + 8;
+ src[2] = s - 5 + p*8;
+ src[3] = s - 5 + p*8 + 8;
+
+ dst[0] = t_dst;
+ dst[1] = t_dst + 16*8;
+ dst[2] = t_dst + 8;
+ dst[3] = t_dst + 16*8 + 8;
+
+ // 16x16->16x16 or 16x8->8x16
+ transpose(src, p, dst, 16, (1 << count));
+
+ vp8_mbloop_filter_horizontal_edge_c_sse2(t_dst + 5*16, 16, blimit, limit,
+ thresh, count);
+
+ dst[0] = s - 5;
+ dst[1] = s - 5 + p*8;
+
+ src[0] = t_dst;
+ src[1] = t_dst + 8;
+
+ // 16x8->8x16 or 8x8->8x8
+ transpose(src, 16, dst, p, (1 << (count - 1)));
+}
+
+/* Horizontal MB filtering */
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, struct loop_filter_info *lfi) {
- vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+ vp8_mbloop_filter_horizontal_edge_c_sse2(y_ptr, y_stride, lfi->mblim,
+ lfi->lim, lfi->hev_thr, 2);
+
+ /* TODO: write sse2 version with u,v interleaved */
if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
+ vp8_mbloop_filter_horizontal_edge_c_sse2(u_ptr, uv_stride, lfi->mblim,
+ lfi->lim, lfi->hev_thr, 1);
+
+ if (v_ptr)
+ vp8_mbloop_filter_horizontal_edge_c_sse2(v_ptr, uv_stride, lfi->mblim,
+ lfi->lim, lfi->hev_thr, 1);
}
+void vp8_loop_filter_bh8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp8_mbloop_filter_horizontal_edge_c_sse2(
+ y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+}
/* Vertical MB Filtering */
-void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, struct loop_filter_info *lfi) {
- vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp8_mbloop_filter_vertical_edge_c_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 2);
+ /* TODO: write sse2 version with u,v interleaved */
if (u_ptr)
- vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
+ vp8_mbloop_filter_vertical_edge_c_sse2(u_ptr, uv_stride, lfi->mblim,
+ lfi->lim, lfi->hev_thr, 1);
+
+ if (v_ptr)
+ vp8_mbloop_filter_vertical_edge_c_sse2(v_ptr, uv_stride, lfi->mblim,
+ lfi->lim, lfi->hev_thr, 1);
}
+void vp8_loop_filter_bv8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp8_mbloop_filter_vertical_edge_c_sse2(
+ y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 72ff126f2..0adc3333e 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -170,16 +170,13 @@ static void kfread_modes(VP8D_COMP *pbi,
m->mbmi.second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
#endif
-#if CONFIG_TX_SELECT
if (cm->txfm_mode == TX_MODE_SELECT && m->mbmi.mb_skip_coeff == 0 &&
m->mbmi.mode <= I8X8_PRED) {
// FIXME(rbultje) code ternary symbol once all experiments are merged
m->mbmi.txfm_size = vp8_read(bc, cm->prob_tx[0]);
if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED)
m->mbmi.txfm_size += vp8_read(bc, cm->prob_tx[1]);
- } else
-#endif
- if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) {
+ } else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) {
m->mbmi.txfm_size = TX_16X16;
} else if (cm->txfm_mode >= ALLOW_8X8 && m->mbmi.mode != B_PRED) {
m->mbmi.txfm_size = TX_8X8;
@@ -188,7 +185,6 @@ static void kfread_modes(VP8D_COMP *pbi,
}
}
-#if CONFIG_NEWMVENTROPY
static int read_nmv_component(vp8_reader *r,
int rv,
const nmv_component *mvcomp) {
@@ -207,7 +203,7 @@ static int read_nmv_component(vp8_reader *r,
o = d << 3;
z = vp8_get_mv_mag(c, o);
- v = (s ? -(z + 1) : (z + 1));
+ v = (s ? -(z + 8) : (z + 8));
return v;
}
@@ -219,6 +215,7 @@ static int read_nmv_component_fp(vp8_reader *r,
int s, z, c, o, d, e, f;
s = v < 0;
z = (s ? -v : v) - 1; /* magnitude - 1 */
+ z &= ~7;
c = vp8_get_mv_class(z, &o);
d = o >> 3;
@@ -332,124 +329,6 @@ static void read_nmvprobs(vp8_reader *bc, nmv_context *mvctx,
}
}
-#else
-
-static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) {
- const vp8_prob *const p = (const vp8_prob *) mvc;
- int x = 0;
-
- if (vp8_read(r, p [mvpis_short])) { /* Large */
- int i = 0;
-
- do {
- x += vp8_read(r, p [MVPbits + i]) << i;
- } while (++i < mvnum_short_bits);
-
- i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */
-
- do {
- x += vp8_read(r, p [MVPbits + i]) << i;
- } while (--i > mvnum_short_bits);
-
- if (!(x & ~((2 << mvnum_short_bits) - 1)) || vp8_read(r, p [MVPbits + mvnum_short_bits]))
- x += (mvnum_short);
- } else /* small */
- x = vp8_treed_read(r, vp8_small_mvtree, p + MVPshort);
-
- if (x && vp8_read(r, p [MVPsign]))
- x = -x;
-
- return x;
-}
-
-static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc) {
- mv->row = (short)(read_mvcomponent(r, mvc) << 1);
- mv->col = (short)(read_mvcomponent(r, ++mvc) << 1);
-#ifdef DEBUG_DEC_MV
- int i;
- printf("%d (np): %d %d\n", dec_mvcount++, mv->row, mv->col);
- // for (i=0; i<MVPcount;++i) printf(" %d", (&mvc[-1])->prob[i]); printf("\n");
- // for (i=0; i<MVPcount;++i) printf(" %d", (&mvc[0])->prob[i]); printf("\n");
-#endif
-}
-
-static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc) {
- int i = 0;
-
- do {
- const vp8_prob *up = vp8_mv_update_probs[i].prob;
- vp8_prob *p = (vp8_prob *)(mvc + i);
- vp8_prob *const pstop = p + MVPcount;
-
- do {
- if (vp8_read(bc, *up++)) {
- const vp8_prob x = (vp8_prob)vp8_read_literal(bc, 7);
-
- *p = x ? x << 1 : 1;
- }
- } while (++p < pstop);
- } while (++i < 2);
-}
-
-static int read_mvcomponent_hp(vp8_reader *r, const MV_CONTEXT_HP *mvc) {
- const vp8_prob *const p = (const vp8_prob *) mvc;
- int x = 0;
-
- if (vp8_read(r, p [mvpis_short_hp])) { /* Large */
- int i = 0;
-
- do {
- x += vp8_read(r, p [MVPbits_hp + i]) << i;
- } while (++i < mvnum_short_bits_hp);
-
- i = mvlong_width_hp - 1; /* Skip bit 3, which is sometimes implicit */
-
- do {
- x += vp8_read(r, p [MVPbits_hp + i]) << i;
- } while (--i > mvnum_short_bits_hp);
-
- if (!(x & ~((2 << mvnum_short_bits_hp) - 1)) || vp8_read(r, p [MVPbits_hp + mvnum_short_bits_hp]))
- x += (mvnum_short_hp);
- } else /* small */
- x = vp8_treed_read(r, vp8_small_mvtree_hp, p + MVPshort_hp);
-
- if (x && vp8_read(r, p [MVPsign_hp]))
- x = -x;
-
- return x;
-}
-
-static void read_mv_hp(vp8_reader *r, MV *mv, const MV_CONTEXT_HP *mvc) {
- mv->row = (short)(read_mvcomponent_hp(r, mvc));
- mv->col = (short)(read_mvcomponent_hp(r, ++mvc));
-#ifdef DEBUG_DEC_MV
- int i;
- printf("%d (hp): %d %d\n", dec_mvcount++, mv->row, mv->col);
- // for (i=0; i<MVPcount_hp;++i) printf(" %d", (&mvc[-1])->prob[i]); printf("\n");
- // for (i=0; i<MVPcount_hp;++i) printf(" %d", (&mvc[0])->prob[i]); printf("\n");
-#endif
-}
-
-static void read_mvcontexts_hp(vp8_reader *bc, MV_CONTEXT_HP *mvc) {
- int i = 0;
-
- do {
- const vp8_prob *up = vp8_mv_update_probs_hp[i].prob;
- vp8_prob *p = (vp8_prob *)(mvc + i);
- vp8_prob *const pstop = p + MVPcount_hp;
-
- do {
- if (vp8_read(bc, *up++)) {
- const vp8_prob x = (vp8_prob)vp8_read_literal(bc, 7);
-
- *p = x ? x << 1 : 1;
- }
- } while (++p < pstop);
- } while (++i < 2);
-}
-
-#endif /* CONFIG_NEWMVENTROPY */
-
// Read the referncence frame
static MV_REFERENCE_FRAME read_ref_frame(VP8D_COMP *pbi,
vp8_reader *const bc,
@@ -596,13 +475,11 @@ static const unsigned char mbsplit_fill_offset[4][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
};
-#if CONFIG_SWITCHABLE_INTERP
static void read_switchable_interp_probs(VP8D_COMP* const pbi,
BOOL_DECODER* const bc) {
VP8_COMMON *const cm = &pbi->common;
int i, j;
for (j = 0; j <= VP8_SWITCHABLE_FILTERS; ++j) {
- //for (j = 0; j <= 0; ++j) {
for (i = 0; i < VP8_SWITCHABLE_FILTERS - 1; ++i) {
cm->fc.switchable_interp_prob[j][i] = vp8_read_literal(bc, 8);
}
@@ -610,16 +487,10 @@ static void read_switchable_interp_probs(VP8D_COMP* const pbi,
//printf("DECODER: %d %d\n", cm->fc.switchable_interp_prob[0],
//cm->fc.switchable_interp_prob[1]);
}
-#endif
static void mb_mode_mv_init(VP8D_COMP *pbi, vp8_reader *bc) {
VP8_COMMON *const cm = &pbi->common;
-#if CONFIG_NEWMVENTROPY
nmv_context *const nmvc = &pbi->common.fc.nmvc;
-#else
- MV_CONTEXT *const mvc = pbi->common.fc.mvc;
- MV_CONTEXT_HP *const mvc_hp = pbi->common.fc.mvc_hp;
-#endif
MACROBLOCKD *const xd = &pbi->mb;
if (cm->frame_type == KEY_FRAME) {
@@ -632,10 +503,8 @@ static void mb_mode_mv_init(VP8D_COMP *pbi, vp8_reader *bc) {
if (cm->pred_filter_mode == 2)
cm->prob_pred_filter_off = (vp8_prob)vp8_read_literal(bc, 8);
#endif
-#if CONFIG_SWITCHABLE_INTERP
if (cm->mcomp_filter_type == SWITCHABLE)
read_switchable_interp_probs(pbi, bc);
-#endif
// Decode the baseline probabilities for decoding reference frame
cm->prob_intra_coded = (vp8_prob)vp8_read_literal(bc, 8);
cm->prob_last_coded = (vp8_prob)vp8_read_literal(bc, 8);
@@ -661,14 +530,7 @@ static void mb_mode_mv_init(VP8D_COMP *pbi, vp8_reader *bc) {
cm->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
} while (++i < VP8_YMODES - 1);
}
-#if CONFIG_NEWMVENTROPY
read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv);
-#else
- if (xd->allow_high_precision_mv)
- read_mvcontexts_hp(bc, mvc_hp);
- else
- read_mvcontexts(bc, mvc);
-#endif
}
}
@@ -751,12 +613,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
int mb_row, int mb_col,
BOOL_DECODER* const bc) {
VP8_COMMON *const cm = &pbi->common;
-#if CONFIG_NEWMVENTROPY
nmv_context *const nmvc = &pbi->common.fc.nmvc;
-#else
- MV_CONTEXT *const mvc = pbi->common.fc.mvc;
- MV_CONTEXT_HP *const mvc_hp = pbi->common.fc.mvc_hp;
-#endif
const int mis = pbi->common.mode_info_stride;
MACROBLOCKD *const xd = &pbi->mb;
@@ -894,19 +751,16 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mbmi->pred_filter_enabled = cm->pred_filter_mode;
}
#endif
-#if CONFIG_SWITCHABLE_INTERP
if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV)
{
if (cm->mcomp_filter_type == SWITCHABLE) {
mbmi->interp_filter = vp8_switchable_interp[
vp8_treed_read(bc, vp8_switchable_interp_tree,
get_pred_probs(cm, xd, PRED_SWITCHABLE_INTERP))];
- //printf("Reading: %d\n", mbmi->interp_filter);
} else {
mbmi->interp_filter = cm->mcomp_filter_type;
}
}
-#endif
if (cm->comp_pred_mode == COMP_PREDICTION_ONLY ||
(cm->comp_pred_mode == HYBRID_PREDICTION &&
@@ -1005,44 +859,20 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
switch (blockmode) {
case NEW4X4:
-#if CONFIG_NEWMVENTROPY
read_nmv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc);
read_nmv_fp(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc,
xd->allow_high_precision_mv);
vp8_increment_nmv(&blockmv.as_mv, &best_mv.as_mv,
&cm->fc.NMVcount, xd->allow_high_precision_mv);
-#else
- if (xd->allow_high_precision_mv) {
- read_mv_hp(bc, &blockmv.as_mv, (const MV_CONTEXT_HP *) mvc_hp);
- cm->fc.MVcount_hp[0][mv_max_hp + (blockmv.as_mv.row)]++;
- cm->fc.MVcount_hp[1][mv_max_hp + (blockmv.as_mv.col)]++;
- } else {
- read_mv(bc, &blockmv.as_mv, (const MV_CONTEXT *) mvc);
- cm->fc.MVcount[0][mv_max + (blockmv.as_mv.row >> 1)]++;
- cm->fc.MVcount[1][mv_max + (blockmv.as_mv.col >> 1)]++;
- }
-#endif /* CONFIG_NEWMVENTROPY */
blockmv.as_mv.row += best_mv.as_mv.row;
blockmv.as_mv.col += best_mv.as_mv.col;
if (mbmi->second_ref_frame) {
-#if CONFIG_NEWMVENTROPY
read_nmv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc);
read_nmv_fp(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
xd->allow_high_precision_mv);
vp8_increment_nmv(&secondmv.as_mv, &best_mv_second.as_mv,
&cm->fc.NMVcount, xd->allow_high_precision_mv);
-#else
- if (xd->allow_high_precision_mv) {
- read_mv_hp(bc, &secondmv.as_mv, (const MV_CONTEXT_HP *) mvc_hp);
- cm->fc.MVcount_hp[0][mv_max_hp + (secondmv.as_mv.row)]++;
- cm->fc.MVcount_hp[1][mv_max_hp + (secondmv.as_mv.col)]++;
- } else {
- read_mv(bc, &secondmv.as_mv, (const MV_CONTEXT *) mvc);
- cm->fc.MVcount[0][mv_max + (secondmv.as_mv.row >> 1)]++;
- cm->fc.MVcount[1][mv_max + (secondmv.as_mv.col >> 1)]++;
- }
-#endif /* CONFIG_NEWMVENTROPY */
secondmv.as_mv.row += best_mv_second.as_mv.row;
secondmv.as_mv.col += best_mv_second.as_mv.col;
}
@@ -1147,23 +977,11 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
break;
case NEWMV:
-#if CONFIG_NEWMVENTROPY
read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc);
read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc,
xd->allow_high_precision_mv);
vp8_increment_nmv(&mv->as_mv, &best_mv.as_mv, &cm->fc.NMVcount,
xd->allow_high_precision_mv);
-#else
- if (xd->allow_high_precision_mv) {
- read_mv_hp(bc, &mv->as_mv, (const MV_CONTEXT_HP *) mvc_hp);
- cm->fc.MVcount_hp[0][mv_max_hp + (mv->as_mv.row)]++;
- cm->fc.MVcount_hp[1][mv_max_hp + (mv->as_mv.col)]++;
- } else {
- read_mv(bc, &mv->as_mv, (const MV_CONTEXT *) mvc);
- cm->fc.MVcount[0][mv_max + (mv->as_mv.row >> 1)]++;
- cm->fc.MVcount[1][mv_max + (mv->as_mv.col >> 1)]++;
- }
-#endif /* CONFIG_NEWMVENTROPY */
mv->as_mv.row += best_mv.as_mv.row;
mv->as_mv.col += best_mv.as_mv.col;
@@ -1178,23 +996,11 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mb_to_top_edge,
mb_to_bottom_edge);
if (mbmi->second_ref_frame) {
-#if CONFIG_NEWMVENTROPY
read_nmv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc);
read_nmv_fp(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc,
xd->allow_high_precision_mv);
vp8_increment_nmv(&mbmi->mv[1].as_mv, &best_mv_second.as_mv,
&cm->fc.NMVcount, xd->allow_high_precision_mv);
-#else
- if (xd->allow_high_precision_mv) {
- read_mv_hp(bc, &mbmi->mv[1].as_mv, (const MV_CONTEXT_HP *) mvc_hp);
- cm->fc.MVcount_hp[0][mv_max_hp + (mbmi->mv[1].as_mv.row)]++;
- cm->fc.MVcount_hp[1][mv_max_hp + (mbmi->mv[1].as_mv.col)]++;
- } else {
- read_mv(bc, &mbmi->mv[1].as_mv, (const MV_CONTEXT *) mvc);
- cm->fc.MVcount[0][mv_max + (mbmi->mv[1].as_mv.row >> 1)]++;
- cm->fc.MVcount[1][mv_max + (mbmi->mv[1].as_mv.col >> 1)]++;
- }
-#endif /* CONFIG_NEWMVENTROPY */
mbmi->mv[1].as_mv.row += best_mv_second.as_mv.row;
mbmi->mv[1].as_mv.col += best_mv_second.as_mv.col;
mbmi->need_to_clamp_secondmv |=
@@ -1282,23 +1088,23 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
#endif
}
-#if CONFIG_TX_SELECT
if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= I8X8_PRED) ||
- (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {
+ (mbmi->ref_frame != INTRA_FRAME && !(mbmi->mode == SPLITMV &&
+ mbmi->partitioning == PARTITIONING_4X4)))) {
// FIXME(rbultje) code ternary symbol once all experiments are merged
mbmi->txfm_size = vp8_read(bc, cm->prob_tx[0]);
- if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED)
+ if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED &&
+ mbmi->mode != SPLITMV)
mbmi->txfm_size += vp8_read(bc, cm->prob_tx[1]);
- } else
-#endif
- if (cm->txfm_mode >= ALLOW_16X16 &&
+ } else if (cm->txfm_mode >= ALLOW_16X16 &&
((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) ||
(mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {
mbmi->txfm_size = TX_16X16;
} else if (cm->txfm_mode >= ALLOW_8X8 &&
- ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode != B_PRED) ||
- (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {
+ (!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == B_PRED) &&
+ !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV &&
+ mbmi->partitioning == PARTITIONING_4X4))) {
mbmi->txfm_size = TX_8X8;
} else {
mbmi->txfm_size = TX_4X4;
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 01739c0db..bc35b17dc 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -208,10 +208,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
MB_PREDICTION_MODE mode;
int i;
int tx_size;
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || \
- CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type;
-#endif
#if CONFIG_SUPERBLOCKS
VP8_COMMON *pc = &pbi->common;
int orig_skip_flag = xd->mode_info_context->mbmi.mb_skip_coeff;
@@ -256,11 +253,9 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
//mode = xd->mode_info_context->mbmi.mode;
-#if CONFIG_SWITCHABLE_INTERP
if (pbi->common.frame_type != KEY_FRAME)
vp8_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter,
&pbi->common);
-#endif
if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV
&& mode != I8X8_PRED
@@ -330,7 +325,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
vp8_intra8x8_predict(b, i8x8mode, b->predictor);
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
-#if CONFIG_HYBRIDTRANSFORM8X8
tx_type = get_tx_type(xd, &xd->block[idx]);
if (tx_type != DCT_DCT) {
vp8_ht_dequant_idct_add_8x8_c(tx_type,
@@ -338,9 +332,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
} else {
vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
}
-#else
- vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
-#endif
q += 64;
} else {
for (j = 0; j < 4; j++) {
@@ -380,7 +371,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
#endif
-#if CONFIG_HYBRIDTRANSFORM
tx_type = get_tx_type(xd, b);
if (tx_type != DCT_DCT) {
vp8_ht_dequant_idct_add_c(tx_type, b->qcoeff,
@@ -390,29 +380,22 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
vp8_dequant_idct_add_c(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
}
-#else
- if (xd->eobs[i] > 1) {
- DEQUANT_INVOKE(&pbi->dequant, idct_add)
- (b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 16, b->dst_stride);
- } else {
- IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
- (b->qcoeff[0] * b->dequant[0], b->predictor,
- *(b->base_dst) + b->dst, 16, b->dst_stride);
- ((int *)b->qcoeff)[0] = 0;
- }
-#endif
}
} else if (mode == SPLITMV) {
- DEQUANT_INVOKE(&pbi->dequant, idct_add_y_block)
- (xd->qcoeff, xd->block[0].dequant,
- xd->predictor, xd->dst.y_buffer,
- xd->dst.y_stride, xd->eobs);
+ if (tx_size == TX_8X8) {
+ vp8_dequant_idct_add_y_block_8x8_c(xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs, xd);
+ } else {
+ DEQUANT_INVOKE(&pbi->dequant,
+ idct_add_y_block)(xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs);
+ }
} else {
BLOCKD *b = &xd->block[24];
if (tx_size == TX_16X16) {
-#if CONFIG_HYBRIDTRANSFORM16X16
BLOCKD *bd = &xd->block[0];
tx_type = get_tx_type(xd, bd);
if (tx_type != DCT_DCT) {
@@ -424,11 +407,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
xd->predictor, xd->dst.y_buffer,
16, xd->dst.y_stride);
}
-#else
- vp8_dequant_idct_add_16x16_c(xd->qcoeff, xd->block[0].dequant,
- xd->predictor, xd->dst.y_buffer,
- 16, xd->dst.y_stride);
-#endif
} else if (tx_size == TX_8X8) {
#if CONFIG_SUPERBLOCKS
void *orig = xd->mode_info_context;
@@ -515,8 +493,10 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
if (!xd->mode_info_context->mbmi.encoded_as_sb) {
#endif
if ((tx_size == TX_8X8 &&
- xd->mode_info_context->mbmi.mode != I8X8_PRED)
- || tx_size == TX_16X16)
+ xd->mode_info_context->mbmi.mode != I8X8_PRED &&
+ xd->mode_info_context->mbmi.mode != SPLITMV)
+ || tx_size == TX_16X16
+ )
DEQUANT_INVOKE(&pbi->dequant, idct_add_uv_block_8x8) //
(xd->qcoeff + 16 * 16, xd->block[16].dequant,
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
@@ -900,7 +880,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) {
}
}
}
-#if CONFIG_HYBRIDTRANSFORM
{
if (vp8_read_bit(bc)) {
/* read coef probability tree */
@@ -920,7 +899,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) {
}
}
}
-#endif
if (pbi->common.txfm_mode != ONLY_4X4 && vp8_read_bit(bc)) {
// read coef probability tree
@@ -940,7 +918,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) {
}
}
}
-#if CONFIG_HYBRIDTRANSFORM8X8
if (pbi->common.txfm_mode != ONLY_4X4 && vp8_read_bit(bc)) {
// read coef probability tree
for (i = 0; i < BLOCK_TYPES_8X8; i++)
@@ -959,7 +936,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) {
}
}
}
-#endif
// 16x16
if (pbi->common.txfm_mode > ALLOW_8X8 && vp8_read_bit(bc)) {
@@ -980,7 +956,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) {
}
}
}
-#if CONFIG_HYBRIDTRANSFORM16X16
if (pbi->common.txfm_mode > ALLOW_8X8 && vp8_read_bit(bc)) {
// read coef probability tree
for (i = 0; i < BLOCK_TYPES_16X16; ++i)
@@ -999,7 +974,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) {
}
}
}
-#endif
}
int vp8_decode_frame(VP8D_COMP *pbi) {
@@ -1214,17 +1188,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
#endif
/* Read the loop filter level and type */
-#if CONFIG_TX_SELECT
pc->txfm_mode = vp8_read_literal(&header_bc, 2);
if (pc->txfm_mode == TX_MODE_SELECT) {
pc->prob_tx[0] = vp8_read_literal(&header_bc, 8);
pc->prob_tx[1] = vp8_read_literal(&header_bc, 8);
}
-#else
- pc->txfm_mode = (TXFM_MODE) vp8_read_bit(&header_bc);
- if (pc->txfm_mode == ALLOW_8X8)
- pc->txfm_mode = ALLOW_16X16;
-#endif
pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(&header_bc);
pc->filter_level = vp8_read_literal(&header_bc, 6);
@@ -1328,12 +1296,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
/* Is high precision mv allowed */
xd->allow_high_precision_mv = (unsigned char)vp8_read_bit(&header_bc);
// Read the type of subpel filter to use
-#if CONFIG_SWITCHABLE_INTERP
if (vp8_read_bit(&header_bc)) {
pc->mcomp_filter_type = SWITCHABLE;
- } else
-#endif
- {
+ } else {
pc->mcomp_filter_type = vp8_read_literal(&header_bc, 2);
}
/* To enable choice of different interploation filters */
@@ -1362,58 +1327,36 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
vp8_copy(pbi->common.fc.pre_coef_probs,
pbi->common.fc.coef_probs);
-#if CONFIG_HYBRIDTRANSFORM
vp8_copy(pbi->common.fc.pre_hybrid_coef_probs,
pbi->common.fc.hybrid_coef_probs);
-#endif
vp8_copy(pbi->common.fc.pre_coef_probs_8x8,
pbi->common.fc.coef_probs_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_copy(pbi->common.fc.pre_hybrid_coef_probs_8x8,
pbi->common.fc.hybrid_coef_probs_8x8);
-#endif
vp8_copy(pbi->common.fc.pre_coef_probs_16x16,
pbi->common.fc.coef_probs_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_copy(pbi->common.fc.pre_hybrid_coef_probs_16x16,
pbi->common.fc.hybrid_coef_probs_16x16);
-#endif
vp8_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob);
vp8_copy(pbi->common.fc.pre_uv_mode_prob, pbi->common.fc.uv_mode_prob);
vp8_copy(pbi->common.fc.pre_bmode_prob, pbi->common.fc.bmode_prob);
vp8_copy(pbi->common.fc.pre_i8x8_mode_prob, pbi->common.fc.i8x8_mode_prob);
vp8_copy(pbi->common.fc.pre_sub_mv_ref_prob, pbi->common.fc.sub_mv_ref_prob);
vp8_copy(pbi->common.fc.pre_mbsplit_prob, pbi->common.fc.mbsplit_prob);
-#if CONFIG_NEWMVENTROPY
pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc;
-#else
- vp8_copy(pbi->common.fc.pre_mvc, pbi->common.fc.mvc);
- vp8_copy(pbi->common.fc.pre_mvc_hp, pbi->common.fc.mvc_hp);
-#endif
vp8_zero(pbi->common.fc.coef_counts);
-#if CONFIG_HYBRIDTRANSFORM
vp8_zero(pbi->common.fc.hybrid_coef_counts);
-#endif
vp8_zero(pbi->common.fc.coef_counts_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_zero(pbi->common.fc.hybrid_coef_counts_8x8);
-#endif
vp8_zero(pbi->common.fc.coef_counts_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_zero(pbi->common.fc.hybrid_coef_counts_16x16);
-#endif
vp8_zero(pbi->common.fc.ymode_counts);
vp8_zero(pbi->common.fc.uv_mode_counts);
vp8_zero(pbi->common.fc.bmode_counts);
vp8_zero(pbi->common.fc.i8x8_mode_counts);
vp8_zero(pbi->common.fc.sub_mv_ref_counts);
vp8_zero(pbi->common.fc.mbsplit_counts);
-#if CONFIG_NEWMVENTROPY
vp8_zero(pbi->common.fc.NMVcount);
-#else
- vp8_zero(pbi->common.fc.MVcount);
- vp8_zero(pbi->common.fc.MVcount_hp);
-#endif
vp8_zero(pbi->common.fc.mv_ref_ct);
vp8_zero(pbi->common.fc.mv_ref_ct_a);
@@ -1472,11 +1415,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
vp8_adapt_coef_probs(pc);
if (pc->frame_type != KEY_FRAME) {
vp8_adapt_mode_probs(pc);
-#if CONFIG_NEWMVENTROPY
vp8_adapt_nmv_probs(pc, xd->allow_high_precision_mv);
-#else
- vp8_adapt_mv_probs(pc);
-#endif
vp8_update_mode_context(&pbi->common);
}
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index e55da754b..db9c3b0b8 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -42,7 +42,6 @@ void vp8_dequantize_b_c(BLOCKD *d) {
}
-#if CONFIG_HYBRIDTRANSFORM
void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride) {
@@ -77,9 +76,7 @@ void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
pred += pitch;
}
}
-#endif
-#if CONFIG_HYBRIDTRANSFORM8X8
void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride) {
@@ -123,7 +120,6 @@ void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
pred = origpred + (b + 1) / 2 * 4 * pitch + ((b + 1) % 2) * 4;
}
}
-#endif
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride) {
@@ -468,7 +464,6 @@ void vp8_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
#endif
}
-#if CONFIG_HYBRIDTRANSFORM16X16
void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride) {
@@ -507,7 +502,6 @@ void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
pred += pitch;
}
}
-#endif
void vp8_dequant_idct_add_16x16_c(short *input, short *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride) {
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index 5b7b21598..4ac710431 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -76,7 +76,6 @@ extern prototype_dequant_block(vp8_dequant_block);
#endif
extern prototype_dequant_idct_add(vp8_dequant_idct_add);
-#if CONFIG_HYBRIDTRANSFORM
// declare dequantization and inverse transform module of hybrid transform decoder
#ifndef vp8_ht_dequant_idct_add
#define vp8_ht_dequant_idct_add vp8_ht_dequant_idct_add_c
@@ -85,7 +84,6 @@ extern void vp8_ht_dequant_idct_add(TX_TYPE tx_type, short *input, short *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride);
-#endif
#ifndef vp8_dequant_dc_idct_add
#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c
@@ -191,17 +189,13 @@ typedef struct {
#define DEQUANT_INVOKE(ctx,fn) vp8_dequant_##fn
#endif
-#if CONFIG_HYBRIDTRANSFORM8X8
void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride);
-#endif
-#if CONFIG_HYBRIDTRANSFORM16X16
void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride);
-#endif
#if CONFIG_SUPERBLOCKS
void vp8_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq,
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 85f213470..fa56865ac 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -135,7 +135,6 @@ int get_token(int v) {
else return DCT_VAL_CATEGORY6;
}
-#if CONFIG_HYBRIDTRANSFORM
void static count_tokens_adaptive_scan(const MACROBLOCKD *xd, INT16 *qcoeff_ptr,
int block, PLANE_TYPE type,
TX_TYPE tx_type,
@@ -180,7 +179,6 @@ void static count_tokens_adaptive_scan(const MACROBLOCKD *xd, INT16 *qcoeff_ptr,
fc->coef_counts[type][band][pt][DCT_EOB_TOKEN]++;
}
}
-#endif
void static count_tokens(INT16 *qcoeff_ptr, int block, PLANE_TYPE type,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
@@ -202,9 +200,7 @@ void static count_tokens(INT16 *qcoeff_ptr, int block, PLANE_TYPE type,
}
void static count_tokens_8x8(INT16 *qcoeff_ptr, int block, PLANE_TYPE type,
-#if CONFIG_HYBRIDTRANSFORM8X8
TX_TYPE tx_type,
-#endif
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int eob, int seg_eob, FRAME_CONTEXT *fc) {
int c, pt, token, band;
@@ -214,29 +210,23 @@ void static count_tokens_8x8(INT16 *qcoeff_ptr, int block, PLANE_TYPE type,
int v = qcoeff_ptr[rc];
band = (type == 1 ? vp8_coef_bands[c] : vp8_coef_bands_8x8[c]);
token = get_token(v);
-#if CONFIG_HYBRIDTRANSFORM8X8
if (tx_type != DCT_DCT)
fc->hybrid_coef_counts_8x8[type][band][pt][token]++;
else
-#endif
fc->coef_counts_8x8[type][band][pt][token]++;
pt = vp8_prev_token_class[token];
}
if (eob < seg_eob) {
band = (type == 1 ? vp8_coef_bands[c] : vp8_coef_bands_8x8[c]);
-#if CONFIG_HYBRIDTRANSFORM8X8
if (tx_type != DCT_DCT)
fc->hybrid_coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN]++;
else
-#endif
fc->coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN]++;
}
}
void static count_tokens_16x16(INT16 *qcoeff_ptr, int block, PLANE_TYPE type,
-#if CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type,
-#endif
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int eob, int seg_eob, FRAME_CONTEXT *fc) {
int c, pt, token;
@@ -246,21 +236,17 @@ void static count_tokens_16x16(INT16 *qcoeff_ptr, int block, PLANE_TYPE type,
int v = qcoeff_ptr[rc];
int band = vp8_coef_bands_16x16[c];
token = get_token(v);
-#if CONFIG_HYBRIDTRANSFORM16X16
if (tx_type != DCT_DCT)
fc->hybrid_coef_counts_16x16[type][band][pt][token]++;
else
-#endif
fc->coef_counts_16x16[type][band][pt][token]++;
pt = vp8_prev_token_class[token];
}
if (eob < seg_eob) {
int band = vp8_coef_bands_16x16[c];
-#if CONFIG_HYBRIDTRANSFORM16X16
if (tx_type != DCT_DCT)
fc->hybrid_coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN]++;
else
-#endif
fc->coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN]++;
}
}
@@ -306,9 +292,7 @@ static int decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd,
BOOL_DECODER* const br,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
PLANE_TYPE type,
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type,
-#endif
int seg_eob, INT16 *qcoeff_ptr, int i,
const int *const scan, int block_type,
const int *coef_bands) {
@@ -320,23 +304,17 @@ static int decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd,
default:
case TX_4X4:
coef_probs =
-#if CONFIG_HYBRIDTRANSFORM
tx_type != DCT_DCT ? fc->hybrid_coef_probs[type][0][0] :
-#endif
fc->coef_probs[type][0][0];
break;
case TX_8X8:
coef_probs =
-#if CONFIG_HYBRIDTRANSFORM8X8
tx_type != DCT_DCT ? fc->hybrid_coef_probs_8x8[type][0][0] :
-#endif
fc->coef_probs_8x8[type][0][0];
break;
case TX_16X16:
coef_probs =
-#if CONFIG_HYBRIDTRANSFORM16X16
tx_type != DCT_DCT ? fc->hybrid_coef_probs_16x16[type][0][0] :
-#endif
fc->coef_probs_16x16[type][0][0];
break;
}
@@ -422,26 +400,17 @@ SKIP_START:
}
if (block_type == TX_4X4) {
-#if CONFIG_HYBRIDTRANSFORM
count_tokens_adaptive_scan(xd, qcoeff_ptr, i, type,
tx_type,
a, l, c, seg_eob, fc);
-#else
- count_tokens(qcoeff_ptr, i, type,
- a, l, c, seg_eob, fc);
-#endif
}
else if (block_type == TX_8X8)
count_tokens_8x8(qcoeff_ptr, i, type,
-#if CONFIG_HYBRIDTRANSFORM8X8
tx_type,
-#endif
a, l, c, seg_eob, fc);
else
count_tokens_16x16(qcoeff_ptr, i, type,
-#if CONFIG_HYBRIDTRANSFORM16X16
tx_type,
-#endif
a, l, c, seg_eob, fc);
return c;
}
@@ -457,12 +426,7 @@ int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd,
const int segment_id = xd->mode_info_context->mbmi.segment_id;
const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB);
INT16 *qcoeff_ptr = &xd->qcoeff[0];
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
- TX_TYPE tx_type = DCT_DCT;
-#endif
-#if CONFIG_HYBRIDTRANSFORM16X16
- tx_type = get_tx_type(xd, &xd->block[0]);
-#endif
+ TX_TYPE tx_type = get_tx_type(xd, &xd->block[0]);
type = PLANE_TYPE_Y_WITH_DC;
@@ -475,26 +439,20 @@ int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd,
{
const int* const scan = vp8_default_zig_zag1d_16x16;
c = decode_coefs(pbi, xd, bc, A, L, type,
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
tx_type,
-#endif
seg_eob, qcoeff_ptr,
0, scan, TX_16X16, coef_bands_x_16x16);
eobs[0] = c;
- *A = *L = (c != !type);
- for (i = 1; i < 16; i++) {
- *(A + vp8_block2above[i]) = *(A);
- *(L + vp8_block2left[i]) = *(L);
- }
+ A[0] = L[0] = (c != !type);
+ A[1] = A[2] = A[3] = A[0];
+ L[1] = L[2] = L[3] = L[0];
eobtotal += c;
}
// 8x8 chroma blocks
qcoeff_ptr += 256;
type = PLANE_TYPE_UV;
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
tx_type = DCT_DCT;
-#endif
if (seg_active)
seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB);
else
@@ -505,9 +463,7 @@ int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd,
const int* const scan = vp8_default_zig_zag1d_8x8;
c = decode_coefs(pbi, xd, bc, a, l, type,
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
tx_type,
-#endif
seg_eob, qcoeff_ptr,
i, scan, TX_8X8, coef_bands_x_8x8);
a[0] = l[0] = ((eobs[i] = c) != !type);
@@ -533,11 +489,10 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd,
const int segment_id = xd->mode_info_context->mbmi.segment_id;
const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB);
INT16 *qcoeff_ptr = &xd->qcoeff[0];
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type = DCT_DCT;
-#endif
- int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED) ? 16 : 24;
+ int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV) ? 16 : 24;
if (xd->mode_info_context->mbmi.mode != B_PRED &&
xd->mode_info_context->mbmi.mode != SPLITMV &&
xd->mode_info_context->mbmi.mode != I8X8_PRED) {
@@ -551,9 +506,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd,
else
seg_eob = 4;
c = decode_coefs(pbi, xd, bc, a, l, type,
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
tx_type,
-#endif
seg_eob, qcoeff_ptr + 24 * 16,
24, scan, TX_8X8, coef_bands_x);
a[0] = l[0] = ((eobs[24] = c) != !type);
@@ -573,22 +526,16 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd,
ENTROPY_CONTEXT *const a = A + vp8_block2above_8x8[i];
ENTROPY_CONTEXT *const l = L + vp8_block2left_8x8[i];
const int *const scan = vp8_default_zig_zag1d_8x8;
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
tx_type = DCT_DCT;
-#endif
if (i == 16)
type = PLANE_TYPE_UV;
-#if CONFIG_HYBRIDTRANSFORM8X8
if (type == PLANE_TYPE_Y_WITH_DC) {
tx_type = get_tx_type(xd, xd->block + i);
}
-#endif
c = decode_coefs(pbi, xd, bc, a, l, type,
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
tx_type,
-#endif
seg_eob, qcoeff_ptr,
i, scan, TX_8X8, coef_bands_x_8x8);
a[0] = l[0] = ((eobs[i] = c) != !type);
@@ -601,9 +548,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd,
if (bufthred == 16) {
type = PLANE_TYPE_UV;
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
tx_type = DCT_DCT;
-#endif
seg_eob = 16;
// use 4x4 transform for U, V components in I8X8 prediction mode
@@ -613,9 +558,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd,
const int *scan = vp8_default_zig_zag1d;
c = decode_coefs(pbi, xd, bc, a, l, type,
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
tx_type,
-#endif
seg_eob, qcoeff_ptr,
i, scan, TX_4X4, coef_bands_x);
a[0] = l[0] = ((eobs[i] = c) != !type);
@@ -652,9 +595,7 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd,
type = PLANE_TYPE_Y2;
c = decode_coefs(dx, xd, bc, a, l, type,
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
DCT_DCT,
-#endif
seg_eob, qcoeff_ptr + 24 * 16, 24,
scan, TX_4X4, coef_bands_x);
a[0] = l[0] = ((eobs[24] = c) != !type);
@@ -668,13 +609,10 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd,
for (i = 0; i < 24; ++i) {
ENTROPY_CONTEXT *const a = A + vp8_block2above[i];
ENTROPY_CONTEXT *const l = L + vp8_block2left[i];
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type = DCT_DCT;
-#endif
if (i == 16)
type = PLANE_TYPE_UV;
-#if CONFIG_HYBRIDTRANSFORM
tx_type = get_tx_type(xd, &xd->block[i]);
switch(tx_type) {
case ADST_DCT :
@@ -689,12 +627,8 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd,
scan = vp8_default_zig_zag1d;
break;
}
-#endif
- c = decode_coefs(dx, xd, bc, a, l, type,
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
- tx_type,
-#endif
+ c = decode_coefs(dx, xd, bc, a, l, type, tx_type,
seg_eob, qcoeff_ptr,
i, scan, TX_4X4, coef_bands_x);
a[0] = l[0] = ((eobs[i] = c) != !type);
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 70cdb6aaf..36776ab21 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -45,32 +45,26 @@ unsigned int tree_update_hist [BLOCK_TYPES]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM
unsigned int hybrid_tree_update_hist [BLOCK_TYPES]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[ENTROPY_NODES][2];
-#endif
unsigned int tree_update_hist_8x8 [BLOCK_TYPES_8X8]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[ENTROPY_NODES] [2];
-#if CONFIG_HYBRIDTRANSFORM8X8
unsigned int hybrid_tree_update_hist_8x8 [BLOCK_TYPES_8X8]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[ENTROPY_NODES] [2];
-#endif
unsigned int tree_update_hist_16x16 [BLOCK_TYPES_16X16]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[ENTROPY_NODES] [2];
-#if CONFIG_HYBRIDTRANSFORM16X16
unsigned int hybrid_tree_update_hist_16x16 [BLOCK_TYPES_16X16]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[ENTROPY_NODES] [2];
-#endif
extern unsigned int active_section;
#endif
@@ -265,57 +259,23 @@ void update_skip_probs(VP8_COMP *cpi) {
}
}
-#if CONFIG_SWITCHABLE_INTERP
void update_switchable_interp_probs(VP8_COMP *cpi, vp8_writer* const bc) {
VP8_COMMON *const pc = &cpi->common;
unsigned int branch_ct[32][2];
int i, j;
for (j = 0; j <= VP8_SWITCHABLE_FILTERS; ++j) {
- //for (j = 0; j <= 0; ++j) {
-/*
- if (!cpi->dummy_packing)
-#if VP8_SWITCHABLE_FILTERS == 3
- printf("HELLO %d %d %d\n", cpi->switchable_interp_count[j][0],
- cpi->switchable_interp_count[j][1], cpi->switchable_interp_count[j][2]);
-#else
- printf("HELLO %d %d\n", cpi->switchable_interp_count[j][0],
- cpi->switchable_interp_count[j][1]);
-#endif
-*/
vp8_tree_probs_from_distribution(
VP8_SWITCHABLE_FILTERS,
vp8_switchable_interp_encodings, vp8_switchable_interp_tree,
- pc->fc.switchable_interp_prob[j], branch_ct, cpi->switchable_interp_count[j],
- 256, 1
- );
+ pc->fc.switchable_interp_prob[j], branch_ct,
+ cpi->switchable_interp_count[j], 256, 1);
for (i = 0; i < VP8_SWITCHABLE_FILTERS - 1; ++i) {
if (pc->fc.switchable_interp_prob[j][i] < 1)
pc->fc.switchable_interp_prob[j][i] = 1;
vp8_write_literal(bc, pc->fc.switchable_interp_prob[j][i], 8);
-/*
- if (!cpi->dummy_packing)
-#if VP8_SWITCHABLE_FILTERS == 3
- printf("Probs %d %d [%d]\n",
- pc->fc.switchable_interp_prob[j][0],
- pc->fc.switchable_interp_prob[j][1], pc->frame_type);
-#else
- printf("Probs %d [%d]\n", pc->fc.switchable_interp_prob[j][0],
- pc->frame_type);
-#endif
-*/
}
}
- /*
- if (!cpi->dummy_packing)
-#if VP8_SWITCHABLE_FILTERS == 3
- printf("Probs %d %d [%d]\n",
- pc->fc.switchable_interp_prob[0], pc->fc.switchable_interp_prob[1], pc->frame_type);
-#else
- printf("Probs %d [%d]\n", pc->fc.switchable_interp_prob[0], pc->frame_type);
-#endif
- */
}
-#endif
// This function updates the reference frame prediction stats
static void update_refpred_stats(VP8_COMP *cpi) {
@@ -649,7 +609,6 @@ static void write_sub_mv_ref
vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
}
-#if CONFIG_NEWMVENTROPY
static void write_nmv(vp8_writer *bc, const MV *mv, const int_mv *ref,
const nmv_context *nmvc, int usehp) {
MV e;
@@ -660,31 +619,6 @@ static void write_nmv(vp8_writer *bc, const MV *mv, const int_mv *ref,
vp8_encode_nmv_fp(bc, &e, &ref->as_mv, nmvc, usehp);
}
-#else
-
-static void write_mv
-(
- vp8_writer *bc, const MV *mv, const int_mv *ref, const MV_CONTEXT *mvc
-) {
- MV e;
- e.row = mv->row - ref->as_mv.row;
- e.col = mv->col - ref->as_mv.col;
-
- vp8_encode_motion_vector(bc, &e, mvc);
-}
-
-static void write_mv_hp
-(
- vp8_writer *bc, const MV *mv, const int_mv *ref, const MV_CONTEXT_HP *mvc
-) {
- MV e;
- e.row = mv->row - ref->as_mv.row;
- e.col = mv->col - ref->as_mv.col;
-
- vp8_encode_motion_vector_hp(bc, &e, mvc);
-}
-#endif /* CONFIG_NEWMVENTROPY */
-
// This function writes the current macro block's segnment id to the bitstream
// It should only be called if a segment map update is indicated.
static void write_mb_segid(vp8_writer *bc,
@@ -821,12 +755,7 @@ static void update_ref_probs(VP8_COMP *const cpi) {
static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
int i;
VP8_COMMON *const pc = &cpi->common;
-#if CONFIG_NEWMVENTROPY
const nmv_context *nmvc = &pc->fc.nmvc;
-#else
- const MV_CONTEXT *mvc = pc->fc.mvc;
- const MV_CONTEXT_HP *mvc_hp = pc->fc.mvc_hp;
-#endif
MACROBLOCK *x = &cpi->mb;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
MODE_INFO *m;
@@ -1042,21 +971,19 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
cpi->common.pred_filter_mode);
}
#endif
-#if CONFIG_SWITCHABLE_INTERP
if (mode >= NEARESTMV && mode <= SPLITMV)
{
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
vp8_write_token(bc, vp8_switchable_interp_tree,
- get_pred_probs(&cpi->common, xd, PRED_SWITCHABLE_INTERP),
+ get_pred_probs(&cpi->common, xd,
+ PRED_SWITCHABLE_INTERP),
vp8_switchable_interp_encodings +
vp8_switchable_interp_map[mi->interp_filter]);
- //if (!cpi->dummy_packing) printf("Reading: %d\n", mi->interp_filter);
} else {
assert (mi->interp_filter ==
cpi->common.mcomp_filter_type);
}
}
-#endif
if (mi->second_ref_frame &&
(mode == NEWMV || mode == SPLITMV)) {
int_mv n1, n2;
@@ -1099,17 +1026,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
}
#endif
-#if CONFIG_NEWMVENTROPY
write_nmv(bc, &mi->mv[0].as_mv, &best_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
-#else
- if (xd->allow_high_precision_mv) {
- write_mv_hp(bc, &mi->mv[0].as_mv, &best_mv, mvc_hp);
- } else {
- write_mv(bc, &mi->mv[0].as_mv, &best_mv, mvc);
- }
-#endif
if (mi->second_ref_frame) {
#if 0 //CONFIG_NEW_MVREF
@@ -1126,17 +1045,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
&best_second_mv);
cpi->best_ref_index_counts[best_index]++;
#endif
-#if CONFIG_NEWMVENTROPY
write_nmv(bc, &mi->mv[1].as_mv, &best_second_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
-#else
- if (xd->allow_high_precision_mv) {
- write_mv_hp(bc, &mi->mv[1].as_mv, &best_second_mv, mvc_hp);
- } else {
- write_mv(bc, &mi->mv[1].as_mv, &best_second_mv, mvc);
- }
-#endif
}
break;
case SPLITMV: {
@@ -1178,40 +1089,16 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
#ifdef ENTROPY_STATS
active_section = 11;
#endif
-#if CONFIG_NEWMVENTROPY
write_nmv(bc, &blockmv.as_mv, &best_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
-#else
- if (xd->allow_high_precision_mv) {
- write_mv_hp(bc, &blockmv.as_mv, &best_mv,
- (const MV_CONTEXT_HP *) mvc_hp);
- } else {
- write_mv(bc, &blockmv.as_mv, &best_mv,
- (const MV_CONTEXT *) mvc);
- }
-#endif
if (mi->second_ref_frame) {
-#if CONFIG_NEWMVENTROPY
write_nmv(bc,
&cpi->mb.partition_info->bmi[j].second_mv.as_mv,
&best_second_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
-#else
- if (xd->allow_high_precision_mv) {
- write_mv_hp(
- bc,
- &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
- &best_second_mv, (const MV_CONTEXT_HP *)mvc_hp);
- } else {
- write_mv(
- bc,
- &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
- &best_second_mv, (const MV_CONTEXT *) mvc);
- }
-#endif
}
}
} while (++j < cpi->mb.partition_info->count);
@@ -1223,9 +1110,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
}
}
-#if CONFIG_TX_SELECT
if (((rf == INTRA_FRAME && mode <= I8X8_PRED) ||
- (rf != INTRA_FRAME && mode != SPLITMV)) &&
+ (rf != INTRA_FRAME && !(mode == SPLITMV &&
+ mi->partitioning == PARTITIONING_4X4))) &&
pc->txfm_mode == TX_MODE_SELECT &&
!((pc->mb_no_coeff_skip && mi->mb_skip_coeff) ||
(segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
@@ -1233,10 +1120,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
TX_SIZE sz = mi->txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp8_write(bc, sz != TX_4X4, pc->prob_tx[0]);
- if (sz != TX_4X4 && mode != I8X8_PRED)
+ if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV)
vp8_write(bc, sz != TX_8X8, pc->prob_tx[1]);
}
-#endif
#ifdef ENTROPY_STATS
active_section = 1;
@@ -1365,7 +1251,6 @@ static void write_mb_modes_kf(const VP8_COMMON *c,
} else
write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
-#if CONFIG_TX_SELECT
if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT &&
!((c->mb_no_coeff_skip && m->mbmi.mb_skip_coeff) ||
(segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
@@ -1376,7 +1261,6 @@ static void write_mb_modes_kf(const VP8_COMMON *c,
if (sz != TX_4X4 && ym <= TM_PRED)
vp8_write(bc, sz != TX_8X8, c->prob_tx[1]);
}
-#endif
}
static void write_kfmodes(VP8_COMP* const cpi, vp8_writer* const bc) {
@@ -1498,7 +1382,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
}
}
}
-#if CONFIG_HYBRIDTRANSFORM
for (i = 0; i < BLOCK_TYPES; ++i) {
for (j = 0; j < COEF_BANDS; ++j) {
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -1519,8 +1402,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
}
}
}
-#endif
-
if (cpi->common.txfm_mode != ONLY_4X4) {
for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
@@ -1547,7 +1428,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
}
}
}
-#if CONFIG_HYBRIDTRANSFORM8X8
for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
for (j = 0; j < COEF_BANDS; ++j) {
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -1572,7 +1452,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
}
}
}
-#endif
}
if (cpi->common.txfm_mode > ALLOW_8X8) {
@@ -1595,7 +1474,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
}
}
}
-#if CONFIG_HYBRIDTRANSFORM16X16
for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
for (j = 0; j < COEF_BANDS; ++j) {
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -1614,7 +1492,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
}
}
}
-#endif
}
#if 0
@@ -1887,7 +1764,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
}
}
-#if CONFIG_HYBRIDTRANSFORM
savings = 0;
update[0] = update[1] = 0;
for (i = 0; i < BLOCK_TYPES; ++i) {
@@ -1976,7 +1852,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
}
}
}
-#endif
/* do not do this if not even allowed */
if (cpi->common.txfm_mode != ONLY_4X4) {
@@ -2054,7 +1929,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
}
}
}
-#if CONFIG_HYBRIDTRANSFORM8X8
update[0] = update[1] = 0;
savings = 0;
for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
@@ -2128,7 +2002,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
}
}
}
-#endif
}
if (cpi->common.txfm_mode > ALLOW_8X8) {
@@ -2206,7 +2079,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
}
}
}
-#if CONFIG_HYBRIDTRANSFORM16X16
update[0] = update[1] = 0;
savings = 0;
for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
@@ -2280,7 +2152,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
}
}
}
-#endif
}
}
@@ -2561,12 +2432,11 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
/* sb mode probability */
const int sb_max = (((pc->mb_rows + 1) >> 1) * ((pc->mb_cols + 1) >> 1));
- pc->sb_coded = get_prob(cpi->sb_count, sb_max);
+ pc->sb_coded = get_prob(sb_max - cpi->sb_count, sb_max);
vp8_write_literal(&header_bc, pc->sb_coded, 8);
}
#endif
-#if CONFIG_TX_SELECT
{
if (pc->txfm_mode == TX_MODE_SELECT) {
pc->prob_tx[0] = get_prob(cpi->txfm_count[0] + cpi->txfm_count_8x8p[0],
@@ -2583,9 +2453,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
vp8_write_literal(&header_bc, pc->prob_tx[1], 8);
}
}
-#else
- vp8_write_bit(&header_bc, !!pc->txfm_mode);
-#endif
// Encode the loop filter level and type
vp8_write_bit(&header_bc, pc->filter_type);
@@ -2687,7 +2554,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
// Signal whether to allow high MV precision
vp8_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0);
-#if CONFIG_SWITCHABLE_INTERP
if (pc->mcomp_filter_type == SWITCHABLE) {
/* Check to see if only one of the filters is actually used */
int count[VP8_SWITCHABLE_FILTERS];
@@ -2712,7 +2578,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
// Signal the type of subpel filter to use
vp8_write_bit(&header_bc, (pc->mcomp_filter_type == SWITCHABLE));
if (pc->mcomp_filter_type != SWITCHABLE)
-#endif /* CONFIG_SWITCHABLE_INTERP */
vp8_write_literal(&header_bc, (pc->mcomp_filter_type), 2);
}
@@ -2731,29 +2596,18 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
vp8_clear_system_state(); // __asm emms;
vp8_copy(cpi->common.fc.pre_coef_probs, cpi->common.fc.coef_probs);
-#if CONFIG_HYBRIDTRANSFORM
vp8_copy(cpi->common.fc.pre_hybrid_coef_probs, cpi->common.fc.hybrid_coef_probs);
-#endif
vp8_copy(cpi->common.fc.pre_coef_probs_8x8, cpi->common.fc.coef_probs_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8);
-#endif
vp8_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16);
-#endif
vp8_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob);
vp8_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob);
vp8_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob);
vp8_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob);
vp8_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob);
vp8_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob);
-#if CONFIG_NEWMVENTROPY
cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc;
-#else
- vp8_copy(cpi->common.fc.pre_mvc, cpi->common.fc.mvc);
- vp8_copy(cpi->common.fc.pre_mvc_hp, cpi->common.fc.mvc_hp);
-#endif
vp8_zero(cpi->sub_mv_ref_count);
vp8_zero(cpi->mbsplit_count);
vp8_zero(cpi->common.fc.mv_ref_ct)
@@ -2796,10 +2650,8 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
vp8_write_literal(&header_bc, pc->prob_pred_filter_off, 8);
#endif
-#if CONFIG_SWITCHABLE_INTERP
if (pc->mcomp_filter_type == SWITCHABLE)
update_switchable_interp_probs(cpi, &header_bc);
-#endif
vp8_write_literal(&header_bc, pc->prob_intra_coded, 8);
vp8_write_literal(&header_bc, pc->prob_last_coded, 8);
@@ -2825,15 +2677,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
update_mbintra_mode_probs(cpi, &header_bc);
-#if CONFIG_NEWMVENTROPY
vp8_write_nmvprobs(cpi, xd->allow_high_precision_mv, &header_bc);
-#else
- if (xd->allow_high_precision_mv) {
- vp8_write_mvprobs_hp(cpi, &header_bc);
- } else {
- vp8_write_mvprobs(cpi, &header_bc);
- }
-#endif
}
vp8_stop_encode(&header_bc);
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 80f9b75b8..48623be8c 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -79,9 +79,7 @@ typedef struct {
int hybrid_pred_diff;
int comp_pred_diff;
int single_pred_diff;
-#if CONFIG_TX_SELECT
int64_t txfm_rd_diff[NB_TXFM_MODES];
-#endif
} PICK_MODE_CONTEXT;
typedef struct {
@@ -114,7 +112,6 @@ typedef struct {
int *mb_norm_activity_ptr;
signed int act_zbin_adj;
-#if CONFIG_NEWMVENTROPY
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
int *nmvcost[2];
@@ -126,28 +123,17 @@ typedef struct {
int *nmvsadcost[2];
int nmvsadcosts_hp[2][MV_VALS];
int *nmvsadcost_hp[2];
-#else
- int mvcosts[2][MVvals + 1];
- int *mvcost[2];
- int mvsadcosts[2][MVfpvals + 1];
- int *mvsadcost[2];
- int mvcosts_hp[2][MVvals_hp + 1];
- int *mvcost_hp[2];
- int mvsadcosts_hp[2][MVfpvals_hp + 1];
- int *mvsadcost_hp[2];
-#endif /* CONFIG_NEWMVENTROPY */
int mbmode_cost[2][MB_MODE_COUNT];
int intra_uv_mode_cost[2][MB_MODE_COUNT];
int bmode_costs[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES];
int i8x8_mode_costs[MB_MODE_COUNT];
int inter_bmode_costs[B_MODE_COUNT];
-#if CONFIG_SWITCHABLE_INTERP
- int switchable_interp_costs[VP8_SWITCHABLE_FILTERS+1]
+ int switchable_interp_costs[VP8_SWITCHABLE_FILTERS + 1]
[VP8_SWITCHABLE_FILTERS];
-#endif
- // These define limits to motion vector components to prevent them from extending outside the UMV borders
+ // These define limits to motion vector components to prevent them
+ // from extending outside the UMV borders
int mv_col_min;
int mv_col_max;
int mv_row_min;
@@ -164,10 +150,8 @@ typedef struct {
unsigned int token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
unsigned int hybrid_token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
-#endif
int optimize;
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index cd13fec7c..0983b1c0a 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -14,8 +14,6 @@
#include "vp8/common/idct.h"
#include "vp8/common/systemdependent.h"
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
-
#include "vp8/common/blockd.h"
// TODO: these transforms can be converted into integer forms to reduce
@@ -71,9 +69,7 @@ float adst_8[64] = {
0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509,
0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532
};
-#endif
-#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
float dct_16[256] = {
0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
@@ -143,7 +139,6 @@ float adst_16[256] = {
0.065889, -0.129396, 0.188227, -0.240255, 0.283599, -0.316693, 0.338341, -0.347761,
0.344612, -0.329007, 0.301511, -0.263118, 0.215215, -0.159534, 0.098087, -0.033094
};
-#endif
static const int xC1S7 = 16069;
static const int xC2S6 = 15137;
@@ -400,7 +395,6 @@ void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8
}
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
void vp8_fht_c(short *input, short *output, int pitch,
TX_TYPE tx_type, int tx_dim) {
@@ -518,7 +512,6 @@ void vp8_fht_c(short *input, short *output, int pitch,
}
vp8_clear_system_state(); // Make it simd safe : __asm emms;
}
-#endif
void vp8_short_fdct4x4_c(short *input, short *output, int pitch) {
int i;
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index 180192bbb..4ad1fe85d 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -26,10 +26,8 @@
#endif
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
void vp8_fht_c(short *input, short *output, int pitch,
TX_TYPE tx_type, int tx_dim);
-#endif
#ifndef vp8_fdct_short16x16
#define vp8_fdct_short16x16 vp8_short_fdct16x16_c
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 619695c33..0910cfd35 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -120,8 +120,8 @@ static unsigned int tt_activity_measure(VP8_COMP *cpi, MACROBLOCK *x) {
* lambda using a non-linear combination (e.g., the smallest, or second
* smallest, etc.).
*/
- act = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)(x->src.y_buffer,
- x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
+ act = vp8_variance16x16(x->src.y_buffer, x->src.y_stride, VP8_VAR_OFFS, 0,
+ &sse);
act = act << 4;
/* If the region is flat, lower the activity some more. */
@@ -411,7 +411,6 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int;
}
-#if CONFIG_TX_SELECT
{
int segment_id = mbmi->segment_id;
if (!segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
@@ -421,7 +420,6 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
}
}
}
-#endif
if (cpi->common.frame_type == KEY_FRAME) {
// Restore the coding modes to that held in the coding context
@@ -1342,37 +1340,18 @@ static void encode_frame_internal(VP8_COMP *cpi) {
cpi->pred_filter_on_count = 0;
cpi->pred_filter_off_count = 0;
#endif
-#if CONFIG_SWITCHABLE_INTERP
vp8_zero(cpi->switchable_interp_count);
-#endif
-
-#if 0
- // Experimental code
- cpi->frame_distortion = 0;
- cpi->last_mb_distortion = 0;
-#endif
xd->mode_info_context = cm->mi;
xd->prev_mode_info_context = cm->prev_mi;
-#if CONFIG_NEWMVENTROPY
vp8_zero(cpi->NMVcount);
-#else
- vp8_zero(cpi->MVcount);
- vp8_zero(cpi->MVcount_hp);
-#endif
vp8_zero(cpi->coef_counts);
-#if CONFIG_HYBRIDTRANSFORM
vp8_zero(cpi->hybrid_coef_counts);
-#endif
vp8_zero(cpi->coef_counts_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_zero(cpi->hybrid_coef_counts_8x8);
-#endif
vp8_zero(cpi->coef_counts_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_zero(cpi->hybrid_coef_counts_16x16);
-#endif
vp8cx_frame_init_quantizer(cpi);
@@ -1393,11 +1372,9 @@ static void encode_frame_internal(VP8_COMP *cpi) {
vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff));
vpx_memset(cpi->single_pred_count, 0, sizeof(cpi->single_pred_count));
vpx_memset(cpi->comp_pred_count, 0, sizeof(cpi->comp_pred_count));
-#if CONFIG_TX_SELECT
vpx_memset(cpi->txfm_count, 0, sizeof(cpi->txfm_count));
vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p));
vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff));
-#endif
{
struct vpx_usec_timer emr_timer;
vpx_usec_timer_start(&emr_timer);
@@ -1457,7 +1434,6 @@ static int check_dual_ref_flags(VP8_COMP *cpi) {
}
}
-#if CONFIG_TX_SELECT
static void reset_skip_txfm_size(VP8_COMP *cpi, TX_SIZE txfm_max) {
VP8_COMMON *cm = &cpi->common;
int mb_row, mb_col, mis = cm->mode_info_stride;
@@ -1481,7 +1457,6 @@ static void reset_skip_txfm_size(VP8_COMP *cpi, TX_SIZE txfm_max) {
}
}
}
-#endif
void vp8_encode_frame(VP8_COMP *cpi) {
if (cpi->sf.RD) {
@@ -1527,7 +1502,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
txfm_type = ONLY_4X4;
} else
#endif
-#if CONFIG_TX_SELECT
/* FIXME (rbultje)
* this is a hack (no really), basically to work around the complete
* nonsense coefficient cost prediction for keyframes. The probabilities
@@ -1575,16 +1549,11 @@ void vp8_encode_frame(VP8_COMP *cpi) {
cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
ALLOW_16X16 : TX_MODE_SELECT;
#endif
-#else
- txfm_type = ALLOW_16X16;
-#endif // CONFIG_TX_SELECT
cpi->common.txfm_mode = txfm_type;
-#if CONFIG_TX_SELECT
if (txfm_type != TX_MODE_SELECT) {
cpi->common.prob_tx[0] = 128;
cpi->common.prob_tx[1] = 128;
}
-#endif
cpi->common.comp_pred_mode = pred_type;
encode_frame_internal(cpi);
@@ -1594,7 +1563,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
}
-#if CONFIG_TX_SELECT
for (i = 0; i < NB_TXFM_MODES; ++i) {
int64_t pd = cpi->rd_tx_select_diff[i];
int diff;
@@ -1604,7 +1572,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
cpi->rd_tx_select_threshes[frame_type][i] += diff;
cpi->rd_tx_select_threshes[frame_type][i] /= 2;
}
-#endif
if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
int single_count_zero = 0;
@@ -1622,7 +1589,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
}
}
-#if CONFIG_TX_SELECT
if (cpi->common.txfm_mode == TX_MODE_SELECT) {
const int count4x4 = cpi->txfm_count[TX_4X4] + cpi->txfm_count_8x8p[TX_4X4];
const int count8x8 = cpi->txfm_count[TX_8X8];
@@ -1639,7 +1605,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
cpi->common.txfm_mode = ALLOW_16X16;
}
}
-#endif
} else {
encode_frame_internal(cpi);
}
@@ -1957,15 +1922,12 @@ void vp8cx_encode_intra_macro_block(VP8_COMP *cpi,
}
if (output_enabled) {
-#if CONFIG_TX_SELECT
int segment_id = mbmi->segment_id;
-#endif
// Tokenize
sum_intra_stats(cpi, x);
vp8_tokenize_mb(cpi, &x->e_mbd, t, 0);
-#if CONFIG_TX_SELECT
if (cpi->common.txfm_mode == TX_MODE_SELECT &&
!((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) ||
(segfeature_active(&x->e_mbd, segment_id, SEG_LVL_EOB) &&
@@ -1975,9 +1937,7 @@ void vp8cx_encode_intra_macro_block(VP8_COMP *cpi,
} else if (mbmi->mode == I8X8_PRED) {
cpi->txfm_count_8x8p[mbmi->txfm_size]++;
}
- } else
-#endif
- if (cpi->common.txfm_mode >= ALLOW_16X16 && mbmi->mode <= TM_PRED) {
+ } else if (cpi->common.txfm_mode >= ALLOW_16X16 && mbmi->mode <= TM_PRED) {
mbmi->txfm_size = TX_16X16;
} else
if (cpi->common.txfm_mode >= ALLOW_8X8 && mbmi->mode != B_PRED) {
@@ -2012,9 +1972,7 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x,
assert(!xd->mode_info_context->mbmi.encoded_as_sb);
#endif
-#if CONFIG_SWITCHABLE_INTERP
vp8_setup_interp_filters(xd, mbmi->interp_filter, cm);
-#endif
if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
// Adjust the zbin based on this MB rate.
adjust_act_zbin(cpi, x);
@@ -2158,7 +2116,6 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x,
}
if (output_enabled) {
-#if CONFIG_TX_SELECT
int segment_id = mbmi->segment_id;
if (cpi->common.txfm_mode == TX_MODE_SELECT &&
!((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) ||
@@ -2167,16 +2124,18 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x,
if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
mbmi->mode != SPLITMV) {
cpi->txfm_count[mbmi->txfm_size]++;
- } else if (mbmi->mode == I8X8_PRED) {
+ } else if (mbmi->mode == I8X8_PRED ||
+ (mbmi->mode == SPLITMV &&
+ mbmi->partitioning != PARTITIONING_4X4)) {
cpi->txfm_count_8x8p[mbmi->txfm_size]++;
}
- } else
-#endif
- if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
+ } else if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) {
mbmi->txfm_size = TX_16X16;
- } else if (mbmi->mode != B_PRED && mbmi->mode != SPLITMV &&
- cpi->common.txfm_mode >= ALLOW_8X8) {
+ } else if (mbmi->mode != B_PRED &&
+ !(mbmi->mode == SPLITMV &&
+ mbmi->partitioning == PARTITIONING_4X4) &&
+ cpi->common.txfm_mode >= ALLOW_8X8) {
mbmi->txfm_size = TX_8X8;
} else {
mbmi->txfm_size = TX_4X4;
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 9076780d9..f44df22ea 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -48,7 +48,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
}
}
- intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff);
+ intra_pred_var = vp8_get_mb_ss(x->src_diff);
return intra_pred_var;
}
@@ -57,9 +57,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
MACROBLOCK *x, int ib) {
BLOCKD *b = &x->e_mbd.block[ib];
BLOCK *be = &x->block[ib];
-#if CONFIG_HYBRIDTRANSFORM
TX_TYPE tx_type;
-#endif
#if CONFIG_COMP_INTRA_PRED
if (b->bmi.as_mode.second == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
@@ -74,15 +72,12 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
-#if CONFIG_HYBRIDTRANSFORM
tx_type = get_tx_type(&x->e_mbd, b);
if (tx_type != DCT_DCT) {
vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 4);
vp8_ht_quantize_b_4x4(be, b, tx_type);
vp8_ihtllm_c(b->dqcoeff, b->diff, 32, tx_type, 4);
- } else
-#endif
- {
+ } else {
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32) ;
x->quantize_b_4x4(be, b) ;
vp8_inverse_transform_b_4x4(IF_RTCD(&rtcd->common->idct), b, 32) ;
@@ -103,9 +98,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
-#if CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type;
-#endif
#if CONFIG_COMP_INTRA_PRED
if (xd->mode_info_context->mbmi.second_mode == (MB_PREDICTION_MODE)(DC_PRED - 1))
@@ -120,7 +113,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
xd->predictor, b->src_stride);
if (tx_size == TX_16X16) {
-#if CONFIG_HYBRIDTRANSFORM16X16
BLOCKD *bd = &xd->block[0];
tx_type = get_tx_type(xd, bd);
if (tx_type != DCT_DCT) {
@@ -129,9 +121,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
if (x->optimize)
vp8_optimize_mby_16x16(x, rtcd);
vp8_ihtllm_c(bd->dqcoeff, bd->diff, 32, tx_type, 16);
- } else
-#endif
- {
+ } else {
vp8_transform_mby_16x16(x);
vp8_quantize_mby_16x16(x);
if (x->optimize)
@@ -196,9 +186,7 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
BLOCK *be = &x->block[ib];
const int iblock[4] = {0, 1, 4, 5};
int i;
-#if CONFIG_HYBRIDTRANSFORM8X8
TX_TYPE tx_type;
-#endif
#if CONFIG_COMP_INTRA_PRED
if (b->bmi.as_mode.second == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
@@ -217,7 +205,6 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
// generate residual blocks
vp8_subtract_4b_c(be, b, 16);
-#if CONFIG_HYBRIDTRANSFORM8X8
tx_type = get_tx_type(xd, xd->block + idx);
if (tx_type != DCT_DCT) {
vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,
@@ -226,13 +213,10 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
tx_type, 8);
} else {
-#endif
x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
vp8_idct_idct8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
-#if CONFIG_HYBRIDTRANSFORM8X8
}
-#endif
} else {
for (i = 0; i < 4; i++) {
b = &xd->block[ib + iblock[i]];
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index dc54d05a2..d3bd0f1dd 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -304,7 +304,6 @@ void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
scan = vp8_default_zig_zag1d;
bands = vp8_coef_bands;
default_eob = 16;
-#if CONFIG_HYBRIDTRANSFORM
// TODO: this isn't called (for intra4x4 modes), but will be left in
// since it could be used later
{
@@ -327,7 +326,6 @@ void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
scan = vp8_default_zig_zag1d;
}
}
-#endif
break;
case TX_8X8:
scan = vp8_default_zig_zag1d_8x8;
@@ -638,6 +636,7 @@ void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
+ int has_2nd_order = x->e_mbd.mode_info_context->mbmi.mode != SPLITMV;
if (!x->e_mbd.above_context || !x->e_mbd.left_context)
return;
@@ -647,18 +646,21 @@ void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
- type = PLANE_TYPE_Y_NO_DC;
+ type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
for (b = 0; b < 16; b += 4) {
optimize_b(x, b, type,
ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b],
rtcd, TX_8X8);
- *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]);
- *(tl + vp8_block2left_8x8[b] + 1) = *(tl + vp8_block2left_8x8[b]);
+ ta[vp8_block2above_8x8[b] + 1] = ta[vp8_block2above_8x8[b]];
+ tl[vp8_block2left_8x8[b] + 1] = tl[vp8_block2left_8x8[b]];
}
// 8x8 always have 2nd roder haar block
- check_reset_8x8_2nd_coeffs(&x->e_mbd,
- ta + vp8_block2above_8x8[24], tl + vp8_block2left_8x8[24]);
+ if (has_2nd_order) {
+ check_reset_8x8_2nd_coeffs(&x->e_mbd,
+ ta + vp8_block2above_8x8[24],
+ tl + vp8_block2left_8x8[24]);
+ }
}
void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
@@ -680,8 +682,8 @@ void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
optimize_b(x, b, PLANE_TYPE_UV,
ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b],
rtcd, TX_8X8);
- *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]);
- *(tl + vp8_block2left_8x8[b] + 1) = *(tl + vp8_block2left_8x8[b]);
+ ta[vp8_block2above_8x8[b] + 1] = ta[vp8_block2above_8x8[b]];
+ tl[vp8_block2left_8x8[b] + 1] = tl[vp8_block2left_8x8[b]];
}
}
@@ -898,11 +900,25 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
optimize_mb_16x16(x, rtcd);
vp8_inverse_transform_mb_16x16(IF_RTCD(&rtcd->common->idct), xd);
} else if (tx_size == TX_8X8) {
- vp8_transform_mb_8x8(x);
- vp8_quantize_mb_8x8(x);
- if (x->optimize)
- optimize_mb_8x8(x, rtcd);
- vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), xd);
+ if (xd->mode_info_context->mbmi.mode == SPLITMV) {
+ assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4);
+ vp8_transform_mby_8x8(x);
+ vp8_transform_mbuv_4x4(x);
+ vp8_quantize_mby_8x8(x);
+ vp8_quantize_mbuv_4x4(x);
+ if (x->optimize) {
+ vp8_optimize_mby_8x8(x, rtcd);
+ vp8_optimize_mbuv_4x4(x, rtcd);
+ }
+ vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), xd);
+ vp8_inverse_transform_mbuv_4x4(IF_RTCD(&rtcd->common->idct), xd);
+ } else {
+ vp8_transform_mb_8x8(x);
+ vp8_quantize_mb_8x8(x);
+ if (x->optimize)
+ optimize_mb_8x8(x, rtcd);
+ vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), xd);
+ }
} else {
transform_mb_4x4(x);
vp8_quantize_mb_4x4(x);
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index d520d995a..75dad2f9b 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -22,8 +22,6 @@ extern unsigned int active_section;
//extern int final_packing;
-#if CONFIG_NEWMVENTROPY
-
#ifdef NMV_STATS
nmv_context_counts tnmvcounts;
#endif
@@ -549,593 +547,3 @@ void vp8_build_nmv_cost_table(int *mvjoint,
if (mvc_flag_h)
build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
}
-
-#else /* CONFIG_NEWMVENTROPY */
-
-static void encode_mvcomponent(
- vp8_writer *const bc,
- const int v,
- const struct mv_context *mvc
-) {
- const vp8_prob *p = mvc->prob;
- const int x = v < 0 ? -v : v;
-
- if (x < mvnum_short) { // Small
- vp8_write(bc, 0, p[mvpis_short]);
- vp8_treed_write(bc, vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits);
- if (!x)
- return; // no sign bit
- } else { // Large
- int i = 0;
-
- vp8_write(bc, 1, p[mvpis_short]);
-
- do
- vp8_write(bc, (x >> i) & 1, p[MVPbits + i]);
-
- while (++i < mvnum_short_bits);
-
- i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */
-
- do
- vp8_write(bc, (x >> i) & 1, p[MVPbits + i]);
-
- while (--i > mvnum_short_bits);
-
- if (x & ~((2 << mvnum_short_bits) - 1))
- vp8_write(bc, (x >> mvnum_short_bits) & 1, p[MVPbits + mvnum_short_bits]);
- }
-
- vp8_write(bc, v < 0, p[MVPsign]);
-}
-
-void vp8_encode_motion_vector(vp8_writer* const bc,
- const MV* const mv,
- const MV_CONTEXT* const mvc) {
- encode_mvcomponent(bc, mv->row >> 1, &mvc[0]);
- encode_mvcomponent(bc, mv->col >> 1, &mvc[1]);
-}
-
-
-static unsigned int cost_mvcomponent(const int v,
- const struct mv_context* const mvc) {
- const vp8_prob *p = mvc->prob;
- const int x = v; // v<0? -v:v;
- unsigned int cost;
-
- if (x < mvnum_short) {
- cost = vp8_cost_zero(p [mvpis_short])
- + vp8_treed_cost(vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits);
-
- if (!x)
- return cost;
- } else {
- int i = 0;
- cost = vp8_cost_one(p [mvpis_short]);
-
- do
- cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1);
-
- while (++i < mvnum_short_bits);
-
- i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */
-
- do
- cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1);
-
- while (--i > mvnum_short_bits);
-
- if (x & ~((2 << mvnum_short_bits) - 1))
- cost += vp8_cost_bit(p [MVPbits + mvnum_short_bits], (x >> mvnum_short_bits) & 1);
- }
-
- return cost; // + vp8_cost_bit( p [MVPsign], v < 0);
-}
-
-void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc,
- const int mvc_flag[2]) {
- int i = 1; // -mv_max;
- unsigned int cost0 = 0;
- unsigned int cost1 = 0;
-
- vp8_clear_system_state();
-
- i = 1;
-
- if (mvc_flag[0]) {
- mvcost [0] [0] = cost_mvcomponent(0, &mvc[0]);
-
- do {
- // mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]);
- cost0 = cost_mvcomponent(i, &mvc[0]);
-
- mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign]);
- mvcost [0] [-i] = cost0 + vp8_cost_one(mvc[0].prob[MVPsign]);
- } while (++i <= mv_max);
- }
-
- i = 1;
-
- if (mvc_flag[1]) {
- mvcost [1] [0] = cost_mvcomponent(0, &mvc[1]);
-
- do {
- // mvcost [1] [i] = cost_mvcomponent( i, mvc[1]);
- cost1 = cost_mvcomponent(i, &mvc[1]);
-
- mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign]);
- mvcost [1] [-i] = cost1 + vp8_cost_one(mvc[1].prob[MVPsign]);
- } while (++i <= mv_max);
- }
-}
-
-
-// Motion vector probability table update depends on benefit.
-// Small correction allows for the fact that an update to an MV probability
-// may have benefit in subsequent frames as well as the current one.
-
-#define MV_PROB_UPDATE_CORRECTION -1
-
-
-__inline static void calc_prob(vp8_prob *p, const unsigned int ct[2]) {
- const unsigned int tot = ct[0] + ct[1];
-
- if (tot) {
- const vp8_prob x = ((ct[0] * 255) / tot) & -2;
- *p = x ? x : 1;
- }
-}
-
-static void update(
- vp8_writer *const bc,
- const unsigned int ct[2],
- vp8_prob *const cur_p,
- const vp8_prob new_p,
- const vp8_prob update_p,
- int *updated
-) {
- const int cur_b = vp8_cost_branch(ct, *cur_p);
- const int new_b = vp8_cost_branch(ct, new_p);
- const int cost = 7 + MV_PROB_UPDATE_CORRECTION + ((vp8_cost_one(update_p) - vp8_cost_zero(update_p) + 128) >> 8);
-
- if (cur_b - new_b > cost) {
- *cur_p = new_p;
- vp8_write(bc, 1, update_p);
- vp8_write_literal(bc, new_p >> 1, 7);
- *updated = 1;
-
- } else
- vp8_write(bc, 0, update_p);
-}
-
-static void write_component_probs(
- vp8_writer *const bc,
- struct mv_context *cur_mvc,
- const struct mv_context *default_mvc_,
- const struct mv_context *update_mvc,
- const unsigned int events [MVvals],
- unsigned int rc,
- int *updated
-) {
- vp8_prob *Pcur = cur_mvc->prob;
- const vp8_prob *default_mvc = default_mvc_->prob;
- const vp8_prob *Pupdate = update_mvc->prob;
- unsigned int is_short_ct[2], sign_ct[2];
-
- unsigned int bit_ct [mvlong_width] [2];
-
- unsigned int short_ct [mvnum_short];
- unsigned int short_bct [mvnum_short - 1] [2];
-
- vp8_prob Pnew [MVPcount];
-
- (void) rc;
- vp8_copy_array(Pnew, default_mvc, MVPcount);
-
- vp8_zero(is_short_ct)
- vp8_zero(sign_ct)
- vp8_zero(bit_ct)
- vp8_zero(short_ct)
- vp8_zero(short_bct)
-
-
- // j=0
- {
- const int c = events [mv_max];
-
- is_short_ct [0] += c; // Short vector
- short_ct [0] += c; // Magnitude distribution
- }
-
- // j: 1 ~ mv_max (1023)
- {
- int j = 1;
-
- do {
- const int c1 = events [mv_max + j]; // positive
- const int c2 = events [mv_max - j]; // negative
- const int c = c1 + c2;
- int a = j;
-
- sign_ct [0] += c1;
- sign_ct [1] += c2;
-
- if (a < mvnum_short) {
- is_short_ct [0] += c; // Short vector
- short_ct [a] += c; // Magnitude distribution
- } else {
- int k = mvlong_width - 1;
- is_short_ct [1] += c; // Long vector
-
- /* bit 3 not always encoded. */
- do
- bit_ct [k] [(a >> k) & 1] += c;
-
- while (--k >= 0);
- }
- } while (++j <= mv_max);
- }
-
- calc_prob(Pnew + mvpis_short, is_short_ct);
-
- calc_prob(Pnew + MVPsign, sign_ct);
-
- {
- vp8_prob p [mvnum_short - 1]; /* actually only need branch ct */
- int j = 0;
-
- vp8_tree_probs_from_distribution(
- mvnum_short, vp8_small_mvencodings, vp8_small_mvtree,
- p, short_bct, short_ct,
- 256, 1
- );
-
- do
- calc_prob(Pnew + MVPshort + j, short_bct[j]);
-
- while (++j < mvnum_short - 1);
- }
-
- {
- int j = 0;
-
- do
- calc_prob(Pnew + MVPbits + j, bit_ct[j]);
-
- while (++j < mvlong_width);
- }
-
- update(bc, is_short_ct, Pcur + mvpis_short, Pnew[mvpis_short],
- *Pupdate++, updated);
-
- update(bc, sign_ct, Pcur + MVPsign, Pnew[MVPsign],
- *Pupdate++, updated);
-
- {
- const vp8_prob *const new_p = Pnew + MVPshort;
- vp8_prob *const cur_p = Pcur + MVPshort;
-
- int j = 0;
-
- do
-
- update(bc, short_bct[j], cur_p + j, new_p[j], *Pupdate++, updated);
-
- while (++j < mvnum_short - 1);
- }
-
- {
- const vp8_prob *const new_p = Pnew + MVPbits;
- vp8_prob *const cur_p = Pcur + MVPbits;
-
- int j = 0;
-
- do
-
- update(bc, bit_ct[j], cur_p + j, new_p[j], *Pupdate++, updated);
-
- while (++j < mvlong_width);
- }
-}
-
-void vp8_write_mvprobs(VP8_COMP* const cpi, vp8_writer* const bc) {
- MV_CONTEXT *mvc = cpi->common.fc.mvc;
- int flags[2] = {0, 0};
-#ifdef ENTROPY_STATS
- active_section = 4;
-#endif
- write_component_probs(
- bc, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0],
- cpi->MVcount[0], 0, &flags[0]);
-
- write_component_probs(
- bc, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1],
- cpi->MVcount[1], 1, &flags[1]);
-
- if (flags[0] || flags[1])
- vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flags);
-
-#ifdef ENTROPY_STATS
- active_section = 5;
-#endif
-}
-
-
-static void encode_mvcomponent_hp(
- vp8_writer *const bc,
- const int v,
- const struct mv_context_hp *mvc
-) {
- const vp8_prob *p = mvc->prob;
- const int x = v < 0 ? -v : v;
-
- if (x < mvnum_short_hp) { // Small
- vp8_write(bc, 0, p[mvpis_short_hp]);
- vp8_treed_write(bc, vp8_small_mvtree_hp, p + MVPshort_hp, x,
- mvnum_short_bits_hp);
- if (!x)
- return; // no sign bit
- } else { // Large
- int i = 0;
-
- vp8_write(bc, 1, p[mvpis_short_hp]);
-
- do
- vp8_write(bc, (x >> i) & 1, p[MVPbits_hp + i]);
-
- while (++i < mvnum_short_bits_hp);
-
- i = mvlong_width_hp - 1; /* Skip bit 3, which is sometimes implicit */
-
- do
- vp8_write(bc, (x >> i) & 1, p[MVPbits_hp + i]);
-
- while (--i > mvnum_short_bits_hp);
-
- if (x & ~((2 << mvnum_short_bits_hp) - 1))
- vp8_write(bc, (x >> mvnum_short_bits_hp) & 1,
- p[MVPbits_hp + mvnum_short_bits_hp]);
- }
-
- vp8_write(bc, v < 0, p[MVPsign_hp]);
-}
-
-void vp8_encode_motion_vector_hp(vp8_writer *bc, const MV *mv,
- const MV_CONTEXT_HP *mvc) {
-
- encode_mvcomponent_hp(bc, mv->row, &mvc[0]);
- encode_mvcomponent_hp(bc, mv->col, &mvc[1]);
-}
-
-
-static unsigned int cost_mvcomponent_hp(const int v,
- const struct mv_context_hp *mvc) {
- const vp8_prob *p = mvc->prob;
- const int x = v; // v<0? -v:v;
- unsigned int cost;
-
- if (x < mvnum_short_hp) {
- cost = vp8_cost_zero(p [mvpis_short_hp])
- + vp8_treed_cost(vp8_small_mvtree_hp, p + MVPshort_hp, x,
- mvnum_short_bits_hp);
-
- if (!x)
- return cost;
- } else {
- int i = 0;
- cost = vp8_cost_one(p [mvpis_short_hp]);
-
- do
- cost += vp8_cost_bit(p [MVPbits_hp + i], (x >> i) & 1);
-
- while (++i < mvnum_short_bits_hp);
-
- i = mvlong_width_hp - 1; /* Skip bit 3, which is sometimes implicit */
-
- do
- cost += vp8_cost_bit(p [MVPbits_hp + i], (x >> i) & 1);
-
- while (--i > mvnum_short_bits_hp);
-
- if (x & ~((2 << mvnum_short_bits_hp) - 1))
- cost += vp8_cost_bit(p [MVPbits_hp + mvnum_short_bits_hp],
- (x >> mvnum_short_bits_hp) & 1);
- }
-
- return cost; // + vp8_cost_bit( p [MVPsign], v < 0);
-}
-
-void vp8_build_component_cost_table_hp(int *mvcost[2],
- const MV_CONTEXT_HP *mvc,
- const int mvc_flag[2]) {
- int i = 1; // -mv_max;
- unsigned int cost0 = 0;
- unsigned int cost1 = 0;
-
- vp8_clear_system_state();
-
- i = 1;
-
- if (mvc_flag[0]) {
- mvcost [0] [0] = cost_mvcomponent_hp(0, &mvc[0]);
-
- do {
- // mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]);
- cost0 = cost_mvcomponent_hp(i, &mvc[0]);
-
- mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign_hp]);
- mvcost [0] [-i] = cost0 + vp8_cost_one(mvc[0].prob[MVPsign_hp]);
- } while (++i <= mv_max_hp);
- }
-
- i = 1;
-
- if (mvc_flag[1]) {
- mvcost [1] [0] = cost_mvcomponent_hp(0, &mvc[1]);
-
- do {
- // mvcost [1] [i] = cost_mvcomponent( i, mvc[1]);
- cost1 = cost_mvcomponent_hp(i, &mvc[1]);
-
- mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign_hp]);
- mvcost [1] [-i] = cost1 + vp8_cost_one(mvc[1].prob[MVPsign_hp]);
- } while (++i <= mv_max_hp);
- }
-}
-
-
-static void write_component_probs_hp(
- vp8_writer *const bc,
- struct mv_context_hp *cur_mvc,
- const struct mv_context_hp *default_mvc_,
- const struct mv_context_hp *update_mvc,
- const unsigned int events [MVvals_hp],
- unsigned int rc,
- int *updated
-) {
- vp8_prob *Pcur = cur_mvc->prob;
- const vp8_prob *default_mvc = default_mvc_->prob;
- const vp8_prob *Pupdate = update_mvc->prob;
- unsigned int is_short_ct[2], sign_ct[2];
-
- unsigned int bit_ct [mvlong_width_hp] [2];
-
- unsigned int short_ct [mvnum_short_hp];
- unsigned int short_bct [mvnum_short_hp - 1] [2];
-
- vp8_prob Pnew [MVPcount_hp];
-
- (void) rc;
- vp8_copy_array(Pnew, default_mvc, MVPcount_hp);
-
- vp8_zero(is_short_ct)
- vp8_zero(sign_ct)
- vp8_zero(bit_ct)
- vp8_zero(short_ct)
- vp8_zero(short_bct)
-
-
- // j=0
- {
- const int c = events [mv_max_hp];
-
- is_short_ct [0] += c; // Short vector
- short_ct [0] += c; // Magnitude distribution
- }
-
- // j: 1 ~ mv_max (1023)
- {
- int j = 1;
-
- do {
- const int c1 = events [mv_max_hp + j]; // positive
- const int c2 = events [mv_max_hp - j]; // negative
- const int c = c1 + c2;
- int a = j;
-
- sign_ct [0] += c1;
- sign_ct [1] += c2;
-
- if (a < mvnum_short_hp) {
- is_short_ct [0] += c; // Short vector
- short_ct [a] += c; // Magnitude distribution
- } else {
- int k = mvlong_width_hp - 1;
- is_short_ct [1] += c; // Long vector
-
- /* bit 3 not always encoded. */
- do
- bit_ct [k] [(a >> k) & 1] += c;
-
- while (--k >= 0);
- }
- } while (++j <= mv_max_hp);
- }
-
- calc_prob(Pnew + mvpis_short_hp, is_short_ct);
-
- calc_prob(Pnew + MVPsign_hp, sign_ct);
-
- {
- vp8_prob p [mvnum_short_hp - 1]; /* actually only need branch ct */
- int j = 0;
-
- vp8_tree_probs_from_distribution(
- mvnum_short_hp, vp8_small_mvencodings_hp, vp8_small_mvtree_hp,
- p, short_bct, short_ct,
- 256, 1
- );
-
- do
- calc_prob(Pnew + MVPshort_hp + j, short_bct[j]);
-
- while (++j < mvnum_short_hp - 1);
- }
-
- {
- int j = 0;
-
- do
- calc_prob(Pnew + MVPbits_hp + j, bit_ct[j]);
-
- while (++j < mvlong_width_hp);
- }
-
- update(bc, is_short_ct, Pcur + mvpis_short_hp, Pnew[mvpis_short_hp],
- *Pupdate++, updated);
-
- update(bc, sign_ct, Pcur + MVPsign_hp, Pnew[MVPsign_hp], *Pupdate++,
- updated);
-
- {
- const vp8_prob *const new_p = Pnew + MVPshort_hp;
- vp8_prob *const cur_p = Pcur + MVPshort_hp;
-
- int j = 0;
-
- do
-
- update(bc, short_bct[j], cur_p + j, new_p[j], *Pupdate++, updated);
-
- while (++j < mvnum_short_hp - 1);
- }
-
- {
- const vp8_prob *const new_p = Pnew + MVPbits_hp;
- vp8_prob *const cur_p = Pcur + MVPbits_hp;
-
- int j = 0;
-
- do
-
- update(bc, bit_ct[j], cur_p + j, new_p[j], *Pupdate++, updated);
-
- while (++j < mvlong_width_hp);
- }
-}
-
-void vp8_write_mvprobs_hp(VP8_COMP* const cpi, vp8_writer* const bc) {
- MV_CONTEXT_HP *mvc = cpi->common.fc.mvc_hp;
- int flags[2] = {0, 0};
-#ifdef ENTROPY_STATS
- active_section = 4;
-#endif
- write_component_probs_hp(
- bc, &mvc[0], &vp8_default_mv_context_hp[0], &vp8_mv_update_probs_hp[0],
- cpi->MVcount_hp[0], 0, &flags[0]
- );
- write_component_probs_hp(
- bc, &mvc[1], &vp8_default_mv_context_hp[1], &vp8_mv_update_probs_hp[1],
- cpi->MVcount_hp[1], 1, &flags[1]
- );
-
- if (flags[0] || flags[1])
- vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp,
- (const MV_CONTEXT_HP *)
- cpi->common.fc.mvc_hp, flags);
-#ifdef ENTROPY_STATS
- active_section = 5;
-#endif
-}
-
-#endif /* CONFIG_NEWMVENTROPY */
diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index c06831cb2..254536580 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h
@@ -14,7 +14,6 @@
#include "onyx_int.h"
-#if CONFIG_NEWMVENTROPY
void vp8_write_nmvprobs(VP8_COMP* const, int usehp, vp8_writer* const);
void vp8_encode_nmv(vp8_writer* const w, const MV* const mv,
const MV* const ref, const nmv_context* const mvctx);
@@ -27,19 +26,5 @@ void vp8_build_nmv_cost_table(int *mvjoint,
int usehp,
int mvc_flag_v,
int mvc_flag_h);
-#else /* CONFIG_NEWMVENTROPY */
-void vp8_write_mvprobs(VP8_COMP* const, vp8_writer* const);
-void vp8_encode_motion_vector(vp8_writer* const, const MV* const,
- const MV_CONTEXT* const);
-void vp8_build_component_cost_table(int *mvcost[2],
- const MV_CONTEXT*,
- const int mvc_flag[2]);
-void vp8_write_mvprobs_hp(VP8_COMP* const, vp8_writer* const);
-void vp8_encode_motion_vector_hp(vp8_writer* const, const MV* const,
- const MV_CONTEXT_HP* const);
-void vp8_build_component_cost_table_hp(int *mvcost[2],
- const MV_CONTEXT_HP*,
- const int mvc_flag[2]);
-#endif /* CONFIG_NEWMVENTROPY */
#endif
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 95f6d97d3..09d5a762e 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -364,7 +364,8 @@ static void zz_motion_search(VP8_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *r
ref_ptr = (unsigned char *)(*(d->base_pre) + d->pre);
- VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16)(src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err));
+ vp8_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
+ (unsigned int *)(best_motion_err));
}
static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
@@ -387,7 +388,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
int new_mv_mode_penalty = 256;
// override the default variance function to use MSE
- v_fn_ptr.vf = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16);
+ v_fn_ptr.vf = vp8_mse16x16;
// Set up pointers for this macro block recon buffer
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
@@ -492,12 +493,7 @@ void vp8_first_pass(VP8_COMP *cpi) {
{
int flag[2] = {1, 1};
vp8_init_mv_probs(cm);
-#if CONFIG_NEWMVENTROPY
vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
-#else
- vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
- vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cm->fc.mvc_hp, flag);
-#endif
}
// for each macroblock row in image
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 356e32c3f..44e83fdc7 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -23,80 +23,6 @@ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER
void vp8_cmachine_specific_config(VP8_COMP *cpi) {
#if CONFIG_RUNTIME_CPU_DETECT
cpi->rtcd.common = &cpi->common.rtcd;
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.sad32x32 = vp8_sad32x32_c;
-#endif
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
- cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
- cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
- cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;
-
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.sad32x32x3 = vp8_sad32x32x3_c;
-#endif
- cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_c;
- cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_c;
- cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_c;
- cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c;
- cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c;
-
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.sad32x32x8 = vp8_sad32x32x8_c;
-#endif
- cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c;
- cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c;
- cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c;
- cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c;
- cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c;
-
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.sad32x32x4d = vp8_sad32x32x4d_c;
-#endif
- cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c;
- cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c;
- cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c;
- cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_c;
- cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_c;
-#if ARCH_X86 || ARCH_X86_64
- cpi->rtcd.variance.copy32xn = vp8_copy32xn_c;
-#endif
- cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
- cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
- cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.var32x32 = vp8_variance32x32_c;
-#endif
-
- cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.subpixvar32x32 = vp8_sub_pixel_variance32x32_c;
-#endif
- cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.halfpixvar32x32_h = vp8_variance_halfpixvar32x32_h_c;
-#endif
- cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c;
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.halfpixvar32x32_v = vp8_variance_halfpixvar32x32_v_c;
-#endif
- cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c;
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.halfpixvar32x32_hv = vp8_variance_halfpixvar32x32_hv_c;
-#endif
- cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c;
-#if CONFIG_SUPERBLOCKS
- cpi->rtcd.variance.subpixmse32x32 = vp8_sub_pixel_mse32x32_c;
-#endif
-
- cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
cpi->rtcd.fdct.short8x8 = vp8_short_fdct8x8_c;
cpi->rtcd.fdct.short16x16 = vp8_short_fdct16x16_c;
@@ -118,16 +44,11 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) {
cpi->rtcd.search.refining_search = vp8_refining_search_sad;
cpi->rtcd.search.diamond_search = vp8_diamond_search_sad;
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
- cpi->rtcd.variance.satd16x16 = vp8_satd16x16_c;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
-#if CONFIG_INTERNAL_STATS
- cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_c;
- cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_c;
-#endif
#endif
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c
index 180ee5870..2eecfcdad 100644
--- a/vp8/encoder/mbgraph.c
+++ b/vp8/encoder/mbgraph.c
@@ -83,10 +83,8 @@ static unsigned int do_16x16_motion_iteration
vp8_set_mbmode_and_mvs(x, NEWMV, dst_mv);
vp8_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
- // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
- best_err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
- (xd->dst.y_buffer, xd->dst.y_stride,
- xd->predictor, 16, INT_MAX);
+ best_err = vp8_sad16x16(xd->dst.y_buffer, xd->dst.y_stride,
+ xd->predictor, 16, INT_MAX);
/* restore UMV window */
x->mv_col_min = tmp_col_min;
@@ -130,11 +128,8 @@ static int do_16x16_motion_search
// FIXME should really use something like near/nearest MV and/or MV prediction
xd->pre.y_buffer = ref->y_buffer + mb_y_offset;
xd->pre.y_stride = ref->y_stride;
- // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
- err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
- (ref->y_buffer + mb_y_offset,
- ref->y_stride, xd->dst.y_buffer,
- xd->dst.y_stride, INT_MAX);
+ err = vp8_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
dst_mv->as_int = 0;
// Test last reference frame using the previous best mv as the
@@ -193,10 +188,8 @@ static int do_16x16_zerozero_search
xd->pre.y_buffer = ref->y_buffer + mb_y_offset;
xd->pre.y_stride = ref->y_stride;
// VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
- err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
- (ref->y_buffer + mb_y_offset,
- ref->y_stride, xd->dst.y_buffer,
- xd->dst.y_stride, INT_MAX);
+ err = vp8_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
dst_mv->as_int = 0;
@@ -221,11 +214,8 @@ static int find_best_16x16_intra
xd->mode_info_context->mbmi.mode = mode;
vp8_build_intra_predictors_mby(xd);
- // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
- err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
- (xd->predictor, 16,
- buf->y_buffer + mb_y_offset,
- buf->y_stride, best_err);
+ err = vp8_sad16x16(xd->predictor, 16, buf->y_buffer + mb_y_offset,
+ buf->y_stride, best_err);
// find best
if (err < best_err) {
best_err = err;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index a6cf2f18b..210887491 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -47,15 +47,9 @@ int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
MV v;
v.row = (mv->as_mv.row - ref->as_mv.row);
v.col = (mv->as_mv.col - ref->as_mv.col);
-#if CONFIG_NEWMVENTROPY
return ((mvjcost[vp8_get_mv_joint(v)] +
mvcost[0][v.row] + mvcost[1][v.col]) *
Weight) >> 7;
-#else
- return ((mvcost[0][v.row >> (ishp == 0)] +
- mvcost[1][v.col >> (ishp == 0)])
- * Weight) >> 7;
-#endif
}
static int mv_err_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
@@ -64,14 +58,9 @@ static int mv_err_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
MV v;
v.row = (mv->as_mv.row - ref->as_mv.row);
v.col = (mv->as_mv.col - ref->as_mv.col);
-#if CONFIG_NEWMVENTROPY
return ((mvjcost[vp8_get_mv_joint(v)] +
mvcost[0][v.row] + mvcost[1][v.col]) *
error_per_bit + 128) >> 8;
-#else
- return ((mvcost[0][v.row >> (ishp == 0)] +
- mvcost[1][v.col >> (ishp == 0)]) * error_per_bit + 128) >> 8;
-#endif
}
return 0;
}
@@ -83,14 +72,9 @@ static int mvsad_err_cost(int_mv *mv, int_mv *ref, DEC_MVSADCOSTS,
MV v;
v.row = (mv->as_mv.row - ref->as_mv.row);
v.col = (mv->as_mv.col - ref->as_mv.col);
-#if CONFIG_NEWMVENTROPY
return ((mvjsadcost[vp8_get_mv_joint(v)] +
mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
error_per_bit + 128) >> 8;
-#else
- return ((mvsadcost[0][v.row] + mvsadcost[1][v.col])
- * error_per_bit + 128) >> 8;
-#endif
}
return 0;
}
@@ -220,35 +204,42 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
* could reduce the area.
*/
-#if CONFIG_NEWMVENTROPY
/* estimated cost of a motion vector (r,c) */
-#define MVC(r,c) \
- (mvcost ? \
- ((mvjcost[((r)!=rr)*2 + ((c)!=rc)] + \
- mvcost[0][((r)-rr)] + mvcost[1][((c)-rc)]) * error_per_bit + 128 )>>8 : 0)
-#else
-#define MVC(r,c) \
- (mvcost ? \
- ((mvcost[0][((r)-rr)>>(xd->allow_high_precision_mv==0)] + \
- mvcost[1][((c)-rc)>>(xd->allow_high_precision_mv==0)]) * \
- error_per_bit + 128 )>>8 : 0)
-#endif /* CONFIG_NEWMVENTROPY */
+#define MVC(r, c) \
+ (mvcost ? \
+ ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
+ mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
+ error_per_bit + 128) >> 8 : 0)
-#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
+#define SP(x) (((x) & 7) << 1) // convert motion vector component to offset
+ // for svf calc
-#define IFMVCV(r,c,s,e) \
- if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
+#define IFMVCV(r, c, s, e) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
+ s \
+ else \
+ e;
/* pointer to predictor base of a motionvector */
-#define PRE(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset)))
+#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
/* returns subpixel variance error function */
-#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
-
-/* checks if (r,c) has better score than previous best */
-#define CHECK_BETTER(v,r,c) \
- IFMVCV(r,c,{thismse = (DIST(r,c)); if((v = MVC(r,c)+thismse) < besterr) \
- { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)
+#define DIST(r, c) \
+ vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
+
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER(v, r, c) \
+ IFMVCV(r, c, { \
+ thismse = (DIST(r, c)); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ }, \
+ v = INT_MAX;)
#define MIN(x,y) (((x)<(y))?(x):(y))
#define MAX(x,y) (((x)>(y))?(x):(y))
@@ -307,17 +298,10 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
br = bestmv->as_mv.row << 3;
bc = bestmv->as_mv.col << 3;
hstep = 4;
-#if CONFIG_NEWMVENTROPY
minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
-#else
- minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1));
- maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1));
- minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1));
- maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1));
-#endif
tr = br;
tc = bc;
@@ -403,13 +387,11 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
tc = bc;
}
-#if CONFIG_NEWMVENTROPY
if (xd->allow_high_precision_mv) {
usehp = vp8_use_nmv_hp(&ref_mv->as_mv);
} else {
usehp = 0;
}
-#endif
if (usehp) {
hstep >>= 1;
@@ -771,13 +753,11 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
*sse1 = sse;
}
-#if CONFIG_NEWMVENTROPY
if (x->e_mbd.allow_high_precision_mv) {
usehp = vp8_use_nmv_hp(&ref_mv->as_mv);
} else {
usehp = 0;
}
-#endif
if (!usehp)
return bestmse;
@@ -1304,16 +1284,8 @@ int vp8_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
MACROBLOCKD *xd = &x->e_mbd;
int_mv fcenter_mv;
-#if CONFIG_NEWMVENTROPY
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
- if (xd->allow_high_precision_mv) {
- mvsadcost[0] = x->mvsadcost_hp[0];
- mvsadcost[1] = x->mvsadcost_hp[1];
- }
-#endif
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1423,16 +1395,8 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
MACROBLOCKD *xd = &x->e_mbd;
int_mv fcenter_mv;
-#if CONFIG_NEWMVENTROPY
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
- if (xd->allow_high_precision_mv) {
- mvsadcost[0] = x->mvsadcost_hp[0];
- mvsadcost[1] = x->mvsadcost_hp[1];
- }
-#endif
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1479,7 +1443,8 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
for (t = 0; t < 4; t++)
block_offset[t] = ss[i + t].offset + best_address;
- fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
+ fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
+ sad_array);
for (t = 0; t < 4; t++, i++) {
if (sad_array[t] < bestsad) {
@@ -1631,16 +1596,8 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int col_max = ref_col + distance;
int_mv fcenter_mv;
-#if CONFIG_NEWMVENTROPY
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
- if (xd->allow_high_precision_mv) {
- mvsadcost[0] = x->mvsadcost_hp[0];
- mvsadcost[1] = x->mvsadcost_hp[1];
- }
-#endif
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1735,16 +1692,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
unsigned int sad_array[3];
int_mv fcenter_mv;
-#if CONFIG_NEWMVENTROPY
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
- if (xd->allow_high_precision_mv) {
- mvsadcost[0] = x->mvsadcost_hp[0];
- mvsadcost[1] = x->mvsadcost_hp[1];
- }
-#endif
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1872,16 +1821,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
unsigned int sad_array[3];
int_mv fcenter_mv;
-#if CONFIG_NEWMVENTROPY
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
- if (xd->allow_high_precision_mv) {
- mvsadcost[0] = x->mvsadcost_hp[0];
- mvsadcost[1] = x->mvsadcost_hp[1];
- }
-#endif
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -2022,16 +1963,8 @@ int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
MACROBLOCKD *xd = &x->e_mbd;
int_mv fcenter_mv;
-#if CONFIG_NEWMVENTROPY
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
- if (xd->allow_high_precision_mv) {
- mvsadcost[0] = x->mvsadcost_hp[0];
- mvsadcost[1] = x->mvsadcost_hp[1];
- }
-#endif
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -2106,16 +2039,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
MACROBLOCKD *xd = &x->e_mbd;
int_mv fcenter_mv;
-#if CONFIG_NEWMVENTROPY
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
- if (xd->allow_high_precision_mv) {
- mvsadcost[0] = x->mvsadcost_hp[0];
- mvsadcost[1] = x->mvsadcost_hp[1];
- }
-#endif
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index afca58084..f09106927 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -15,21 +15,12 @@
#include "block.h"
#include "variance.h"
-#if CONFIG_NEWMVENTROPY
#define MVCOSTS mvjcost, mvcost
#define MVSADCOSTS mvjsadcost, mvsadcost
#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
#define DEC_MVSADCOSTS int *mvjsadcost, int *mvsadcost[2]
#define NULLMVCOST NULL, NULL
#define XMVCOST x->nmvjointcost, (x->e_mbd.allow_high_precision_mv?x->nmvcost_hp:x->nmvcost)
-#else
-#define MVCOSTS mvcost
-#define MVSADCOSTS mvsadcost
-#define DEC_MVCOSTS int *mvcost[2]
-#define DEC_MVSADCOSTS int *mvsadcost[2]
-#define NULLMVCOST NULL
-#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
-#endif /* CONFIG_NEWMVENTROPY */
#ifdef ENTROPY_STATS
extern void init_mv_ref_counts();
diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
index b1abd1e2a..23b9973c3 100644
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -46,14 +46,11 @@ void vp8_init_mode_costs(VP8_COMP *c) {
vp8_cost_tokens(c->mb.i8x8_mode_costs,
x->fc.i8x8_mode_prob, vp8_i8x8_mode_tree);
-#if CONFIG_SWITCHABLE_INTERP
{
int i;
for (i = 0; i <= VP8_SWITCHABLE_FILTERS; ++i)
- //for (i = 0; i <= 0; ++i)
vp8_cost_tokens((int *)c->mb.switchable_interp_costs[i],
x->fc.switchable_interp_prob[i],
vp8_switchable_interp_tree);
}
-#endif
}
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 14e9e784a..f11ff5936 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -77,7 +77,7 @@ extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFF
extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
#endif
-int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
+int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);
extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi, int distance);
@@ -85,6 +85,7 @@ static void set_default_lf_deltas(VP8_COMP *cpi);
extern const int vp8_gf_interval_table[101];
+#define DEFAULT_INTERP_FILTER EIGHTTAP /* SWITCHABLE for better performance */
#define SEARCH_BEST_FILTER 0 /* to search exhaustively for
best filter */
#define RESET_FOREACH_FILTER 0 /* whether to reset the encoder state
@@ -101,25 +102,14 @@ extern const int vp8_gf_interval_table[101];
#if CONFIG_INTERNAL_STATS
#include "math.h"
-extern double vp8_calc_ssim
-(
- YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
- int lumamask,
- double *weight,
- const vp8_variance_rtcd_vtable_t *rtcd
-);
+extern double vp8_calc_ssim(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *dest, int lumamask,
+ double *weight);
-extern double vp8_calc_ssimg
-(
- YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
- double *ssim_y,
- double *ssim_u,
- double *ssim_v,
- const vp8_variance_rtcd_vtable_t *rtcd
-);
+extern double vp8_calc_ssimg(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *dest, double *ssim_y,
+ double *ssim_u, double *ssim_v);
#endif
@@ -149,12 +139,10 @@ extern int skip_false_count;
extern int intra_mode_stats[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES];
#endif
-#if CONFIG_NEWMVENTROPY
#ifdef NMV_STATS
extern void init_nmvstats();
extern void print_nmvstats();
#endif
-#endif
#ifdef SPEEDSTATS
unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -1630,7 +1618,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) {
cpi->cq_target_quality = cpi->oxcf.cq_level;
if (!cm->use_bilinear_mc_filter)
- cm->mcomp_filter_type = EIGHTTAP;
+ cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
else
cm->mcomp_filter_type = BILINEAR;
@@ -1700,8 +1688,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) {
#define M_LOG2_E 0.693147180559945309417
#define log2f(x) (log (x) / (float) M_LOG2_E)
-#if CONFIG_NEWMVENTROPY
-
static void cal_nmvjointsadcost(int *mvjointsadcost) {
mvjointsadcost[0] = 600;
mvjointsadcost[1] = 300;
@@ -1739,40 +1725,6 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
} while (++i <= MV_MAX);
}
-#else
-
-static void cal_mvsadcosts(int *mvsadcost[2]) {
- int i = 1;
-
- mvsadcost [0] [0] = 300;
- mvsadcost [1] [0] = 300;
-
- do {
- double z = 256 * (2 * (log2f(8 * i) + .6));
- mvsadcost [0][i] = (int) z;
- mvsadcost [1][i] = (int) z;
- mvsadcost [0][-i] = (int) z;
- mvsadcost [1][-i] = (int) z;
- } while (++i <= mvfp_max);
-}
-
-static void cal_mvsadcosts_hp(int *mvsadcost[2]) {
- int i = 1;
-
- mvsadcost [0] [0] = 300;
- mvsadcost [1] [0] = 300;
-
- do {
- double z = 256 * (2 * (log2f(8 * i) + .6));
- mvsadcost [0][i] = (int) z;
- mvsadcost [1][i] = (int) z;
- mvsadcost [0][-i] = (int) z;
- mvsadcost [1][-i] = (int) z;
- } while (++i <= mvfp_max_hp);
-}
-
-#endif /* CONFIG_NEWMVENTROPY */
-
VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
int i;
volatile union {
@@ -1824,10 +1776,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
#endif
for (i = 0; i < COMP_PRED_CONTEXTS; i++)
cm->prob_comppred[i] = 128;
-#if CONFIG_TX_SELECT
for (i = 0; i < TX_SIZE_MAX - 1; i++)
cm->prob_tx[i] = 128;
-#endif
// Prime the recent reference frame useage counters.
// Hereafter they will be maintained as a sort of moving average
@@ -1888,11 +1838,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
vp8_zero(inter_uv_modes);
vp8_zero(inter_b_modes);
#endif
-#if CONFIG_NEWMVENTROPY
#ifdef NMV_STATS
init_nmvstats();
#endif
-#endif
/*Initialize the feed-forward activity masking.*/
cpi->activity_avg = 90 << 12;
@@ -1958,7 +1906,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->gf_rate_correction_factor = 1.0;
cpi->twopass.est_max_qcorrection_factor = 1.0;
-#if CONFIG_NEWMVENTROPY
cal_nmvjointsadcost(cpi->mb.nmvjointsadcost);
cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX];
cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX];
@@ -1971,19 +1918,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->mb.nmvsadcost_hp[0] = &cpi->mb.nmvsadcosts_hp[0][MV_MAX];
cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX];
cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp);
-#else
- cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max + 1];
- cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max + 1];
- cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max + 1];
- cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max + 1];
- cal_mvsadcosts(cpi->mb.mvsadcost);
-
- cpi->mb.mvcost_hp[0] = &cpi->mb.mvcosts_hp[0][mv_max_hp + 1];
- cpi->mb.mvcost_hp[1] = &cpi->mb.mvcosts_hp[1][mv_max_hp + 1];
- cpi->mb.mvsadcost_hp[0] = &cpi->mb.mvsadcosts_hp[0][mvfp_max_hp + 1];
- cpi->mb.mvsadcost_hp[1] = &cpi->mb.mvsadcosts_hp[1][mvfp_max_hp + 1];
- cal_mvsadcosts_hp(cpi->mb.mvsadcost_hp);
-#endif /* CONFIG_NEWMVENTROPY */
for (i = 0; i < KEY_FRAME_CONTEXT; i++) {
cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
@@ -2027,74 +1961,48 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
init_mv_ref_counts();
#endif
+#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \
+ cpi->fn_ptr[BT].sdf = SDF; \
+ cpi->fn_ptr[BT].vf = VF; \
+ cpi->fn_ptr[BT].svf = SVF; \
+ cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \
+ cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \
+ cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \
+ cpi->fn_ptr[BT].sdx3f = SDX3F; \
+ cpi->fn_ptr[BT].sdx8f = SDX8F; \
+ cpi->fn_ptr[BT].sdx4df = SDX4DF;
+
+
#if CONFIG_SUPERBLOCKS
- cpi->fn_ptr[BLOCK_32X32].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32);
- cpi->fn_ptr[BLOCK_32X32].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32);
- cpi->fn_ptr[BLOCK_32X32].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar32x32);
- cpi->fn_ptr[BLOCK_32X32].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_h);
- cpi->fn_ptr[BLOCK_32X32].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_v);
- cpi->fn_ptr[BLOCK_32X32].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_hv);
- cpi->fn_ptr[BLOCK_32X32].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x3);
- cpi->fn_ptr[BLOCK_32X32].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x8);
- cpi->fn_ptr[BLOCK_32X32].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x4d);
+ BFP(BLOCK_32X32, vp8_sad32x32, vp8_variance32x32, vp8_sub_pixel_variance32x32,
+ vp8_variance_halfpixvar32x32_h, vp8_variance_halfpixvar32x32_v,
+ vp8_variance_halfpixvar32x32_hv, vp8_sad32x32x3, vp8_sad32x32x8,
+ vp8_sad32x32x4d)
#endif
- cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
- cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
- cpi->fn_ptr[BLOCK_16X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
- cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h);
- cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
- cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
- cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
- cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8);
- cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
-
- cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
- cpi->fn_ptr[BLOCK_16X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);
- cpi->fn_ptr[BLOCK_16X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);
- cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL;
- cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
- cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
- cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
- cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8);
- cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
-
- cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
- cpi->fn_ptr[BLOCK_8X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);
- cpi->fn_ptr[BLOCK_8X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);
- cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL;
- cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
- cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
- cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
- cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8);
- cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
-
- cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
- cpi->fn_ptr[BLOCK_8X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);
- cpi->fn_ptr[BLOCK_8X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);
- cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL;
- cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
- cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
- cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
- cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8);
- cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
-
- cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
- cpi->fn_ptr[BLOCK_4X4].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);
- cpi->fn_ptr[BLOCK_4X4].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);
- cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL;
- cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
- cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
- cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
- cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
- cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
+ BFP(BLOCK_16X16, vp8_sad16x16, vp8_variance16x16, vp8_sub_pixel_variance16x16,
+ vp8_variance_halfpixvar16x16_h, vp8_variance_halfpixvar16x16_v,
+ vp8_variance_halfpixvar16x16_hv, vp8_sad16x16x3, vp8_sad16x16x8,
+ vp8_sad16x16x4d)
+
+ BFP(BLOCK_16X8, vp8_sad16x8, vp8_variance16x8, vp8_sub_pixel_variance16x8,
+ NULL, NULL, NULL, vp8_sad16x8x3, vp8_sad16x8x8, vp8_sad16x8x4d)
+
+ BFP(BLOCK_8X16, vp8_sad8x16, vp8_variance8x16, vp8_sub_pixel_variance8x16,
+ NULL, NULL, NULL, vp8_sad8x16x3, vp8_sad8x16x8, vp8_sad8x16x4d)
+
+ BFP(BLOCK_8X8, vp8_sad8x8, vp8_variance8x8, vp8_sub_pixel_variance8x8,
+ NULL, NULL, NULL, vp8_sad8x8x3, vp8_sad8x8x8, vp8_sad8x8x4d)
+
+ BFP(BLOCK_4X4, vp8_sad4x4, vp8_variance4x4, vp8_sub_pixel_variance4x4,
+ NULL, NULL, NULL, vp8_sad4x4x3, vp8_sad4x4x8, vp8_sad4x4x4d)
#if ARCH_X86 || ARCH_X86_64
- cpi->fn_ptr[BLOCK_16X16].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
- cpi->fn_ptr[BLOCK_16X8].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
- cpi->fn_ptr[BLOCK_8X16].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
- cpi->fn_ptr[BLOCK_8X8].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
- cpi->fn_ptr[BLOCK_4X4].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
+ cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn;
+ cpi->fn_ptr[BLOCK_16X8].copymem = vp8_copy32xn;
+ cpi->fn_ptr[BLOCK_8X16].copymem = vp8_copy32xn;
+ cpi->fn_ptr[BLOCK_8X8].copymem = vp8_copy32xn;
+ cpi->fn_ptr[BLOCK_4X4].copymem = vp8_copy32xn;
#endif
cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);
@@ -2136,12 +2044,10 @@ void vp8_remove_compressor(VP8_PTR *ptr) {
print_mode_context();
}
#endif
-#if CONFIG_NEWMVENTROPY
#ifdef NMV_STATS
if (cpi->pass != 1)
print_nmvstats();
#endif
-#endif
#if CONFIG_INTERNAL_STATS
@@ -2370,8 +2276,7 @@ void vp8_remove_compressor(VP8_PTR *ptr) {
static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
unsigned char *recon, int recon_stride,
- unsigned int cols, unsigned int rows,
- vp8_variance_rtcd_vtable_t *rtcd) {
+ unsigned int cols, unsigned int rows) {
unsigned int row, col;
uint64_t total_sse = 0;
int diff;
@@ -2380,9 +2285,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
for (col = 0; col + 16 <= cols; col += 16) {
unsigned int sse;
- VARIANCE_INVOKE(rtcd, mse16x16)(orig + col, orig_stride,
- recon + col, recon_stride,
- &sse);
+ vp8_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse);
total_sse += sse;
}
@@ -2434,8 +2337,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) {
pkt.kind = VPX_CODEC_PSNR_PKT;
sse = calc_plane_error(orig->y_buffer, orig->y_stride,
recon->y_buffer, recon->y_stride,
- width, height,
- IF_RTCD(&cpi->rtcd.variance));
+ width, height);
pkt.data.psnr.sse[0] = sse;
pkt.data.psnr.sse[1] = sse;
pkt.data.psnr.samples[0] = width * height;
@@ -2446,8 +2348,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) {
sse = calc_plane_error(orig->u_buffer, orig->uv_stride,
recon->u_buffer, recon->uv_stride,
- width, height,
- IF_RTCD(&cpi->rtcd.variance));
+ width, height);
pkt.data.psnr.sse[0] += sse;
pkt.data.psnr.sse[2] = sse;
pkt.data.psnr.samples[0] += width * height;
@@ -2455,8 +2356,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) {
sse = calc_plane_error(orig->v_buffer, orig->uv_stride,
recon->v_buffer, recon->uv_stride,
- width, height,
- IF_RTCD(&cpi->rtcd.variance));
+ width, height);
pkt.data.psnr.sse[0] += sse;
pkt.data.psnr.sse[3] = sse;
pkt.data.psnr.samples[0] += width * height;
@@ -3034,13 +2934,10 @@ static void encode_frame_to_data_rate
/* list of filters to search over */
int mcomp_filters_to_search[] = {
-#if CONFIG_SWITCHABLE_INTERP
EIGHTTAP, EIGHTTAP_SHARP, SIXTAP, SWITCHABLE
-#else
- EIGHTTAP, EIGHTTAP_SHARP, SIXTAP,
-#endif
};
- int mcomp_filters = sizeof(mcomp_filters_to_search) / sizeof(*mcomp_filters_to_search);
+ int mcomp_filters = sizeof(mcomp_filters_to_search) /
+ sizeof(*mcomp_filters_to_search);
int mcomp_filter_index = 0;
INT64 mcomp_filter_cost[4];
@@ -3265,12 +3162,7 @@ static void encode_frame_to_data_rate
cm->mcomp_filter_type = mcomp_filters_to_search[0];
mcomp_filter_index = 0;
} else {
-#if CONFIG_SWITCHABLE_INTERP
- cm->mcomp_filter_type = SWITCHABLE;
-#else
- cm->mcomp_filter_type =
- (Q < SHARP_FILTER_QTHRESH ? EIGHTTAP_SHARP : EIGHTTAP);
-#endif
+ cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
}
/* TODO: Decide this more intelligently */
xd->allow_high_precision_mv = (Q < HIGH_PRECISION_MV_QTHRESH);
@@ -3428,8 +3320,7 @@ static void encode_frame_to_data_rate
if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) {
int last_q = Q;
int kf_err = vp8_calc_ss_err(cpi->Source,
- &cm->yv12_fb[cm->new_fb_idx],
- IF_RTCD(&cpi->rtcd.variance));
+ &cm->yv12_fb[cm->new_fb_idx]);
int high_err_target = cpi->ambient_err;
int low_err_target = (cpi->ambient_err >> 1);
@@ -3584,7 +3475,6 @@ static void encode_frame_to_data_rate
if (cpi->is_src_frame_alt_ref)
Loop = FALSE;
-#if CONFIG_SWITCHABLE_INTERP
if (cm->frame_type != KEY_FRAME &&
!sf->search_best_filter &&
cm->mcomp_filter_type == SWITCHABLE) {
@@ -3610,19 +3500,16 @@ static void encode_frame_to_data_rate
if (count[i]) {
cm->mcomp_filter_type = vp8_switchable_interp[i];
Loop = TRUE; /* Make sure to loop since the filter changed */
- //loop_count = -1;
break;
}
}
}
}
-#endif
if (Loop == FALSE && cm->frame_type != KEY_FRAME && sf->search_best_filter) {
if (mcomp_filter_index < mcomp_filters) {
INT64 err = vp8_calc_ss_err(cpi->Source,
- &cm->yv12_fb[cm->new_fb_idx],
- IF_RTCD(&cpi->rtcd.variance));
+ &cm->yv12_fb[cm->new_fb_idx]);
INT64 rate = cpi->projected_frame_size << 8;
mcomp_filter_cost[mcomp_filter_index] =
(RDCOST(cpi->RDMULT, cpi->RDDIV, rate, err));
@@ -3684,8 +3571,7 @@ static void encode_frame_to_data_rate
// the force key frame
if (cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0)) {
cpi->ambient_err = vp8_calc_ss_err(cpi->Source,
- &cm->yv12_fb[cm->new_fb_idx],
- IF_RTCD(&cpi->rtcd.variance));
+ &cm->yv12_fb[cm->new_fb_idx]);
}
// This frame's MVs are saved and will be used in next frame's MV
@@ -3758,18 +3644,12 @@ static void encode_frame_to_data_rate
update_reference_frames(cm);
vp8_copy(cpi->common.fc.coef_counts, cpi->coef_counts);
-#if CONFIG_HYBRIDTRANSFORM
vp8_copy(cpi->common.fc.hybrid_coef_counts, cpi->hybrid_coef_counts);
-#endif
vp8_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_copy(cpi->common.fc.hybrid_coef_counts_8x8, cpi->hybrid_coef_counts_8x8);
-#endif
vp8_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_copy(cpi->common.fc.hybrid_coef_counts_16x16,
cpi->hybrid_coef_counts_16x16);
-#endif
vp8_adapt_coef_probs(&cpi->common);
if (cpi->common.frame_type != KEY_FRAME) {
vp8_copy(cpi->common.fc.ymode_counts, cpi->ymode_count);
@@ -3780,14 +3660,8 @@ static void encode_frame_to_data_rate
vp8_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count);
vp8_adapt_mode_probs(&cpi->common);
-#if CONFIG_NEWMVENTROPY
cpi->common.fc.NMVcount = cpi->NMVcount;
vp8_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
-#else
- vp8_copy(cpi->common.fc.MVcount, cpi->MVcount);
- vp8_copy(cpi->common.fc.MVcount_hp, cpi->MVcount_hp);
- vp8_adapt_mv_probs(&cpi->common);
-#endif /* CONFIG_NEWMVENTROPY */
vp8_update_mode_context(&cpi->common);
}
@@ -3903,8 +3777,7 @@ static void encode_frame_to_data_rate
vp8_clear_system_state(); // __asm emms;
recon_err = vp8_calc_ss_err(cpi->Source,
- &cm->yv12_fb[cm->new_fb_idx],
- IF_RTCD(&cpi->rtcd.variance));
+ &cm->yv12_fb[cm->new_fb_idx]);
if (cpi->twopass.total_left_stats->coded_error != 0.0)
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
@@ -4390,16 +4263,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
int64_t sq_error;
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
- recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height,
- IF_RTCD(&cpi->rtcd.variance));
+ recon->y_buffer, recon->y_stride, orig->y_width,
+ orig->y_height);
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
- recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
- IF_RTCD(&cpi->rtcd.variance));
+ recon->u_buffer, recon->uv_stride, orig->uv_width,
+ orig->uv_height);
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
- recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
- IF_RTCD(&cpi->rtcd.variance));
+ recon->v_buffer, recon->uv_stride, orig->uv_width,
+ orig->uv_height);
sq_error = ye + ue + ve;
@@ -4419,16 +4292,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
vp8_clear_system_state();
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
- pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height,
- IF_RTCD(&cpi->rtcd.variance));
+ pp->y_buffer, pp->y_stride, orig->y_width,
+ orig->y_height);
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
- pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
- IF_RTCD(&cpi->rtcd.variance));
+ pp->u_buffer, pp->uv_stride, orig->uv_width,
+ orig->uv_height);
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
- pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
- IF_RTCD(&cpi->rtcd.variance));
+ pp->v_buffer, pp->uv_stride, orig->uv_width,
+ orig->uv_height);
sq_error = ye + ue + ve;
@@ -4441,8 +4314,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
cpi->totalp += frame_psnr2;
frame_ssim2 = vp8_calc_ssim(cpi->Source,
- &cm->post_proc_buffer, 1, &weight,
- IF_RTCD(&cpi->rtcd.variance));
+ &cm->post_proc_buffer, 1, &weight);
cpi->summed_quality += frame_ssim2 * weight;
cpi->summed_weights += weight;
@@ -4461,7 +4333,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
if (cpi->b_calculate_ssimg) {
double y, u, v, frame_all;
frame_all = vp8_calc_ssimg(cpi->Source, cm->frame_to_show,
- &y, &u, &v, IF_RTCD(&cpi->rtcd.variance));
+ &y, &u, &v);
cpi->total_ssimg_y += y;
cpi->total_ssimg_u += u;
cpi->total_ssimg_v += v;
@@ -4604,19 +4476,19 @@ int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert
-int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd) {
+int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) {
int i, j;
int Total = 0;
unsigned char *src = source->y_buffer;
unsigned char *dst = dest->y_buffer;
- (void)rtcd;
// Loop through the Y plane raw and reconstruction data summing (square differences)
for (i = 0; i < source->y_height; i += 16) {
for (j = 0; j < source->y_width; j += 16) {
unsigned int sse;
- Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
+ Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
+ &sse);
}
src += 16 * source->y_stride;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 01151280c..ab6802509 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -60,17 +60,10 @@
#define VP8_TEMPORAL_ALT_REF 1
typedef struct {
-#if CONFIG_NEWMVENTROPY
nmv_context nmvc;
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
int nmvcosts_hp[2][MV_VALS];
-#else
- MV_CONTEXT mvc[2];
- int mvcosts[2][MVvals + 1];
- MV_CONTEXT_HP mvc_hp[2];
- int mvcosts_hp[2][MVvals_hp + 1];
-#endif
#ifdef MODE_STATS
// Stats
@@ -97,24 +90,18 @@ typedef struct {
vp8_prob coef_probs[BLOCK_TYPES]
[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM
vp8_prob hybrid_coef_probs[BLOCK_TYPES]
[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#endif
vp8_prob coef_probs_8x8[BLOCK_TYPES_8X8]
[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_prob hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]
[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#endif
vp8_prob coef_probs_16x16[BLOCK_TYPES_16X16]
[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_prob hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]
[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#endif
vp8_prob ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */
vp8_prob uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1];
@@ -123,10 +110,8 @@ typedef struct {
vp8_prob sub_mv_ref_prob [SUBMVREF_COUNT][VP8_SUBMVREFS - 1];
vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1];
-#if CONFIG_SWITCHABLE_INTERP
vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS + 1]
[VP8_SWITCHABLE_FILTERS - 1];
-#endif
int mv_ref_ct[6][4][2];
int mode_context[6][4];
@@ -365,7 +350,6 @@ typedef struct {
typedef struct VP8_ENCODER_RTCD {
VP8_COMMON_RTCD *common;
- vp8_variance_rtcd_vtable_t variance;
vp8_fdct_rtcd_vtable_t fdct;
vp8_encodemb_rtcd_vtable_t encodemb;
vp8_search_rtcd_vtable_t search;
@@ -373,10 +357,10 @@ typedef struct VP8_ENCODER_RTCD {
} VP8_ENCODER_RTCD;
enum {
- BLOCK_16X8,
- BLOCK_8X16,
- BLOCK_8X8,
- BLOCK_4X4,
+ BLOCK_16X8 = PARTITIONING_16X8,
+ BLOCK_8X16 = PARTITIONING_8X16,
+ BLOCK_8X8 = PARTITIONING_8X8,
+ BLOCK_4X4 = PARTITIONING_4X4,
BLOCK_16X16,
BLOCK_MAX_SEGMENTS,
BLOCK_32X32 = BLOCK_MAX_SEGMENTS,
@@ -465,13 +449,11 @@ typedef struct VP8_COMP {
int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
int comp_pred_count[COMP_PRED_CONTEXTS];
int single_pred_count[COMP_PRED_CONTEXTS];
-#if CONFIG_TX_SELECT
// FIXME contextualize
int txfm_count[TX_SIZE_MAX];
int txfm_count_8x8p[TX_SIZE_MAX - 1];
int64_t rd_tx_select_diff[NB_TXFM_MODES];
int rd_tx_select_threshes[4][NB_TXFM_MODES];
-#endif
int RDMULT;
int RDDIV;
@@ -563,39 +545,28 @@ typedef struct VP8_COMP {
// int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */
int y_uv_mode_count[VP8_YMODES][VP8_UV_MODES];
-#if CONFIG_NEWMVENTROPY
nmv_context_counts NMVcount;
-#else
- unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */
- unsigned int MVcount_hp [2] [MVvals_hp]; /* (row,col) MV cts this frame */
-#endif
unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM
unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
vp8_prob frame_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
unsigned int frame_hybrid_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#endif
unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
vp8_prob frame_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
unsigned int frame_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM8X8
unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
vp8_prob frame_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
unsigned int frame_hybrid_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#endif
unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
vp8_prob frame_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
unsigned int frame_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM16X16
unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
vp8_prob frame_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
unsigned int frame_hybrid_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#endif
int gfu_boost;
int last_boost;
@@ -780,10 +751,8 @@ typedef struct VP8_COMP {
int pred_filter_on_count;
int pred_filter_off_count;
#endif
-#if CONFIG_SWITCHABLE_INTERP
- unsigned int switchable_interp_count[VP8_SWITCHABLE_FILTERS+1]
+ unsigned int switchable_interp_count[VP8_SWITCHABLE_FILTERS + 1]
[VP8_SWITCHABLE_FILTERS];
-#endif
#if CONFIG_NEW_MVREF
unsigned int best_ref_index_counts[MAX_MV_REFS];
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 954997889..57bd41468 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -21,7 +21,8 @@
#include "vpx_ports/arm.h"
#endif
-extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
+extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *dest);
#if HAVE_ARMV7
extern void vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
#endif
@@ -71,7 +72,8 @@ vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst
vpx_memcpy(dst_y, src_y, ystride * (linestocopy + 16));
}
-static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, int Fraction, const vp8_variance_rtcd_vtable_t *rtcd) {
+static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *dest, int Fraction) {
int i, j;
int Total = 0;
int srcoffset, dstoffset;
@@ -79,7 +81,6 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
unsigned char *dst = dest->y_buffer;
int linestocopy = (source->y_height >> (Fraction + 4));
- (void)rtcd;
if (linestocopy < 1)
linestocopy = 1;
@@ -97,7 +98,8 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
for (i = 0; i < linestocopy; i += 16) {
for (j = 0; j < source->y_width; j += 16) {
unsigned int sse;
- Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
+ Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
+ &sse);
}
src += 16 * source->y_stride;
@@ -179,7 +181,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
// Get the err using the previous frame's filter value.
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
- best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+ best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3);
// Re-instate the unfiltered frame
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
@@ -192,7 +194,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
// Get the err for filtered frame
- filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+ filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3);
// Re-instate the unfiltered frame
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
@@ -221,7 +223,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
// Get the err for filtered frame
- filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+ filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3);
// Re-instate the unfiltered frame
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
@@ -308,7 +310,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme
vp8cx_set_alt_lf_level(cpi, filt_mid);
vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_mid, segment);
- best_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+ best_err = vp8_calc_ss_err(sd, cm->frame_to_show);
filt_best = filt_mid;
// Re-instate the unfiltered frame
@@ -348,7 +350,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme
vp8cx_set_alt_lf_level(cpi, filt_low);
vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_low, segment);
- filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+ filt_err = vp8_calc_ss_err(sd, cm->frame_to_show);
// Re-instate the unfiltered frame
#if HAVE_ARMV7
@@ -383,7 +385,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme
vp8cx_set_alt_lf_level(cpi, filt_high);
vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_high, segment);
- filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+ filt_err = vp8_calc_ss_err(sd, cm->frame_to_show);
// Re-instate the unfiltered frame
#if HAVE_ARMV7
@@ -517,7 +519,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
vp8cx_set_alt_lf_level(cpi, filt_mid);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
- best_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+ best_err = vp8_calc_ss_err(sd, cm->frame_to_show);
filt_best = filt_mid;
// Re-instate the unfiltered frame
@@ -557,7 +559,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
vp8cx_set_alt_lf_level(cpi, filt_low);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
- filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+ filt_err = vp8_calc_ss_err(sd, cm->frame_to_show);
// Re-instate the unfiltered frame
#if HAVE_ARMV7
@@ -592,7 +594,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
vp8cx_set_alt_lf_level(cpi, filt_high);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
- filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+ filt_err = vp8_calc_ss_err(sd, cm->frame_to_show);
// Re-instate the unfiltered frame
#if HAVE_ARMV7
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index b6a1f27f8..16b4e6e1d 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -21,7 +21,6 @@
extern int enc_debug;
#endif
-#if CONFIG_HYBRIDTRANSFORM
void vp8_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
int i, rc, eob;
int zbin;
@@ -85,7 +84,6 @@ void vp8_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
d->eob = eob + 1;
}
-#endif
void vp8_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
int i, rc, eob;
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index 1375ed0b0..e39433fc3 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -30,11 +30,9 @@
#include "arm/quantize_arm.h"
#endif
-#if CONFIG_HYBRIDTRANSFORM
#define prototype_quantize_block_type(sym) \
void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type)
extern prototype_quantize_block_type(vp8_ht_quantize_b_4x4);
-#endif
#ifndef vp8_quantize_quantb_4x4
#define vp8_quantize_quantb_4x4 vp8_regular_quantize_b_4x4
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 570bedfe9..cc3c82e74 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -132,17 +132,10 @@ void vp8_save_coding_context(VP8_COMP *cpi) {
// intended for use in a re-code loop in vp8_compress_frame where the
// quantizer value is adjusted between loop iterations.
-#if CONFIG_NEWMVENTROPY
cc->nmvc = cm->fc.nmvc;
vp8_copy(cc->nmvjointcost, cpi->mb.nmvjointcost);
vp8_copy(cc->nmvcosts, cpi->mb.nmvcosts);
vp8_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp);
-#else
- vp8_copy(cc->mvc, cm->fc.mvc);
- vp8_copy(cc->mvcosts, cpi->mb.mvcosts);
- vp8_copy(cc->mvc_hp, cm->fc.mvc_hp);
- vp8_copy(cc->mvcosts_hp, cpi->mb.mvcosts_hp);
-#endif
vp8_copy(cc->mv_ref_ct, cm->fc.mv_ref_ct);
vp8_copy(cc->mode_context, cm->fc.mode_context);
@@ -178,20 +171,12 @@ void vp8_save_coding_context(VP8_COMP *cpi) {
vp8_copy(cc->last_mode_lf_deltas, xd->last_mode_lf_deltas);
vp8_copy(cc->coef_probs, cm->fc.coef_probs);
-#if CONFIG_HYBRIDTRANSFORM
vp8_copy(cc->hybrid_coef_probs, cm->fc.hybrid_coef_probs);
-#endif
vp8_copy(cc->coef_probs_8x8, cm->fc.coef_probs_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8);
-#endif
vp8_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16);
-#endif
-#if CONFIG_SWITCHABLE_INTERP
vp8_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
-#endif
}
void vp8_restore_coding_context(VP8_COMP *cpi) {
@@ -202,17 +187,10 @@ void vp8_restore_coding_context(VP8_COMP *cpi) {
// Restore key state variables to the snapshot state stored in the
// previous call to vp8_save_coding_context.
-#if CONFIG_NEWMVENTROPY
cm->fc.nmvc = cc->nmvc;
vp8_copy(cpi->mb.nmvjointcost, cc->nmvjointcost);
vp8_copy(cpi->mb.nmvcosts, cc->nmvcosts);
vp8_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp);
-#else
- vp8_copy(cm->fc.mvc, cc->mvc);
- vp8_copy(cpi->mb.mvcosts, cc->mvcosts);
- vp8_copy(cm->fc.mvc_hp, cc->mvc_hp);
- vp8_copy(cpi->mb.mvcosts_hp, cc->mvcosts_hp);
-#endif
vp8_copy(cm->fc.mv_ref_ct, cc->mv_ref_ct);
vp8_copy(cm->fc.mode_context, cc->mode_context);
@@ -249,20 +227,12 @@ void vp8_restore_coding_context(VP8_COMP *cpi) {
vp8_copy(xd->last_mode_lf_deltas, cc->last_mode_lf_deltas);
vp8_copy(cm->fc.coef_probs, cc->coef_probs);
-#if CONFIG_HYBRIDTRANSFORM
vp8_copy(cm->fc.hybrid_coef_probs, cc->hybrid_coef_probs);
-#endif
vp8_copy(cm->fc.coef_probs_8x8, cc->coef_probs_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
vp8_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8);
-#endif
vp8_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
vp8_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16);
-#endif
-#if CONFIG_SWITCHABLE_INTERP
vp8_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
-#endif
}
@@ -275,16 +245,6 @@ void vp8_setup_key_frame(VP8_COMP *cpi) {
vp8_default_bmode_probs(cm->fc.bmode_prob);
vp8_init_mv_probs(& cpi->common);
-#if CONFIG_NEWMVENTROPY == 0
- /* this is not really required */
- {
- int flag[2] = {1, 1};
- vp8_build_component_cost_table(
- cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
- vp8_build_component_cost_table_hp(
- cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cpi->common.fc.mvc_hp, flag);
- }
-#endif
// cpi->common.filter_level = 0; // Reset every key frame.
cpi->common.filter_level = cpi->common.base_qindex * 3 / 8;
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 4b9e90725..e919de36f 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -60,10 +60,8 @@ extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
#define INVALID_MV 0x80008000
-#if CONFIG_SWITCHABLE_INTERP
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1
-#endif
static const int auto_speed_thresh[17] = {
1000,
@@ -355,37 +353,31 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) {
cpi->mb.token_costs[TX_4X4],
(const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs,
BLOCK_TYPES);
-#if CONFIG_HYBRIDTRANSFORM
fill_token_costs(
cpi->mb.hybrid_token_costs[TX_4X4],
(const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11])
cpi->common.fc.hybrid_coef_probs,
BLOCK_TYPES);
-#endif
fill_token_costs(
cpi->mb.token_costs[TX_8X8],
(const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8,
BLOCK_TYPES_8X8);
-#if CONFIG_HYBRIDTRANSFORM8X8
fill_token_costs(
cpi->mb.hybrid_token_costs[TX_8X8],
(const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11])
cpi->common.fc.hybrid_coef_probs_8x8,
BLOCK_TYPES_8X8);
-#endif
fill_token_costs(
cpi->mb.token_costs[TX_16X16],
(const vp8_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16,
BLOCK_TYPES_16X16);
-#if CONFIG_HYBRIDTRANSFORM16X16
fill_token_costs(
cpi->mb.hybrid_token_costs[TX_16X16],
(const vp8_prob(*)[8][PREV_COEF_CONTEXTS][11])
cpi->common.fc.hybrid_coef_probs_16x16,
BLOCK_TYPES_16X16);
-#endif
/*rough estimate for costing*/
cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
@@ -393,14 +385,12 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) {
if (cpi->common.frame_type != KEY_FRAME)
{
-#if CONFIG_NEWMVENTROPY
vp8_build_nmv_cost_table(
cpi->mb.nmvjointcost,
cpi->mb.e_mbd.allow_high_precision_mv ?
cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
&cpi->common.fc.nmvc,
cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
-#endif
}
}
@@ -409,19 +399,6 @@ void vp8_auto_select_speed(VP8_COMP *cpi) {
milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
-#if 0
-
- if (0) {
- FILE *f;
-
- f = fopen("speed.stt", "a");
- fprintf(f, " %8ld %10ld %10ld %10ld\n",
- cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
- fclose(f);
- }
-
-#endif
-
/*
// this is done during parameter valid check
if( cpi->oxcf.cpu_used > 16)
@@ -520,7 +497,7 @@ int vp8_mbuverror_c(MACROBLOCK *mb) {
return error;
}
-int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) {
+int vp8_uvsse(MACROBLOCK *x) {
unsigned char *uptr, *vptr;
unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
@@ -551,16 +528,14 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) {
vptr = x->e_mbd.pre.v_buffer + offset;
if ((mv_row | mv_col) & 7) {
- VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride,
- (mv_col & 7) << 1, (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
- VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride,
- (mv_col & 7) << 1, (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
+ vp8_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
+ (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
+ vp8_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
+ (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
sse2 += sse1;
} else {
- VARIANCE_INVOKE(rtcd, var8x8)(uptr, pre_stride,
- upred_ptr, uv_stride, &sse2);
- VARIANCE_INVOKE(rtcd, var8x8)(vptr, pre_stride,
- vpred_ptr, uv_stride, &sse1);
+ vp8_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
+ vp8_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
sse2 += sse1;
}
return sse2;
@@ -607,9 +582,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
short *qcoeff_ptr = b->qcoeff;
MACROBLOCKD *xd = &mb->e_mbd;
MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi;
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type = DCT_DCT;
-#endif
int segment_id = mbmi->segment_id;
switch (tx_size) {
@@ -617,7 +590,6 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
scan = vp8_default_zig_zag1d;
band = vp8_coef_bands;
default_eob = 16;
-#if CONFIG_HYBRIDTRANSFORM
if (type == PLANE_TYPE_Y_WITH_DC) {
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
@@ -636,14 +608,12 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
}
}
}
-#endif
break;
case TX_8X8:
scan = vp8_default_zig_zag1d_8x8;
band = vp8_coef_bands_8x8;
default_eob = 64;
-#if CONFIG_HYBRIDTRANSFORM8X8
if (type == PLANE_TYPE_Y_WITH_DC) {
BLOCKD *bb;
int ib = (b - xd->block);
@@ -653,17 +623,14 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
tx_type = get_tx_type_8x8(xd, bb);
}
}
-#endif
break;
case TX_16X16:
scan = vp8_default_zig_zag1d_16x16;
band = vp8_coef_bands_16x16;
default_eob = 256;
-#if CONFIG_HYBRIDTRANSFORM16X16
if (type == PLANE_TYPE_Y_WITH_DC) {
tx_type = get_tx_type_16x16(xd, b);
}
-#endif
break;
default:
break;
@@ -675,7 +642,6 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
if (tx_type != DCT_DCT) {
for (; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
@@ -687,9 +653,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
if (c < seg_eob)
cost += mb->hybrid_token_costs[tx_size][type][band[c]]
[pt][DCT_EOB_TOKEN];
- } else
-#endif
- {
+ } else {
for (; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
int t = vp8_dct_value_tokens_ptr[v].Token;
@@ -870,9 +834,7 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
MACROBLOCKD *xd = &mb->e_mbd;
BLOCKD *b = &mb->e_mbd.block[0];
BLOCK *be = &mb->block[0];
-#if CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type;
-#endif
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(
mb->src_diff,
@@ -880,24 +842,18 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
mb->e_mbd.predictor,
mb->block[0].src_stride);
-#if CONFIG_HYBRIDTRANSFORM16X16
tx_type = get_tx_type_16x16(xd, b);
if (tx_type != DCT_DCT) {
vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 16);
} else
vp8_transform_mby_16x16(mb);
-#else
- vp8_transform_mby_16x16(mb);
-#endif
vp8_quantize_mby_16x16(mb);
-#if CONFIG_HYBRIDTRANSFORM16X16
// TODO(jingning) is it possible to quickly determine whether to force
// trailing coefficients to be zero, instead of running trellis
// optimization in the rate-distortion optimization loop?
if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
vp8_optimize_mby_16x16(mb, rtcd);
-#endif
d = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(mb, 0);
@@ -913,8 +869,6 @@ static void macro_block_yrd(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
VP8_COMMON *cm = &cpi->common;
MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
-#if CONFIG_TX_SELECT
-
MACROBLOCKD *xd = &x->e_mbd;
int can_skip = cm->mb_no_coeff_skip;
vp8_prob skip_prob = can_skip ? get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
@@ -1022,25 +976,6 @@ static void macro_block_yrd(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
else
txfm_cache[TX_MODE_SELECT] = rd4x4s < rd8x8s ? rd4x4s : rd8x8s;
-#else /* CONFIG_TX_SELECT */
-
- switch (cpi->common.txfm_mode) {
- case ALLOW_16X16:
- macro_block_yrd_16x16(x, rate, distortion, IF_RTCD(&cpi->rtcd), skippable);
- mbmi->txfm_size = TX_16X16;
- break;
- case ALLOW_8X8:
- macro_block_yrd_8x8(x, rate, distortion, IF_RTCD(&cpi->rtcd), skippable);
- mbmi->txfm_size = TX_8X8;
- break;
- default:
- case ONLY_4X4:
- macro_block_yrd_4x4(x, rate, distortion, IF_RTCD(&cpi->rtcd), skippable);
- mbmi->txfm_size = TX_4X4;
- break;
- }
-
-#endif /* CONFIG_TX_SELECT */
}
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
@@ -1155,10 +1090,8 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
ENTROPY_CONTEXT ta = *a, tempa = *a;
ENTROPY_CONTEXT tl = *l, templ = *l;
-#if CONFIG_HYBRIDTRANSFORM
TX_TYPE tx_type = DCT_DCT;
TX_TYPE best_tx_type = DCT_DCT;
-#endif
/*
* The predictor buffer is a 2d buffer with a stride of 16. Create
* a temp buffer that meets the stride requirements, but we are only
@@ -1191,7 +1124,6 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
b->bmi.as_mode.first = mode;
-#if CONFIG_HYBRIDTRANSFORM
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 4);
@@ -1200,10 +1132,6 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, b);
}
-#else
- x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
-#endif
tempa = ta;
templ = tl;
@@ -1221,9 +1149,7 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
-#if CONFIG_HYBRIDTRANSFORM
best_tx_type = tx_type;
-#endif
#if CONFIG_COMP_INTRA_PRED
*best_second_mode = mode2;
@@ -1242,17 +1168,12 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode);
#endif
-#if CONFIG_HYBRIDTRANSFORM
// inverse transform
if (best_tx_type != DCT_DCT)
vp8_ihtllm_c(best_dqcoeff, b->diff, 32, best_tx_type, 4);
else
IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(
best_dqcoeff, b->diff, 32);
-#else
- IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(
- best_dqcoeff, b->diff, 32);
-#endif
vp8_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -1405,11 +1326,9 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
int64_t this_rd;
MACROBLOCKD *xd = &x->e_mbd;
-#if CONFIG_TX_SELECT
int i;
for (i = 0; i < NB_TXFM_MODES; i++)
txfm_cache[i] = INT64_MAX;
-#endif
// Y Search for 16x16 intra prediction mode
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
@@ -1452,7 +1371,6 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
*skippable = skip;
}
-#if CONFIG_TX_SELECT
for (i = 0; i < NB_TXFM_MODES; i++) {
int64_t adj_rd = this_rd + local_txfm_cache[i] -
local_txfm_cache[cpi->common.txfm_mode];
@@ -1460,7 +1378,6 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
txfm_cache[i] = adj_rd;
}
}
-#endif
#if CONFIG_COMP_INTRA_PRED
}
@@ -1535,22 +1452,18 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib,
vp8_subtract_4b_c(be, b, 16);
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
-#if CONFIG_HYBRIDTRANSFORM8X8
TX_TYPE tx_type = get_tx_type_8x8(xd, b);
if (tx_type != DCT_DCT)
vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32, tx_type, 8);
else
x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
-#else
- x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
-#endif
x->quantize_b_8x8(x->block + idx, xd->block + idx);
// compute quantization mse of 8x8 block
distortion = vp8_block_error_c((x->block + idx)->coeff,
(xd->block + idx)->dqcoeff, 64);
- ta0 = *(a + vp8_block2above_8x8[idx]);
- tl0 = *(l + vp8_block2left_8x8 [idx]);
+ ta0 = a[vp8_block2above_8x8[idx]];
+ tl0 = l[vp8_block2left_8x8[idx]];
rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC,
&ta0, &tl0, TX_8X8);
@@ -1576,10 +1489,10 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib,
distortion += vp8_block_error_c((x->block + ib + 5)->coeff,
(xd->block + ib + 5)->dqcoeff, 16);
- ta0 = *(a + vp8_block2above[ib]);
- ta1 = *(a + vp8_block2above[ib + 1]);
- tl0 = *(l + vp8_block2above[ib]);
- tl1 = *(l + vp8_block2above[ib + 4]);
+ ta0 = a[vp8_block2above[ib]];
+ ta1 = a[vp8_block2above[ib + 1]];
+ tl0 = l[vp8_block2left[ib]];
+ tl1 = l[vp8_block2left[ib + 4]];
rate_t = cost_coeffs(x, xd->block + ib, PLANE_TYPE_Y_WITH_DC,
&ta0, &tl0, TX_4X4);
rate_t += cost_coeffs(x, xd->block + ib + 1, PLANE_TYPE_Y_WITH_DC,
@@ -1621,15 +1534,15 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib,
vp8_encode_intra8x8(IF_RTCD(&cpi->rtcd), x, ib);
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
- *(a + vp8_block2above_8x8[idx]) = besta0;
- *(a + vp8_block2above_8x8[idx] + 1) = besta1;
- *(l + vp8_block2left_8x8 [idx]) = bestl0;
- *(l + vp8_block2left_8x8 [idx] + 1) = bestl1;
+ a[vp8_block2above_8x8[idx]] = besta0;
+ a[vp8_block2above_8x8[idx] + 1] = besta1;
+ l[vp8_block2left_8x8[idx]] = bestl0;
+ l[vp8_block2left_8x8[idx] + 1] = bestl1;
} else {
- *(a + vp8_block2above[ib]) = besta0;
- *(a + vp8_block2above[ib + 1]) = besta1;
- *(l + vp8_block2above[ib]) = bestl0;
- *(l + vp8_block2above[ib + 4]) = bestl1;
+ a[vp8_block2above[ib]] = besta0;
+ a[vp8_block2above[ib + 1]] = besta1;
+ l[vp8_block2left[ib]] = bestl0;
+ l[vp8_block2left[ib + 4]] = bestl1;
}
return best_rd;
@@ -2223,12 +2136,22 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
int which_label,
int *labelyrate,
int *distortion,
+ int64_t *otherrd,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl,
const VP8_ENCODER_RTCD *rtcd) {
int i, j;
MACROBLOCKD *xd = &x->e_mbd;
const int iblock[4] = { 0, 1, 4, 5 };
+ int othercost = 0, otherdist = 0;
+ ENTROPY_CONTEXT_PLANES tac, tlc;
+ ENTROPY_CONTEXT *tacp = (ENTROPY_CONTEXT *) &tac,
+ *tlcp = (ENTROPY_CONTEXT *) &tlc;
+
+ if (otherrd) {
+ memcpy(&tac, ta, sizeof(ENTROPY_CONTEXT_PLANES));
+ memcpy(&tlc, tl, sizeof(ENTROPY_CONTEXT_PLANES));
+ }
*distortion = 0;
*labelyrate = 0;
@@ -2236,8 +2159,9 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
int ib = vp8_i8x8_block[i];
if (labels[ib] == which_label) {
- BLOCKD *bd = &xd->block[ib];
- BLOCK *be = &x->block[ib];
+ int idx = (ib & 8) + ((ib & 2) << 1);
+ BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx];
+ BLOCK *be = &x->block[ib], *be2 = &x->block[idx];
int thisdistortion;
vp8_build_inter_predictors4b(xd, bd, 16);
@@ -2245,24 +2169,66 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
vp8_build_2nd_inter_predictors4b(xd, bd, 16);
vp8_subtract_4b_c(be, bd, 16);
- for (j = 0; j < 4; j += 2) {
- bd = &xd->block[ib + iblock[j]];
- be = &x->block[ib + iblock[j]];
- x->vp8_short_fdct8x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
- thisdistortion = vp8_block_error_c(be->coeff, bd->dqcoeff, 32);
+ if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
+ if (otherrd) {
+ x->vp8_short_fdct8x8(be->src_diff, be2->coeff, 32);
+ x->quantize_b_8x8(be2, bd2);
+ thisdistortion = vp8_block_error_c(be2->coeff, bd2->dqcoeff, 64);
+ otherdist += thisdistortion;
+ othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
+ tacp + vp8_block2above_8x8[idx],
+ tlcp + vp8_block2left_8x8[idx], TX_8X8);
+ }
+ for (j = 0; j < 4; j += 2) {
+ bd = &xd->block[ib + iblock[j]];
+ be = &x->block[ib + iblock[j]];
+ x->vp8_short_fdct8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+ thisdistortion = vp8_block_error_c(be->coeff, bd->dqcoeff, 32);
+ *distortion += thisdistortion;
+ *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
+ ta + vp8_block2above[ib + iblock[j]],
+ tl + vp8_block2left[ib + iblock[j]],
+ TX_4X4);
+ *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
+ ta + vp8_block2above[ib + iblock[j] + 1],
+ tl + vp8_block2left[ib + iblock[j]],
+ TX_4X4);
+ }
+ } else /* 8x8 */ {
+ if (otherrd) {
+ for (j = 0; j < 4; j += 2) {
+ BLOCKD *bd3 = &xd->block[ib + iblock[j]];
+ BLOCK *be3 = &x->block[ib + iblock[j]];
+ x->vp8_short_fdct8x4(be3->src_diff, be3->coeff, 32);
+ x->quantize_b_4x4_pair(be3, be3 + 1, bd3, bd3 + 1);
+ thisdistortion = vp8_block_error_c(be3->coeff, bd3->dqcoeff, 32);
+ otherdist += thisdistortion;
+ othercost += cost_coeffs(x, bd3, PLANE_TYPE_Y_WITH_DC,
+ tacp + vp8_block2above[ib + iblock[j]],
+ tlcp + vp8_block2left[ib + iblock[j]],
+ TX_4X4);
+ othercost += cost_coeffs(x, bd3 + 1, PLANE_TYPE_Y_WITH_DC,
+ tacp + vp8_block2above[ib + iblock[j] + 1],
+ tlcp + vp8_block2left[ib + iblock[j]],
+ TX_4X4);
+ }
+ }
+ x->vp8_short_fdct8x8(be->src_diff, be2->coeff, 32);
+ x->quantize_b_8x8(be2, bd2);
+ thisdistortion = vp8_block_error_c(be2->coeff, bd2->dqcoeff, 64);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
- ta + vp8_block2above[ib + iblock[j]],
- tl + vp8_block2left[ib + iblock[j]], TX_4X4);
- *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
- ta + vp8_block2above[ib + iblock[j] + 1],
- tl + vp8_block2left[ib + iblock[j]],
- TX_4X4);
+ *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
+ ta + vp8_block2above_8x8[idx],
+ tl + vp8_block2left_8x8[idx], TX_8X8);
}
}
}
*distortion >>= 2;
+ if (otherrd) {
+ othercost >>= 2;
+ *otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist);
+ }
return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
}
@@ -2274,7 +2240,8 @@ typedef struct {
int_mv mvp;
int64_t segment_rd;
- int segment_num;
+ SPLITMV_PARTITIONING_TYPE segment_num;
+ TX_SIZE txfm_size;
int r;
int d;
int segment_yrate;
@@ -2300,9 +2267,14 @@ int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
return r;
}
-static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
- BEST_SEG_INFO *bsi, unsigned int segmentation,
- int_mv seg_mvs[16 /* n_blocks */][MAX_REF_FRAMES - 1]) {
+static void rd_check_segment_txsize(VP8_COMP *cpi, MACROBLOCK *x,
+ BEST_SEG_INFO *bsi,
+ SPLITMV_PARTITIONING_TYPE segmentation,
+ TX_SIZE tx_size, int64_t *otherrds,
+ int64_t *rds, int *completed,
+ /* 16 = n_blocks */
+ int_mv seg_mvs[16 /* n_blocks */]
+ [MAX_REF_FRAMES - 1]) {
int i, j;
int const *labels;
int br = 0, bd = 0;
@@ -2310,12 +2282,12 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
int label_count;
- int64_t this_segment_rd = 0;
+ int64_t this_segment_rd = 0, other_segment_rd;
int label_mv_thresh;
int rate = 0;
int sbr = 0, sbd = 0;
int segmentyrate = 0;
- uint8_t best_eobs[16];
+ uint8_t best_eobs[16] = { 0 };
vp8_variance_fn_ptr_t *v_fn_ptr;
@@ -2343,20 +2315,23 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
- rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
+ rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
+ vp8_mbsplit_encodings + segmentation);
rate += vp8_cost_mv_ref(cpi, SPLITMV, bsi->mdcounts);
this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
br += rate;
+ other_segment_rd = this_segment_rd;
- for (i = 0; i < label_count; i++) {
+ mbmi->txfm_size = tx_size;
+ for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) {
int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
- int64_t best_label_rd = INT64_MAX;
+ int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
B_PREDICTION_MODE mode_selected = ZERO4X4;
int bestlabelyrate = 0;
// search for the best motion vector on this segment
for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) {
- int64_t this_rd;
+ int64_t this_rd, other_rd;
int distortion;
int labelyrate;
ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
@@ -2378,21 +2353,23 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
BLOCK *c;
BLOCKD *e;
- // Is the best so far sufficiently good that we cant justify doing and new motion search.
+ /* Is the best so far sufficiently good that we cant justify doing
+ * and new motion search. */
if (best_label_rd < label_mv_thresh)
break;
if (cpi->compressor_speed) {
- if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
+ if (segmentation == PARTITIONING_8X16 ||
+ segmentation == PARTITIONING_16X8) {
bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
- if (i == 1 && segmentation == BLOCK_16X8)
+ if (i == 1 && segmentation == PARTITIONING_16X8)
bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
step_param = bsi->sv_istep[i];
}
// use previous block's result as next block's MV predictor.
- if (segmentation == BLOCK_4X4 && i > 0) {
+ if (segmentation == PARTITIONING_4X4 && i > 0) {
bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int;
if (i == 4 || i == 8 || i == 12)
bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int;
@@ -2424,7 +2401,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
// Should we do a full search (best quality only)
if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
/* Check if mvp_full is within the range. */
- vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
+ x->mv_row_min, x->mv_row_max);
thissme = cpi->full_search_sad(x, c, e, &mvp_full,
sadpb, 16, v_fn_ptr,
@@ -2434,7 +2412,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
bestsme = thissme;
mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int;
} else {
- // The full search result is actually worse so re-instate the previous best vector
+ /* The full search result is actually worse so re-instate the
+ * previous best vector */
e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int;
}
}
@@ -2444,15 +2423,16 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
int distortion;
unsigned int sse;
cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
- bsi->ref_mv, x->errorperbit, v_fn_ptr, XMVCOST,
- &distortion, &sse);
+ bsi->ref_mv, x->errorperbit, v_fn_ptr,
+ XMVCOST, &distortion, &sse);
// safe motion search result for use in compound prediction
seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int;
}
} /* NEW4X4 */
else if (mbmi->second_ref_frame && this_mode == NEW4X4) {
- // motion search not completed? Then skip newmv for this block with comppred
+ /* motion search not completed? Then skip newmv for this block with
+ * comppred */
if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) {
continue;
@@ -2474,14 +2454,15 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
mv_check_bounds(x, &second_mode_mv[this_mode]))
continue;
- if (segmentation == BLOCK_4X4) {
+ if (segmentation == PARTITIONING_4X4) {
this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate,
&distortion,
ta_s, tl_s, IF_RTCD(&cpi->rtcd));
+ other_rd = this_rd;
} else {
this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate,
- &distortion, ta_s, tl_s,
- IF_RTCD(&cpi->rtcd));
+ &distortion, &other_rd,
+ ta_s, tl_s, IF_RTCD(&cpi->rtcd));
}
this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
rate += labelyrate;
@@ -2492,9 +2473,20 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
bestlabelyrate = labelyrate;
mode_selected = this_mode;
best_label_rd = this_rd;
- for (j = 0; j < 16; j++)
- if (labels[j] == i)
- best_eobs[j] = x->e_mbd.block[j].eob;
+ if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
+ for (j = 0; j < 16; j++)
+ if (labels[j] == i)
+ best_eobs[j] = x->e_mbd.block[j].eob;
+ } else {
+ for (j = 0; j < 4; j++) {
+ int ib = vp8_i8x8_block[j], idx = j * 4;
+
+ if (labels[ib] == i)
+ best_eobs[idx] = x->e_mbd.block[idx].eob;
+ }
+ }
+ if (other_rd < best_other_rd)
+ best_other_rd = other_rd;
vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
@@ -2506,18 +2498,18 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
- &second_mode_mv[mode_selected], seg_mvs[i], bsi->ref_mv, bsi->second_ref_mv, XMVCOST);
+ &second_mode_mv[mode_selected], seg_mvs[i],
+ bsi->ref_mv, bsi->second_ref_mv, XMVCOST);
br += sbr;
bd += sbd;
segmentyrate += bestlabelyrate;
this_segment_rd += best_label_rd;
-
- if (this_segment_rd >= bsi->segment_rd) {
- break;
- }
-
-
+ other_segment_rd += best_other_rd;
+ if (rds)
+ rds[i] = this_segment_rd;
+ if (otherrds)
+ rds[i] = other_segment_rd;
} /* for each label */
if (this_segment_rd < bsi->segment_rd) {
@@ -2526,6 +2518,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
bsi->segment_yrate = segmentyrate;
bsi->segment_rd = this_segment_rd;
bsi->segment_num = segmentation;
+ bsi->txfm_size = mbmi->txfm_size;
// store everything needed to come back to this!!
for (i = 0; i < 16; i++) {
@@ -2538,6 +2531,105 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
bsi->eobs[i] = best_eobs[i];
}
}
+
+ if (completed) {
+ *completed = i;
+ }
+}
+
+static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
+ BEST_SEG_INFO *bsi,
+ unsigned int segmentation,
+ /* 16 = n_blocks */
+ int_mv seg_mvs[16][MAX_REF_FRAMES - 1],
+ int64_t txfm_cache[NB_TXFM_MODES]) {
+ int i, n, c = vp8_mbsplit_count[segmentation];
+
+ if (segmentation == PARTITIONING_4X4) {
+ int64_t rd[16];
+
+ rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, NULL,
+ rd, &n, seg_mvs);
+ if (n == c) {
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ if (rd[c - 1] < txfm_cache[i])
+ txfm_cache[i] = rd[c - 1];
+ }
+ }
+ } else {
+ int64_t diff, base_rd;
+ int cost4x4 = vp8_cost_bit(cpi->common.prob_tx[0], 0);
+ int cost8x8 = vp8_cost_bit(cpi->common.prob_tx[0], 1);
+
+ if (cpi->common.txfm_mode == TX_MODE_SELECT) {
+ int64_t rd4x4[4], rd8x8[4];
+ int n4x4, n8x8, nmin;
+ BEST_SEG_INFO bsi4x4, bsi8x8;
+
+ /* factor in cost of cost4x4/8x8 in decision */
+ vpx_memcpy(&bsi4x4, bsi, sizeof(*bsi));
+ vpx_memcpy(&bsi8x8, bsi, sizeof(*bsi));
+ rd_check_segment_txsize(cpi, x, &bsi4x4, segmentation,
+ TX_4X4, NULL, rd4x4, &n4x4, seg_mvs);
+ rd_check_segment_txsize(cpi, x, &bsi8x8, segmentation,
+ TX_8X8, NULL, rd8x8, &n8x8, seg_mvs);
+ if (bsi4x4.segment_num == segmentation) {
+ bsi4x4.segment_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
+ if (bsi4x4.segment_rd < bsi->segment_rd)
+ vpx_memcpy(bsi, &bsi4x4, sizeof(*bsi));
+ }
+ if (bsi8x8.segment_num == segmentation) {
+ bsi8x8.segment_rd += RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
+ if (bsi8x8.segment_rd < bsi->segment_rd)
+ vpx_memcpy(bsi, &bsi8x8, sizeof(*bsi));
+ }
+ n = n4x4 > n8x8 ? n4x4 : n8x8;
+ if (n == c) {
+ nmin = n4x4 < n8x8 ? n4x4 : n8x8;
+ diff = rd8x8[nmin - 1] - rd4x4[nmin - 1];
+ if (n == n4x4) {
+ base_rd = rd4x4[c - 1];
+ } else {
+ base_rd = rd8x8[c - 1] - diff;
+ }
+ }
+ } else {
+ int64_t rd[4], otherrd[4];
+
+ if (cpi->common.txfm_mode == ONLY_4X4) {
+ rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, otherrd,
+ rd, &n, seg_mvs);
+ if (n == c) {
+ base_rd = rd[c - 1];
+ diff = otherrd[c - 1] - rd[c - 1];
+ }
+ } else /* use 8x8 transform */ {
+ rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_8X8, otherrd,
+ rd, &n, seg_mvs);
+ if (n == c) {
+ diff = rd[c - 1] - otherrd[c - 1];
+ base_rd = otherrd[c - 1];
+ }
+ }
+ }
+
+ if (n == c) {
+ if (base_rd < txfm_cache[ONLY_4X4]) {
+ txfm_cache[ONLY_4X4] = base_rd;
+ }
+ if (base_rd + diff < txfm_cache[1]) {
+ txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = base_rd + diff;
+ }
+ if (diff < 0) {
+ base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
+ } else {
+ base_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
+ }
+ if (base_rd < txfm_cache[TX_MODE_SELECT]) {
+ txfm_cache[TX_MODE_SELECT] = base_rd;
+ }
+ }
+ }
}
static __inline
@@ -2553,17 +2645,26 @@ void vp8_cal_step_param(int sr, int *sp) {
*sp = MAX_MVSEARCH_STEPS - 1 - step;
}
-static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
- int_mv *best_ref_mv, int_mv *second_best_ref_mv, int64_t best_rd,
- int *mdcounts, int *returntotrate,
- int *returnyrate, int *returndistortion,
- int *skippable, int mvthresh,
- int_mv seg_mvs[BLOCK_MAX_SEGMENTS - 1][16 /* n_blocks */][MAX_REF_FRAMES - 1]) {
+static int rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
+ int_mv *best_ref_mv,
+ int_mv *second_best_ref_mv,
+ int64_t best_rd,
+ int *mdcounts,
+ int *returntotrate,
+ int *returnyrate,
+ int *returndistortion,
+ int *skippable, int mvthresh,
+ int_mv seg_mvs[NB_PARTITIONINGS]
+ [16 /* n_blocks */]
+ [MAX_REF_FRAMES - 1],
+ int64_t txfm_cache[NB_TXFM_MODES]) {
int i;
BEST_SEG_INFO bsi;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
vpx_memset(&bsi, 0, sizeof(bsi));
+ for (i = 0; i < NB_TXFM_MODES; i++)
+ txfm_cache[i] = INT64_MAX;
bsi.segment_rd = best_rd;
bsi.ref_mv = best_ref_mv;
@@ -2571,6 +2672,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
bsi.mvp.as_int = best_ref_mv->as_int;
bsi.mvthresh = mvthresh;
bsi.mdcounts = mdcounts;
+ bsi.txfm_size = TX_4X4;
for (i = 0; i < 16; i++)
bsi.modes[i] = ZERO4X4;
@@ -2578,15 +2680,19 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
if (cpi->compressor_speed == 0) {
/* for now, we will keep the original segmentation order
when in best quality mode */
- rd_check_segment(cpi, x, &bsi, BLOCK_16X8, seg_mvs[BLOCK_16X8]);
- rd_check_segment(cpi, x, &bsi, BLOCK_8X16, seg_mvs[BLOCK_8X16]);
- rd_check_segment(cpi, x, &bsi, BLOCK_8X8, seg_mvs[BLOCK_8X8]);
- rd_check_segment(cpi, x, &bsi, BLOCK_4X4, seg_mvs[BLOCK_4X4]);
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
+ seg_mvs[PARTITIONING_16X8], txfm_cache);
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
+ seg_mvs[PARTITIONING_8X16], txfm_cache);
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
+ seg_mvs[PARTITIONING_8X8], txfm_cache);
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
+ seg_mvs[PARTITIONING_4X4], txfm_cache);
} else {
int sr;
- rd_check_segment(cpi, x, &bsi, BLOCK_8X8, seg_mvs[BLOCK_8X8]);
-
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
+ seg_mvs[PARTITIONING_8X8], txfm_cache);
if (bsi.segment_rd < best_rd) {
int tmp_col_min = x->mv_col_min;
@@ -2602,34 +2708,40 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
- /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
+ /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
+ * according to the closeness of 2 MV. */
/* block 8X16 */
- {
- sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
- vp8_cal_step_param(sr, &bsi.sv_istep[0]);
+ sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
+ (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
+ vp8_cal_step_param(sr, &bsi.sv_istep[0]);
- sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
- vp8_cal_step_param(sr, &bsi.sv_istep[1]);
+ sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
+ (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
+ vp8_cal_step_param(sr, &bsi.sv_istep[1]);
- rd_check_segment(cpi, x, &bsi, BLOCK_8X16, seg_mvs[BLOCK_8X16]);
- }
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
+ seg_mvs[PARTITIONING_8X16], txfm_cache);
/* block 16X8 */
- {
- sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
- vp8_cal_step_param(sr, &bsi.sv_istep[0]);
+ sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
+ (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
+ vp8_cal_step_param(sr, &bsi.sv_istep[0]);
- sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
- vp8_cal_step_param(sr, &bsi.sv_istep[1]);
+ sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
+ (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
+ vp8_cal_step_param(sr, &bsi.sv_istep[1]);
- rd_check_segment(cpi, x, &bsi, BLOCK_16X8, seg_mvs[BLOCK_16X8]);
- }
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
+ seg_mvs[PARTITIONING_16X8], txfm_cache);
/* If 8x8 is better than 16x8/8x16, then do 4x4 search */
/* Not skip 4x4 if speed=0 (good quality) */
- if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8) { /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
+ if (cpi->sf.no_skip_block4x4_search ||
+ bsi.segment_num == PARTITIONING_8X8) {
+ /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
- rd_check_segment(cpi, x, &bsi, BLOCK_4X4, seg_mvs[BLOCK_4X4]);
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
+ seg_mvs[PARTITIONING_4X4], txfm_cache);
}
/* restore UMV window */
@@ -2653,9 +2765,12 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
*returntotrate = bsi.r;
*returndistortion = bsi.d;
*returnyrate = bsi.segment_yrate;
- *skippable = mby_is_skippable_4x4(&x->e_mbd, 0);
+ *skippable = bsi.txfm_size == TX_4X4 ?
+ mby_is_skippable_4x4(&x->e_mbd, 0) :
+ mby_is_skippable_8x8(&x->e_mbd, 0);
/* save partitions */
+ mbmi->txfm_size = bsi.txfm_size;
mbmi->partitioning = bsi.segment_num;
x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
@@ -2901,9 +3016,7 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
int_mv *best_ref_mv, int_mv *second_best_ref_mv) {
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
-#if CONFIG_NEWMVENTROPY
MV mv;
-#endif
if (mbmi->mode == SPLITMV) {
int i;
@@ -2911,7 +3024,6 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < x->partition_info->count; i++) {
if (x->partition_info->bmi[i].mode == NEW4X4) {
if (x->e_mbd.allow_high_precision_mv) {
-#if CONFIG_NEWMVENTROPY
mv.row = (x->partition_info->bmi[i].mv.as_mv.row
- best_ref_mv->as_mv.row);
mv.col = (x->partition_info->bmi[i].mv.as_mv.col
@@ -2925,20 +3037,7 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv,
&cpi->NMVcount, 1);
}
-#else
- cpi->MVcount_hp[0][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.row
- - best_ref_mv->as_mv.row)]++;
- cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.col
- - best_ref_mv->as_mv.col)]++;
- if (mbmi->second_ref_frame) {
- cpi->MVcount_hp[0][mv_max_hp + (x->partition_info->bmi[i].second_mv.as_mv.row
- - second_best_ref_mv->as_mv.row)]++;
- cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].second_mv.as_mv.col
- - second_best_ref_mv->as_mv.col)]++;
- }
-#endif
} else {
-#if CONFIG_NEWMVENTROPY
mv.row = (x->partition_info->bmi[i].mv.as_mv.row
- best_ref_mv->as_mv.row);
mv.col = (x->partition_info->bmi[i].mv.as_mv.col
@@ -2952,24 +3051,11 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv,
&cpi->NMVcount, 0);
}
-#else
- cpi->MVcount[0][mv_max + ((x->partition_info->bmi[i].mv.as_mv.row
- - best_ref_mv->as_mv.row) >> 1)]++;
- cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].mv.as_mv.col
- - best_ref_mv->as_mv.col) >> 1)]++;
- if (mbmi->second_ref_frame) {
- cpi->MVcount[0][mv_max + ((x->partition_info->bmi[i].second_mv.as_mv.row
- - second_best_ref_mv->as_mv.row) >> 1)]++;
- cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].second_mv.as_mv.col
- - second_best_ref_mv->as_mv.col) >> 1)]++;
- }
-#endif
}
}
}
} else if (mbmi->mode == NEWMV) {
if (x->e_mbd.allow_high_precision_mv) {
-#if CONFIG_NEWMVENTROPY
mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
@@ -2978,20 +3064,7 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1);
}
-#else
- cpi->MVcount_hp[0][mv_max_hp + (mbmi->mv[0].as_mv.row
- - best_ref_mv->as_mv.row)]++;
- cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[0].as_mv.col
- - best_ref_mv->as_mv.col)]++;
- if (mbmi->second_ref_frame) {
- cpi->MVcount_hp[0][mv_max_hp + (mbmi->mv[1].as_mv.row
- - second_best_ref_mv->as_mv.row)]++;
- cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[1].as_mv.col
- - second_best_ref_mv->as_mv.col)]++;
- }
-#endif
} else {
-#if CONFIG_NEWMVENTROPY
mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
@@ -3000,18 +3073,6 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0);
}
-#else
- cpi->MVcount[0][mv_max + ((mbmi->mv[0].as_mv.row
- - best_ref_mv->as_mv.row) >> 1)]++;
- cpi->MVcount[1][mv_max + ((mbmi->mv[0].as_mv.col
- - best_ref_mv->as_mv.col) >> 1)]++;
- if (mbmi->second_ref_frame) {
- cpi->MVcount[0][mv_max + ((mbmi->mv[1].as_mv.row
- - second_best_ref_mv->as_mv.row) >> 1)]++;
- cpi->MVcount[1][mv_max + ((mbmi->mv[1].as_mv.col
- - second_best_ref_mv->as_mv.col) >> 1)]++;
- }
-#endif
}
}
}
@@ -3185,9 +3246,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int hybrid_pred_diff,
int64_t txfm_size_diff[NB_TXFM_MODES]) {
MACROBLOCKD *xd = &x->e_mbd;
-#if CONFIG_TX_SELECT
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
-#endif
// Take a snapshot of the coding context so it can be
// restored if we decide to encode this way
@@ -3207,9 +3266,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
ctx->comp_pred_diff = comp_pred_diff;
ctx->hybrid_pred_diff = hybrid_pred_diff;
-#if CONFIG_TX_SELECT
memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
-#endif
}
static void inter_mode_cost(VP8_COMP *cpi, MACROBLOCK *x, int this_mode,
@@ -3326,9 +3383,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
int_mv ref_mv[MAX_REF_FRAMES] = {{0}};
#endif
-#if CONFIG_SWITCHABLE_INTERP
int switchable_filter_index = 0;
-#endif
MB_PREDICTION_MODE uv_intra_mode;
MB_PREDICTION_MODE uv_intra_mode_8x8 = 0;
@@ -3344,7 +3399,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
unsigned char *y_buffer[4], *u_buffer[4], *v_buffer[4];
unsigned int ref_costs[MAX_REF_FRAMES];
- int_mv seg_mvs[BLOCK_MAX_SEGMENTS - 1][16 /* n_blocks */][MAX_REF_FRAMES - 1];
+ int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1];
vpx_memset(mode8x8, 0, sizeof(mode8x8));
vpx_memset(&frame_mv, 0, sizeof(frame_mv));
@@ -3359,7 +3414,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
for (i = 0; i < NB_TXFM_MODES; i++)
best_txfm_rd[i] = INT64_MAX;
- for (i = 0; i < BLOCK_MAX_SEGMENTS - 1; i++) {
+ for (i = 0; i < NB_PARTITIONINGS; i++) {
int j, k;
for (j = 0; j < 16; j++)
@@ -3425,12 +3480,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
// that depend on the current prediction etc.
vp8_estimate_ref_frame_costs(cpi, segment_id, ref_costs);
-#if CONFIG_SWITCHABLE_INTERP
for (mode_index = 0; mode_index < MAX_MODES;
mode_index += (!switchable_filter_index)) {
-#else
- for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
-#endif
int64_t this_rd = INT64_MAX;
int is_comp_pred;
int disable_skip = 0, skippable = 0;
@@ -3458,19 +3509,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
#if CONFIG_PRED_FILTER
mbmi->pred_filter_enabled = 0;
#endif
-#if CONFIG_SWITCHABLE_INTERP
if (cpi->common.mcomp_filter_type == SWITCHABLE &&
this_mode >= NEARESTMV && this_mode <= SPLITMV) {
mbmi->interp_filter =
vp8_switchable_interp[switchable_filter_index++];
if (switchable_filter_index == VP8_SWITCHABLE_FILTERS)
switchable_filter_index = 0;
- //printf("Searching %d (%d)\n", this_mode, switchable_filter_index);
} else {
mbmi->interp_filter = cpi->common.mcomp_filter_type;
}
vp8_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
-#endif
// Test best rd so far against threshold for trying this mode.
if (best_rd <= cpi->rd_threshes[mode_index])
@@ -3612,11 +3660,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
}
break;
case I8X8_PRED: {
-#if CONFIG_TX_SELECT
int cost0 = vp8_cost_bit(cm->prob_tx[0], 0);
int cost1 = vp8_cost_bit(cm->prob_tx[0], 1);
int64_t tmp_rd_4x4s, tmp_rd_8x8s;
-#endif
int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
mbmi->txfm_size = TX_4X4;
@@ -3638,7 +3684,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
txfm_cache[ONLY_4X4] = tmp_rd_4x4;
txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
-#if CONFIG_TX_SELECT
tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? tmp_rd_4x4s : tmp_rd_8x8s;
@@ -3667,9 +3712,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
mode8x8[1][3] = x->e_mbd.mode_info_context->bmi[10].as_mode.second;
#endif
}
- } else
-#endif
- if (cm->txfm_mode == ONLY_4X4) {
+ } else if (cm->txfm_mode == ONLY_4X4) {
rate = r4x4;
rate_y = tok4x4;
distortion = d4x4;
@@ -3725,21 +3768,19 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
(mbmi->ref_frame == GOLDEN_FRAME) ?
cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
- mbmi->txfm_size = TX_4X4; // FIXME use 8x8 in case of 8x8/8x16/16x8
- tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
- second_ref, best_yrd, mdcounts,
- &rate, &rate_y, &distortion,
- &skippable,
- this_rd_thresh, seg_mvs);
+ tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
+ second_ref, best_yrd, mdcounts,
+ &rate, &rate_y, &distortion,
+ &skippable,
+ this_rd_thresh, seg_mvs,
+ txfm_cache);
rate2 += rate;
distortion2 += distortion;
-#if CONFIG_SWITCHABLE_INTERP
if (cpi->common.mcomp_filter_type == SWITCHABLE)
rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
[get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
[vp8_switchable_interp_map[mbmi->interp_filter]];
-#endif
// If even the 'Y' rd value of split is higher than best so far
// then dont bother looking at UV
if (tmp_rd < best_yrd) {
@@ -3877,13 +3918,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
rate2 += vp8_cost_bit(cpi->common.prob_pred_filter_off,
xd->mode_info_context->mbmi.pred_filter_enabled);
#endif
-#if CONFIG_SWITCHABLE_INTERP
if (cpi->common.mcomp_filter_type == SWITCHABLE)
rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
[get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
[vp8_switchable_interp_map[
x->e_mbd.mode_info_context->mbmi.interp_filter]];
-#endif
/* We don't include the cost of the second reference here, because there are only
* three options: Last/Golden, ARF/Last or Golden/ARF, or in other words if you
@@ -3908,8 +3947,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
if (threshold < x->encode_breakout)
threshold = x->encode_breakout;
- var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
- (*(b->base_src), b->src_stride,
+ var = vp8_variance16x16(*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
if (sse < threshold) {
@@ -3919,7 +3957,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
if ((sse - var < q2dc *q2dc >> 4) ||
(sse / 2 > var && sse - var < 64)) {
// Check u and v to make sure skip is ok
- int sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
+ int sse2 = vp8_uvsse(x);
if (sse2 * 2 < threshold) {
x->skip = 1;
distortion2 = sse + sse2;
@@ -4127,7 +4165,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
if (!mode_excluded && this_rd != INT64_MAX) {
for (i = 0; i < NB_TXFM_MODES; i++) {
int64_t adj_rd;
- if (this_mode != B_PRED && this_mode != SPLITMV) {
+ if (this_mode != B_PRED) {
adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];
} else {
adj_rd = this_rd;
@@ -4151,7 +4189,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
else
++cpi->pred_filter_off_count;
#endif
-#if CONFIG_SWITCHABLE_INTERP
if (cpi->common.mcomp_filter_type == SWITCHABLE &&
best_mbmode.mode >= NEARESTMV &&
best_mbmode.mode <= SPLITMV) {
@@ -4159,7 +4196,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
[get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
[vp8_switchable_interp_map[best_mbmode.interp_filter]];
}
-#endif
// Reduce the activation RD thresholds for the best choice mode
if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
@@ -4185,11 +4221,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
(cpi->oxcf.arnr_max_frames == 0) &&
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
mbmi->mode = ZEROMV;
-#if CONFIG_TX_SELECT
if (cm->txfm_mode != TX_MODE_SELECT)
mbmi->txfm_size = cm->txfm_mode;
else
-#endif
mbmi->txfm_size = TX_16X16;
mbmi->ref_frame = ALTREF_FRAME;
mbmi->mv[0].as_int = 0;
@@ -4239,7 +4273,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
best_pred_diff[i] = best_rd - best_pred_rd[i];
}
-#if CONFIG_TX_SELECT
if (!x->skip) {
for (i = 0; i < NB_TXFM_MODES; i++) {
if (best_txfm_rd[i] == INT64_MAX)
@@ -4250,7 +4283,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
} else {
vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
}
-#endif
end:
store_coding_context(x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition,
@@ -4381,10 +4413,8 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 1);
dist = dist16x16 + (distuv8x8 >> 2);
mbmi->txfm_size = txfm_size_16x16;
-#if CONFIG_TX_SELECT
memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
-#endif
} else if (error8x8 > error16x16) {
if (error4x4 < error16x16) {
rate = rateuv;
@@ -4401,20 +4431,16 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
mbmi->mode = B_PRED;
mbmi->txfm_size = TX_4X4;
dist = dist4x4 + (distuv >> 2);
-#if CONFIG_TX_SELECT
memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
-#endif
} else {
mbmi->txfm_size = txfm_size_16x16;
mbmi->mode = mode16x16;
rate = rate16x16 + rateuv8x8;
dist = dist16x16 + (distuv8x8 >> 2);
-#if CONFIG_TX_SELECT
for (i = 0; i < NB_TXFM_MODES; i++) {
x->mb_context[xd->mb_index].txfm_rd_diff[i] = error16x16 - txfm_cache[i];
}
-#endif
}
if (cpi->common.mb_no_coeff_skip)
rate += vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 0);
@@ -4434,10 +4460,8 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
mbmi->mode = B_PRED;
mbmi->txfm_size = TX_4X4;
dist = dist4x4 + (distuv >> 2);
-#if CONFIG_TX_SELECT
memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
-#endif
} else {
// FIXME(rbultje) support transform-size selection
mbmi->mode = I8X8_PRED;
@@ -4445,10 +4469,8 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
set_i8x8_block_modes(x, mode8x8);
rate = rate8x8 + rateuv;
dist = dist8x8 + (distuv >> 2);
-#if CONFIG_TX_SELECT
memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
-#endif
}
if (cpi->common.mb_no_coeff_skip)
rate += vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 0);
@@ -4805,8 +4827,8 @@ int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x,
if (threshold < x->encode_breakout)
threshold = x->encode_breakout;
- var = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32)(*(b->base_src),
- b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse);
+ var = vp8_variance32x32(*(b->base_src), b->src_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, &sse);
if (sse < threshold) {
unsigned int q2dc = xd->block[24].dequant[0];
@@ -4816,11 +4838,9 @@ int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x,
(sse / 2 > var && sse - var < 64)) {
// Check u and v to make sure skip is ok
unsigned int sse2, sse3;
- var += VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
- (x->src.u_buffer, x->src.uv_stride,
+ var += vp8_variance16x16(x->src.u_buffer, x->src.uv_stride,
xd->dst.u_buffer, xd->dst.uv_stride, &sse2);
- var += VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
- (x->src.v_buffer, x->src.uv_stride,
+ var += vp8_variance16x16(x->src.v_buffer, x->src.uv_stride,
xd->dst.v_buffer, xd->dst.uv_stride, &sse3);
sse2 += sse3;
if (sse2 * 2 < threshold) {
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index 2e86a16c0..f15e687c6 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -10,33 +10,10 @@
#include <stdlib.h>
+#include "vp8/common/sadmxn.h"
#include "vpx_ports/config.h"
#include "vpx/vpx_integer.h"
-static __inline
-unsigned int sad_mx_n_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int m,
- int n) {
-
- int r, c;
- unsigned int sad = 0;
-
- for (r = 0; r < n; r++) {
- for (c = 0; c < m; c++) {
- sad += abs(src_ptr[c] - ref_ptr[c]);
- }
-
- src_ptr += src_stride;
- ref_ptr += ref_stride;
- }
-
- return sad;
-}
-
unsigned int vp8_sad32x32_c(const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
@@ -97,25 +74,6 @@ unsigned int vp8_sad4x4_c(
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4);
}
-#if CONFIG_NEWBESTREFMV
-unsigned int vp8_sad3x16_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad){
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16);
-}
-unsigned int vp8_sad16x3_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad){
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3);
-}
-#endif
-
void vp8_sad32x32x3_c(const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
index d3d9711dc..865496ae2 100644
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -11,18 +11,10 @@
#include "onyx_int.h"
-void vp8_ssim_parms_16x16_c
-(
- unsigned char *s,
- int sp,
- unsigned char *r,
- int rp,
- unsigned long *sum_s,
- unsigned long *sum_r,
- unsigned long *sum_sq_s,
- unsigned long *sum_sq_r,
- unsigned long *sum_sxr
-) {
+void vp8_ssim_parms_16x16_c(unsigned char *s, int sp, unsigned char *r,
+ int rp, unsigned long *sum_s, unsigned long *sum_r,
+ unsigned long *sum_sq_s, unsigned long *sum_sq_r,
+ unsigned long *sum_sxr) {
int i, j;
for (i = 0; i < 16; i++, s += sp, r += rp) {
for (j = 0; j < 16; j++) {
@@ -34,18 +26,10 @@ void vp8_ssim_parms_16x16_c
}
}
}
-void vp8_ssim_parms_8x8_c
-(
- unsigned char *s,
- int sp,
- unsigned char *r,
- int rp,
- unsigned long *sum_s,
- unsigned long *sum_r,
- unsigned long *sum_sq_s,
- unsigned long *sum_sq_r,
- unsigned long *sum_sxr
-) {
+void vp8_ssim_parms_8x8_c(unsigned char *s, int sp, unsigned char *r, int rp,
+ unsigned long *sum_s, unsigned long *sum_r,
+ unsigned long *sum_sq_s, unsigned long *sum_sq_r,
+ unsigned long *sum_sxr) {
int i, j;
for (i = 0; i < 8; i++, s += sp, r += rp) {
for (j = 0; j < 8; j++) {
@@ -61,15 +45,9 @@ void vp8_ssim_parms_8x8_c
const static int64_t cc1 = 26634; // (64^2*(.01*255)^2
const static int64_t cc2 = 239708; // (64^2*(.03*255)^2
-static double similarity
-(
- unsigned long sum_s,
- unsigned long sum_r,
- unsigned long sum_sq_s,
- unsigned long sum_sq_r,
- unsigned long sum_sxr,
- int count
-) {
+static double similarity(unsigned long sum_s, unsigned long sum_r,
+ unsigned long sum_sq_s, unsigned long sum_sq_r,
+ unsigned long sum_sxr, int count) {
int64_t ssim_n, ssim_d;
int64_t c1, c2;
@@ -87,23 +65,22 @@ static double similarity
return ssim_n * 1.0 / ssim_d;
}
-static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp,
- const vp8_variance_rtcd_vtable_t *rtcd) {
+static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp) {
unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
- SSIMPF_INVOKE(rtcd, 16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+ vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+ &sum_sxr);
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
}
-static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp,
- const vp8_variance_rtcd_vtable_t *rtcd) {
+static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp) {
unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
- SSIMPF_INVOKE(rtcd, 8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+ vp8_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+ &sum_sxr);
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
}
// TODO: (jbb) tried to scale this function such that we may be able to use it
// for distortion metric in mode selection code ( provided we do a reconstruction)
-long dssim(unsigned char *s, int sp, unsigned char *r, int rp,
- const vp8_variance_rtcd_vtable_t *rtcd) {
+long dssim(unsigned char *s, int sp, unsigned char *r, int rp) {
unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
int64_t ssim3;
int64_t ssim_n1, ssim_n2;
@@ -115,7 +92,8 @@ long dssim(unsigned char *s, int sp, unsigned char *r, int rp,
c1 = cc1 * 16;
c2 = cc2 * 16;
- SSIMPF_INVOKE(rtcd, 16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+ vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+ &sum_sxr);
ssim_n1 = (2 * sum_s * sum_r + c1);
ssim_n2 = ((int64_t) 2 * 256 * sum_sxr - (int64_t) 2 * sum_s * sum_r + c2);
@@ -137,16 +115,8 @@ long dssim(unsigned char *s, int sp, unsigned char *r, int rp,
// We are using a 8x8 moving window with starting location of each 8x8 window
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
// block boundaries to penalize blocking artifacts.
-double vp8_ssim2
-(
- unsigned char *img1,
- unsigned char *img2,
- int stride_img1,
- int stride_img2,
- int width,
- int height,
- const vp8_variance_rtcd_vtable_t *rtcd
-) {
+double vp8_ssim2(unsigned char *img1, unsigned char *img2, int stride_img1,
+ int stride_img2, int width, int height) {
int i, j;
int samples = 0;
double ssim_total = 0;
@@ -154,7 +124,7 @@ double vp8_ssim2
// sample point start with each 4x4 location
for (i = 0; i < height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
for (j = 0; j < width - 8; j += 4) {
- double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2, rtcd);
+ double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2);
ssim_total += v;
samples++;
}
@@ -162,28 +132,22 @@ double vp8_ssim2
ssim_total /= samples;
return ssim_total;
}
-double vp8_calc_ssim
-(
- YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
- int lumamask,
- double *weight,
- const vp8_variance_rtcd_vtable_t *rtcd
-) {
+double vp8_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+ int lumamask, double *weight) {
double a, b, c;
double ssimv;
a = vp8_ssim2(source->y_buffer, dest->y_buffer,
source->y_stride, dest->y_stride, source->y_width,
- source->y_height, rtcd);
+ source->y_height);
b = vp8_ssim2(source->u_buffer, dest->u_buffer,
source->uv_stride, dest->uv_stride, source->uv_width,
- source->uv_height, rtcd);
+ source->uv_height);
c = vp8_ssim2(source->v_buffer, dest->v_buffer,
source->uv_stride, dest->uv_stride, source->uv_width,
- source->uv_height, rtcd);
+ source->uv_height);
ssimv = a * .8 + .1 * (b + c);
@@ -192,29 +156,22 @@ double vp8_calc_ssim
return ssimv;
}
-double vp8_calc_ssimg
-(
- YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
- double *ssim_y,
- double *ssim_u,
- double *ssim_v,
- const vp8_variance_rtcd_vtable_t *rtcd
-) {
+double vp8_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+ double *ssim_y, double *ssim_u, double *ssim_v) {
double ssim_all = 0;
double a, b, c;
a = vp8_ssim2(source->y_buffer, dest->y_buffer,
source->y_stride, dest->y_stride, source->y_width,
- source->y_height, rtcd);
+ source->y_height);
b = vp8_ssim2(source->u_buffer, dest->u_buffer,
source->uv_stride, dest->uv_stride, source->uv_width,
- source->uv_height, rtcd);
+ source->uv_height);
c = vp8_ssim2(source->v_buffer, dest->v_buffer,
source->uv_stride, dest->uv_stride, source->uv_width,
- source->uv_height, rtcd);
+ source->uv_height);
*ssim_y = a;
*ssim_u = b;
*ssim_v = c;
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index d46637a3e..2ddae1cbd 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -26,38 +26,26 @@
#ifdef ENTROPY_STATS
INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM
INT64 hybrid_context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#endif
INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM8X8
INT64 hybrid_context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#endif
INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM16X16
INT64 hybrid_context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#endif
extern unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS][ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM
extern unsigned int hybrid_tree_update_hist[BLOCK_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS][ENTROPY_NODES][2];
-#endif
extern unsigned int tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
[PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#if CONFIG_HYBRIDTRANSFORM8X8
extern unsigned int hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
[PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#endif
extern unsigned int tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
[PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#if CONFIG_HYBRIDTRANSFORM16X16
extern unsigned int hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
[PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#endif
#endif /* ENTROPY_STATS */
void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run);
@@ -134,9 +122,7 @@ static void tokenize1st_order_b_16x16(MACROBLOCKD *xd,
const int eob = b->eob; /* one beyond last nonzero coeff */
TOKENEXTRA *t = *tp; /* store tokens starting here */
const short *qcoeff_ptr = b->qcoeff;
-#if CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
int seg_eob = 256;
int segment_id = xd->mode_info_context->mbmi.segment_id;
@@ -162,22 +148,18 @@ static void tokenize1st_order_b_16x16(MACROBLOCKD *xd,
}
t->Token = x;
-#if CONFIG_HYBRIDTRANSFORM16X16
if (tx_type != DCT_DCT)
t->context_tree = cpi->common.fc.hybrid_coef_probs_16x16[type][band][pt];
else
-#endif
t->context_tree = cpi->common.fc.coef_probs_16x16[type][band][pt];
t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
(band > 1 && type == PLANE_TYPE_Y_NO_DC));
assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM16X16
if (tx_type != DCT_DCT)
++cpi->hybrid_coef_counts_16x16[type][band][pt][x];
else
-#endif
++cpi->coef_counts_16x16[type][band][pt][x];
}
pt = vp8_prev_token_class[x];
@@ -310,9 +292,7 @@ static void tokenize1st_order_b_8x8(MACROBLOCKD *xd,
int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; /* start at DC unless type 0 */
TOKENEXTRA *t = *tp; /* store tokens starting here */
const short *qcoeff_ptr = b->qcoeff;
-#if CONFIG_HYBRIDTRANSFORM8X8
TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
const int eob = b->eob;
int seg_eob = 64;
int segment_id = xd->mode_info_context->mbmi.segment_id;
@@ -338,11 +318,9 @@ static void tokenize1st_order_b_8x8(MACROBLOCKD *xd,
x = DCT_EOB_TOKEN;
t->Token = x;
-#if CONFIG_HYBRIDTRANSFORM8X8
if (tx_type != DCT_DCT)
t->context_tree = cpi->common.fc.hybrid_coef_probs_8x8[type][band][pt];
else
-#endif
t->context_tree = cpi->common.fc.coef_probs_8x8[type][band][pt];
t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
@@ -350,11 +328,9 @@ static void tokenize1st_order_b_8x8(MACROBLOCKD *xd,
assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM8X8
if (tx_type != DCT_DCT)
++cpi->hybrid_coef_counts_8x8[type][band][pt][x];
else
-#endif
++cpi->coef_counts_8x8[type][band][pt][x];
}
pt = vp8_prev_token_class[x];
@@ -451,7 +427,6 @@ static void tokenize1st_order_b_4x4(MACROBLOCKD *xd,
const int16_t *qcoeff_ptr = b->qcoeff;
int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
-#if CONFIG_HYBRIDTRANSFORM
TX_TYPE tx_type = get_tx_type(xd, &xd->block[block]);
switch (tx_type) {
case ADST_DCT:
@@ -464,7 +439,6 @@ static void tokenize1st_order_b_4x4(MACROBLOCKD *xd,
pt_scan = vp8_default_zig_zag1d;
break;
}
-#endif
a = (ENTROPY_CONTEXT *)xd->above_context + vp8_block2above[block];
l = (ENTROPY_CONTEXT *)xd->left_context + vp8_block2left[block];
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
@@ -485,22 +459,18 @@ static void tokenize1st_order_b_4x4(MACROBLOCKD *xd,
token = DCT_EOB_TOKEN;
t->Token = token;
-#if CONFIG_HYBRIDTRANSFORM
if (tx_type != DCT_DCT)
t->context_tree = cpi->common.fc.hybrid_coef_probs[type][band][pt];
else
-#endif
t->context_tree = cpi->common.fc.coef_probs[type][band][pt];
t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
(band > 1 && type == PLANE_TYPE_Y_NO_DC));
assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM
if (tx_type != DCT_DCT)
++cpi->hybrid_coef_counts[type][band][pt][token];
else
-#endif
++cpi->coef_counts[type][band][pt][token];
}
pt = vp8_prev_token_class[token];
@@ -619,7 +589,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi,
xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd);
break;
case TX_8X8:
- if (xd->mode_info_context->mbmi.mode == I8X8_PRED)
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV)
xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8_4x4uv(xd, 0);
else
xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8(xd, has_y2_block);
@@ -668,17 +639,15 @@ void vp8_tokenize_mb(VP8_COMP *cpi,
tokenize1st_order_b_16x16(xd, xd->block, t, PLANE_TYPE_Y_WITH_DC,
A, L, cpi, dry_run);
+ A[1] = A[2] = A[3] = A[0];
+ L[1] = L[2] = L[3] = L[0];
- for (b = 1; b < 16; b++) {
- *(A + vp8_block2above[b]) = *(A);
- *(L + vp8_block2left[b] ) = *(L);
- }
for (b = 16; b < 24; b += 4) {
tokenize1st_order_b_8x8(xd, xd->block + b, t, PLANE_TYPE_UV,
A + vp8_block2above_8x8[b],
L + vp8_block2left_8x8[b], cpi, dry_run);
- *(A + vp8_block2above_8x8[b]+1) = *(A + vp8_block2above_8x8[b]);
- *(L + vp8_block2left_8x8[b]+1 ) = *(L + vp8_block2left_8x8[b]);
+ A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+ L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]];
}
vpx_memset(&A[8], 0, sizeof(A[8]));
vpx_memset(&L[8], 0, sizeof(L[8]));
@@ -692,18 +661,19 @@ void vp8_tokenize_mb(VP8_COMP *cpi,
A + vp8_block2above_8x8[b],
L + vp8_block2left_8x8[b],
cpi, dry_run);
- *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
- *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]);
+ A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+ L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]];
}
- if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV) {
tokenize1st_order_chroma_4x4(xd, t, cpi, dry_run);
} else {
for (b = 16; b < 24; b += 4) {
tokenize1st_order_b_8x8(xd, xd->block + b, t, PLANE_TYPE_UV,
A + vp8_block2above_8x8[b],
L + vp8_block2left_8x8[b], cpi, dry_run);
- *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
- *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]);
+ A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+ L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]];
}
}
} else {
@@ -995,30 +965,24 @@ static __inline void stuff1st_order_b_8x8(MACROBLOCKD *xd,
int dry_run) {
int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */
-#if CONFIG_HYBRIDTRANSFORM8X8
TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
const int band = vp8_coef_bands_8x8[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
(void) b;
t->Token = DCT_EOB_TOKEN;
-#if CONFIG_HYBRIDTRANSFORM8X8
if (tx_type != DCT_DCT)
t->context_tree = cpi->common.fc.hybrid_coef_probs_8x8[type][band][pt];
else
-#endif
t->context_tree = cpi->common.fc.coef_probs_8x8[type][band][pt];
// t->section = 8;
t->skip_eob_node = 0;
++t;
*tp = t;
if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM8X8
if (tx_type == DCT_DCT)
++cpi->hybrid_coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN];
else
-#endif
++cpi->coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN];
}
pt = 0; /* 0 <-> all coeff data is zero */
@@ -1074,8 +1038,8 @@ static void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *xd,
A + vp8_block2above_8x8[b],
L + vp8_block2left_8x8[b],
cpi, dry_run);
- *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
- *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]);
+ A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+ L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]];
}
for (b = 16; b < 24; b += 4) {
@@ -1083,8 +1047,8 @@ static void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *xd,
A + vp8_block2above[b],
L + vp8_block2left[b],
cpi, dry_run);
- *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
- *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]);
+ A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+ L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]];
}
if (dry_run)
*t = t_backup;
@@ -1100,29 +1064,23 @@ static __inline void stuff1st_order_b_16x16(MACROBLOCKD *xd,
int dry_run) {
int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */
-#if CONFIG_HYBRIDTRANSFORM16X16
TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
const int band = vp8_coef_bands_16x16[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
(void) b;
t->Token = DCT_EOB_TOKEN;
-#if CONFIG_HYBRIDTRANSFORM16X16
if (tx_type != DCT_DCT)
t->context_tree = cpi->common.fc.hybrid_coef_probs_16x16[type][band][pt];
else
-#endif
t->context_tree = cpi->common.fc.coef_probs_16x16[type][band][pt];
t->skip_eob_node = 0;
++t;
*tp = t;
if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM16X16
if (tx_type != DCT_DCT)
++cpi->hybrid_coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN];
else
-#endif
++cpi->coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN];
}
pt = 0; /* 0 <-> all coeff data is zero */
@@ -1138,17 +1096,15 @@ static void vp8_stuff_mb_16x16(VP8_COMP *cpi, MACROBLOCKD *xd,
stuff1st_order_b_16x16(xd, xd->block, t, PLANE_TYPE_Y_WITH_DC,
A, L, cpi, dry_run);
- for (i = 1; i < 16; i++) {
- *(A + vp8_block2above[i]) = *(A);
- *(L + vp8_block2left[i]) = *(L);
- }
+ A[1] = A[2] = A[3] = A[0];
+ L[1] = L[2] = L[3] = L[0];
for (b = 16; b < 24; b += 4) {
stuff1st_order_buv_8x8(xd, xd->block + b, t,
A + vp8_block2above[b],
L + vp8_block2left[b],
cpi, dry_run);
- *(A + vp8_block2above_8x8[b]+1) = *(A + vp8_block2above_8x8[b]);
- *(L + vp8_block2left_8x8[b]+1 ) = *(L + vp8_block2left_8x8[b]);
+ A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+ L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]];
}
vpx_memset(&A[8], 0, sizeof(A[8]));
vpx_memset(&L[8], 0, sizeof(L[8]));
@@ -1189,28 +1145,22 @@ static __inline void stuff1st_order_b_4x4(MACROBLOCKD *xd,
int dry_run) {
int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */
-#if CONFIG_HYBRIDTRANSFORM
TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
const int band = vp8_coef_bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
t->Token = DCT_EOB_TOKEN;
-#if CONFIG_HYBRIDTRANSFORM
if (tx_type != DCT_DCT)
t->context_tree = cpi->common.fc.hybrid_coef_probs[type][band][pt];
else
-#endif
t->context_tree = cpi->common.fc.coef_probs[type][band][pt];
t->skip_eob_node = 0;
++t;
*tp = t;
if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM
if (tx_type != DCT_DCT)
++cpi->hybrid_coef_counts[type][band][pt][DCT_EOB_TOKEN];
else
-#endif
++cpi->coef_counts[type][band][pt][DCT_EOB_TOKEN];
}
pt = 0; /* 0 <-> all coeff data is zero */
@@ -1288,8 +1238,8 @@ static void vp8_stuff_mb_8x8_4x4uv(VP8_COMP *cpi, MACROBLOCKD *xd,
A + vp8_block2above_8x8[b],
L + vp8_block2left_8x8[b],
cpi, dry_run);
- *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
- *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]);
+ A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+ L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]];
}
for (b = 16; b < 24; b++)
@@ -1308,7 +1258,8 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
if (tx_size == TX_16X16) {
vp8_stuff_mb_16x16(cpi, xd, t, dry_run);
} else if (tx_size == TX_8X8) {
- if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV) {
vp8_stuff_mb_8x8_4x4uv(cpi, xd, t, dry_run);
} else {
vp8_stuff_mb_8x8(cpi, xd, t, dry_run);
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index a2fadfc4c..cdeb390c3 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -12,507 +12,73 @@
#ifndef VARIANCE_H
#define VARIANCE_H
-#include "vpx_config.h"
-
-#define prototype_sad(sym)\
- unsigned int (sym)\
- (\
- const unsigned char *src_ptr, \
- int source_stride, \
- const unsigned char *ref_ptr, \
- int ref_stride, \
- int max_sad\
- )
-
-#define prototype_sad_multi_same_address(sym)\
- void (sym)\
- (\
- const unsigned char *src_ptr, \
- int source_stride, \
- const unsigned char *ref_ptr, \
- int ref_stride, \
- unsigned int *sad_array\
- )
-
-#define prototype_sad_multi_same_address_1(sym)\
- void (sym)\
- (\
- const unsigned char *src_ptr, \
- int source_stride, \
- const unsigned char *ref_ptr, \
- int ref_stride, \
- unsigned short *sad_array\
- )
-
-#define prototype_sad_multi_dif_address(sym)\
- void (sym)\
- (\
- const unsigned char *src_ptr, \
- int source_stride, \
- unsigned char *ref_ptr[4], \
- int ref_stride, \
- unsigned int *sad_array\
- )
-
-#define prototype_variance(sym) \
- unsigned int (sym) \
- (\
- const unsigned char *src_ptr, \
- int source_stride, \
- const unsigned char *ref_ptr, \
- int ref_stride, \
- unsigned int *sse\
- )
-
-#define prototype_variance2(sym) \
- unsigned int (sym) \
- (\
- const unsigned char *src_ptr, \
- int source_stride, \
- const unsigned char *ref_ptr, \
- int ref_stride, \
- unsigned int *sse,\
- int *sum\
- )
-
-#define prototype_subpixvariance(sym) \
- unsigned int (sym) \
- ( \
- const unsigned char *src_ptr, \
- int source_stride, \
- int xoffset, \
- int yoffset, \
- const unsigned char *ref_ptr, \
- int Refstride, \
- unsigned int *sse \
- );
-
-#define prototype_ssimpf(sym) \
- void (sym) \
- ( \
- unsigned char *s, \
- int sp, \
- unsigned char *r, \
- int rp, \
- unsigned long *sum_s, \
- unsigned long *sum_r, \
- unsigned long *sum_sq_s, \
- unsigned long *sum_sq_r, \
- unsigned long *sum_sxr \
- );
-
-#define prototype_getmbss(sym) unsigned int (sym)(const short *)
-
-#define prototype_get16x16prederror(sym)\
- unsigned int (sym)\
- (\
- const unsigned char *src_ptr, \
- int source_stride, \
- const unsigned char *ref_ptr, \
- int ref_stride \
- )
-
-#if ARCH_X86 || ARCH_X86_64
-#include "x86/variance_x86.h"
-#endif
-
-#if ARCH_ARM
-#include "arm/variance_arm.h"
-#endif
-
-#ifndef vp8_variance_sad4x4
-#define vp8_variance_sad4x4 vp8_sad4x4_c
-#endif
-extern prototype_sad(vp8_variance_sad4x4);
-
-#ifndef vp8_variance_sad8x8
-#define vp8_variance_sad8x8 vp8_sad8x8_c
-#endif
-extern prototype_sad(vp8_variance_sad8x8);
-
-#ifndef vp8_variance_sad8x16
-#define vp8_variance_sad8x16 vp8_sad8x16_c
-#endif
-extern prototype_sad(vp8_variance_sad8x16);
-
-#ifndef vp8_variance_sad16x8
-#define vp8_variance_sad16x8 vp8_sad16x8_c
-#endif
-extern prototype_sad(vp8_variance_sad16x8);
-
-#ifndef vp8_variance_sad16x16
-#define vp8_variance_sad16x16 vp8_sad16x16_c
-#endif
-extern prototype_sad(vp8_variance_sad16x16);
-
-#ifndef vp8_variance_sad32x32
-#define vp8_variance_sad32x32 vp8_sad32x32_c
-#endif
-extern prototype_sad(vp8_variance_sad32x32);
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_sad32x32x3
-#define vp8_variance_sad32x32x3 vp8_sad32x32x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad32x32x3);
-
-#ifndef vp8_variance_sad16x16x3
-#define vp8_variance_sad16x16x3 vp8_sad16x16x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad16x16x3);
-
-#ifndef vp8_variance_sad16x8x3
-#define vp8_variance_sad16x8x3 vp8_sad16x8x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad16x8x3);
-
-#ifndef vp8_variance_sad8x8x3
-#define vp8_variance_sad8x8x3 vp8_sad8x8x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad8x8x3);
-
-#ifndef vp8_variance_sad8x16x3
-#define vp8_variance_sad8x16x3 vp8_sad8x16x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3);
-
-#ifndef vp8_variance_sad4x4x3
-#define vp8_variance_sad4x4x3 vp8_sad4x4x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3);
-
-#ifndef vp8_variance_sad32x32x8
-#define vp8_variance_sad32x32x8 vp8_sad32x32x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad32x32x8);
-
-#ifndef vp8_variance_sad16x16x8
-#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8);
-
-#ifndef vp8_variance_sad16x8x8
-#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8);
-
-#ifndef vp8_variance_sad8x8x8
-#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8);
-
-#ifndef vp8_variance_sad8x16x8
-#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8);
-
-#ifndef vp8_variance_sad4x4x8
-#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8);
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_sad32x32x4d
-#define vp8_variance_sad32x32x4d vp8_sad32x32x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad32x32x4d);
-
-#ifndef vp8_variance_sad16x16x4d
-#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad16x16x4d);
-
-#ifndef vp8_variance_sad16x8x4d
-#define vp8_variance_sad16x8x4d vp8_sad16x8x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad16x8x4d);
-
-#ifndef vp8_variance_sad8x8x4d
-#define vp8_variance_sad8x8x4d vp8_sad8x8x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad8x8x4d);
-
-#ifndef vp8_variance_sad8x16x4d
-#define vp8_variance_sad8x16x4d vp8_sad8x16x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad8x16x4d);
-
-#ifndef vp8_variance_sad4x4x4d
-#define vp8_variance_sad4x4x4d vp8_sad4x4x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad4x4x4d);
-
-#if ARCH_X86 || ARCH_X86_64
-#ifndef vp8_variance_copy32xn
-#define vp8_variance_copy32xn vp8_copy32xn_c
-#endif
-extern prototype_sad(vp8_variance_copy32xn);
-#endif
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_var4x4
-#define vp8_variance_var4x4 vp8_variance4x4_c
-#endif
-extern prototype_variance(vp8_variance_var4x4);
-
-#ifndef vp8_variance_var8x8
-#define vp8_variance_var8x8 vp8_variance8x8_c
-#endif
-extern prototype_variance(vp8_variance_var8x8);
-
-#ifndef vp8_variance_var8x16
-#define vp8_variance_var8x16 vp8_variance8x16_c
-#endif
-extern prototype_variance(vp8_variance_var8x16);
-
-#ifndef vp8_variance_var16x8
-#define vp8_variance_var16x8 vp8_variance16x8_c
-#endif
-extern prototype_variance(vp8_variance_var16x8);
-
-#ifndef vp8_variance_var16x16
-#define vp8_variance_var16x16 vp8_variance16x16_c
-#endif
-extern prototype_variance(vp8_variance_var16x16);
-
-#ifndef vp8_variance_var32x32
-#define vp8_variance_var32x32 vp8_variance32x32_c
-#endif
-extern prototype_variance(vp8_variance_var32x32);
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_subpixvar4x4
-#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar4x4);
-
-#ifndef vp8_variance_subpixvar8x8
-#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar8x8);
-
-#ifndef vp8_variance_subpixvar8x16
-#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar8x16);
-
-#ifndef vp8_variance_subpixvar16x8
-#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar16x8);
-
-#ifndef vp8_variance_subpixvar16x16
-#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar16x16);
-
-#ifndef vp8_variance_subpixvar32x32
-#define vp8_variance_subpixvar32x32 vp8_sub_pixel_variance32x32_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar32x32);
-
-#ifndef vp8_variance_halfpixvar16x16_h
-#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar16x16_h);
-
-#ifndef vp8_variance_halfpixvar32x32_h
-#define vp8_variance_halfpixvar32x32_h vp8_variance_halfpixvar32x32_h_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar32x32_h);
-
-#ifndef vp8_variance_halfpixvar16x16_v
-#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar16x16_v);
-
-#ifndef vp8_variance_halfpixvar32x32_v
-#define vp8_variance_halfpixvar32x32_v vp8_variance_halfpixvar32x32_v_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar32x32_v);
-
-#ifndef vp8_variance_halfpixvar16x16_hv
-#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar16x16_hv);
-
-#ifndef vp8_variance_halfpixvar32x32_hv
-#define vp8_variance_halfpixvar32x32_hv vp8_variance_halfpixvar32x32_hv_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar32x32_hv);
-
-#ifndef vp8_variance_subpixmse16x16
-#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixmse16x16);
-
-#ifndef vp8_variance_subpixmse32x32
-#define vp8_variance_subpixmse32x32 vp8_sub_pixel_mse32x32_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixmse32x32);
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_getmbss
-#define vp8_variance_getmbss vp8_get_mb_ss_c
-#endif
-extern prototype_getmbss(vp8_variance_getmbss);
-
-#ifndef vp8_variance_mse16x16
-#define vp8_variance_mse16x16 vp8_mse16x16_c
-#endif
-extern prototype_variance(vp8_variance_mse16x16);
-
-#ifndef vp8_ssimpf_8x8
-#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c
-#endif
-extern prototype_ssimpf(vp8_ssimpf_8x8)
-
-#ifndef vp8_ssimpf_16x16
-#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c
-#endif
-extern prototype_ssimpf(vp8_ssimpf_16x16)
-
-#ifndef vp8_variance_satd16x16
-#define vp8_variance_satd16x16 vp8_satd16x16_c
-#endif
-extern prototype_variance(vp8_variance_satd16x16);
-
-typedef prototype_sad(*vp8_sad_fn_t);
-typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
-typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
-typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t);
-typedef prototype_variance(*vp8_variance_fn_t);
-typedef prototype_variance2(*vp8_variance2_fn_t);
-typedef prototype_subpixvariance(*vp8_subpixvariance_fn_t);
-typedef prototype_getmbss(*vp8_getmbss_fn_t);
-typedef prototype_ssimpf(*vp8_ssimpf_fn_t);
-typedef prototype_get16x16prederror(*vp8_get16x16prederror_fn_t);
-
-typedef struct {
- vp8_sad_fn_t sad4x4;
- vp8_sad_fn_t sad8x8;
- vp8_sad_fn_t sad8x16;
- vp8_sad_fn_t sad16x8;
- vp8_sad_fn_t sad16x16;
-#if CONFIG_SUPERBLOCKS
- vp8_sad_fn_t sad32x32;
-#endif
-
- vp8_variance_fn_t var4x4;
- vp8_variance_fn_t var8x8;
- vp8_variance_fn_t var8x16;
- vp8_variance_fn_t var16x8;
- vp8_variance_fn_t var16x16;
-#if CONFIG_SUPERBLOCKS
- vp8_variance_fn_t var32x32;
-#endif
-
- vp8_subpixvariance_fn_t subpixvar4x4;
- vp8_subpixvariance_fn_t subpixvar8x8;
- vp8_subpixvariance_fn_t subpixvar8x16;
- vp8_subpixvariance_fn_t subpixvar16x8;
- vp8_subpixvariance_fn_t subpixvar16x16;
-#if CONFIG_SUPERBLOCKS
- vp8_subpixvariance_fn_t subpixvar32x32;
-#endif
- vp8_variance_fn_t halfpixvar16x16_h;
- vp8_variance_fn_t halfpixvar32x32_h;
- vp8_variance_fn_t halfpixvar16x16_v;
-#if CONFIG_SUPERBLOCKS
- vp8_variance_fn_t halfpixvar32x32_v;
-#endif
- vp8_variance_fn_t halfpixvar16x16_hv;
-#if CONFIG_SUPERBLOCKS
- vp8_variance_fn_t halfpixvar32x32_hv;
-#endif
- vp8_subpixvariance_fn_t subpixmse16x16;
-#if CONFIG_SUPERBLOCKS
- vp8_subpixvariance_fn_t subpixmse32x32;
-#endif
-
- vp8_getmbss_fn_t getmbss;
- vp8_variance_fn_t mse16x16;
-
-#if CONFIG_SUPERBLOCKS
- vp8_sad_multi_fn_t sad32x32x3;
-#endif
- vp8_sad_multi_fn_t sad16x16x3;
- vp8_sad_multi_fn_t sad16x8x3;
- vp8_sad_multi_fn_t sad8x16x3;
- vp8_sad_multi_fn_t sad8x8x3;
- vp8_sad_multi_fn_t sad4x4x3;
-
-#if CONFIG_SUPERBLOCKS
- vp8_sad_multi1_fn_t sad32x32x8;
-#endif
- vp8_sad_multi1_fn_t sad16x16x8;
- vp8_sad_multi1_fn_t sad16x8x8;
- vp8_sad_multi1_fn_t sad8x16x8;
- vp8_sad_multi1_fn_t sad8x8x8;
- vp8_sad_multi1_fn_t sad4x4x8;
-
-#if CONFIG_SUPERBLOCKS
- vp8_sad_multi_d_fn_t sad32x32x4d;
-#endif
- vp8_sad_multi_d_fn_t sad16x16x4d;
- vp8_sad_multi_d_fn_t sad16x8x4d;
- vp8_sad_multi_d_fn_t sad8x16x4d;
- vp8_sad_multi_d_fn_t sad8x8x4d;
- vp8_sad_multi_d_fn_t sad4x4x4d;
-
-#if ARCH_X86 || ARCH_X86_64
- vp8_sad_fn_t copy32xn;
-#endif
-
-#if CONFIG_INTERNAL_STATS
- vp8_ssimpf_fn_t ssimpf_8x8;
- vp8_ssimpf_fn_t ssimpf_16x16;
-#endif
-
- vp8_variance_fn_t satd16x16;
-} vp8_variance_rtcd_vtable_t;
-
-typedef struct {
- vp8_sad_fn_t sdf;
- vp8_variance_fn_t vf;
- vp8_subpixvariance_fn_t svf;
- vp8_variance_fn_t svf_halfpix_h;
- vp8_variance_fn_t svf_halfpix_v;
- vp8_variance_fn_t svf_halfpix_hv;
- vp8_sad_multi_fn_t sdx3f;
- vp8_sad_multi1_fn_t sdx8f;
- vp8_sad_multi_d_fn_t sdx4df;
-#if ARCH_X86 || ARCH_X86_64
- vp8_sad_fn_t copymem;
-#endif
+typedef unsigned int(*vp8_sad_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad);
+
+typedef void (*vp8_copy32xn_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int n);
+
+typedef void (*vp8_sad_multi_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sad_array);
+
+typedef void (*vp8_sad_multi1_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array);
+
+typedef void (*vp8_sad_multi_d_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char * const ref_ptr[],
+ int ref_stride, unsigned int *sad_array);
+
+typedef unsigned int (*vp8_variance_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sse);
+
+typedef unsigned int (*vp8_subpixvariance_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ int xoffset,
+ int yoffset,
+ const unsigned char *ref_ptr,
+ int Refstride,
+ unsigned int *sse);
+
+typedef void (*vp8_ssimpf_fn_t)(unsigned char *s, int sp, unsigned char *r,
+ int rp, unsigned long *sum_s,
+ unsigned long *sum_r, unsigned long *sum_sq_s,
+ unsigned long *sum_sq_r,
+ unsigned long *sum_sxr);
+
+typedef unsigned int (*vp8_getmbss_fn_t)(const short *);
+
+typedef unsigned int (*vp8_get16x16prederror_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride);
+
+typedef struct variance_vtable {
+ vp8_sad_fn_t sdf;
+ vp8_variance_fn_t vf;
+ vp8_subpixvariance_fn_t svf;
+ vp8_variance_fn_t svf_halfpix_h;
+ vp8_variance_fn_t svf_halfpix_v;
+ vp8_variance_fn_t svf_halfpix_hv;
+ vp8_sad_multi_fn_t sdx3f;
+ vp8_sad_multi1_fn_t sdx8f;
+ vp8_sad_multi_d_fn_t sdx4df;
+ vp8_copy32xn_fn_t copymem;
} vp8_variance_fn_ptr_t;
-#if CONFIG_RUNTIME_CPU_DETECT
-#define VARIANCE_INVOKE(ctx,fn) (ctx)->fn
-#define SSIMPF_INVOKE(ctx,fn) (ctx)->ssimpf_##fn
-#else
-#define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
-#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn
-#endif
-
-#if CONFIG_NEWBESTREFMV
-unsigned int vp8_sad2x16_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad);
-unsigned int vp8_sad16x2_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad);
-#endif
-
#endif
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
deleted file mode 100644
index 0971f11b0..000000000
--- a/vp8/encoder/x86/variance_x86.h
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VARIANCE_X86_H
-#define VARIANCE_X86_H
-
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-#if HAVE_MMX
-extern prototype_sad(vp8_sad4x4_mmx);
-extern prototype_sad(vp8_sad8x8_mmx);
-extern prototype_sad(vp8_sad8x16_mmx);
-extern prototype_sad(vp8_sad16x8_mmx);
-extern prototype_sad(vp8_sad16x16_mmx);
-extern prototype_variance(vp8_variance4x4_mmx);
-extern prototype_variance(vp8_variance8x8_mmx);
-extern prototype_variance(vp8_variance8x16_mmx);
-extern prototype_variance(vp8_variance16x8_mmx);
-extern prototype_variance(vp8_variance16x16_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance4x4_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx);
-extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx);
-extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx);
-extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx);
-extern prototype_getmbss(vp8_get_mb_ss_mmx);
-extern prototype_variance(vp8_mse16x16_mmx);
-extern prototype_variance2(vp8_get8x8var_mmx);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_variance_sad4x4
-#define vp8_variance_sad4x4 vp8_sad4x4_mmx
-
-#undef vp8_variance_sad8x8
-#define vp8_variance_sad8x8 vp8_sad8x8_mmx
-
-#undef vp8_variance_sad8x16
-#define vp8_variance_sad8x16 vp8_sad8x16_mmx
-
-#undef vp8_variance_sad16x8
-#define vp8_variance_sad16x8 vp8_sad16x8_mmx
-
-#undef vp8_variance_sad16x16
-#define vp8_variance_sad16x16 vp8_sad16x16_mmx
-
-#undef vp8_variance_var4x4
-#define vp8_variance_var4x4 vp8_variance4x4_mmx
-
-#undef vp8_variance_var8x8
-#define vp8_variance_var8x8 vp8_variance8x8_mmx
-
-#undef vp8_variance_var8x16
-#define vp8_variance_var8x16 vp8_variance8x16_mmx
-
-#undef vp8_variance_var16x8
-#define vp8_variance_var16x8 vp8_variance16x8_mmx
-
-#undef vp8_variance_var16x16
-#define vp8_variance_var16x16 vp8_variance16x16_mmx
-
-#undef vp8_variance_subpixvar4x4
-#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_mmx
-
-#undef vp8_variance_subpixvar8x8
-#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_mmx
-
-#undef vp8_variance_subpixvar8x16
-#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_mmx
-
-#undef vp8_variance_subpixvar16x8
-#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_mmx
-
-#undef vp8_variance_subpixvar16x16
-#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx
-
-#undef vp8_variance_halfpixvar16x16_h
-#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx
-
-#undef vp8_variance_halfpixvar16x16_v
-#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx
-
-#undef vp8_variance_halfpixvar16x16_hv
-#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx
-
-#undef vp8_variance_subpixmse16x16
-#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx
-
-#undef vp8_variance_getmbss
-#define vp8_variance_getmbss vp8_get_mb_ss_mmx
-
-#undef vp8_variance_mse16x16
-#define vp8_variance_mse16x16 vp8_mse16x16_mmx
-
-#endif
-#endif
-
-
-#if HAVE_SSE2
-extern prototype_sad(vp8_sad4x4_wmt);
-extern prototype_sad(vp8_sad8x8_wmt);
-extern prototype_sad(vp8_sad8x16_wmt);
-extern prototype_sad(vp8_sad16x8_wmt);
-extern prototype_sad(vp8_sad16x16_wmt);
-extern prototype_sad(vp8_copy32xn_sse2);
-extern prototype_variance(vp8_variance4x4_wmt);
-extern prototype_variance(vp8_variance8x8_wmt);
-extern prototype_variance(vp8_variance8x16_wmt);
-extern prototype_variance(vp8_variance16x8_wmt);
-extern prototype_variance(vp8_variance16x16_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance4x4_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt);
-extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt);
-extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt);
-extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt);
-extern prototype_getmbss(vp8_get_mb_ss_sse2);
-extern prototype_variance(vp8_mse16x16_wmt);
-extern prototype_variance2(vp8_get8x8var_sse2);
-extern prototype_variance2(vp8_get16x16var_sse2);
-extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2)
-extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2)
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_variance_sad4x4
-#define vp8_variance_sad4x4 vp8_sad4x4_wmt
-
-#undef vp8_variance_sad8x8
-#define vp8_variance_sad8x8 vp8_sad8x8_wmt
-
-#undef vp8_variance_sad8x16
-#define vp8_variance_sad8x16 vp8_sad8x16_wmt
-
-#undef vp8_variance_sad16x8
-#define vp8_variance_sad16x8 vp8_sad16x8_wmt
-
-#undef vp8_variance_sad16x16
-#define vp8_variance_sad16x16 vp8_sad16x16_wmt
-
-#undef vp8_variance_copy32xn
-#define vp8_variance_copy32xn vp8_copy32xn_sse2
-
-#undef vp8_variance_var4x4
-#define vp8_variance_var4x4 vp8_variance4x4_wmt
-
-#undef vp8_variance_var8x8
-#define vp8_variance_var8x8 vp8_variance8x8_wmt
-
-#undef vp8_variance_var8x16
-#define vp8_variance_var8x16 vp8_variance8x16_wmt
-
-#undef vp8_variance_var16x8
-#define vp8_variance_var16x8 vp8_variance16x8_wmt
-
-#undef vp8_variance_var16x16
-#define vp8_variance_var16x16 vp8_variance16x16_wmt
-
-#undef vp8_variance_subpixvar4x4
-#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_wmt
-
-#undef vp8_variance_subpixvar8x8
-#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_wmt
-
-#undef vp8_variance_subpixvar8x16
-#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_wmt
-
-#undef vp8_variance_subpixvar16x8
-#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_wmt
-
-#undef vp8_variance_subpixvar16x16
-#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt
-
-#undef vp8_variance_halfpixvar16x16_h
-#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt
-
-#undef vp8_variance_halfpixvar16x16_v
-#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt
-
-#undef vp8_variance_halfpixvar16x16_hv
-#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt
-
-#undef vp8_variance_subpixmse16x16
-#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt
-
-#undef vp8_variance_getmbss
-#define vp8_variance_getmbss vp8_get_mb_ss_sse2
-
-#undef vp8_variance_mse16x16
-#define vp8_variance_mse16x16 vp8_mse16x16_wmt
-
-#if ARCH_X86_64
-#undef vp8_ssimpf_8x8
-#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2
-
-#undef vp8_ssimpf_16x16
-#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2
-#endif
-
-#endif
-#endif
-
-
-#if HAVE_SSE3
-extern prototype_sad(vp8_sad16x16_sse3);
-extern prototype_sad(vp8_sad16x8_sse3);
-extern prototype_sad_multi_same_address(vp8_sad16x16x3_sse3);
-extern prototype_sad_multi_same_address(vp8_sad16x8x3_sse3);
-extern prototype_sad_multi_same_address(vp8_sad8x16x3_sse3);
-extern prototype_sad_multi_same_address(vp8_sad8x8x3_sse3);
-extern prototype_sad_multi_same_address(vp8_sad4x4x3_sse3);
-
-extern prototype_sad_multi_dif_address(vp8_sad16x16x4d_sse3);
-extern prototype_sad_multi_dif_address(vp8_sad16x8x4d_sse3);
-extern prototype_sad_multi_dif_address(vp8_sad8x16x4d_sse3);
-extern prototype_sad_multi_dif_address(vp8_sad8x8x4d_sse3);
-extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3);
-extern prototype_sad(vp8_copy32xn_sse3);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef vp8_variance_sad16x16
-#define vp8_variance_sad16x16 vp8_sad16x16_sse3
-
-#undef vp8_variance_sad16x16x3
-#define vp8_variance_sad16x16x3 vp8_sad16x16x3_sse3
-
-#undef vp8_variance_sad16x8x3
-#define vp8_variance_sad16x8x3 vp8_sad16x8x3_sse3
-
-#undef vp8_variance_sad8x16x3
-#define vp8_variance_sad8x16x3 vp8_sad8x16x3_sse3
-
-#undef vp8_variance_sad8x8x3
-#define vp8_variance_sad8x8x3 vp8_sad8x8x3_sse3
-
-#undef vp8_variance_sad4x4x3
-#define vp8_variance_sad4x4x3 vp8_sad4x4x3_sse3
-
-#undef vp8_variance_sad16x16x4d
-#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_sse3
-
-#undef vp8_variance_sad16x8x4d
-#define vp8_variance_sad16x8x4d vp8_sad16x8x4d_sse3
-
-#undef vp8_variance_sad8x16x4d
-#define vp8_variance_sad8x16x4d vp8_sad8x16x4d_sse3
-
-#undef vp8_variance_sad8x8x4d
-#define vp8_variance_sad8x8x4d vp8_sad8x8x4d_sse3
-
-#undef vp8_variance_sad4x4x4d
-#define vp8_variance_sad4x4x4d vp8_sad4x4x4d_sse3
-
-#undef vp8_variance_copy32xn
-#define vp8_variance_copy32xn vp8_copy32xn_sse3
-
-#endif
-#endif
-
-
-#if HAVE_SSSE3
-extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3);
-extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_ssse3);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_variance_sad16x16x3
-#define vp8_variance_sad16x16x3 vp8_sad16x16x3_ssse3
-
-#undef vp8_variance_sad16x8x3
-#define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3
-
-#undef vp8_variance_subpixvar16x8
-#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_ssse3
-
-#undef vp8_variance_subpixvar16x16
-#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3
-
-#endif
-#endif
-
-
-#if HAVE_SSE4_1
-extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4);
-extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4);
-extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4);
-extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4);
-extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_variance_sad16x16x8
-#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4
-
-#undef vp8_variance_sad16x8x8
-#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4
-
-#undef vp8_variance_sad8x16x8
-#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4
-
-#undef vp8_variance_sad8x8x8
-#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4
-
-#undef vp8_variance_sad4x4x8
-#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4
-
-#endif
-#endif
-
-#endif
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 71c51c14f..a169b493e 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -90,31 +90,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) {
/* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
if (flags & HAS_MMX) {
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_mmx;
- cpi->rtcd.variance.sad8x16 = vp8_sad8x16_mmx;
- cpi->rtcd.variance.sad8x8 = vp8_sad8x8_mmx;
- cpi->rtcd.variance.sad4x4 = vp8_sad4x4_mmx;
-
- cpi->rtcd.variance.var4x4 = vp8_variance4x4_mmx;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_mmx;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_mmx;
- cpi->rtcd.variance.var16x8 = vp8_variance16x8_mmx;
- cpi->rtcd.variance.var16x16 = vp8_variance16x16_mmx;
-
- cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_mmx;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_mmx;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mmx;
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mmx;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_mmx;
- cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
- cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx;
- cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx;
- cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx;
-
- cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_mmx;
-
cpi->rtcd.encodemb.berr = vp8_block_error_mmx;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx;
@@ -126,32 +101,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) {
#if HAVE_SSE2
if (flags & HAS_SSE2) {
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_wmt;
- cpi->rtcd.variance.sad8x16 = vp8_sad8x16_wmt;
- cpi->rtcd.variance.sad8x8 = vp8_sad8x8_wmt;
- cpi->rtcd.variance.sad4x4 = vp8_sad4x4_wmt;
- cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse2;
-
- cpi->rtcd.variance.var4x4 = vp8_variance4x4_wmt;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_wmt;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_wmt;
- cpi->rtcd.variance.var16x8 = vp8_variance16x8_wmt;
- cpi->rtcd.variance.var16x16 = vp8_variance16x16_wmt;
-
- cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_wmt;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_wmt;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wmt;
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wmt;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_wmt;
- cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
- cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt;
- cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt;
- cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt;
-
- cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_sse2;
-
cpi->rtcd.encodemb.berr = vp8_block_error_xmm;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm;
@@ -160,54 +109,20 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) {
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2;
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
-#if CONFIG_INTERNAL_STATS
-#if ARCH_X86_64
- cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse2;
- cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_sse2;
-#endif
-#endif
}
#endif
#if HAVE_SSE3
if (flags & HAS_SSE3) {
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3;
- cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_sse3;
- cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_sse3;
- cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3;
- cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3;
- cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3;
cpi->rtcd.search.full_search = vp8_full_search_sadx3;
- cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3;
- cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3;
- cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3;
- cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_sse3;
- cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3;
- cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse3;
cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4;
cpi->rtcd.search.refining_search = vp8_refining_search_sadx4;
}
#endif
-#if HAVE_SSSE3
- if (flags & HAS_SSSE3) {
- cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3;
- cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3;
-
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
- }
-#endif
-
-
#if HAVE_SSE4_1
if (flags & HAS_SSE4_1) {
- cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4;
- cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4;
- cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4;
- cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4;
- cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4;
cpi->rtcd.search.full_search = vp8_full_search_sadx8;
}
#endif
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 25c4fe210..fbbdec145 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -53,6 +53,7 @@ VP8_COMMON_SRCS-yes += common/reconintra.h
VP8_COMMON_SRCS-yes += common/reconintra4x4.h
VP8_COMMON_SRCS-yes += common/rtcd.c
VP8_COMMON_SRCS-yes += common/rtcd_defs.sh
+VP8_COMMON_SRCS-yes += common/sadmxn.h
VP8_COMMON_SRCS-yes += common/seg_common.h
VP8_COMMON_SRCS-yes += common/seg_common.c
VP8_COMMON_SRCS-yes += common/setupintrarecon.h
@@ -119,6 +120,8 @@ endif
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/filter_sse2.c
ifeq ($(HAVE_SSE2),yes)
vp8/common/x86/filter_sse2.c.o: CFLAGS += -msse2
+vp8/common/x86/loopfilter_x86.c.o: CFLAGS += -msse2
+vp8/common/loopfilter_filters.c.o: CFLAGS += -msse2
endif
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 7058e316b..6d2f18080 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -92,7 +92,6 @@ VP8_CX_SRCS-yes += encoder/mbgraph.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h
-VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c