diff options
Diffstat (limited to 'vp8')
57 files changed, 1374 insertions, 5153 deletions
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 3ab4cc3a9..cb546e74b 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -86,9 +86,7 @@ typedef enum BILINEAR = 1, EIGHTTAP = 2, EIGHTTAP_SHARP = 3, -#if CONFIG_SWITCHABLE_INTERP SWITCHABLE /* should be the last one */ -#endif } INTERPOLATIONFILTERTYPE; typedef enum @@ -135,14 +133,12 @@ typedef enum { TX_SIZE_MAX // Number of different transforms available } TX_SIZE; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 typedef enum { DCT_DCT = 0, // DCT in both horizontal and vertical - ADST_DCT = 1, // ADST in horizontal, DCT in vertical - DCT_ADST = 2, // DCT in horizontal, ADST in vertical + ADST_DCT = 1, // ADST in vertical, DCT in horizontal + DCT_ADST = 2, // DCT in vertical, ADST in horizontal ADST_ADST = 3 // ADST in both directions } TX_TYPE; -#endif #define VP8_YMODES (B_PRED + 1) #define VP8_UV_MODES (TM_PRED + 1) @@ -177,6 +173,14 @@ typedef enum { #define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */ #define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4) +typedef enum { + PARTITIONING_16X8 = 0, + PARTITIONING_8X16, + PARTITIONING_8X8, + PARTITIONING_4X4, + NB_PARTITIONINGS, +} SPLITMV_PARTITIONING_TYPE; + /* For keyframes, intra block modes are predicted by the (already decoded) modes for the Y blocks to the left and above us; for interframes, there is a single probability table. */ @@ -184,9 +188,7 @@ typedef enum { union b_mode_info { struct { B_PREDICTION_MODE first; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type; -#endif #if CONFIG_COMP_INTRA_PRED B_PREDICTION_MODE second; @@ -220,7 +222,7 @@ typedef struct { int mv_ref_index[MAX_REF_FRAMES]; #endif - unsigned char partitioning; + SPLITMV_PARTITIONING_TYPE partitioning; unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ unsigned char need_to_clamp_mvs; unsigned char need_to_clamp_secondmv; @@ -239,9 +241,7 @@ typedef struct { // Flag to turn prediction signal filter on(1)/off(0 ) at the MB level unsigned int pred_filter_enabled; #endif -#if CONFIG_SWITCHABLE_INTERP INTERPOLATIONFILTERTYPE interp_filter; -#endif #if CONFIG_SUPERBLOCKS // FIXME need a SB array of 4 MB_MODE_INFOs that @@ -388,17 +388,11 @@ typedef struct MacroBlockD { } MACROBLOCKD; -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 #define ACTIVE_HT 110 // quantization stepsize threshold -#endif -#if CONFIG_HYBRIDTRANSFORM8X8 #define ACTIVE_HT8 300 -#endif -#if CONFIG_HYBRIDTRANSFORM16X16 #define ACTIVE_HT16 300 -#endif // convert MB_PREDICTION_MODE to B_PREDICTION_MODE static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) { @@ -442,7 +436,6 @@ static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) { return b_mode; } -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 // transform mapping static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { // map transform type @@ -470,9 +463,7 @@ static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { } return tx_type; } -#endif -#if CONFIG_HYBRIDTRANSFORM static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { TX_TYPE tx_type = DCT_DCT; if (xd->mode_info_context->mbmi.mode == B_PRED && @@ -481,9 +472,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { } return tx_type; } -#endif -#if CONFIG_HYBRIDTRANSFORM8X8 static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) { TX_TYPE tx_type = DCT_DCT; if (xd->mode_info_context->mbmi.mode == I8X8_PRED && @@ -492,9 +481,7 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) { } return tx_type; } -#endif -#if CONFIG_HYBRIDTRANSFORM16X16 static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) { TX_TYPE tx_type = DCT_DCT; if (xd->mode_info_context->mbmi.mode < I8X8_PRED && @@ -503,34 +490,24 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) { } return tx_type; } -#endif -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || \ - CONFIG_HYBRIDTRANSFORM16X16 static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) { TX_TYPE tx_type = DCT_DCT; int ib = (b - xd->block); if (ib >= 16) return tx_type; -#if CONFIG_HYBRIDTRANSFORM16X16 if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) { tx_type = get_tx_type_16x16(xd, b); } -#endif -#if CONFIG_HYBRIDTRANSFORM8X8 if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { ib = (ib & 8) + ((ib & 4) >> 1); tx_type = get_tx_type_8x8(xd, &xd->block[ib]); } -#endif -#if CONFIG_HYBRIDTRANSFORM if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { tx_type = get_tx_type_4x4(xd, b); } -#endif return tx_type; } -#endif extern void vp8_build_block_doffsets(MACROBLOCKD *xd); extern void vp8_setup_block_dptrs(MACROBLOCKD *xd); diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h index 5e21195ee..bd1f795d0 100644 --- a/vp8/common/default_coef_probs.h +++ b/vp8/common/default_coef_probs.h @@ -13,9 +13,9 @@ static const vp8_prob default_coef_probs [BLOCK_TYPES] -[COEF_BANDS] -[PREV_COEF_CONTEXTS] -[ENTROPY_NODES] = { + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = { { /* Block Type ( 0 ) */ { @@ -254,11 +254,10 @@ static const vp8_prob default_coef_probs [BLOCK_TYPES] } }; -#if CONFIG_HYBRIDTRANSFORM static const vp8_prob default_hybrid_coef_probs [BLOCK_TYPES] -[COEF_BANDS] -[PREV_COEF_CONTEXTS] -[ENTROPY_NODES] = { + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = { { /* Block Type ( 0 ) */ { @@ -496,7 +495,6 @@ static const vp8_prob default_hybrid_coef_probs [BLOCK_TYPES] } } }; -#endif static const vp8_prob default_coef_probs_8x8[BLOCK_TYPES_8X8] @@ -731,12 +729,11 @@ default_coef_probs_8x8[BLOCK_TYPES_8X8] } }; -#if CONFIG_HYBRIDTRANSFORM8X8 static const vp8_prob default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] = { + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = { { /* block Type 0 */ { @@ -964,7 +961,6 @@ default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] } } }; -#endif static const vp8_prob default_coef_probs_16x16[BLOCK_TYPES_16X16] @@ -1173,7 +1169,6 @@ static const vp8_prob } }; -#if CONFIG_HYBRIDTRANSFORM16X16 static const vp8_prob default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] @@ -1380,4 +1375,3 @@ static const vp8_prob } } }; -#endif diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index 90f7a52c2..a3f731a3c 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -64,8 +64,6 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = { 7, 11, 14, 15, }; - -#if CONFIG_HYBRIDTRANSFORM DECLARE_ALIGNED(16, const int, vp8_col_scan[16]) = { 0, 4, 8, 12, 1, 5, 9, 13, @@ -78,7 +76,6 @@ DECLARE_ALIGNED(16, const int, vp8_row_scan[16]) = { 8, 9, 10, 11, 12, 13, 14, 15 }; -#endif DECLARE_ALIGNED(64, const int, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5, @@ -208,25 +205,19 @@ vp8_extra_bit_struct vp8_extra_bits[12] = { void vp8_default_coef_probs(VP8_COMMON *pc) { vpx_memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(pc->fc.coef_probs)); -#if CONFIG_HYBRIDTRANSFORM vpx_memcpy(pc->fc.hybrid_coef_probs, default_hybrid_coef_probs, sizeof(pc->fc.hybrid_coef_probs)); -#endif vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8, sizeof(pc->fc.coef_probs_8x8)); -#if CONFIG_HYBRIDTRANSFORM8X8 vpx_memcpy(pc->fc.hybrid_coef_probs_8x8, default_hybrid_coef_probs_8x8, sizeof(pc->fc.hybrid_coef_probs_8x8)); -#endif vpx_memcpy(pc->fc.coef_probs_16x16, default_coef_probs_16x16, sizeof(pc->fc.coef_probs_16x16)); -#if CONFIG_HYBRIDTRANSFORM16X16 vpx_memcpy(pc->fc.hybrid_coef_probs_16x16, default_hybrid_coef_probs_16x16, sizeof(pc->fc.hybrid_coef_probs_16x16)); -#endif } void vp8_coef_tree_initialize() { @@ -344,7 +335,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { } } -#if CONFIG_HYBRIDTRANSFORM for (i = 0; i < BLOCK_TYPES; ++i) for (j = 0; j < COEF_BANDS; ++j) for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { @@ -366,7 +356,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { else cm->fc.hybrid_coef_probs[i][j][k][t] = prob; } } -#endif for (i = 0; i < BLOCK_TYPES_8X8; ++i) for (j = 0; j < COEF_BANDS; ++j) @@ -390,7 +379,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { } } -#if CONFIG_HYBRIDTRANSFORM8X8 for (i = 0; i < BLOCK_TYPES_8X8; ++i) for (j = 0; j < COEF_BANDS; ++j) for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { @@ -413,7 +401,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { else cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = prob; } } -#endif for (i = 0; i < BLOCK_TYPES_16X16; ++i) for (j = 0; j < COEF_BANDS; ++j) @@ -437,7 +424,6 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { } } -#if CONFIG_HYBRIDTRANSFORM16X16 for (i = 0; i < BLOCK_TYPES_16X16; ++i) for (j = 0; j < COEF_BANDS; ++j) for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { @@ -458,5 +444,4 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { else cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = prob; } } -#endif } diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index b9dfb344f..48a100ac6 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -104,10 +104,8 @@ struct VP8Common; void vp8_default_coef_probs(struct VP8Common *); extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); -#if CONFIG_HYBRIDTRANSFORM extern DECLARE_ALIGNED(16, const int, vp8_col_scan[16]); extern DECLARE_ALIGNED(16, const int, vp8_row_scan[16]); -#endif extern short vp8_default_zig_zag_mask[16]; extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]); diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c index 5627aa43a..bcd9f3707 100644 --- a/vp8/common/entropymode.c +++ b/vp8/common/entropymode.c @@ -215,9 +215,9 @@ const vp8_tree_index vp8_uv_mode_tree[VP8_UV_MODES * 2 - 2] = { }; const vp8_tree_index vp8_mbsplit_tree[6] = { - -3, 2, - -2, 4, - -0, -1 + -PARTITIONING_4X4, 2, + -PARTITIONING_8X8, 4, + -PARTITIONING_16X8, -PARTITIONING_8X16, }; const vp8_tree_index vp8_mv_ref_tree[8] = { @@ -301,11 +301,8 @@ void vp8_init_mbmode_probs(VP8_COMMON *x) { vpx_memcpy(x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_prob2, sizeof(vp8_sub_mv_ref_prob2)); vpx_memcpy(x->fc.mbsplit_prob, vp8_mbsplit_probs, sizeof(vp8_mbsplit_probs)); -#if CONFIG_SWITCHABLE_INTERP vpx_memcpy(x->fc.switchable_interp_prob, vp8_switchable_interp_prob, sizeof(vp8_switchable_interp_prob)); -#endif - } @@ -338,7 +335,6 @@ void vp8_kf_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES] [VP8_BINTRAMODES] [ } while (++i < VP8_BINTRAMODES); } -#if CONFIG_SWITCHABLE_INTERP #if VP8_SWITCHABLE_FILTERS == 3 const vp8_tree_index vp8_switchable_interp_tree[VP8_SWITCHABLE_FILTERS*2-2] = { -0, 2, @@ -363,19 +359,10 @@ const vp8_prob vp8_switchable_interp_prob [VP8_SWITCHABLE_FILTERS+1] { 64}, {192}, }; -//#define SWITCHABLE_86 -#ifdef SWITCHABLE_86 -const INTERPOLATIONFILTERTYPE vp8_switchable_interp[VP8_SWITCHABLE_FILTERS] = { - EIGHTTAP, SIXTAP}; -const int vp8_switchable_interp_map[SWITCHABLE+1] = {1, -1, 0, -1, -1}; //8, 6 -#else const INTERPOLATIONFILTERTYPE vp8_switchable_interp[VP8_SWITCHABLE_FILTERS] = { EIGHTTAP, EIGHTTAP_SHARP}; const int vp8_switchable_interp_map[SWITCHABLE+1] = {-1, -1, 0, 1, -1}; //8, 8s #endif -#endif -#endif - void vp8_entropy_mode_init() { vp8_tokens_from_tree(vp8_bmode_encodings, vp8_bmode_tree); @@ -387,10 +374,8 @@ void vp8_entropy_mode_init() { vp8_tokens_from_tree(vp8_uv_mode_encodings, vp8_uv_mode_tree); vp8_tokens_from_tree(vp8_i8x8_mode_encodings, vp8_i8x8_mode_tree); vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree); -#if CONFIG_SWITCHABLE_INTERP vp8_tokens_from_tree(vp8_switchable_interp_encodings, vp8_switchable_interp_tree); -#endif vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array, vp8_mv_ref_tree, NEARESTMV); diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h index 430c949a6..debb5659e 100644 --- a/vp8/common/entropymode.h +++ b/vp8/common/entropymode.h @@ -76,16 +76,14 @@ void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES void vp8_adapt_mode_probs(struct VP8Common *); -#if CONFIG_SWITCHABLE_INTERP #define VP8_SWITCHABLE_FILTERS 2 /* number of switchable filters */ extern const INTERPOLATIONFILTERTYPE vp8_switchable_interp [VP8_SWITCHABLE_FILTERS]; -extern const int vp8_switchable_interp_map[SWITCHABLE+1]; +extern const int vp8_switchable_interp_map[SWITCHABLE + 1]; extern const vp8_tree_index vp8_switchable_interp_tree - [2*(VP8_SWITCHABLE_FILTERS-1)]; + [2*(VP8_SWITCHABLE_FILTERS - 1)]; extern struct vp8_token_struct vp8_switchable_interp_encodings [VP8_SWITCHABLE_FILTERS]; extern const vp8_prob vp8_switchable_interp_prob - [VP8_SWITCHABLE_FILTERS+1][VP8_SWITCHABLE_FILTERS-1]; -#endif + [VP8_SWITCHABLE_FILTERS + 1][VP8_SWITCHABLE_FILTERS - 1]; #endif diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c index 6c31236ec..a442a2438 100644 --- a/vp8/common/entropymv.c +++ b/vp8/common/entropymv.c @@ -14,8 +14,6 @@ //#define MV_COUNT_TESTING -#if CONFIG_NEWMVENTROPY - #define MV_COUNT_SAT 16 #define MV_MAX_UPDATE_FACTOR 160 @@ -450,413 +448,13 @@ void vp8_adapt_nmv_probs(VP8_COMMON *cm, int usehp) { } } -#else /* CONFIG_NEWMVENTROPY */ - -#define MV_COUNT_SAT 16 -#define MV_MAX_UPDATE_FACTOR 128 - -const MV_CONTEXT_HP vp8_mv_update_probs_hp[2] = { - {{ - 237, - 246, - 253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 250, 250, 252, 254, 254, 254 - } - }, - {{ - 231, - 243, - 245, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 251, 251, 254, 254, 254, 254 - } - } -}; -const MV_CONTEXT_HP vp8_default_mv_context_hp[2] = { - {{ - /* row */ - 162, /* is short */ - 128, /* sign */ - 220, 204, 180, 192, 192, 119, 192, 192, 180, 140, 192, 192, 224, 224, 224, /* short tree */ - 128, 129, 132, 75, 145, 178, 206, 239, 254, 254, 254 /* long bits */ - } - }, - {{ - /* same for column */ - 164, /* is short */ - 128, - 220, 204, 180, 192, 192, 119, 192, 192, 180, 140, 192, 192, 224, 224, 224, /* short tree */ - 128, 130, 130, 74, 148, 180, 203, 236, 254, 254, 254 /* long bits */ - } - } -}; - -const MV_CONTEXT vp8_mv_update_probs[2] = { - {{ - 237, - 246, - 253, 253, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 250, 250, 252, 254, 254 - } - }, - {{ - 231, - 243, - 245, 253, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 251, 251, 254, 254, 254 - } - } -}; -const MV_CONTEXT vp8_default_mv_context[2] = { - {{ - /* row */ - 162, /* is short */ - 128, /* sign */ - 225, 146, 172, 147, 214, 39, 156, /* short tree */ - 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */ - } - }, - {{ - /* same for column */ - 164, /* is short */ - 128, - 204, 170, 119, 235, 140, 230, 228, - 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */ - } - } -}; - -const vp8_tree_index vp8_small_mvtree_hp [30] = { - 2, 16, - 4, 10, - 6, 8, - -0, -1, - -2, -3, - 12, 14, - -4, -5, - -6, -7, - 18, 24, - 20, 22, - -8, -9, - -10, -11, - 26, 28, - -12, -13, - -14, -15 -}; -struct vp8_token_struct vp8_small_mvencodings_hp [16]; - -const vp8_tree_index vp8_small_mvtree [14] = { - 2, 8, - 4, 6, - -0, -1, - -2, -3, - 10, 12, - -4, -5, - -6, -7 -}; -struct vp8_token_struct vp8_small_mvencodings [8]; - -__inline static void calc_prob(vp8_prob *p, const unsigned int ct[2], int pbits) { - const unsigned int tot = ct[0] + ct[1]; - if (tot) { - const vp8_prob x = ((ct[0] * 255) / tot) & -(1 << (8 - pbits)); - *p = x ? x : 1; - } else { - *p = 128; - } -} - -static void compute_component_probs( - const unsigned int events [MVvals], - vp8_prob Pnew [MVPcount], - unsigned int is_short_ct[2], - unsigned int sign_ct[2], - unsigned int bit_ct [mvlong_width] [2], - unsigned int short_ct [mvnum_short], - unsigned int short_bct [mvnum_short - 1] [2] -) { - is_short_ct[0] = is_short_ct[1] = 0; - sign_ct[0] = sign_ct[1] = 0; - vpx_memset(bit_ct, 0, sizeof(unsigned int)*mvlong_width * 2); - vpx_memset(short_ct, 0, sizeof(unsigned int)*mvnum_short); - vpx_memset(short_bct, 0, sizeof(unsigned int) * (mvnum_short - 1) * 2); - - { - const int c = events [mv_max]; - is_short_ct [0] += c; // Short vector - short_ct [0] += c; // Magnitude distribution - } - { - int j = 1; - do { - const int c1 = events [mv_max + j]; // positive - const int c2 = events [mv_max - j]; // negative - const int c = c1 + c2; - int a = j; - - sign_ct [0] += c1; - sign_ct [1] += c2; - - if (a < mvnum_short) { - is_short_ct [0] += c; // Short vector - short_ct [a] += c; // Magnitude distribution - } else { - int k = mvlong_width - 1; - is_short_ct [1] += c; // Long vector - - do - bit_ct [k] [(a >> k) & 1] += c; - - while (--k >= 0); - } - } while (++j <= mv_max); - } - calc_prob(Pnew + mvpis_short, is_short_ct, 8); - - calc_prob(Pnew + MVPsign, sign_ct, 8); - - { - vp8_prob p [mvnum_short - 1]; /* actually only need branch ct */ - int j = 0; - - vp8_tree_probs_from_distribution( - mvnum_short, vp8_small_mvencodings, vp8_small_mvtree, - p, short_bct, short_ct, - 256, 1 - ); - - do - calc_prob(Pnew + MVPshort + j, short_bct[j], 8); - while (++j < mvnum_short - 1); - } - - { - int j = 0; - do - calc_prob(Pnew + MVPbits + j, bit_ct[j], 8); - while (++j < mvlong_width); - } -} - -static void compute_component_probs_hp( - const unsigned int events [MVvals_hp], - vp8_prob Pnew [MVPcount_hp], - unsigned int is_short_ct[2], - unsigned int sign_ct[2], - unsigned int bit_ct [mvlong_width_hp] [2], - unsigned int short_ct [mvnum_short_hp], - unsigned int short_bct [mvnum_short_hp - 1] [2] -) { - is_short_ct[0] = is_short_ct[1] = 0; - sign_ct[0] = sign_ct[1] = 0; - vpx_memset(bit_ct, 0, sizeof(unsigned int)*mvlong_width_hp * 2); - vpx_memset(short_ct, 0, sizeof(unsigned int)*mvnum_short_hp); - vpx_memset(short_bct, 0, sizeof(unsigned int) * (mvnum_short_hp - 1) * 2); - - { - const int c = events [mv_max_hp]; - is_short_ct [0] += c; // Short vector - short_ct [0] += c; // Magnitude distribution - } - { - int j = 1; - do { - const int c1 = events [mv_max_hp + j]; // positive - const int c2 = events [mv_max_hp - j]; // negative - const int c = c1 + c2; - int a = j; - - sign_ct [0] += c1; - sign_ct [1] += c2; - - if (a < mvnum_short_hp) { - is_short_ct [0] += c; // Short vector - short_ct [a] += c; // Magnitude distribution - } else { - int k = mvlong_width_hp - 1; - is_short_ct [1] += c; // Long vector - - do - bit_ct [k] [(a >> k) & 1] += c; - - while (--k >= 0); - } - } while (++j <= mv_max_hp); - } - calc_prob(Pnew + mvpis_short_hp, is_short_ct, 8); - - calc_prob(Pnew + MVPsign_hp, sign_ct, 8); - - { - vp8_prob p [mvnum_short_hp - 1]; /* actually only need branch ct */ - int j = 0; - - vp8_tree_probs_from_distribution( - mvnum_short_hp, vp8_small_mvencodings_hp, vp8_small_mvtree_hp, - p, short_bct, short_ct, - 256, 1 - ); - - do - calc_prob(Pnew + MVPshort_hp + j, short_bct[j], 8); - while (++j < mvnum_short_hp - 1); - } - - { - int j = 0; - do - calc_prob(Pnew + MVPbits_hp + j, bit_ct[j], 8); - while (++j < mvlong_width_hp); - } -} - -void vp8_adapt_mv_probs(VP8_COMMON *cm) { - int i, t, count, factor; -#ifdef MV_COUNT_TESTING - printf("static const unsigned int\nMVcount[2][MVvals]={\n"); - for (i = 0; i < 2; ++i) { - printf(" { "); - for (t = 0; t < MVvals; t++) { - printf("%d, ", cm->fc.MVcount[i][t]); - if (t % 16 == 15 && t != MVvals - 1) printf("\n "); - } - printf("},\n"); - } - printf("};\n"); - printf("static const unsigned int\nMVcount_hp[2][MVvals_hp]={\n"); - for (i = 0; i < 2; ++i) { - printf(" { "); - for (t = 0; t < MVvals_hp; t++) { - printf("%d, ", cm->fc.MVcount_hp[i][t]); - if (t % 16 == 15 && t != MVvals_hp - 1) printf("\n "); - } - printf("},\n"); - } - printf("};\n"); -#endif /* MV_COUNT_TESTING */ - - for (i = 0; i < 2; ++i) { - int prob; - unsigned int is_short_ct[2]; - unsigned int sign_ct[2]; - unsigned int bit_ct [mvlong_width] [2]; - unsigned int short_ct [mvnum_short]; - unsigned int short_bct [mvnum_short - 1] [2]; - vp8_prob Pnew [MVPcount]; - compute_component_probs(cm->fc.MVcount[i], Pnew, - is_short_ct, sign_ct, - bit_ct, short_ct, short_bct); - count = is_short_ct[0] + is_short_ct[1]; - count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); - prob = ((int)cm->fc.pre_mvc[i].prob[mvpis_short] * (256 - factor) + - (int)Pnew[mvpis_short] * factor + 128) >> 8; - if (prob <= 0) cm->fc.mvc[i].prob[mvpis_short] = 1; - else if (prob > 255) cm->fc.mvc[i].prob[mvpis_short] = 255; - else cm->fc.mvc[i].prob[mvpis_short] = prob; - - count = sign_ct[0] + sign_ct[1]; - count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); - prob = ((int)cm->fc.pre_mvc[i].prob[MVPsign] * (256 - factor) + - (int)Pnew[MVPsign] * factor + 128) >> 8; - if (prob <= 0) cm->fc.mvc[i].prob[MVPsign] = 1; - else if (prob > 255) cm->fc.mvc[i].prob[MVPsign] = 255; - else cm->fc.mvc[i].prob[MVPsign] = prob; - - for (t = 0; t < mvnum_short - 1; ++t) { - count = short_bct[t][0] + short_bct[t][1]; - count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); - prob = ((int)cm->fc.pre_mvc[i].prob[MVPshort + t] * (256 - factor) + - (int)Pnew[MVPshort + t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.mvc[i].prob[MVPshort + t] = 1; - else if (prob > 255) cm->fc.mvc[i].prob[MVPshort + t] = 255; - else cm->fc.mvc[i].prob[MVPshort + t] = prob; - } - for (t = 0; t < mvlong_width; ++t) { - count = bit_ct[t][0] + bit_ct[t][1]; - count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); - prob = ((int)cm->fc.pre_mvc[i].prob[MVPbits + t] * (256 - factor) + - (int)Pnew[MVPbits + t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.mvc[i].prob[MVPbits + t] = 1; - else if (prob > 255) cm->fc.mvc[i].prob[MVPbits + t] = 255; - else cm->fc.mvc[i].prob[MVPbits + t] = prob; - } - } - for (i = 0; i < 2; ++i) { - int prob; - unsigned int is_short_ct[2]; - unsigned int sign_ct[2]; - unsigned int bit_ct [mvlong_width_hp] [2]; - unsigned int short_ct [mvnum_short_hp]; - unsigned int short_bct [mvnum_short_hp - 1] [2]; - vp8_prob Pnew [MVPcount_hp]; - compute_component_probs_hp(cm->fc.MVcount_hp[i], Pnew, - is_short_ct, sign_ct, - bit_ct, short_ct, short_bct); - count = is_short_ct[0] + is_short_ct[1]; - count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); - prob = ((int)cm->fc.pre_mvc_hp[i].prob[mvpis_short_hp] * (256 - factor) + - (int)Pnew[mvpis_short_hp] * factor + 128) >> 8; - if (prob <= 0) cm->fc.mvc_hp[i].prob[mvpis_short_hp] = 1; - else if (prob > 255) cm->fc.mvc_hp[i].prob[mvpis_short_hp] = 255; - else cm->fc.mvc_hp[i].prob[mvpis_short_hp] = prob; - - count = sign_ct[0] + sign_ct[1]; - count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); - prob = ((int)cm->fc.pre_mvc_hp[i].prob[MVPsign_hp] * (256 - factor) + - (int)Pnew[MVPsign_hp] * factor + 128) >> 8; - if (prob <= 0) cm->fc.mvc_hp[i].prob[MVPsign_hp] = 1; - else if (prob > 255) cm->fc.mvc_hp[i].prob[MVPsign_hp] = 255; - else cm->fc.mvc_hp[i].prob[MVPsign_hp] = prob; - - for (t = 0; t < mvnum_short_hp - 1; ++t) { - count = short_bct[t][0] + short_bct[t][1]; - count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); - prob = ((int)cm->fc.pre_mvc_hp[i].prob[MVPshort_hp + t] * (256 - factor) + - (int)Pnew[MVPshort_hp + t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.mvc_hp[i].prob[MVPshort_hp + t] = 1; - else if (prob > 255) cm->fc.mvc_hp[i].prob[MVPshort_hp + t] = 255; - else cm->fc.mvc_hp[i].prob[MVPshort_hp + t] = prob; - } - for (t = 0; t < mvlong_width_hp; ++t) { - count = bit_ct[t][0] + bit_ct[t][1]; - count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); - prob = ((int)cm->fc.pre_mvc_hp[i].prob[MVPbits_hp + t] * (256 - factor) + - (int)Pnew[MVPbits_hp + t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.mvc_hp[i].prob[MVPbits_hp + t] = 1; - else if (prob > 255) cm->fc.mvc_hp[i].prob[MVPbits_hp + t] = 255; - else cm->fc.mvc_hp[i].prob[MVPbits_hp + t] = prob; - } - } -} - -#endif /* CONFIG_NEWMVENTROPY */ - void vp8_entropy_mv_init() { -#if CONFIG_NEWMVENTROPY vp8_tokens_from_tree(vp8_mv_joint_encodings, vp8_mv_joint_tree); vp8_tokens_from_tree(vp8_mv_class_encodings, vp8_mv_class_tree); vp8_tokens_from_tree(vp8_mv_class0_encodings, vp8_mv_class0_tree); vp8_tokens_from_tree(vp8_mv_fp_encodings, vp8_mv_fp_tree); -#else - vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree); - vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp); -#endif } void vp8_init_mv_probs(VP8_COMMON *cm) { -#if CONFIG_NEWMVENTROPY vpx_memcpy(&cm->fc.nmvc, &vp8_default_nmv_context, sizeof(nmv_context)); -#else - vpx_memcpy(cm->fc.mvc, - vp8_default_mv_context, sizeof(vp8_default_mv_context)); - vpx_memcpy(cm->fc.mvc_hp, - vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp)); -#endif } diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h index 1a193b172..80540a54c 100644 --- a/vp8/common/entropymv.h +++ b/vp8/common/entropymv.h @@ -22,7 +22,6 @@ void vp8_entropy_mv_init(); void vp8_init_mv_probs(struct VP8Common *cm); void vp8_adapt_mv_probs(struct VP8Common *cm); -#if CONFIG_NEWMVENTROPY void vp8_adapt_nmv_probs(struct VP8Common *cm, int usehp); void vp8_lower_mv_precision(MV *mv); int vp8_use_nmv_hp(const MV *ref); @@ -129,65 +128,4 @@ void vp8_counts_to_nmv_context( unsigned int (*branch_ct_class0_hp)[2], unsigned int (*branch_ct_hp)[2]); -#else /* CONFIG_NEWMVENTROPY */ - -enum { - mv_max = 1023, /* max absolute value of a MV component */ - MVvals = (2 * mv_max) + 1, /* # possible values "" */ - mvlong_width = 10, /* Large MVs have 9 bit magnitudes */ - mvnum_short = 8, /* magnitudes 0 through 7 */ - mvnum_short_bits = 3, /* number of bits for short mvs */ - - mvfp_max = 255, /* max absolute value of a full pixel MV component */ - MVfpvals = (2 * mvfp_max) + 1, /* # possible full pixel MV values */ - - /* probability offsets for coding each MV component */ - - mvpis_short = 0, /* short (<= 7) vs long (>= 8) */ - MVPsign, /* sign for non-zero */ - MVPshort, /* 8 short values = 7-position tree */ - - MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */ - MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */ -}; - -typedef struct mv_context { - vp8_prob prob[MVPcount]; /* often come in row, col pairs */ -} MV_CONTEXT; - -extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2]; - -enum { - mv_max_hp = 2047, /* max absolute value of a MV component */ - MVvals_hp = (2 * mv_max_hp) + 1, /* # possible values "" */ - mvlong_width_hp = 11, /* Large MVs have 9 bit magnitudes */ - mvnum_short_hp = 16, /* magnitudes 0 through 15 */ - mvnum_short_bits_hp = 4, /* number of bits for short mvs */ - - mvfp_max_hp = 255, /* max absolute value of a full pixel MV component */ - MVfpvals_hp = (2 * mvfp_max_hp) + 1, /* # possible full pixel MV values */ - - /* probability offsets for coding each MV component */ - - mvpis_short_hp = 0, /* short (<= 7) vs long (>= 8) */ - MVPsign_hp, /* sign for non-zero */ - MVPshort_hp, /* 8 short values = 7-position tree */ - - MVPbits_hp = MVPshort_hp + mvnum_short_hp - 1, /* mvlong_width long value bits */ - MVPcount_hp = MVPbits_hp + mvlong_width_hp /* (with independent probabilities) */ -}; - -typedef struct mv_context_hp { - vp8_prob prob[MVPcount_hp]; /* often come in row, col pairs */ -} MV_CONTEXT_HP; - -extern const MV_CONTEXT_HP vp8_mv_update_probs_hp[2], vp8_default_mv_context_hp[2]; - -extern const vp8_tree_index vp8_small_mvtree[]; -extern struct vp8_token_struct vp8_small_mvencodings [8]; -extern const vp8_tree_index vp8_small_mvtree_hp[]; -extern struct vp8_token_struct vp8_small_mvencodings_hp [16]; - -#endif /* CONFIG_NEWMVENTROPY */ - #endif diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c index 7c9ea1066..5fc135090 100644 --- a/vp8/common/findnearmv.c +++ b/vp8/common/findnearmv.c @@ -10,7 +10,7 @@ #include "findnearmv.h" -#include "vp8/encoder/variance.h" +#include "vp8/common/sadmxn.h" #include <limits.h> const unsigned char vp8_mbsplit_offset[4][16] = { @@ -22,11 +22,7 @@ const unsigned char vp8_mbsplit_offset[4][16] = { static void lower_mv_precision(int_mv *mv, int usehp) { -#if CONFIG_NEWMVENTROPY if (!usehp || !vp8_use_nmv_hp(&mv->as_mv)) { -#else - if (!usehp) { -#endif if (mv->as_mv.row & 1) mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1); if (mv->as_mv.col & 1) @@ -199,6 +195,23 @@ vp8_prob *vp8_mv_ref_probs(VP8_COMMON *pc, } #if CONFIG_NEWBESTREFMV +unsigned int vp8_sad3x16_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16); +} +unsigned int vp8_sad16x3_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3); +} + /* check a list of motion vectors by sad score using a number rows of pixels * above and a number cols of pixels in the left to select the one with best * score to use as ref motion vector @@ -260,10 +273,10 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd, sad = 0; if (xd->up_available) - sad += vp8_sad16x3_c(above_src, xd->dst.y_stride, + sad += vp8_sad16x3(above_src, xd->dst.y_stride, above_ref + offset, ref_y_stride, INT_MAX); if (xd->left_available) - sad += vp8_sad3x16_c(left_src, xd->dst.y_stride, + sad += vp8_sad3x16(left_src, xd->dst.y_stride, left_ref + offset, ref_y_stride, INT_MAX); // Add the entry to our list and then resort the list on score. sad_scores[i] = sad; diff --git a/vp8/common/idct.h b/vp8/common/idct.h index d096e8182..ae33df668 100644 --- a/vp8/common/idct.h +++ b/vp8/common/idct.h @@ -109,12 +109,9 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c); extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c); #endif -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 #include "vp8/common/blockd.h" void vp8_ihtllm_c(short *input, short *output, int pitch, TX_TYPE tx_type, int tx_dim); -#endif - typedef prototype_idct((*vp8_idct_fn_t)); typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t)); diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index d705fec32..c7369b2e2 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -26,9 +26,7 @@ #include "vp8/common/idct.h" #include "vp8/common/systemdependent.h" -#if CONFIG_HYBRIDTRANSFORM #include "vp8/common/blockd.h" -#endif #include <math.h> @@ -38,7 +36,6 @@ static const int rounding = 0; // TODO: these transforms can be further converted into integer forms // for complexity optimization -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 float idct_4[16] = { 0.500000000000000, 0.653281482438188, 0.500000000000000, 0.270598050073099, 0.500000000000000, 0.270598050073099, -0.500000000000000, -0.653281482438188, @@ -90,9 +87,7 @@ float iadst_8[64] = { 0.483002021635509, -0.466553967085785, 0.434217976756762, -0.387095214016348, 0.326790388032145, -0.255357107325375, 0.175227946595736, -0.089131608307532 }; -#endif -#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 float idct_16[256] = { 0.250000, 0.351851, 0.346760, 0.338330, 0.326641, 0.311806, 0.293969, 0.273300, 0.250000, 0.224292, 0.196424, 0.166664, 0.135299, 0.102631, 0.068975, 0.034654, @@ -162,9 +157,7 @@ float iadst_16[256] = { 0.347761, -0.344612, 0.338341, -0.329007, 0.316693, -0.301511, 0.283599, -0.263118, 0.240255, -0.215215, 0.188227, -0.159534, 0.129396, -0.098087, 0.065889, -0.033094 }; -#endif -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 void vp8_ihtllm_c(short *input, short *output, int pitch, TX_TYPE tx_type, int tx_dim) { @@ -289,7 +282,6 @@ void vp8_ihtllm_c(short *input, short *output, int pitch, } vp8_clear_system_state(); // Make it simd safe : __asm emms; } -#endif void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) { int i; diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c index 3f97d2101..323d48de8 100644 --- a/vp8/common/loopfilter_filters.c +++ b/vp8/common/loopfilter_filters.c @@ -7,8 +7,6 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ - - #include <stdlib.h> #include "vpx_config.h" #include "loopfilter.h" @@ -94,6 +92,7 @@ static __inline void vp8_filter(signed char mask, uc hev, uc *op1, *op1 = u ^ 0x80; } + void vp8_loop_filter_horizontal_edge_c ( unsigned char *s, @@ -218,6 +217,7 @@ static __inline void vp8_mbfilter(signed char mask, uc hev, uc flat, Filter2 = vp8_signed_char_clamp(vp8_filter + 3); Filter1 >>= 3; Filter2 >>= 3; + u = vp8_signed_char_clamp(qs0 - Filter1); *oq0 = u ^ 0x80; u = vp8_signed_char_clamp(ps0 + Filter2); @@ -271,8 +271,6 @@ void vp8_mbloop_filter_horizontal_edge_c } while (++i < count * 8); } - - void vp8_mbloop_filter_vertical_edge_c ( unsigned char *s, diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 0396a7087..38df3500a 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -51,27 +51,14 @@ typedef struct frame_contexts { vp8_prob sub_mv_ref_prob [SUBMVREF_COUNT][VP8_SUBMVREFS - 1]; vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1]; vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#if CONFIG_HYBRIDTRANSFORM vp8_prob hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#endif vp8_prob coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_prob hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#endif vp8_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_prob hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#endif -#if CONFIG_NEWMVENTROPY nmv_context nmvc; nmv_context pre_nmvc; -#else - MV_CONTEXT mvc[2]; - MV_CONTEXT_HP mvc_hp[2]; - MV_CONTEXT pre_mvc[2]; - MV_CONTEXT_HP pre_mvc_hp[2]; -#endif vp8_prob pre_bmode_prob [VP8_BINTRAMODES - 1]; vp8_prob pre_ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */ vp8_prob pre_uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1]; @@ -87,56 +74,37 @@ typedef struct frame_contexts { vp8_prob pre_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#if CONFIG_HYBRIDTRANSFORM vp8_prob pre_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#endif vp8_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_prob pre_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#endif vp8_prob pre_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_prob pre_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; -#endif unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#if CONFIG_HYBRIDTRANSFORM unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#endif unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#if CONFIG_HYBRIDTRANSFORM8X8 unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#endif unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#if CONFIG_HYBRIDTRANSFORM16X16 unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#endif -#if CONFIG_NEWMVENTROPY nmv_context_counts NMVcount; -#else - unsigned int MVcount [2] [MVvals]; - unsigned int MVcount_hp [2] [MVvals_hp]; -#endif -#if CONFIG_SWITCHABLE_INTERP - vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS+1] - [VP8_SWITCHABLE_FILTERS-1]; -#endif + vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS + 1] + [VP8_SWITCHABLE_FILTERS - 1]; int mode_context[6][4]; int mode_context_a[6][4]; @@ -161,10 +129,8 @@ typedef enum { ONLY_4X4 = 0, ALLOW_8X8 = 1, ALLOW_16X16 = 2, -#if CONFIG_TX_SELECT TX_MODE_SELECT = 3, -#endif - NB_TXFM_MODES = 3 + CONFIG_TX_SELECT, + NB_TXFM_MODES = 4, } TXFM_MODE; typedef struct VP8_COMMON_RTCD { @@ -302,10 +268,8 @@ typedef struct VP8Common { vp8_prob prob_comppred[COMP_PRED_CONTEXTS]; -#if CONFIG_TX_SELECT // FIXME contextualize vp8_prob prob_tx[TX_SIZE_MAX - 1]; -#endif vp8_prob mbskip_pred_probs[MBSKIP_CONTEXTS]; diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index 17bbe3281..388612e8a 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -783,7 +783,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t if (mi->mbmi.mode == SPLITMV) { switch (mi->mbmi.partitioning) { - case 0 : { /* mv_top_bottom */ + case PARTITIONING_16X8 : { /* mv_top_bottom */ union b_mode_info *bmi = &mi->bmi[0]; MV *mv = &bmi->mv.as_mv; @@ -803,7 +803,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t break; } - case 1 : { /* mv_left_right */ + case PARTITIONING_8X16 : { /* mv_left_right */ union b_mode_info *bmi = &mi->bmi[0]; MV *mv = &bmi->mv.as_mv; @@ -823,7 +823,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t break; } - case 2 : { /* mv_quarters */ + case PARTITIONING_8X8 : { /* mv_quarters */ union b_mode_info *bmi = &mi->bmi[0]; MV *mv = &bmi->mv.as_mv; @@ -858,6 +858,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t vp8_blit_line(x0 + 12, x1, y0 + 12, y1, y_buffer, y_stride); break; } + case PARTITIONING_4X4: default : { union b_mode_info *bmi = mi->bmi; int bx0, by0; diff --git a/vp8/common/pred_common.c b/vp8/common/pred_common.c index a32389433..a97eed8e4 100644 --- a/vp8/common/pred_common.c +++ b/vp8/common/pred_common.c @@ -63,7 +63,6 @@ unsigned char get_pred_context(const VP8_COMMON *const cm, (m - cm->mode_info_stride)->mbmi.mb_skip_coeff; break; -#if CONFIG_SWITCHABLE_INTERP case PRED_SWITCHABLE_INTERP: { int left_in_image = (m - 1)->mbmi.mb_in_image; @@ -93,7 +92,6 @@ unsigned char get_pred_context(const VP8_COMMON *const cm, pred_context = VP8_SWITCHABLE_FILTERS; } break; -#endif default: // TODO *** add error trap code. @@ -175,11 +173,10 @@ const vp8_prob *get_pred_probs(const VP8_COMMON *const cm, pred_probability = &cm->mbskip_pred_probs[pred_context]; break; -#if CONFIG_SWITCHABLE_INTERP case PRED_SWITCHABLE_INTERP: pred_probability = &cm->fc.switchable_interp_prob[pred_context][0]; break; -#endif + default: // TODO *** add error trap code. pred_probability = NULL; diff --git a/vp8/common/pred_common.h b/vp8/common/pred_common.h index 402e0235f..2a9875dfe 100644 --- a/vp8/common/pred_common.h +++ b/vp8/common/pred_common.h @@ -22,12 +22,9 @@ typedef enum { PRED_REF = 1, PRED_COMP = 2, PRED_MBSKIP = 3, -#if CONFIG_SWITCHABLE_INTERP - PRED_SWITCHABLE_INTERP = 4, -#endif + PRED_SWITCHABLE_INTERP = 4 } PRED_ID; - extern unsigned char get_pred_context(const VP8_COMMON *const cm, const MACROBLOCKD *const xd, PRED_ID pred_id); diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index a41d233ab..6c60845fb 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -36,13 +36,7 @@ void vp8_setup_interp_filters(MACROBLOCKD *xd, &cm->rtcd.subpix, sixtap_avg8x8); xd->subpixel_predict_avg16x16 = SUBPIX_INVOKE( &cm->rtcd.subpix, sixtap_avg16x16); - } - else if (mcomp_filter_type == EIGHTTAP -#if CONFIG_SWITCHABLE_INTERP - || - mcomp_filter_type == SWITCHABLE -#endif - ) { + } else if (mcomp_filter_type == EIGHTTAP || mcomp_filter_type == SWITCHABLE) { xd->subpixel_predict = SUBPIX_INVOKE( &cm->rtcd.subpix, eighttap4x4); xd->subpixel_predict8x4 = SUBPIX_INVOKE( @@ -965,7 +959,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) { MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; BLOCKD *blockd = xd->block; - if (xd->mode_info_context->mbmi.partitioning < 3) { + if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) { blockd[ 0].bmi = xd->mode_info_context->bmi[ 0]; blockd[ 2].bmi = xd->mode_info_context->bmi[ 2]; blockd[ 8].bmi = xd->mode_info_context->bmi[ 8]; diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh index ef272df90..ea64c9682 100644 --- a/vp8/common/rtcd_defs.sh +++ b/vp8/common/rtcd_defs.sh @@ -125,22 +125,22 @@ specialize vp8_comp_intra_uv4x4_predict; # Loopfilter # prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_mbv; +specialize vp8_loop_filter_mbv sse2 prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_bv; +specialize vp8_loop_filter_bv sse2 prototype void vp8_loop_filter_bv8x8 "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_bv8x8; +specialize vp8_loop_filter_bv8x8 sse2 prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_mbh; +specialize vp8_loop_filter_mbh sse2 prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_bh; +specialize vp8_loop_filter_bh sse2 prototype void vp8_loop_filter_bh8x8 "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_bh8x8; +specialize vp8_loop_filter_bh8x8 sse2 prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit" specialize vp8_loop_filter_simple_mbv mmx sse2 media neon @@ -174,3 +174,210 @@ vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2 vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6 vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon +# +# sad 16x3, 3x16 +# +prototype unsigned int vp8_sad16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad" +specialize vp8_sad16x3 + +prototype unsigned int vp8_sad3x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad" +specialize vp8_sad3x16 + +# +# Encoder functions below this point. +# +if [ "$CONFIG_VP8_ENCODER" = "yes" ]; then + + +# variance +[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 + +prototype unsigned int vp8_variance32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance32x32 + +prototype unsigned int vp8_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance16x16 mmx sse2 +vp8_variance16x16_sse2=vp8_variance16x16_wmt +vp8_variance16x16_mmx=vp8_variance16x16_mmx + +prototype unsigned int vp8_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance16x8 mmx sse2 +vp8_variance16x8_sse2=vp8_variance16x8_wmt +vp8_variance16x8_mmx=vp8_variance16x8_mmx + +prototype unsigned int vp8_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance8x16 mmx sse2 +vp8_variance8x16_sse2=vp8_variance8x16_wmt +vp8_variance8x16_mmx=vp8_variance8x16_mmx + +prototype unsigned int vp8_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance8x8 mmx sse2 +vp8_variance8x8_sse2=vp8_variance8x8_wmt +vp8_variance8x8_mmx=vp8_variance8x8_mmx + +prototype unsigned int vp8_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance4x4 mmx sse2 +vp8_variance4x4_sse2=vp8_variance4x4_wmt +vp8_variance4x4_mmx=vp8_variance4x4_mmx + +prototype unsigned int vp8_sub_pixel_variance32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance32x32 + +prototype unsigned int vp8_sub_pixel_variance16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance16x16 sse2 mmx ssse3 +vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt + +prototype unsigned int vp8_sub_pixel_variance8x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance8x16 sse2 mmx +vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt + +prototype unsigned int vp8_sub_pixel_variance16x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance16x8 sse2 mmx ssse3 +vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_ssse3; +vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt + +prototype unsigned int vp8_sub_pixel_variance8x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance8x8 sse2 mmx +vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt + +prototype unsigned int vp8_sub_pixel_variance4x4 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance4x4 sse2 mmx +vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt + +prototype unsigned int vp8_sad32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad32x32 + +prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad16x16 mmx sse2 sse3 +vp8_sad16x16_sse2=vp8_sad16x16_wmt + +prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad16x8 mmx sse2 +vp8_sad16x8_sse2=vp8_sad16x8_wmt + +prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad8x16 mmx sse2 +vp8_sad8x16_sse2=vp8_sad8x16_wmt + +prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad8x8 mmx sse2 +vp8_sad8x8_sse2=vp8_sad8x8_wmt + +prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad4x4 mmx sse2 +vp8_sad4x4_sse2=vp8_sad4x4_wmt + +prototype unsigned int vp8_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar16x16_h mmx sse2 +vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt + +prototype unsigned int vp8_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar16x16_v mmx sse2 +vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt + +prototype unsigned int vp8_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar16x16_hv mmx sse2 +vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt + +prototype unsigned int vp8_variance_halfpixvar32x32_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar32x32_h + +prototype unsigned int vp8_variance_halfpixvar32x32_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar32x32_v + +prototype unsigned int vp8_variance_halfpixvar32x32_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar32x32_hv + +prototype void vp8_sad32x32x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad32x32x3 + +prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad16x16x3 sse3 ssse3 + +prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad16x8x3 sse3 ssse3 + +prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad8x16x3 sse3 + +prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad8x8x3 sse3 + +prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad4x4x3 sse3 + +prototype void vp8_sad32x32x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad32x32x8 + +prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad16x16x8 sse4 + +prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad16x8x8 sse4 + +prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad8x16x8 sse4 + +prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad8x8x8 sse4 + +prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad4x4x8 sse4 + +prototype void vp8_sad32x32x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad32x32x4d + +prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad16x16x4d sse3 + +prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad16x8x4d sse3 + +prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad8x16x4d sse3 + +prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad8x8x4d sse3 + +prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad4x4x4d sse3 + +# +# Block copy +# +case $arch in + x86*) + prototype void vp8_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n" + specialize vp8_copy32xn sse2 sse3 + ;; +esac + +prototype unsigned int vp8_sub_pixel_mse16x16 "const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse" +specialize vp8_sub_pixel_mse16x16 sse2 mmx +vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt + +prototype unsigned int vp8_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse" +specialize vp8_mse16x16 mmx sse2 +vp8_mse16x16_sse2=vp8_mse16x16_wmt + +prototype unsigned int vp8_sub_pixel_mse32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_mse32x32 + +prototype unsigned int vp8_get_mb_ss "const short *" +specialize vp8_get_mb_ss mmx sse2 + +# +# Structured Similarity (SSIM) +# +if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then + [ $arch = "x86_64" ] && sse2_on_x86_64=sse2 + + prototype void vp8_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" + specialize vp8_ssim_parms_8x8 $sse2_on_x86_64 + + prototype void vp8_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" + specialize vp8_ssim_parms_16x16 $sse2_on_x86_64 +fi + +fi +# end encoder functions diff --git a/vp8/common/sadmxn.h b/vp8/common/sadmxn.h new file mode 100644 index 000000000..47b8dfc58 --- /dev/null +++ b/vp8/common/sadmxn.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef __INC_SAD_H +#define __INC_SAD_H + +static __inline +unsigned int sad_mx_n_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int m, + int n) { + int r, c; + unsigned int sad = 0; + + for (r = 0; r < n; r++) { + for (c = 0; c < m; c++) { + sad += abs(src_ptr[c] - ref_ptr[c]); + } + + src_ptr += src_stride; + ref_ptr += ref_stride; + } + + return sad; +} + +#endif diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm index 697a5dee6..63b72385b 100644 --- a/vp8/common/x86/loopfilter_mmx.asm +++ b/vp8/common/x86/loopfilter_mmx.asm @@ -594,790 +594,6 @@ sym(vp8_loop_filter_vertical_edge_mmx): ret -;void vp8_mbloop_filter_horizontal_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_mbloop_filter_horizontal_edge_mmx) -sym(vp8_mbloop_filter_horizontal_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - movsxd rcx, dword ptr arg(5) ;count -.next8_mbh: - mov rdx, arg(3) ;limit - movq mm7, [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - - ; calculate breakout conditions - movq mm2, [rdi+2*rax] ; q3 - - movq mm1, [rsi+2*rax] ; q2 - movq mm6, mm1 ; q2 - psubusb mm1, mm2 ; q2-=q3 - psubusb mm2, mm6 ; q3-=q2 - por mm1, mm2 ; abs(q3-q2) - psubusb mm1, mm7 - - - ; mm1 = abs(q3-q2), mm6 =q2, mm7 = limit - movq mm4, [rsi+rax] ; q1 - movq mm3, mm4 ; q1 - psubusb mm4, mm6 ; q1-=q2 - psubusb mm6, mm3 ; q2-=q1 - por mm4, mm6 ; abs(q2-q1) - psubusb mm4, mm7 - por mm1, mm4 - - - ; mm1 = mask, mm3=q1, mm7 = limit - - movq mm4, [rsi] ; q0 - movq mm0, mm4 ; q0 - psubusb mm4, mm3 ; q0-=q1 - psubusb mm3, mm0 ; q1-=q0 - por mm4, mm3 ; abs(q0-q1) - movq t0, mm4 ; save to t0 - psubusb mm4, mm7 - por mm1, mm4 - - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - - neg rax ; negate pitch to deal with above border - - movq mm2, [rsi+4*rax] ; p3 - movq mm4, [rdi+4*rax] ; p2 - movq mm5, mm4 ; p2 - psubusb mm4, mm2 ; p2-=p3 - psubusb mm2, mm5 ; p3-=p2 - por mm4, mm2 ; abs(p3 - p2) - psubusb mm4, mm7 - por mm1, mm4 - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - - movq mm4, [rsi+2*rax] ; p1 - movq mm3, mm4 ; p1 - psubusb mm4, mm5 ; p1-=p2 - psubusb mm5, mm3 ; p2-=p1 - por mm4, mm5 ; abs(p2 - p1) - psubusb mm4, mm7 - por mm1, mm4 - - movq mm2, mm3 ; p1 - - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - - movq mm4, [rsi+rax] ; p0 - movq mm5, mm4 ; p0 - psubusb mm4, mm3 ; p0-=p1 - psubusb mm3, mm5 ; p1-=p0 - por mm4, mm3 ; abs(p1 - p0) - movq t1, mm4 ; save to t1 - psubusb mm4, mm7 - por mm1, mm4 - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm5 = p0 - movq mm3, [rdi] ; q1 - movq mm4, mm3 ; q1 - psubusb mm3, mm2 ; q1-=p1 - psubusb mm2, mm4 ; p1-=q1 - por mm2, mm3 ; abs(p1-q1) - pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm2, 1 ; abs(p1-q1)/2 - - movq mm6, mm5 ; p0 - movq mm3, mm0 ; q0 - psubusb mm5, mm3 ; p0-=q0 - psubusb mm3, mm6 ; q0-=p0 - por mm5, mm3 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; get blimit - movq mm7, [rdx] ; blimit - - psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm5 - pxor mm5, mm5 - pcmpeqb mm1, mm5 ; mask mm1 - - ; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm6 = p0, - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 - paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - pcmpeqb mm4, mm5 - - pcmpeqb mm5, mm5 - pxor mm4, mm5 - - - - ; mm1 = mask, mm0=q0, mm7 = thresh, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm6 = p0, mm4=hev - ; start work on filters - movq mm2, [rsi+2*rax] ; p1 - movq mm7, [rdi] ; q1 - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) - paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - - - ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0 - movq mm2, mm1 ; vp8_filter - pand mm2, mm4; ; Filter2 = vp8_filter & hev - - movq mm5, mm2 ; - paddsb mm5, [GLOBAL(t3)]; - - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm5 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm5 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - movq mm5, mm0 ; Filter2 - - paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm2 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm2 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0 - psubsb mm3, mm0 ; qs0 =qs0 - filter1 - paddsb mm6, mm5 ; ps0 =ps0 + Fitler2 - - ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0 - ; vp8_filter &= ~hev; - ; Filter2 = vp8_filter; - pandn mm4, mm1 ; vp8_filter&=~hev - - - ; mm3=qs0, mm4=filter2, mm6=ps0 - - ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); - ; s = vp8_signed_char_clamp(qs0 - u); - ; *oq0 = s^0x80; - ; s = vp8_signed_char_clamp(ps0 + u); - ; *op0 = s^0x80; - pxor mm0, mm0 - - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s27)] - pmulhw mm2, [GLOBAL(s27)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - psubsb mm3, mm1 - paddsb mm6, mm1 - - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - movq [rsi+rax], mm6 - movq [rsi], mm3 - - ; roughly 2/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); - ; s = vp8_signed_char_clamp(qs1 - u); - ; *oq1 = s^0x80; - ; s = vp8_signed_char_clamp(ps1 + u); - ; *op1 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s18)] - pmulhw mm2, [GLOBAL(s18)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - movq mm3, [rdi] - movq mm6, [rsi+rax*2] ; p1 - - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - movq [rdi], mm3 - movq [rsi+rax*2], mm6 - - ; roughly 1/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); - ; s = vp8_signed_char_clamp(qs2 - u); - ; *oq2 = s^0x80; - ; s = vp8_signed_char_clamp(ps2 + u); - ; *op2 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s9)] - pmulhw mm2, [GLOBAL(s9)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - - movq mm6, [rdi+rax*4] - neg rax - movq mm3, [rdi+rax ] - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - movq [rdi+rax ], mm3 - neg rax - movq [rdi+rax*4], mm6 - -;EARLY_BREAK_OUT: - neg rax - add rsi,8 - dec rcx - jnz .next8_mbh - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_vertical_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_mbloop_filter_vertical_edge_mmx) -sym(vp8_mbloop_filter_vertical_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 96 ; reserve 96 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - %define srct [rsp + 32] ;__declspec(align(16)) char srct[64]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4 - 4] - - movsxd rcx, dword ptr arg(5) ;count -.next8_mbv: - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - - ;transpose - movq mm0, [rdi+2*rax] ; 77 76 75 74 73 72 71 70 - movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60 - - movq mm7, mm6 ; 77 76 75 74 73 72 71 70 - punpckhbw mm7, mm0 ; 77 67 76 66 75 65 74 64 - - punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60 - movq mm0, [rsi+rax] ; 57 56 55 54 53 52 51 50 - - movq mm4, [rsi] ; 47 46 45 44 43 42 41 40 - movq mm5, mm4 ; 47 46 45 44 43 42 41 40 - - punpckhbw mm5, mm0 ; 57 47 56 46 55 45 54 44 - punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40 - - movq mm3, mm5 ; 57 47 56 46 55 45 54 44 - punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46 - - punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44 - movq mm2, mm4 ; 53 43 52 42 51 41 50 40 - - punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42 - punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40 - - neg rax - - movq mm7, [rsi+rax] ; 37 36 35 34 33 32 31 30 - movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20 - - movq mm1, mm6 ; 27 26 25 24 23 22 21 20 - punpckhbw mm6, mm7 ; 37 27 36 36 35 25 34 24 - - punpcklbw mm1, mm7 ; 33 23 32 22 31 21 30 20 - - movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00 - punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04 - - movq mm0, mm7 ; 17 07 16 06 15 05 14 04 - punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06 - - punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04 - movq mm6, mm7 ; 37 27 17 07 36 26 16 06 - - punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3 - punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2 - - lea rdx, srct - movq mm5, mm6 ; 76 66 56 46 36 26 16 06 - - movq [rdx+56], mm7 - psubusb mm5, mm7 ; q2-q3 - - - movq [rdx+48], mm6 - psubusb mm7, mm6 ; q3-q2 - - por mm7, mm5; ; mm7=abs (q3-q2) - movq mm5, mm0 ; 35 25 15 05 34 24 14 04 - - punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1 - punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0 - - movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1 - psubusb mm3, mm6 ; q1-q2 - - psubusb mm6, mm5 ; q2-q1 - por mm6, mm3 ; mm6=abs(q2-q1) - - movq [rdx+40], mm5 ; save q1 - movq [rdx+32], mm0 ; save q0 - - movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00 - punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00 - - movq mm0, mm3 ; 13 03 12 02 11 01 10 00 - punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00 - - punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02 - movq mm1, mm0 ; 31 21 11 01 30 20 10 00 - - punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3 - punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2 - - movq [rdx], mm0 ; save p3 - movq [rdx+8], mm1 ; save p2 - - movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2 - psubusb mm2, mm0 ; p2-p3 - - psubusb mm0, mm1 ; p3-p2 - por mm0, mm2 ; mm0=abs(p3-p2) - - movq mm2, mm3 ; 33 23 13 03 32 22 12 02 - punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1 - - punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0 - movq [rdx+24], mm3 ; save p0 - - movq [rdx+16], mm2 ; save p1 - movq mm5, mm2 ; mm5 = p1 - - psubusb mm2, mm1 ; p1-p2 - psubusb mm1, mm5 ; p2-p1 - - por mm1, mm2 ; mm1=abs(p2-p1) - mov rdx, arg(3) ;limit - - movq mm4, [rdx] ; mm4 = limit - psubusb mm7, mm4 ; abs(q3-q2) > limit - - psubusb mm0, mm4 ; abs(p3-p2) > limit - psubusb mm1, mm4 ; abs(p2-p1) > limit - - psubusb mm6, mm4 ; abs(q2-q1) > limit - por mm7, mm6 ; or - - por mm0, mm1 ; - por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit - - movq mm1, mm5 ; p1 - - movq mm7, mm3 ; mm3=mm7=p0 - psubusb mm7, mm5 ; p0 - p1 - - psubusb mm5, mm3 ; p1 - p0 - por mm5, mm7 ; abs(p1-p0) - - movq t0, mm5 ; save abs(p1-p0) - lea rdx, srct - - psubusb mm5, mm4 ; mm5 = abs(p1-p0) > limit - por mm0, mm5 ; mm0=mask - - movq mm5, [rdx+32] ; mm5=q0 - movq mm7, [rdx+40] ; mm7=q1 - - movq mm6, mm5 ; mm6=q0 - movq mm2, mm7 ; q1 - psubusb mm5, mm7 ; q0-q1 - - psubusb mm7, mm6 ; q1-q0 - por mm7, mm5 ; abs(q1-q0) - - movq t1, mm7 ; save abs(q1-q0) - psubusb mm7, mm4 ; mm7=abs(q1-q0)> limit - - por mm0, mm7 ; mask - - movq mm5, mm2 ; q1 - psubusb mm5, mm1 ; q1-=p1 - psubusb mm1, mm2 ; p1-=q1 - por mm5, mm1 ; abs(p1-q1) - pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm5, 1 ; abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; - - movq mm4, [rdx] ;blimit - movq mm1, mm3 ; mm1=mm3=p0 - - movq mm7, mm6 ; mm7=mm6=q0 - psubusb mm1, mm7 ; p0-q0 - - psubusb mm7, mm3 ; q0-p0 - por mm1, mm7 ; abs(q0-p0) - paddusb mm1, mm1 ; abs(q0-p0)*2 - paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm0; ; mask - - pxor mm0, mm0 - pcmpeqb mm1, mm0 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] - ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 ; abs(q1 - q0) > thresh - - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 ; abs(p1 - p0)> thresh - - por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb mm4, mm0 - - pcmpeqb mm0, mm0 - pxor mm4, mm0 - - - - - ; start work on filters - lea rdx, srct - - ; start work on filters - movq mm2, [rdx+16] ; p1 - movq mm7, [rdx+40] ; q1 - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - - movq mm6, [rdx+24] ; p0 - movq mm0, [rdx+32] ; q0 - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) - paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - - ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0 - movq mm2, mm1 ; vp8_filter - pand mm2, mm4; ; Filter2 = vp8_filter & hev - - movq mm5, mm2 ; - paddsb mm5, [GLOBAL(t3)]; - - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm5 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm5 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - movq mm5, mm0 ; Filter2 - - paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm2 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm2 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0 - psubsb mm3, mm0 ; qs0 =qs0 - filter1 - paddsb mm6, mm5 ; ps0 =ps0 + Fitler2 - - ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0 - ; vp8_filter &= ~hev; - ; Filter2 = vp8_filter; - pandn mm4, mm1 ; vp8_filter&=~hev - - - ; mm3=qs0, mm4=filter2, mm6=ps0 - - ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); - ; s = vp8_signed_char_clamp(qs0 - u); - ; *oq0 = s^0x80; - ; s = vp8_signed_char_clamp(ps0 + u); - ; *op0 = s^0x80; - pxor mm0, mm0 - - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s27)] - pmulhw mm2, [GLOBAL(s27)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - psubsb mm3, mm1 - paddsb mm6, mm1 - - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - movq [rdx+24], mm6 - movq [rdx+32], mm3 - - ; roughly 2/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); - ; s = vp8_signed_char_clamp(qs1 - u); - ; *oq1 = s^0x80; - ; s = vp8_signed_char_clamp(ps1 + u); - ; *op1 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s18)] - pmulhw mm2, [GLOBAL(s18)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - movq mm3, [rdx + 40] - movq mm6, [rdx + 16] ; p1 - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - movq [rdx + 40], mm3 - movq [rdx + 16], mm6 - - ; roughly 1/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); - ; s = vp8_signed_char_clamp(qs2 - u); - ; *oq2 = s^0x80; - ; s = vp8_signed_char_clamp(ps2 + u); - ; *op2 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s9)] - pmulhw mm2, [GLOBAL(s9)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - movq mm6, [rdx+ 8] - movq mm3, [rdx+48] - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01 - pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06 - - ; tranpose and write back - movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00 - movq mm1, mm0 ; mm0 = 70 60 50 40 30 20 10 00 - - punpcklbw mm0, mm6 ; mm0 = 31 30 21 20 11 10 01 00 - punpckhbw mm1, mm6 ; mm3 = 71 70 61 60 51 50 41 40 - - movq mm2, [rdx+16] ; mm2 = 72 62 52 42 32 22 12 02 - movq mm6, mm2 ; mm3 = 72 62 52 42 32 22 12 02 - - punpcklbw mm2, [rdx+24] ; mm2 = 33 32 23 22 13 12 03 02 - punpckhbw mm6, [rdx+24] ; mm3 = 73 72 63 62 53 52 43 42 - - movq mm5, mm0 ; mm5 = 31 30 21 20 11 10 01 00 - punpcklwd mm0, mm2 ; mm0 = 13 12 11 10 03 02 01 00 - - punpckhwd mm5, mm2 ; mm5 = 33 32 31 30 23 22 21 20 - movq mm4, mm1 ; mm4 = 71 70 61 60 51 50 41 40 - - punpcklwd mm1, mm6 ; mm1 = 53 52 51 50 43 42 41 40 - punpckhwd mm4, mm6 ; mm4 = 73 72 71 70 63 62 61 60 - - movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04 - punpcklbw mm2, [rdx+40] ; mm2 = 35 34 25 24 15 14 05 04 - - movq mm6, mm3 ; mm6 = 76 66 56 46 36 26 15 06 - punpcklbw mm6, [rdx+56] ; mm6 = 37 36 27 26 17 16 07 06 - - movq mm7, mm2 ; mm7 = 35 34 25 24 15 14 05 04 - punpcklwd mm2, mm6 ; mm2 = 17 16 15 14 07 06 05 04 - - punpckhwd mm7, mm6 ; mm7 = 37 36 35 34 27 26 25 24 - movq mm6, mm0 ; mm6 = 13 12 11 10 03 02 01 00 - - punpckldq mm0, mm2 ; mm0 = 07 06 05 04 03 02 01 00 - punpckhdq mm6, mm2 ; mm6 = 17 16 15 14 13 12 11 10 - - movq [rsi+rax*4], mm0 ; write out - movq [rdi+rax*4], mm6 ; write out - - movq mm0, mm5 ; mm0 = 33 32 31 30 23 22 21 20 - punpckldq mm0, mm7 ; mm0 = 27 26 25 24 23 22 20 20 - - punpckhdq mm5, mm7 ; mm5 = 37 36 35 34 33 32 31 30 - movq [rsi+rax*2], mm0 ; write out - - movq [rdi+rax*2], mm5 ; write out - movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04 - - punpckhbw mm2, [rdx+40] ; mm2 = 75 74 65 64 54 54 45 44 - punpckhbw mm3, [rdx+56] ; mm3 = 77 76 67 66 57 56 47 46 - - movq mm5, mm2 ; mm5 = 75 74 65 64 54 54 45 44 - punpcklwd mm2, mm3 ; mm2 = 57 56 55 54 47 46 45 44 - - punpckhwd mm5, mm3 ; mm5 = 77 76 75 74 67 66 65 64 - movq mm0, mm1 ; mm0= 53 52 51 50 43 42 41 40 - - movq mm3, mm4 ; mm4 = 73 72 71 70 63 62 61 60 - punpckldq mm0, mm2 ; mm0 = 47 46 45 44 43 42 41 40 - - punpckhdq mm1, mm2 ; mm1 = 57 56 55 54 53 52 51 50 - movq [rsi], mm0 ; write out - - movq [rdi], mm1 ; write out - neg rax - - punpckldq mm3, mm5 ; mm3 = 67 66 65 64 63 62 61 60 - punpckhdq mm4, mm5 ; mm4 = 77 76 75 74 73 72 71 60 - - movq [rsi+rax*2], mm3 - movq [rdi+rax*2], mm4 - - lea rsi, [rsi+rax*8] - dec rcx - - jnz .next8_mbv - - add rsp, 96 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - ;void vp8_loop_filter_simple_horizontal_edge_mmx ;( ; unsigned char *src_ptr, diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index 295609c58..6f6531c86 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -380,302 +380,6 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): ret -%macro MB_FILTER_AND_WRITEBACK 1 -%if %1 == 0 - movdqa xmm2, p1 ; p1 - movdqa xmm7, q1 ; q1 -%elif %1 == 1 - movdqa xmm2, [rsi+2*rax] ; p1 - movdqa xmm7, [rdi] ; q1 - - mov rcx, rax - neg rcx -%elif %1 == 2 - lea rdx, srct - - movdqa xmm2, [rdx+32] ; p1 - movdqa xmm7, [rdx+80] ; q1 - movdqa xmm6, [rdx+48] ; p0 - movdqa xmm0, [rdx+64] ; q0 -%endif - - pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values - - psubsb xmm2, xmm7 ; p1 - q1 - movdqa xmm3, xmm0 ; q0 - - psubsb xmm0, xmm6 ; q0 - p0 - - paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1) - - paddsb xmm2, xmm0 ; 2 * (q0 - p0) - - paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1) - - pand xmm1, xmm2 ; mask filter values we don't care about - - movdqa xmm2, xmm1 ; vp8_filter - - pand xmm2, xmm4 ; Filter2 = vp8_filter & hev - pxor xmm0, xmm0 - - pandn xmm4, xmm1 ; vp8_filter&=~hev - pxor xmm1, xmm1 - - punpcklbw xmm0, xmm4 ; Filter 2 (hi) - movdqa xmm5, xmm2 - - punpckhbw xmm1, xmm4 ; Filter 2 (lo) - paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3) - - pmulhw xmm1, [GLOBAL(s9)] ; Filter 2 (lo) * 9 - - pmulhw xmm0, [GLOBAL(s9)] ; Filter 2 (hi) * 9 - - punpckhbw xmm7, xmm5 ; axbxcxdx - paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) - - punpcklbw xmm5, xmm5 ; exfxgxhx - psraw xmm7, 11 ; sign extended shift right by 3 - - psraw xmm5, 11 ; sign extended shift right by 3 - punpckhbw xmm4, xmm2 ; axbxcxdx - - punpcklbw xmm2, xmm2 ; exfxgxhx - psraw xmm4, 11 ; sign extended shift right by 3 - - packsswb xmm5, xmm7 ; Filter2 >>=3; - psraw xmm2, 11 ; sign extended shift right by 3 - - packsswb xmm2, xmm4 ; Filter1 >>=3; - movdqa xmm7, xmm1 - - paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2 - movdqa xmm4, xmm1 - - psubsb xmm3, xmm2 ; qs0 =qs0 - Filter1 - movdqa xmm5, xmm0 - - movdqa xmm2, xmm5 - paddw xmm0, [GLOBAL(s63)] ; Filter 2 (hi) * 9 + 63 - - paddw xmm1, [GLOBAL(s63)] ; Filter 2 (lo) * 9 + 63 - paddw xmm5, xmm5 ; Filter 2 (hi) * 18 - - paddw xmm7, xmm7 ; Filter 2 (lo) * 18 - paddw xmm5, xmm0 ; Filter 2 (hi) * 27 + 63 - - paddw xmm7, xmm1 ; Filter 2 (lo) * 27 + 63 - paddw xmm2, xmm0 ; Filter 2 (hi) * 18 + 63 - - paddw xmm4, xmm1 ; Filter 2 (lo) * 18 + 63 - psraw xmm0, 7 ; (Filter 2 (hi) * 9 + 63) >> 7 - - psraw xmm1, 7 ; (Filter 2 (lo) * 9 + 63) >> 7 - psraw xmm2, 7 ; (Filter 2 (hi) * 18 + 63) >> 7 - - packsswb xmm0, xmm1 ; u1 = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) - psraw xmm4, 7 ; (Filter 2 (lo) * 18 + 63) >> 7 - - psraw xmm5, 7 ; (Filter 2 (hi) * 27 + 63) >> 7 - packsswb xmm2, xmm4 ; u2 = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) - - psraw xmm7, 7 ; (Filter 2 (lo) * 27 + 63) >> 7 - - packsswb xmm5, xmm7 ; u3 = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) - - psubsb xmm3, xmm5 ; sq = vp8_signed_char_clamp(qs0 - u3) - paddsb xmm6, xmm5 ; sp = vp8_signed_char_clamp(ps0 - u3) - -%if %1 == 0 - movdqa xmm5, q2 ; q2 - movdqa xmm1, q1 ; q1 - movdqa xmm4, p1 ; p1 - movdqa xmm7, p2 ; p2 - -%elif %1 == 1 - movdqa xmm5, XMMWORD PTR [rdi+rcx] ; q2 - movdqa xmm1, XMMWORD PTR [rdi] ; q1 - movdqa xmm4, XMMWORD PTR [rsi+rax*2] ; p1 - movdqa xmm7, XMMWORD PTR [rdi+rax*4] ; p2 -%elif %1 == 2 - movdqa xmm5, XMMWORD PTR [rdx+96] ; q2 - movdqa xmm1, XMMWORD PTR [rdx+80] ; q1 - movdqa xmm4, XMMWORD PTR [rdx+32] ; p1 - movdqa xmm7, XMMWORD PTR [rdx+16] ; p2 -%endif - - pxor xmm3, [GLOBAL(t80)] ; *oq0 = sq^0x80 - pxor xmm6, [GLOBAL(t80)] ; *oq0 = sp^0x80 - - pxor xmm1, [GLOBAL(t80)] - pxor xmm4, [GLOBAL(t80)] - - psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2) - paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2) - - pxor xmm1, [GLOBAL(t80)] ; *oq1 = sq^0x80; - pxor xmm4, [GLOBAL(t80)] ; *op1 = sp^0x80; - - pxor xmm7, [GLOBAL(t80)] - pxor xmm5, [GLOBAL(t80)] - - paddsb xmm7, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u) - psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u) - - pxor xmm7, [GLOBAL(t80)] ; *op2 = sp^0x80; - pxor xmm5, [GLOBAL(t80)] ; *oq2 = sq^0x80; - -%if %1 == 0 - lea rsi, [rsi+rcx*2] - lea rdi, [rdi+rcx*2] - - movq MMWORD PTR [rsi], xmm6 ; p0 - movhps MMWORD PTR [rdi], xmm6 - movq MMWORD PTR [rsi + rcx], xmm3 ; q0 - movhps MMWORD PTR [rdi + rcx], xmm3 - - movq MMWORD PTR [rsi+rcx*2], xmm1 ; q1 - movhps MMWORD PTR [rdi+rcx*2], xmm1 - - movq MMWORD PTR [rsi + rax], xmm4 ; p1 - movhps MMWORD PTR [rdi + rax], xmm4 - - movq MMWORD PTR [rsi+rax*2], xmm7 ; p2 - movhps MMWORD PTR [rdi+rax*2], xmm7 - - lea rsi, [rsi + rcx] - lea rdi, [rdi + rcx] - movq MMWORD PTR [rsi+rcx*2], xmm5 ; q2 - movhps MMWORD PTR [rdi+rcx*2], xmm5 -%elif %1 == 1 - movdqa XMMWORD PTR [rdi+rcx], xmm5 ; q2 - movdqa XMMWORD PTR [rdi], xmm1 ; q1 - movdqa XMMWORD PTR [rsi], xmm3 ; q0 - movdqa XMMWORD PTR [rsi+rax ],xmm6 ; p0 - movdqa XMMWORD PTR [rsi+rax*2],xmm4 ; p1 - movdqa XMMWORD PTR [rdi+rax*4],xmm7 ; p2 -%elif %1 == 2 - movdqa XMMWORD PTR [rdx+80], xmm1 ; q1 - movdqa XMMWORD PTR [rdx+64], xmm3 ; q0 - movdqa XMMWORD PTR [rdx+48], xmm6 ; p0 - movdqa XMMWORD PTR [rdx+32], xmm4 ; p1 -%endif - -%endmacro - - -;void vp8_mbloop_filter_horizontal_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_mbloop_filter_horizontal_edge_sse2) -sym(vp8_mbloop_filter_horizontal_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step - - mov rdx, arg(3) ;limit - movdqa xmm7, XMMWORD PTR [rdx] - - lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 1 - ; filter and write back the results - MB_FILTER_AND_WRITEBACK 1 - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_horizontal_edge_uv_sse2 -;( -; unsigned char *u, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; unsigned char *v -;) -global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) -sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 96 ; reserve 96 bytes - %define q2 [rsp + 0] ;__declspec(align(16)) char q2[16]; - %define q1 [rsp + 16] ;__declspec(align(16)) char q1[16]; - %define p2 [rsp + 32] ;__declspec(align(16)) char p2[16]; - %define p1 [rsp + 48] ;__declspec(align(16)) char p1[16]; - %define t0 [rsp + 64] ;__declspec(align(16)) char t0[16]; - %define t1 [rsp + 80] ;__declspec(align(16)) char t1[16]; - - mov rsi, arg(0) ; u - mov rdi, arg(5) ; v - movsxd rax, dword ptr arg(1) ; src_pixel_step - mov rcx, rax - neg rax ; negate pitch to deal with above border - - mov rdx, arg(3) ;limit - movdqa xmm7, XMMWORD PTR [rdx] - - lea rsi, [rsi + rcx] - lea rdi, [rdi + rcx] - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 0 - ; filter and write back the results - MB_FILTER_AND_WRITEBACK 0 - - add rsp, 96 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - %macro TRANSPOSE_16X8 2 movq xmm4, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00 movq xmm1, QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10 @@ -1141,233 +845,6 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2): pop rbp ret -%macro MBV_TRANSPOSE 0 - movdqa xmm0, [rdx] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - - punpcklbw xmm0, xmm7 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpckhbw xmm1, xmm7 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - movdqa xmm2, [rdx+32] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - movdqa xmm6, xmm2 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - - punpcklbw xmm2, [rdx+48] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpckhbw xmm6, [rdx+48] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - movdqa xmm3, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpcklwd xmm0, xmm2 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - - punpckhwd xmm3, xmm2 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - - movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpcklbw xmm2, [rdx+80] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - - movdqa xmm6, xmm5 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 - punpcklbw xmm6, [rdx+112] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06 - - movdqa xmm7, xmm2 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - punpcklwd xmm2, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04 - - punpckhwd xmm7, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44 - movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - - punpckldq xmm0, xmm2 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00 - punpckhdq xmm6, xmm2 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20 -%endmacro - -%macro MBV_WRITEBACK_1 0 - movq QWORD PTR [rsi], xmm0 - movhps MMWORD PTR [rdi], xmm0 - - movq QWORD PTR [rsi+2*rax], xmm6 - movhps MMWORD PTR [rdi+2*rax], xmm6 - - movdqa xmm0, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - punpckldq xmm0, xmm7 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40 - - punpckhdq xmm3, xmm7 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60 - - movq QWORD PTR [rsi+4*rax], xmm0 - movhps MMWORD PTR [rdi+4*rax], xmm0 - - movq QWORD PTR [rsi+2*rcx], xmm3 - movhps MMWORD PTR [rdi+2*rcx], xmm3 - - movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpckhbw xmm2, [rdx+80] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 - - punpckhbw xmm5, [rdx+112] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86 - movdqa xmm0, xmm2 - - punpcklwd xmm0, xmm5 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84 - punpckhwd xmm2, xmm5 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4 - - movdqa xmm5, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80 - - punpckhdq xmm5, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0 -%endmacro - -%macro MBV_WRITEBACK_2 0 - movq QWORD PTR [rsi], xmm1 - movhps MMWORD PTR [rdi], xmm1 - - movq QWORD PTR [rsi+2*rax], xmm5 - movhps MMWORD PTR [rdi+2*rax], xmm5 - - movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - punpckldq xmm1, xmm2 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0 - punpckhdq xmm4, xmm2 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0 - - movq QWORD PTR [rsi+4*rax], xmm1 - movhps MMWORD PTR [rdi+4*rax], xmm1 - - movq QWORD PTR [rsi+2*rcx], xmm4 - movhps MMWORD PTR [rdi+2*rcx], xmm4 -%endmacro - - -;void vp8_mbloop_filter_vertical_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_mbloop_filter_vertical_edge_sse2) -sym(vp8_mbloop_filter_vertical_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 160 ; reserve 160 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; - %define srct [rsp + 32] ;__declspec(align(16)) char srct[128]; - - mov rsi, arg(0) ; src_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax*2+rax] - - ; Transpose - TRANSPOSE_16X8 1, 0 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK 0 - - neg rax - ; start work on filters - MB_FILTER_AND_WRITEBACK 2 - - lea rsi, [rsi+rax*8] - lea rdi, [rdi+rax*8] - - ; transpose and write back - MBV_TRANSPOSE - - neg rax - - MBV_WRITEBACK_1 - - lea rsi, [rsi+rax*8] - lea rdi, [rdi+rax*8] - MBV_WRITEBACK_2 - - add rsp, 160 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_vertical_edge_uv_sse2 -;( -; unsigned char *u, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; unsigned char *v -;) -global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) -sym(vp8_mbloop_filter_vertical_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 160 ; reserve 160 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; - %define srct [rsp + 32] ;__declspec(align(16)) char srct[128]; - - mov rsi, arg(0) ; u_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax+2*rax] - - lea rdx, srct - - ; Transpose - TRANSPOSE_16X8 0, 0 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK 0 - - ; start work on filters - MB_FILTER_AND_WRITEBACK 2 - - ; transpose and write back - MBV_TRANSPOSE - - mov rsi, arg(0) ;u_ptr - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] - MBV_WRITEBACK_1 - mov rsi, arg(5) ;v_ptr - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] - MBV_WRITEBACK_2 - - add rsp, 160 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - ;void vp8_loop_filter_simple_horizontal_edge_sse2 ;( ; unsigned char *src_ptr, diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c index e7239818e..716d10c79 100644 --- a/vp8/common/x86/loopfilter_x86.c +++ b/vp8/common/x86/loopfilter_x86.c @@ -9,63 +9,36 @@ */ +#include <emmintrin.h> // SSE2 #include "vpx_config.h" #include "vp8/common/loopfilter.h" -prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx); -prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx); prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx); prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx); prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2); prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2); -prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2); -prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2); extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2; extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2; -extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2; -extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2; #if HAVE_MMX /* Horizontal MB filtering */ void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { - vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { - vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } /* Horizontal B Filtering */ void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - if (v_ptr) - vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); } @@ -99,26 +72,413 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned #endif -/* Horizontal MB filtering */ #if HAVE_SSE2 +void vp8_mbloop_filter_horizontal_edge_c_sse2 +( + unsigned char *s, + int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh, + int count +) { + DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); + __m128i mask, hev, flat; + __m128i thresh, limit, blimit; + const __m128i zero = _mm_set1_epi16(0); + __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; + + thresh = _mm_shuffle_epi32(_mm_cvtsi32_si128(_thresh[0] * 0x01010101), 0); + limit = _mm_shuffle_epi32(_mm_cvtsi32_si128(_limit[0] * 0x01010101), 0); + blimit = _mm_shuffle_epi32(_mm_cvtsi32_si128(_blimit[0] * 0x01010101), 0); + + p4 = _mm_loadu_si128((__m128i *)(s - 5 * p)); + p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); + p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); + p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); + p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); + q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); + q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); + q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); + q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); + q4 = _mm_loadu_si128((__m128i *)(s + 4 * p)); + { + const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), + _mm_subs_epu8(p0, p1)); + const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), + _mm_subs_epu8(q0, q1)); + const __m128i one = _mm_set1_epi8(1); + const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); + __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), + _mm_subs_epu8(q0, p0)); + __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), + _mm_subs_epu8(q1, p1)); + __m128i work; + flat = _mm_max_epu8(abs_p1p0, abs_q1q0); + hev = _mm_subs_epu8(flat, thresh); + hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); + + abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); + abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); + // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + mask = _mm_max_epu8(flat, mask); + // mask |= (abs(p1 - p0) > limit) * -1; + // mask |= (abs(q1 - q0) > limit) * -1; + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1), + _mm_subs_epu8(p1, p2)), + _mm_or_si128(_mm_subs_epu8(p3, p2), + _mm_subs_epu8(p2, p3))); + mask = _mm_max_epu8(work, mask); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1), + _mm_subs_epu8(q1, q2)), + _mm_or_si128(_mm_subs_epu8(q3, q2), + _mm_subs_epu8(q2, q3))); + mask = _mm_max_epu8(work, mask); + mask = _mm_subs_epu8(mask, limit); + mask = _mm_cmpeq_epi8(mask, zero); + + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0), + _mm_subs_epu8(p0, p2)), + _mm_or_si128(_mm_subs_epu8(q2, q0), + _mm_subs_epu8(q0, q2))); + flat = _mm_max_epu8(work, flat); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0), + _mm_subs_epu8(p0, p3)), + _mm_or_si128(_mm_subs_epu8(q3, q0), + _mm_subs_epu8(q0, q3))); + flat = _mm_max_epu8(work, flat); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p4, p0), + _mm_subs_epu8(p0, p4)), + _mm_or_si128(_mm_subs_epu8(q4, q0), + _mm_subs_epu8(q0, q4))); + flat = _mm_max_epu8(work, flat); + flat = _mm_subs_epu8(flat, one); + flat = _mm_cmpeq_epi8(flat, zero); + flat = _mm_and_si128(flat, mask); + } + { + const __m128i four = _mm_set1_epi16(4); + unsigned char *src = s; + int i = 0; + do { + __m128i workp_a, workp_b, workp_shft; + p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 5 * p)), zero); + p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); + p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); + p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); + p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); + q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); + q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); + q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); + q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); + q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 4 * p)), zero); + + workp_a = _mm_add_epi16(_mm_add_epi16(p4, p3), _mm_add_epi16(p2, p1)); + workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); + workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p4); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op2[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op1[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p4), q2); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op0[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq0[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q4); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq1[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q4); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq2[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + src += 8; + } while (++i < count); + } + // lp filter + { + const __m128i t4 = _mm_set1_epi8(4); + const __m128i t3 = _mm_set1_epi8(3); + const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t1f = _mm_set1_epi8(0x1f); + const __m128i t1 = _mm_set1_epi8(0x1); + const __m128i t7f = _mm_set1_epi8(0x7f); + + const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), + t80); + const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), + t80); + const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), + t80); + const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), + t80); + __m128i vp8_filt; + __m128i work_a; + __m128i filter1, filter2; + + vp8_filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); + work_a = _mm_subs_epi8(qs0, ps0); + vp8_filt = _mm_adds_epi8(vp8_filt, work_a); + vp8_filt = _mm_adds_epi8(vp8_filt, work_a); + vp8_filt = _mm_adds_epi8(vp8_filt, work_a); + /* (vp8_filter + 3 * (qs0 - ps0)) & mask */ + vp8_filt = _mm_and_si128(vp8_filt, mask); + + filter1 = _mm_adds_epi8(vp8_filt, t4); + filter2 = _mm_adds_epi8(vp8_filt, t3); + + /* Filter1 >> 3 */ + work_a = _mm_cmpgt_epi8(zero, filter1); + filter1 = _mm_srli_epi16(filter1, 3); + work_a = _mm_and_si128(work_a, te0); + filter1 = _mm_and_si128(filter1, t1f); + filter1 = _mm_or_si128(filter1, work_a); + + /* Filter2 >> 3 */ + work_a = _mm_cmpgt_epi8(zero, filter2); + filter2 = _mm_srli_epi16(filter2, 3); + work_a = _mm_and_si128(work_a, te0); + filter2 = _mm_and_si128(filter2, t1f); + filter2 = _mm_or_si128(filter2, work_a); + + /* vp8_filt >> 1 */ + vp8_filt = _mm_adds_epi8(filter1, t1); + work_a = _mm_cmpgt_epi8(zero, vp8_filt); + vp8_filt = _mm_srli_epi16(vp8_filt, 1); + work_a = _mm_and_si128(work_a, t80); + vp8_filt = _mm_and_si128(vp8_filt, t7f); + vp8_filt = _mm_or_si128(vp8_filt, work_a); + + vp8_filt = _mm_andnot_si128(hev, vp8_filt); + + work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); + q0 = _mm_load_si128((__m128i *)flat_oq0); + work_a = _mm_andnot_si128(flat, work_a); + q0 = _mm_and_si128(flat, q0); + q0 = _mm_or_si128(work_a, q0); + + work_a = _mm_xor_si128(_mm_subs_epi8(qs1, vp8_filt), t80); + q1 = _mm_load_si128((__m128i *)flat_oq1); + work_a = _mm_andnot_si128(flat, work_a); + q1 = _mm_and_si128(flat, q1); + q1 = _mm_or_si128(work_a, q1); + + work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); + q2 = _mm_load_si128((__m128i *)flat_oq2); + work_a = _mm_andnot_si128(flat, work_a); + q2 = _mm_and_si128(flat, q2); + q2 = _mm_or_si128(work_a, q2); + + work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); + p0 = _mm_load_si128((__m128i *)flat_op0); + work_a = _mm_andnot_si128(flat, work_a); + p0 = _mm_and_si128(flat, p0); + p0 = _mm_or_si128(work_a, p0); + + work_a = _mm_xor_si128(_mm_adds_epi8(ps1, vp8_filt), t80); + p1 = _mm_load_si128((__m128i *)flat_op1); + work_a = _mm_andnot_si128(flat, work_a); + p1 = _mm_and_si128(flat, p1); + p1 = _mm_or_si128(work_a, p1); + + work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); + p2 = _mm_load_si128((__m128i *)flat_op2); + work_a = _mm_andnot_si128(flat, work_a); + p2 = _mm_and_si128(flat, p2); + p2 = _mm_or_si128(work_a, p2); + + if (count == 1) { + _mm_storel_epi64((__m128i *)(s - 3 * p), p2); + _mm_storel_epi64((__m128i *)(s - 2 * p), p1); + _mm_storel_epi64((__m128i *)(s - 1 * p), p0); + _mm_storel_epi64((__m128i *)(s + 0 * p), q0); + _mm_storel_epi64((__m128i *)(s + 1 * p), q1); + _mm_storel_epi64((__m128i *)(s + 2 * p), q2); + } else { + _mm_storeu_si128((__m128i *)(s - 3 * p), p2); + _mm_storeu_si128((__m128i *)(s - 2 * p), p1); + _mm_storeu_si128((__m128i *)(s - 1 * p), p0); + _mm_storeu_si128((__m128i *)(s + 0 * p), q0); + _mm_storeu_si128((__m128i *)(s + 1 * p), q1); + _mm_storeu_si128((__m128i *)(s + 2 * p), q2); + } + } +} +static __inline void transpose(unsigned char *src[], int in_p, + unsigned char *dst[], int out_p, + int num_8x8_to_transpose) { + int idx8x8 = 0; + __m128i x0, x1, x2, x3, x4, x5, x6, x7; + + do { + unsigned char *in = src[idx8x8]; + unsigned char *out = dst[idx8x8]; + + x0 = _mm_loadl_epi64((__m128i *)(in + 0*in_p)); // 00 01 02 03 04 05 06 07 + x1 = _mm_loadl_epi64((__m128i *)(in + 1*in_p)); // 10 11 12 13 14 15 16 17 + x2 = _mm_loadl_epi64((__m128i *)(in + 2*in_p)); // 20 21 22 23 24 25 26 27 + x3 = _mm_loadl_epi64((__m128i *)(in + 3*in_p)); // 30 31 32 33 34 35 36 37 + x4 = _mm_loadl_epi64((__m128i *)(in + 4*in_p)); // 40 41 42 43 44 45 46 47 + x5 = _mm_loadl_epi64((__m128i *)(in + 5*in_p)); // 50 51 52 53 54 55 56 57 + x6 = _mm_loadl_epi64((__m128i *)(in + 6*in_p)); // 60 61 62 63 64 65 66 67 + x7 = _mm_loadl_epi64((__m128i *)(in + 7*in_p)); // 70 71 72 73 74 75 76 77 + // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 + x0 = _mm_unpacklo_epi8(x0, x1); + // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 + x1 = _mm_unpacklo_epi8(x2, x3); + // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 + x2 = _mm_unpacklo_epi8(x4, x5); + // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 + x3 = _mm_unpacklo_epi8(x6, x7); + // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + x4 = _mm_unpacklo_epi16(x0, x1); + // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 + x5 = _mm_unpacklo_epi16(x2, x3); + // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 + x6 = _mm_unpacklo_epi32(x4, x5); + // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 + x7 = _mm_unpackhi_epi32(x4, x5); + + _mm_storel_pd((double *)(out + 0*out_p), + _mm_cvtepi32_pd(x6)); // 00 10 20 30 40 50 60 70 + _mm_storeh_pd((double *)(out + 1*out_p), + _mm_cvtepi32_pd(x6)); // 01 11 21 31 41 51 61 71 + _mm_storel_pd((double *)(out + 2*out_p), + _mm_cvtepi32_pd(x7)); // 02 12 22 32 42 52 62 72 + _mm_storeh_pd((double *)(out + 3*out_p), + _mm_cvtepi32_pd(x7)); // 03 13 23 33 43 53 63 73 + + // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + x4 = _mm_unpackhi_epi16(x0, x1); + // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 + x5 = _mm_unpackhi_epi16(x2, x3); + // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 + x6 = _mm_unpacklo_epi32(x4, x5); + // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 + x7 = _mm_unpackhi_epi32(x4, x5); + + _mm_storel_pd((double *)(out + 4*out_p), + _mm_cvtepi32_pd(x6)); // 04 14 24 34 44 54 64 74 + _mm_storeh_pd((double *)(out + 5*out_p), + _mm_cvtepi32_pd(x6)); // 05 15 25 35 45 55 65 75 + _mm_storel_pd((double *)(out + 6*out_p), + _mm_cvtepi32_pd(x7)); // 06 16 26 36 46 56 66 76 + _mm_storeh_pd((double *)(out + 7*out_p), + _mm_cvtepi32_pd(x7)); // 07 17 27 37 47 57 67 77 + } while (++idx8x8 < num_8x8_to_transpose); +} +void vp8_mbloop_filter_vertical_edge_c_sse2 +( + unsigned char *s, + int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count +) { + DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 16]); + unsigned char *src[4]; + unsigned char *dst[4]; + + src[0] = s - 5; + src[1] = s - 5 + 8; + src[2] = s - 5 + p*8; + src[3] = s - 5 + p*8 + 8; + + dst[0] = t_dst; + dst[1] = t_dst + 16*8; + dst[2] = t_dst + 8; + dst[3] = t_dst + 16*8 + 8; + + // 16x16->16x16 or 16x8->8x16 + transpose(src, p, dst, 16, (1 << count)); + + vp8_mbloop_filter_horizontal_edge_c_sse2(t_dst + 5*16, 16, blimit, limit, + thresh, count); + + dst[0] = s - 5; + dst[1] = s - 5 + p*8; + + src[0] = t_dst; + src[1] = t_dst + 8; + + // 16x8->8x16 or 8x8->8x8 + transpose(src, 16, dst, p, (1 << (count - 1))); +} + +/* Horizontal MB filtering */ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { - vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); + vp8_mbloop_filter_horizontal_edge_c_sse2(y_ptr, y_stride, lfi->mblim, + lfi->lim, lfi->hev_thr, 2); + + /* TODO: write sse2 version with u,v interleaved */ if (u_ptr) - vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); + vp8_mbloop_filter_horizontal_edge_c_sse2(u_ptr, uv_stride, lfi->mblim, + lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_mbloop_filter_horizontal_edge_c_sse2(v_ptr, uv_stride, lfi->mblim, + lfi->lim, lfi->hev_thr, 1); } +void vp8_loop_filter_bh8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + struct loop_filter_info *lfi) { + vp8_mbloop_filter_horizontal_edge_c_sse2( + y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); +} /* Vertical MB Filtering */ -void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, struct loop_filter_info *lfi) { - vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); +void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + struct loop_filter_info *lfi) { + vp8_mbloop_filter_vertical_edge_c_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, + lfi->hev_thr, 2); + /* TODO: write sse2 version with u,v interleaved */ if (u_ptr) - vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); + vp8_mbloop_filter_vertical_edge_c_sse2(u_ptr, uv_stride, lfi->mblim, + lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_mbloop_filter_vertical_edge_c_sse2(v_ptr, uv_stride, lfi->mblim, + lfi->lim, lfi->hev_thr, 1); } +void vp8_loop_filter_bv8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + struct loop_filter_info *lfi) { + vp8_mbloop_filter_vertical_edge_c_sse2( + y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); +} /* Horizontal B Filtering */ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c index 72ff126f2..0adc3333e 100644 --- a/vp8/decoder/decodemv.c +++ b/vp8/decoder/decodemv.c @@ -170,16 +170,13 @@ static void kfread_modes(VP8D_COMP *pbi, m->mbmi.second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); #endif -#if CONFIG_TX_SELECT if (cm->txfm_mode == TX_MODE_SELECT && m->mbmi.mb_skip_coeff == 0 && m->mbmi.mode <= I8X8_PRED) { // FIXME(rbultje) code ternary symbol once all experiments are merged m->mbmi.txfm_size = vp8_read(bc, cm->prob_tx[0]); if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) m->mbmi.txfm_size += vp8_read(bc, cm->prob_tx[1]); - } else -#endif - if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) { + } else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) { m->mbmi.txfm_size = TX_16X16; } else if (cm->txfm_mode >= ALLOW_8X8 && m->mbmi.mode != B_PRED) { m->mbmi.txfm_size = TX_8X8; @@ -188,7 +185,6 @@ static void kfread_modes(VP8D_COMP *pbi, } } -#if CONFIG_NEWMVENTROPY static int read_nmv_component(vp8_reader *r, int rv, const nmv_component *mvcomp) { @@ -207,7 +203,7 @@ static int read_nmv_component(vp8_reader *r, o = d << 3; z = vp8_get_mv_mag(c, o); - v = (s ? -(z + 1) : (z + 1)); + v = (s ? -(z + 8) : (z + 8)); return v; } @@ -219,6 +215,7 @@ static int read_nmv_component_fp(vp8_reader *r, int s, z, c, o, d, e, f; s = v < 0; z = (s ? -v : v) - 1; /* magnitude - 1 */ + z &= ~7; c = vp8_get_mv_class(z, &o); d = o >> 3; @@ -332,124 +329,6 @@ static void read_nmvprobs(vp8_reader *bc, nmv_context *mvctx, } } -#else - -static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) { - const vp8_prob *const p = (const vp8_prob *) mvc; - int x = 0; - - if (vp8_read(r, p [mvpis_short])) { /* Large */ - int i = 0; - - do { - x += vp8_read(r, p [MVPbits + i]) << i; - } while (++i < mvnum_short_bits); - - i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */ - - do { - x += vp8_read(r, p [MVPbits + i]) << i; - } while (--i > mvnum_short_bits); - - if (!(x & ~((2 << mvnum_short_bits) - 1)) || vp8_read(r, p [MVPbits + mvnum_short_bits])) - x += (mvnum_short); - } else /* small */ - x = vp8_treed_read(r, vp8_small_mvtree, p + MVPshort); - - if (x && vp8_read(r, p [MVPsign])) - x = -x; - - return x; -} - -static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc) { - mv->row = (short)(read_mvcomponent(r, mvc) << 1); - mv->col = (short)(read_mvcomponent(r, ++mvc) << 1); -#ifdef DEBUG_DEC_MV - int i; - printf("%d (np): %d %d\n", dec_mvcount++, mv->row, mv->col); - // for (i=0; i<MVPcount;++i) printf(" %d", (&mvc[-1])->prob[i]); printf("\n"); - // for (i=0; i<MVPcount;++i) printf(" %d", (&mvc[0])->prob[i]); printf("\n"); -#endif -} - -static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc) { - int i = 0; - - do { - const vp8_prob *up = vp8_mv_update_probs[i].prob; - vp8_prob *p = (vp8_prob *)(mvc + i); - vp8_prob *const pstop = p + MVPcount; - - do { - if (vp8_read(bc, *up++)) { - const vp8_prob x = (vp8_prob)vp8_read_literal(bc, 7); - - *p = x ? x << 1 : 1; - } - } while (++p < pstop); - } while (++i < 2); -} - -static int read_mvcomponent_hp(vp8_reader *r, const MV_CONTEXT_HP *mvc) { - const vp8_prob *const p = (const vp8_prob *) mvc; - int x = 0; - - if (vp8_read(r, p [mvpis_short_hp])) { /* Large */ - int i = 0; - - do { - x += vp8_read(r, p [MVPbits_hp + i]) << i; - } while (++i < mvnum_short_bits_hp); - - i = mvlong_width_hp - 1; /* Skip bit 3, which is sometimes implicit */ - - do { - x += vp8_read(r, p [MVPbits_hp + i]) << i; - } while (--i > mvnum_short_bits_hp); - - if (!(x & ~((2 << mvnum_short_bits_hp) - 1)) || vp8_read(r, p [MVPbits_hp + mvnum_short_bits_hp])) - x += (mvnum_short_hp); - } else /* small */ - x = vp8_treed_read(r, vp8_small_mvtree_hp, p + MVPshort_hp); - - if (x && vp8_read(r, p [MVPsign_hp])) - x = -x; - - return x; -} - -static void read_mv_hp(vp8_reader *r, MV *mv, const MV_CONTEXT_HP *mvc) { - mv->row = (short)(read_mvcomponent_hp(r, mvc)); - mv->col = (short)(read_mvcomponent_hp(r, ++mvc)); -#ifdef DEBUG_DEC_MV - int i; - printf("%d (hp): %d %d\n", dec_mvcount++, mv->row, mv->col); - // for (i=0; i<MVPcount_hp;++i) printf(" %d", (&mvc[-1])->prob[i]); printf("\n"); - // for (i=0; i<MVPcount_hp;++i) printf(" %d", (&mvc[0])->prob[i]); printf("\n"); -#endif -} - -static void read_mvcontexts_hp(vp8_reader *bc, MV_CONTEXT_HP *mvc) { - int i = 0; - - do { - const vp8_prob *up = vp8_mv_update_probs_hp[i].prob; - vp8_prob *p = (vp8_prob *)(mvc + i); - vp8_prob *const pstop = p + MVPcount_hp; - - do { - if (vp8_read(bc, *up++)) { - const vp8_prob x = (vp8_prob)vp8_read_literal(bc, 7); - - *p = x ? x << 1 : 1; - } - } while (++p < pstop); - } while (++i < 2); -} - -#endif /* CONFIG_NEWMVENTROPY */ - // Read the referncence frame static MV_REFERENCE_FRAME read_ref_frame(VP8D_COMP *pbi, vp8_reader *const bc, @@ -596,13 +475,11 @@ static const unsigned char mbsplit_fill_offset[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} }; -#if CONFIG_SWITCHABLE_INTERP static void read_switchable_interp_probs(VP8D_COMP* const pbi, BOOL_DECODER* const bc) { VP8_COMMON *const cm = &pbi->common; int i, j; for (j = 0; j <= VP8_SWITCHABLE_FILTERS; ++j) { - //for (j = 0; j <= 0; ++j) { for (i = 0; i < VP8_SWITCHABLE_FILTERS - 1; ++i) { cm->fc.switchable_interp_prob[j][i] = vp8_read_literal(bc, 8); } @@ -610,16 +487,10 @@ static void read_switchable_interp_probs(VP8D_COMP* const pbi, //printf("DECODER: %d %d\n", cm->fc.switchable_interp_prob[0], //cm->fc.switchable_interp_prob[1]); } -#endif static void mb_mode_mv_init(VP8D_COMP *pbi, vp8_reader *bc) { VP8_COMMON *const cm = &pbi->common; -#if CONFIG_NEWMVENTROPY nmv_context *const nmvc = &pbi->common.fc.nmvc; -#else - MV_CONTEXT *const mvc = pbi->common.fc.mvc; - MV_CONTEXT_HP *const mvc_hp = pbi->common.fc.mvc_hp; -#endif MACROBLOCKD *const xd = &pbi->mb; if (cm->frame_type == KEY_FRAME) { @@ -632,10 +503,8 @@ static void mb_mode_mv_init(VP8D_COMP *pbi, vp8_reader *bc) { if (cm->pred_filter_mode == 2) cm->prob_pred_filter_off = (vp8_prob)vp8_read_literal(bc, 8); #endif -#if CONFIG_SWITCHABLE_INTERP if (cm->mcomp_filter_type == SWITCHABLE) read_switchable_interp_probs(pbi, bc); -#endif // Decode the baseline probabilities for decoding reference frame cm->prob_intra_coded = (vp8_prob)vp8_read_literal(bc, 8); cm->prob_last_coded = (vp8_prob)vp8_read_literal(bc, 8); @@ -661,14 +530,7 @@ static void mb_mode_mv_init(VP8D_COMP *pbi, vp8_reader *bc) { cm->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8); } while (++i < VP8_YMODES - 1); } -#if CONFIG_NEWMVENTROPY read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv); -#else - if (xd->allow_high_precision_mv) - read_mvcontexts_hp(bc, mvc_hp); - else - read_mvcontexts(bc, mvc); -#endif } } @@ -751,12 +613,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int mb_row, int mb_col, BOOL_DECODER* const bc) { VP8_COMMON *const cm = &pbi->common; -#if CONFIG_NEWMVENTROPY nmv_context *const nmvc = &pbi->common.fc.nmvc; -#else - MV_CONTEXT *const mvc = pbi->common.fc.mvc; - MV_CONTEXT_HP *const mvc_hp = pbi->common.fc.mvc_hp; -#endif const int mis = pbi->common.mode_info_stride; MACROBLOCKD *const xd = &pbi->mb; @@ -894,19 +751,16 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->pred_filter_enabled = cm->pred_filter_mode; } #endif -#if CONFIG_SWITCHABLE_INTERP if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV) { if (cm->mcomp_filter_type == SWITCHABLE) { mbmi->interp_filter = vp8_switchable_interp[ vp8_treed_read(bc, vp8_switchable_interp_tree, get_pred_probs(cm, xd, PRED_SWITCHABLE_INTERP))]; - //printf("Reading: %d\n", mbmi->interp_filter); } else { mbmi->interp_filter = cm->mcomp_filter_type; } } -#endif if (cm->comp_pred_mode == COMP_PREDICTION_ONLY || (cm->comp_pred_mode == HYBRID_PREDICTION && @@ -1005,44 +859,20 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, switch (blockmode) { case NEW4X4: -#if CONFIG_NEWMVENTROPY read_nmv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc); read_nmv_fp(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc, xd->allow_high_precision_mv); vp8_increment_nmv(&blockmv.as_mv, &best_mv.as_mv, &cm->fc.NMVcount, xd->allow_high_precision_mv); -#else - if (xd->allow_high_precision_mv) { - read_mv_hp(bc, &blockmv.as_mv, (const MV_CONTEXT_HP *) mvc_hp); - cm->fc.MVcount_hp[0][mv_max_hp + (blockmv.as_mv.row)]++; - cm->fc.MVcount_hp[1][mv_max_hp + (blockmv.as_mv.col)]++; - } else { - read_mv(bc, &blockmv.as_mv, (const MV_CONTEXT *) mvc); - cm->fc.MVcount[0][mv_max + (blockmv.as_mv.row >> 1)]++; - cm->fc.MVcount[1][mv_max + (blockmv.as_mv.col >> 1)]++; - } -#endif /* CONFIG_NEWMVENTROPY */ blockmv.as_mv.row += best_mv.as_mv.row; blockmv.as_mv.col += best_mv.as_mv.col; if (mbmi->second_ref_frame) { -#if CONFIG_NEWMVENTROPY read_nmv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc); read_nmv_fp(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, xd->allow_high_precision_mv); vp8_increment_nmv(&secondmv.as_mv, &best_mv_second.as_mv, &cm->fc.NMVcount, xd->allow_high_precision_mv); -#else - if (xd->allow_high_precision_mv) { - read_mv_hp(bc, &secondmv.as_mv, (const MV_CONTEXT_HP *) mvc_hp); - cm->fc.MVcount_hp[0][mv_max_hp + (secondmv.as_mv.row)]++; - cm->fc.MVcount_hp[1][mv_max_hp + (secondmv.as_mv.col)]++; - } else { - read_mv(bc, &secondmv.as_mv, (const MV_CONTEXT *) mvc); - cm->fc.MVcount[0][mv_max + (secondmv.as_mv.row >> 1)]++; - cm->fc.MVcount[1][mv_max + (secondmv.as_mv.col >> 1)]++; - } -#endif /* CONFIG_NEWMVENTROPY */ secondmv.as_mv.row += best_mv_second.as_mv.row; secondmv.as_mv.col += best_mv_second.as_mv.col; } @@ -1147,23 +977,11 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, break; case NEWMV: -#if CONFIG_NEWMVENTROPY read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc); read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc, xd->allow_high_precision_mv); vp8_increment_nmv(&mv->as_mv, &best_mv.as_mv, &cm->fc.NMVcount, xd->allow_high_precision_mv); -#else - if (xd->allow_high_precision_mv) { - read_mv_hp(bc, &mv->as_mv, (const MV_CONTEXT_HP *) mvc_hp); - cm->fc.MVcount_hp[0][mv_max_hp + (mv->as_mv.row)]++; - cm->fc.MVcount_hp[1][mv_max_hp + (mv->as_mv.col)]++; - } else { - read_mv(bc, &mv->as_mv, (const MV_CONTEXT *) mvc); - cm->fc.MVcount[0][mv_max + (mv->as_mv.row >> 1)]++; - cm->fc.MVcount[1][mv_max + (mv->as_mv.col >> 1)]++; - } -#endif /* CONFIG_NEWMVENTROPY */ mv->as_mv.row += best_mv.as_mv.row; mv->as_mv.col += best_mv.as_mv.col; @@ -1178,23 +996,11 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mb_to_top_edge, mb_to_bottom_edge); if (mbmi->second_ref_frame) { -#if CONFIG_NEWMVENTROPY read_nmv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc); read_nmv_fp(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc, xd->allow_high_precision_mv); vp8_increment_nmv(&mbmi->mv[1].as_mv, &best_mv_second.as_mv, &cm->fc.NMVcount, xd->allow_high_precision_mv); -#else - if (xd->allow_high_precision_mv) { - read_mv_hp(bc, &mbmi->mv[1].as_mv, (const MV_CONTEXT_HP *) mvc_hp); - cm->fc.MVcount_hp[0][mv_max_hp + (mbmi->mv[1].as_mv.row)]++; - cm->fc.MVcount_hp[1][mv_max_hp + (mbmi->mv[1].as_mv.col)]++; - } else { - read_mv(bc, &mbmi->mv[1].as_mv, (const MV_CONTEXT *) mvc); - cm->fc.MVcount[0][mv_max + (mbmi->mv[1].as_mv.row >> 1)]++; - cm->fc.MVcount[1][mv_max + (mbmi->mv[1].as_mv.col >> 1)]++; - } -#endif /* CONFIG_NEWMVENTROPY */ mbmi->mv[1].as_mv.row += best_mv_second.as_mv.row; mbmi->mv[1].as_mv.col += best_mv_second.as_mv.col; mbmi->need_to_clamp_secondmv |= @@ -1282,23 +1088,23 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, #endif } -#if CONFIG_TX_SELECT if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 && ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= I8X8_PRED) || - (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) { + (mbmi->ref_frame != INTRA_FRAME && !(mbmi->mode == SPLITMV && + mbmi->partitioning == PARTITIONING_4X4)))) { // FIXME(rbultje) code ternary symbol once all experiments are merged mbmi->txfm_size = vp8_read(bc, cm->prob_tx[0]); - if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED) + if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED && + mbmi->mode != SPLITMV) mbmi->txfm_size += vp8_read(bc, cm->prob_tx[1]); - } else -#endif - if (cm->txfm_mode >= ALLOW_16X16 && + } else if (cm->txfm_mode >= ALLOW_16X16 && ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) || (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) { mbmi->txfm_size = TX_16X16; } else if (cm->txfm_mode >= ALLOW_8X8 && - ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode != B_PRED) || - (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) { + (!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == B_PRED) && + !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV && + mbmi->partitioning == PARTITIONING_4X4))) { mbmi->txfm_size = TX_8X8; } else { mbmi->txfm_size = TX_4X4; diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 01739c0db..bc35b17dc 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -208,10 +208,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_PREDICTION_MODE mode; int i; int tx_size; -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || \ - CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type; -#endif #if CONFIG_SUPERBLOCKS VP8_COMMON *pc = &pbi->common; int orig_skip_flag = xd->mode_info_context->mbmi.mb_skip_coeff; @@ -256,11 +253,9 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } //mode = xd->mode_info_context->mbmi.mode; -#if CONFIG_SWITCHABLE_INTERP if (pbi->common.frame_type != KEY_FRAME) vp8_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, &pbi->common); -#endif if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV && mode != I8X8_PRED @@ -330,7 +325,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, vp8_intra8x8_predict(b, i8x8mode, b->predictor); if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { -#if CONFIG_HYBRIDTRANSFORM8X8 tx_type = get_tx_type(xd, &xd->block[idx]); if (tx_type != DCT_DCT) { vp8_ht_dequant_idct_add_8x8_c(tx_type, @@ -338,9 +332,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } else { vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride); } -#else - vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride); -#endif q += 64; } else { for (j = 0; j < 4; j++) { @@ -380,7 +371,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } #endif -#if CONFIG_HYBRIDTRANSFORM tx_type = get_tx_type(xd, b); if (tx_type != DCT_DCT) { vp8_ht_dequant_idct_add_c(tx_type, b->qcoeff, @@ -390,29 +380,22 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, vp8_dequant_idct_add_c(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride); } -#else - if (xd->eobs[i] > 1) { - DEQUANT_INVOKE(&pbi->dequant, idct_add) - (b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); - } else { - IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add) - (b->qcoeff[0] * b->dequant[0], b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); - ((int *)b->qcoeff)[0] = 0; - } -#endif } } else if (mode == SPLITMV) { - DEQUANT_INVOKE(&pbi->dequant, idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); + if (tx_size == TX_8X8) { + vp8_dequant_idct_add_y_block_8x8_c(xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd); + } else { + DEQUANT_INVOKE(&pbi->dequant, + idct_add_y_block)(xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs); + } } else { BLOCKD *b = &xd->block[24]; if (tx_size == TX_16X16) { -#if CONFIG_HYBRIDTRANSFORM16X16 BLOCKD *bd = &xd->block[0]; tx_type = get_tx_type(xd, bd); if (tx_type != DCT_DCT) { @@ -424,11 +407,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, xd->predictor, xd->dst.y_buffer, 16, xd->dst.y_stride); } -#else - vp8_dequant_idct_add_16x16_c(xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - 16, xd->dst.y_stride); -#endif } else if (tx_size == TX_8X8) { #if CONFIG_SUPERBLOCKS void *orig = xd->mode_info_context; @@ -515,8 +493,10 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, if (!xd->mode_info_context->mbmi.encoded_as_sb) { #endif if ((tx_size == TX_8X8 && - xd->mode_info_context->mbmi.mode != I8X8_PRED) - || tx_size == TX_16X16) + xd->mode_info_context->mbmi.mode != I8X8_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV) + || tx_size == TX_16X16 + ) DEQUANT_INVOKE(&pbi->dequant, idct_add_uv_block_8x8) // (xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, @@ -900,7 +880,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) { } } } -#if CONFIG_HYBRIDTRANSFORM { if (vp8_read_bit(bc)) { /* read coef probability tree */ @@ -920,7 +899,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) { } } } -#endif if (pbi->common.txfm_mode != ONLY_4X4 && vp8_read_bit(bc)) { // read coef probability tree @@ -940,7 +918,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) { } } } -#if CONFIG_HYBRIDTRANSFORM8X8 if (pbi->common.txfm_mode != ONLY_4X4 && vp8_read_bit(bc)) { // read coef probability tree for (i = 0; i < BLOCK_TYPES_8X8; i++) @@ -959,7 +936,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) { } } } -#endif // 16x16 if (pbi->common.txfm_mode > ALLOW_8X8 && vp8_read_bit(bc)) { @@ -980,7 +956,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) { } } } -#if CONFIG_HYBRIDTRANSFORM16X16 if (pbi->common.txfm_mode > ALLOW_8X8 && vp8_read_bit(bc)) { // read coef probability tree for (i = 0; i < BLOCK_TYPES_16X16; ++i) @@ -999,7 +974,6 @@ static void read_coef_probs(VP8D_COMP *pbi, BOOL_DECODER* const bc) { } } } -#endif } int vp8_decode_frame(VP8D_COMP *pbi) { @@ -1214,17 +1188,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) { #endif /* Read the loop filter level and type */ -#if CONFIG_TX_SELECT pc->txfm_mode = vp8_read_literal(&header_bc, 2); if (pc->txfm_mode == TX_MODE_SELECT) { pc->prob_tx[0] = vp8_read_literal(&header_bc, 8); pc->prob_tx[1] = vp8_read_literal(&header_bc, 8); } -#else - pc->txfm_mode = (TXFM_MODE) vp8_read_bit(&header_bc); - if (pc->txfm_mode == ALLOW_8X8) - pc->txfm_mode = ALLOW_16X16; -#endif pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(&header_bc); pc->filter_level = vp8_read_literal(&header_bc, 6); @@ -1328,12 +1296,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) { /* Is high precision mv allowed */ xd->allow_high_precision_mv = (unsigned char)vp8_read_bit(&header_bc); // Read the type of subpel filter to use -#if CONFIG_SWITCHABLE_INTERP if (vp8_read_bit(&header_bc)) { pc->mcomp_filter_type = SWITCHABLE; - } else -#endif - { + } else { pc->mcomp_filter_type = vp8_read_literal(&header_bc, 2); } /* To enable choice of different interploation filters */ @@ -1362,58 +1327,36 @@ int vp8_decode_frame(VP8D_COMP *pbi) { vp8_copy(pbi->common.fc.pre_coef_probs, pbi->common.fc.coef_probs); -#if CONFIG_HYBRIDTRANSFORM vp8_copy(pbi->common.fc.pre_hybrid_coef_probs, pbi->common.fc.hybrid_coef_probs); -#endif vp8_copy(pbi->common.fc.pre_coef_probs_8x8, pbi->common.fc.coef_probs_8x8); -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_copy(pbi->common.fc.pre_hybrid_coef_probs_8x8, pbi->common.fc.hybrid_coef_probs_8x8); -#endif vp8_copy(pbi->common.fc.pre_coef_probs_16x16, pbi->common.fc.coef_probs_16x16); -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_copy(pbi->common.fc.pre_hybrid_coef_probs_16x16, pbi->common.fc.hybrid_coef_probs_16x16); -#endif vp8_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob); vp8_copy(pbi->common.fc.pre_uv_mode_prob, pbi->common.fc.uv_mode_prob); vp8_copy(pbi->common.fc.pre_bmode_prob, pbi->common.fc.bmode_prob); vp8_copy(pbi->common.fc.pre_i8x8_mode_prob, pbi->common.fc.i8x8_mode_prob); vp8_copy(pbi->common.fc.pre_sub_mv_ref_prob, pbi->common.fc.sub_mv_ref_prob); vp8_copy(pbi->common.fc.pre_mbsplit_prob, pbi->common.fc.mbsplit_prob); -#if CONFIG_NEWMVENTROPY pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc; -#else - vp8_copy(pbi->common.fc.pre_mvc, pbi->common.fc.mvc); - vp8_copy(pbi->common.fc.pre_mvc_hp, pbi->common.fc.mvc_hp); -#endif vp8_zero(pbi->common.fc.coef_counts); -#if CONFIG_HYBRIDTRANSFORM vp8_zero(pbi->common.fc.hybrid_coef_counts); -#endif vp8_zero(pbi->common.fc.coef_counts_8x8); -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_zero(pbi->common.fc.hybrid_coef_counts_8x8); -#endif vp8_zero(pbi->common.fc.coef_counts_16x16); -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_zero(pbi->common.fc.hybrid_coef_counts_16x16); -#endif vp8_zero(pbi->common.fc.ymode_counts); vp8_zero(pbi->common.fc.uv_mode_counts); vp8_zero(pbi->common.fc.bmode_counts); vp8_zero(pbi->common.fc.i8x8_mode_counts); vp8_zero(pbi->common.fc.sub_mv_ref_counts); vp8_zero(pbi->common.fc.mbsplit_counts); -#if CONFIG_NEWMVENTROPY vp8_zero(pbi->common.fc.NMVcount); -#else - vp8_zero(pbi->common.fc.MVcount); - vp8_zero(pbi->common.fc.MVcount_hp); -#endif vp8_zero(pbi->common.fc.mv_ref_ct); vp8_zero(pbi->common.fc.mv_ref_ct_a); @@ -1472,11 +1415,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) { vp8_adapt_coef_probs(pc); if (pc->frame_type != KEY_FRAME) { vp8_adapt_mode_probs(pc); -#if CONFIG_NEWMVENTROPY vp8_adapt_nmv_probs(pc, xd->allow_high_precision_mv); -#else - vp8_adapt_mv_probs(pc); -#endif vp8_update_mode_context(&pbi->common); } diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c index e55da754b..db9c3b0b8 100644 --- a/vp8/decoder/dequantize.c +++ b/vp8/decoder/dequantize.c @@ -42,7 +42,6 @@ void vp8_dequantize_b_c(BLOCKD *d) { } -#if CONFIG_HYBRIDTRANSFORM void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride) { @@ -77,9 +76,7 @@ void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq, pred += pitch; } } -#endif -#if CONFIG_HYBRIDTRANSFORM8X8 void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride) { @@ -123,7 +120,6 @@ void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, pred = origpred + (b + 1) / 2 * 4 * pitch + ((b + 1) % 2) * 4; } } -#endif void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride) { @@ -468,7 +464,6 @@ void vp8_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, #endif } -#if CONFIG_HYBRIDTRANSFORM16X16 void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride) { @@ -507,7 +502,6 @@ void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq, pred += pitch; } } -#endif void vp8_dequant_idct_add_16x16_c(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride) { diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h index 5b7b21598..4ac710431 100644 --- a/vp8/decoder/dequantize.h +++ b/vp8/decoder/dequantize.h @@ -76,7 +76,6 @@ extern prototype_dequant_block(vp8_dequant_block); #endif extern prototype_dequant_idct_add(vp8_dequant_idct_add); -#if CONFIG_HYBRIDTRANSFORM // declare dequantization and inverse transform module of hybrid transform decoder #ifndef vp8_ht_dequant_idct_add #define vp8_ht_dequant_idct_add vp8_ht_dequant_idct_add_c @@ -85,7 +84,6 @@ extern void vp8_ht_dequant_idct_add(TX_TYPE tx_type, short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride); -#endif #ifndef vp8_dequant_dc_idct_add #define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c @@ -191,17 +189,13 @@ typedef struct { #define DEQUANT_INVOKE(ctx,fn) vp8_dequant_##fn #endif -#if CONFIG_HYBRIDTRANSFORM8X8 void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride); -#endif -#if CONFIG_HYBRIDTRANSFORM16X16 void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride); -#endif #if CONFIG_SUPERBLOCKS void vp8_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq, diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index 85f213470..fa56865ac 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -135,7 +135,6 @@ int get_token(int v) { else return DCT_VAL_CATEGORY6; } -#if CONFIG_HYBRIDTRANSFORM void static count_tokens_adaptive_scan(const MACROBLOCKD *xd, INT16 *qcoeff_ptr, int block, PLANE_TYPE type, TX_TYPE tx_type, @@ -180,7 +179,6 @@ void static count_tokens_adaptive_scan(const MACROBLOCKD *xd, INT16 *qcoeff_ptr, fc->coef_counts[type][band][pt][DCT_EOB_TOKEN]++; } } -#endif void static count_tokens(INT16 *qcoeff_ptr, int block, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, @@ -202,9 +200,7 @@ void static count_tokens(INT16 *qcoeff_ptr, int block, PLANE_TYPE type, } void static count_tokens_8x8(INT16 *qcoeff_ptr, int block, PLANE_TYPE type, -#if CONFIG_HYBRIDTRANSFORM8X8 TX_TYPE tx_type, -#endif ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int eob, int seg_eob, FRAME_CONTEXT *fc) { int c, pt, token, band; @@ -214,29 +210,23 @@ void static count_tokens_8x8(INT16 *qcoeff_ptr, int block, PLANE_TYPE type, int v = qcoeff_ptr[rc]; band = (type == 1 ? vp8_coef_bands[c] : vp8_coef_bands_8x8[c]); token = get_token(v); -#if CONFIG_HYBRIDTRANSFORM8X8 if (tx_type != DCT_DCT) fc->hybrid_coef_counts_8x8[type][band][pt][token]++; else -#endif fc->coef_counts_8x8[type][band][pt][token]++; pt = vp8_prev_token_class[token]; } if (eob < seg_eob) { band = (type == 1 ? vp8_coef_bands[c] : vp8_coef_bands_8x8[c]); -#if CONFIG_HYBRIDTRANSFORM8X8 if (tx_type != DCT_DCT) fc->hybrid_coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN]++; else -#endif fc->coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN]++; } } void static count_tokens_16x16(INT16 *qcoeff_ptr, int block, PLANE_TYPE type, -#if CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type, -#endif ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int eob, int seg_eob, FRAME_CONTEXT *fc) { int c, pt, token; @@ -246,21 +236,17 @@ void static count_tokens_16x16(INT16 *qcoeff_ptr, int block, PLANE_TYPE type, int v = qcoeff_ptr[rc]; int band = vp8_coef_bands_16x16[c]; token = get_token(v); -#if CONFIG_HYBRIDTRANSFORM16X16 if (tx_type != DCT_DCT) fc->hybrid_coef_counts_16x16[type][band][pt][token]++; else -#endif fc->coef_counts_16x16[type][band][pt][token]++; pt = vp8_prev_token_class[token]; } if (eob < seg_eob) { int band = vp8_coef_bands_16x16[c]; -#if CONFIG_HYBRIDTRANSFORM16X16 if (tx_type != DCT_DCT) fc->hybrid_coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN]++; else -#endif fc->coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN]++; } } @@ -306,9 +292,7 @@ static int decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd, BOOL_DECODER* const br, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, PLANE_TYPE type, -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type, -#endif int seg_eob, INT16 *qcoeff_ptr, int i, const int *const scan, int block_type, const int *coef_bands) { @@ -320,23 +304,17 @@ static int decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd, default: case TX_4X4: coef_probs = -#if CONFIG_HYBRIDTRANSFORM tx_type != DCT_DCT ? fc->hybrid_coef_probs[type][0][0] : -#endif fc->coef_probs[type][0][0]; break; case TX_8X8: coef_probs = -#if CONFIG_HYBRIDTRANSFORM8X8 tx_type != DCT_DCT ? fc->hybrid_coef_probs_8x8[type][0][0] : -#endif fc->coef_probs_8x8[type][0][0]; break; case TX_16X16: coef_probs = -#if CONFIG_HYBRIDTRANSFORM16X16 tx_type != DCT_DCT ? fc->hybrid_coef_probs_16x16[type][0][0] : -#endif fc->coef_probs_16x16[type][0][0]; break; } @@ -422,26 +400,17 @@ SKIP_START: } if (block_type == TX_4X4) { -#if CONFIG_HYBRIDTRANSFORM count_tokens_adaptive_scan(xd, qcoeff_ptr, i, type, tx_type, a, l, c, seg_eob, fc); -#else - count_tokens(qcoeff_ptr, i, type, - a, l, c, seg_eob, fc); -#endif } else if (block_type == TX_8X8) count_tokens_8x8(qcoeff_ptr, i, type, -#if CONFIG_HYBRIDTRANSFORM8X8 tx_type, -#endif a, l, c, seg_eob, fc); else count_tokens_16x16(qcoeff_ptr, i, type, -#if CONFIG_HYBRIDTRANSFORM16X16 tx_type, -#endif a, l, c, seg_eob, fc); return c; } @@ -457,12 +426,7 @@ int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd, const int segment_id = xd->mode_info_context->mbmi.segment_id; const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB); INT16 *qcoeff_ptr = &xd->qcoeff[0]; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 - TX_TYPE tx_type = DCT_DCT; -#endif -#if CONFIG_HYBRIDTRANSFORM16X16 - tx_type = get_tx_type(xd, &xd->block[0]); -#endif + TX_TYPE tx_type = get_tx_type(xd, &xd->block[0]); type = PLANE_TYPE_Y_WITH_DC; @@ -475,26 +439,20 @@ int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd, { const int* const scan = vp8_default_zig_zag1d_16x16; c = decode_coefs(pbi, xd, bc, A, L, type, -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 tx_type, -#endif seg_eob, qcoeff_ptr, 0, scan, TX_16X16, coef_bands_x_16x16); eobs[0] = c; - *A = *L = (c != !type); - for (i = 1; i < 16; i++) { - *(A + vp8_block2above[i]) = *(A); - *(L + vp8_block2left[i]) = *(L); - } + A[0] = L[0] = (c != !type); + A[1] = A[2] = A[3] = A[0]; + L[1] = L[2] = L[3] = L[0]; eobtotal += c; } // 8x8 chroma blocks qcoeff_ptr += 256; type = PLANE_TYPE_UV; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 tx_type = DCT_DCT; -#endif if (seg_active) seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB); else @@ -505,9 +463,7 @@ int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd, const int* const scan = vp8_default_zig_zag1d_8x8; c = decode_coefs(pbi, xd, bc, a, l, type, -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 tx_type, -#endif seg_eob, qcoeff_ptr, i, scan, TX_8X8, coef_bands_x_8x8); a[0] = l[0] = ((eobs[i] = c) != !type); @@ -533,11 +489,10 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd, const int segment_id = xd->mode_info_context->mbmi.segment_id; const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB); INT16 *qcoeff_ptr = &xd->qcoeff[0]; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type = DCT_DCT; -#endif - int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED) ? 16 : 24; + int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED || + xd->mode_info_context->mbmi.mode == SPLITMV) ? 16 : 24; if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && xd->mode_info_context->mbmi.mode != I8X8_PRED) { @@ -551,9 +506,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd, else seg_eob = 4; c = decode_coefs(pbi, xd, bc, a, l, type, -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 tx_type, -#endif seg_eob, qcoeff_ptr + 24 * 16, 24, scan, TX_8X8, coef_bands_x); a[0] = l[0] = ((eobs[24] = c) != !type); @@ -573,22 +526,16 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd, ENTROPY_CONTEXT *const a = A + vp8_block2above_8x8[i]; ENTROPY_CONTEXT *const l = L + vp8_block2left_8x8[i]; const int *const scan = vp8_default_zig_zag1d_8x8; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 tx_type = DCT_DCT; -#endif if (i == 16) type = PLANE_TYPE_UV; -#if CONFIG_HYBRIDTRANSFORM8X8 if (type == PLANE_TYPE_Y_WITH_DC) { tx_type = get_tx_type(xd, xd->block + i); } -#endif c = decode_coefs(pbi, xd, bc, a, l, type, -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 tx_type, -#endif seg_eob, qcoeff_ptr, i, scan, TX_8X8, coef_bands_x_8x8); a[0] = l[0] = ((eobs[i] = c) != !type); @@ -601,9 +548,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd, if (bufthred == 16) { type = PLANE_TYPE_UV; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 tx_type = DCT_DCT; -#endif seg_eob = 16; // use 4x4 transform for U, V components in I8X8 prediction mode @@ -613,9 +558,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd, const int *scan = vp8_default_zig_zag1d; c = decode_coefs(pbi, xd, bc, a, l, type, -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 tx_type, -#endif seg_eob, qcoeff_ptr, i, scan, TX_4X4, coef_bands_x); a[0] = l[0] = ((eobs[i] = c) != !type); @@ -652,9 +595,7 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd, type = PLANE_TYPE_Y2; c = decode_coefs(dx, xd, bc, a, l, type, -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 DCT_DCT, -#endif seg_eob, qcoeff_ptr + 24 * 16, 24, scan, TX_4X4, coef_bands_x); a[0] = l[0] = ((eobs[24] = c) != !type); @@ -668,13 +609,10 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd, for (i = 0; i < 24; ++i) { ENTROPY_CONTEXT *const a = A + vp8_block2above[i]; ENTROPY_CONTEXT *const l = L + vp8_block2left[i]; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type = DCT_DCT; -#endif if (i == 16) type = PLANE_TYPE_UV; -#if CONFIG_HYBRIDTRANSFORM tx_type = get_tx_type(xd, &xd->block[i]); switch(tx_type) { case ADST_DCT : @@ -689,12 +627,8 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd, scan = vp8_default_zig_zag1d; break; } -#endif - c = decode_coefs(dx, xd, bc, a, l, type, -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 - tx_type, -#endif + c = decode_coefs(dx, xd, bc, a, l, type, tx_type, seg_eob, qcoeff_ptr, i, scan, TX_4X4, coef_bands_x); a[0] = l[0] = ((eobs[i] = c) != !type); diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index 70cdb6aaf..36776ab21 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -45,32 +45,26 @@ unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; -#if CONFIG_HYBRIDTRANSFORM unsigned int hybrid_tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; -#endif unsigned int tree_update_hist_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; -#if CONFIG_HYBRIDTRANSFORM8X8 unsigned int hybrid_tree_update_hist_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; -#endif unsigned int tree_update_hist_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; -#if CONFIG_HYBRIDTRANSFORM16X16 unsigned int hybrid_tree_update_hist_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; -#endif extern unsigned int active_section; #endif @@ -265,57 +259,23 @@ void update_skip_probs(VP8_COMP *cpi) { } } -#if CONFIG_SWITCHABLE_INTERP void update_switchable_interp_probs(VP8_COMP *cpi, vp8_writer* const bc) { VP8_COMMON *const pc = &cpi->common; unsigned int branch_ct[32][2]; int i, j; for (j = 0; j <= VP8_SWITCHABLE_FILTERS; ++j) { - //for (j = 0; j <= 0; ++j) { -/* - if (!cpi->dummy_packing) -#if VP8_SWITCHABLE_FILTERS == 3 - printf("HELLO %d %d %d\n", cpi->switchable_interp_count[j][0], - cpi->switchable_interp_count[j][1], cpi->switchable_interp_count[j][2]); -#else - printf("HELLO %d %d\n", cpi->switchable_interp_count[j][0], - cpi->switchable_interp_count[j][1]); -#endif -*/ vp8_tree_probs_from_distribution( VP8_SWITCHABLE_FILTERS, vp8_switchable_interp_encodings, vp8_switchable_interp_tree, - pc->fc.switchable_interp_prob[j], branch_ct, cpi->switchable_interp_count[j], - 256, 1 - ); + pc->fc.switchable_interp_prob[j], branch_ct, + cpi->switchable_interp_count[j], 256, 1); for (i = 0; i < VP8_SWITCHABLE_FILTERS - 1; ++i) { if (pc->fc.switchable_interp_prob[j][i] < 1) pc->fc.switchable_interp_prob[j][i] = 1; vp8_write_literal(bc, pc->fc.switchable_interp_prob[j][i], 8); -/* - if (!cpi->dummy_packing) -#if VP8_SWITCHABLE_FILTERS == 3 - printf("Probs %d %d [%d]\n", - pc->fc.switchable_interp_prob[j][0], - pc->fc.switchable_interp_prob[j][1], pc->frame_type); -#else - printf("Probs %d [%d]\n", pc->fc.switchable_interp_prob[j][0], - pc->frame_type); -#endif -*/ } } - /* - if (!cpi->dummy_packing) -#if VP8_SWITCHABLE_FILTERS == 3 - printf("Probs %d %d [%d]\n", - pc->fc.switchable_interp_prob[0], pc->fc.switchable_interp_prob[1], pc->frame_type); -#else - printf("Probs %d [%d]\n", pc->fc.switchable_interp_prob[0], pc->frame_type); -#endif - */ } -#endif // This function updates the reference frame prediction stats static void update_refpred_stats(VP8_COMP *cpi) { @@ -649,7 +609,6 @@ static void write_sub_mv_ref vp8_sub_mv_ref_encoding_array - LEFT4X4 + m); } -#if CONFIG_NEWMVENTROPY static void write_nmv(vp8_writer *bc, const MV *mv, const int_mv *ref, const nmv_context *nmvc, int usehp) { MV e; @@ -660,31 +619,6 @@ static void write_nmv(vp8_writer *bc, const MV *mv, const int_mv *ref, vp8_encode_nmv_fp(bc, &e, &ref->as_mv, nmvc, usehp); } -#else - -static void write_mv -( - vp8_writer *bc, const MV *mv, const int_mv *ref, const MV_CONTEXT *mvc -) { - MV e; - e.row = mv->row - ref->as_mv.row; - e.col = mv->col - ref->as_mv.col; - - vp8_encode_motion_vector(bc, &e, mvc); -} - -static void write_mv_hp -( - vp8_writer *bc, const MV *mv, const int_mv *ref, const MV_CONTEXT_HP *mvc -) { - MV e; - e.row = mv->row - ref->as_mv.row; - e.col = mv->col - ref->as_mv.col; - - vp8_encode_motion_vector_hp(bc, &e, mvc); -} -#endif /* CONFIG_NEWMVENTROPY */ - // This function writes the current macro block's segnment id to the bitstream // It should only be called if a segment map update is indicated. static void write_mb_segid(vp8_writer *bc, @@ -821,12 +755,7 @@ static void update_ref_probs(VP8_COMP *const cpi) { static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) { int i; VP8_COMMON *const pc = &cpi->common; -#if CONFIG_NEWMVENTROPY const nmv_context *nmvc = &pc->fc.nmvc; -#else - const MV_CONTEXT *mvc = pc->fc.mvc; - const MV_CONTEXT_HP *mvc_hp = pc->fc.mvc_hp; -#endif MACROBLOCK *x = &cpi->mb; MACROBLOCKD *xd = &cpi->mb.e_mbd; MODE_INFO *m; @@ -1042,21 +971,19 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) { cpi->common.pred_filter_mode); } #endif -#if CONFIG_SWITCHABLE_INTERP if (mode >= NEARESTMV && mode <= SPLITMV) { if (cpi->common.mcomp_filter_type == SWITCHABLE) { vp8_write_token(bc, vp8_switchable_interp_tree, - get_pred_probs(&cpi->common, xd, PRED_SWITCHABLE_INTERP), + get_pred_probs(&cpi->common, xd, + PRED_SWITCHABLE_INTERP), vp8_switchable_interp_encodings + vp8_switchable_interp_map[mi->interp_filter]); - //if (!cpi->dummy_packing) printf("Reading: %d\n", mi->interp_filter); } else { assert (mi->interp_filter == cpi->common.mcomp_filter_type); } } -#endif if (mi->second_ref_frame && (mode == NEWMV || mode == SPLITMV)) { int_mv n1, n2; @@ -1099,17 +1026,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) { } #endif -#if CONFIG_NEWMVENTROPY write_nmv(bc, &mi->mv[0].as_mv, &best_mv, (const nmv_context*) nmvc, xd->allow_high_precision_mv); -#else - if (xd->allow_high_precision_mv) { - write_mv_hp(bc, &mi->mv[0].as_mv, &best_mv, mvc_hp); - } else { - write_mv(bc, &mi->mv[0].as_mv, &best_mv, mvc); - } -#endif if (mi->second_ref_frame) { #if 0 //CONFIG_NEW_MVREF @@ -1126,17 +1045,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) { &best_second_mv); cpi->best_ref_index_counts[best_index]++; #endif -#if CONFIG_NEWMVENTROPY write_nmv(bc, &mi->mv[1].as_mv, &best_second_mv, (const nmv_context*) nmvc, xd->allow_high_precision_mv); -#else - if (xd->allow_high_precision_mv) { - write_mv_hp(bc, &mi->mv[1].as_mv, &best_second_mv, mvc_hp); - } else { - write_mv(bc, &mi->mv[1].as_mv, &best_second_mv, mvc); - } -#endif } break; case SPLITMV: { @@ -1178,40 +1089,16 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) { #ifdef ENTROPY_STATS active_section = 11; #endif -#if CONFIG_NEWMVENTROPY write_nmv(bc, &blockmv.as_mv, &best_mv, (const nmv_context*) nmvc, xd->allow_high_precision_mv); -#else - if (xd->allow_high_precision_mv) { - write_mv_hp(bc, &blockmv.as_mv, &best_mv, - (const MV_CONTEXT_HP *) mvc_hp); - } else { - write_mv(bc, &blockmv.as_mv, &best_mv, - (const MV_CONTEXT *) mvc); - } -#endif if (mi->second_ref_frame) { -#if CONFIG_NEWMVENTROPY write_nmv(bc, &cpi->mb.partition_info->bmi[j].second_mv.as_mv, &best_second_mv, (const nmv_context*) nmvc, xd->allow_high_precision_mv); -#else - if (xd->allow_high_precision_mv) { - write_mv_hp( - bc, - &cpi->mb.partition_info->bmi[j].second_mv.as_mv, - &best_second_mv, (const MV_CONTEXT_HP *)mvc_hp); - } else { - write_mv( - bc, - &cpi->mb.partition_info->bmi[j].second_mv.as_mv, - &best_second_mv, (const MV_CONTEXT *) mvc); - } -#endif } } } while (++j < cpi->mb.partition_info->count); @@ -1223,9 +1110,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) { } } -#if CONFIG_TX_SELECT if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || - (rf != INTRA_FRAME && mode != SPLITMV)) && + (rf != INTRA_FRAME && !(mode == SPLITMV && + mi->partitioning == PARTITIONING_4X4))) && pc->txfm_mode == TX_MODE_SELECT && !((pc->mb_no_coeff_skip && mi->mb_skip_coeff) || (segfeature_active(xd, segment_id, SEG_LVL_EOB) && @@ -1233,10 +1120,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) { TX_SIZE sz = mi->txfm_size; // FIXME(rbultje) code ternary symbol once all experiments are merged vp8_write(bc, sz != TX_4X4, pc->prob_tx[0]); - if (sz != TX_4X4 && mode != I8X8_PRED) + if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) vp8_write(bc, sz != TX_8X8, pc->prob_tx[1]); } -#endif #ifdef ENTROPY_STATS active_section = 1; @@ -1365,7 +1251,6 @@ static void write_mb_modes_kf(const VP8_COMMON *c, } else write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); -#if CONFIG_TX_SELECT if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && !((c->mb_no_coeff_skip && m->mbmi.mb_skip_coeff) || (segfeature_active(xd, segment_id, SEG_LVL_EOB) && @@ -1376,7 +1261,6 @@ static void write_mb_modes_kf(const VP8_COMMON *c, if (sz != TX_4X4 && ym <= TM_PRED) vp8_write(bc, sz != TX_8X8, c->prob_tx[1]); } -#endif } static void write_kfmodes(VP8_COMP* const cpi, vp8_writer* const bc) { @@ -1498,7 +1382,6 @@ void build_coeff_contexts(VP8_COMP *cpi) { } } } -#if CONFIG_HYBRIDTRANSFORM for (i = 0; i < BLOCK_TYPES; ++i) { for (j = 0; j < COEF_BANDS; ++j) { for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { @@ -1519,8 +1402,6 @@ void build_coeff_contexts(VP8_COMP *cpi) { } } } -#endif - if (cpi->common.txfm_mode != ONLY_4X4) { for (i = 0; i < BLOCK_TYPES_8X8; ++i) { @@ -1547,7 +1428,6 @@ void build_coeff_contexts(VP8_COMP *cpi) { } } } -#if CONFIG_HYBRIDTRANSFORM8X8 for (i = 0; i < BLOCK_TYPES_8X8; ++i) { for (j = 0; j < COEF_BANDS; ++j) { for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { @@ -1572,7 +1452,6 @@ void build_coeff_contexts(VP8_COMP *cpi) { } } } -#endif } if (cpi->common.txfm_mode > ALLOW_8X8) { @@ -1595,7 +1474,6 @@ void build_coeff_contexts(VP8_COMP *cpi) { } } } -#if CONFIG_HYBRIDTRANSFORM16X16 for (i = 0; i < BLOCK_TYPES_16X16; ++i) { for (j = 0; j < COEF_BANDS; ++j) { for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { @@ -1614,7 +1492,6 @@ void build_coeff_contexts(VP8_COMP *cpi) { } } } -#endif } #if 0 @@ -1887,7 +1764,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) { } } -#if CONFIG_HYBRIDTRANSFORM savings = 0; update[0] = update[1] = 0; for (i = 0; i < BLOCK_TYPES; ++i) { @@ -1976,7 +1852,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) { } } } -#endif /* do not do this if not even allowed */ if (cpi->common.txfm_mode != ONLY_4X4) { @@ -2054,7 +1929,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) { } } } -#if CONFIG_HYBRIDTRANSFORM8X8 update[0] = update[1] = 0; savings = 0; for (i = 0; i < BLOCK_TYPES_8X8; ++i) { @@ -2128,7 +2002,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) { } } } -#endif } if (cpi->common.txfm_mode > ALLOW_8X8) { @@ -2206,7 +2079,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) { } } } -#if CONFIG_HYBRIDTRANSFORM16X16 update[0] = update[1] = 0; savings = 0; for (i = 0; i < BLOCK_TYPES_16X16; ++i) { @@ -2280,7 +2152,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) { } } } -#endif } } @@ -2561,12 +2432,11 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) /* sb mode probability */ const int sb_max = (((pc->mb_rows + 1) >> 1) * ((pc->mb_cols + 1) >> 1)); - pc->sb_coded = get_prob(cpi->sb_count, sb_max); + pc->sb_coded = get_prob(sb_max - cpi->sb_count, sb_max); vp8_write_literal(&header_bc, pc->sb_coded, 8); } #endif -#if CONFIG_TX_SELECT { if (pc->txfm_mode == TX_MODE_SELECT) { pc->prob_tx[0] = get_prob(cpi->txfm_count[0] + cpi->txfm_count_8x8p[0], @@ -2583,9 +2453,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) vp8_write_literal(&header_bc, pc->prob_tx[1], 8); } } -#else - vp8_write_bit(&header_bc, !!pc->txfm_mode); -#endif // Encode the loop filter level and type vp8_write_bit(&header_bc, pc->filter_type); @@ -2687,7 +2554,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) // Signal whether to allow high MV precision vp8_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0); -#if CONFIG_SWITCHABLE_INTERP if (pc->mcomp_filter_type == SWITCHABLE) { /* Check to see if only one of the filters is actually used */ int count[VP8_SWITCHABLE_FILTERS]; @@ -2712,7 +2578,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) // Signal the type of subpel filter to use vp8_write_bit(&header_bc, (pc->mcomp_filter_type == SWITCHABLE)); if (pc->mcomp_filter_type != SWITCHABLE) -#endif /* CONFIG_SWITCHABLE_INTERP */ vp8_write_literal(&header_bc, (pc->mcomp_filter_type), 2); } @@ -2731,29 +2596,18 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) vp8_clear_system_state(); // __asm emms; vp8_copy(cpi->common.fc.pre_coef_probs, cpi->common.fc.coef_probs); -#if CONFIG_HYBRIDTRANSFORM vp8_copy(cpi->common.fc.pre_hybrid_coef_probs, cpi->common.fc.hybrid_coef_probs); -#endif vp8_copy(cpi->common.fc.pre_coef_probs_8x8, cpi->common.fc.coef_probs_8x8); -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8); -#endif vp8_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16); -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16); -#endif vp8_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob); vp8_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob); vp8_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob); vp8_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob); vp8_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob); vp8_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob); -#if CONFIG_NEWMVENTROPY cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc; -#else - vp8_copy(cpi->common.fc.pre_mvc, cpi->common.fc.mvc); - vp8_copy(cpi->common.fc.pre_mvc_hp, cpi->common.fc.mvc_hp); -#endif vp8_zero(cpi->sub_mv_ref_count); vp8_zero(cpi->mbsplit_count); vp8_zero(cpi->common.fc.mv_ref_ct) @@ -2796,10 +2650,8 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) vp8_write_literal(&header_bc, pc->prob_pred_filter_off, 8); #endif -#if CONFIG_SWITCHABLE_INTERP if (pc->mcomp_filter_type == SWITCHABLE) update_switchable_interp_probs(cpi, &header_bc); -#endif vp8_write_literal(&header_bc, pc->prob_intra_coded, 8); vp8_write_literal(&header_bc, pc->prob_last_coded, 8); @@ -2825,15 +2677,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) update_mbintra_mode_probs(cpi, &header_bc); -#if CONFIG_NEWMVENTROPY vp8_write_nmvprobs(cpi, xd->allow_high_precision_mv, &header_bc); -#else - if (xd->allow_high_precision_mv) { - vp8_write_mvprobs_hp(cpi, &header_bc); - } else { - vp8_write_mvprobs(cpi, &header_bc); - } -#endif } vp8_stop_encode(&header_bc); diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index 80f9b75b8..48623be8c 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -79,9 +79,7 @@ typedef struct { int hybrid_pred_diff; int comp_pred_diff; int single_pred_diff; -#if CONFIG_TX_SELECT int64_t txfm_rd_diff[NB_TXFM_MODES]; -#endif } PICK_MODE_CONTEXT; typedef struct { @@ -114,7 +112,6 @@ typedef struct { int *mb_norm_activity_ptr; signed int act_zbin_adj; -#if CONFIG_NEWMVENTROPY int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; int *nmvcost[2]; @@ -126,28 +123,17 @@ typedef struct { int *nmvsadcost[2]; int nmvsadcosts_hp[2][MV_VALS]; int *nmvsadcost_hp[2]; -#else - int mvcosts[2][MVvals + 1]; - int *mvcost[2]; - int mvsadcosts[2][MVfpvals + 1]; - int *mvsadcost[2]; - int mvcosts_hp[2][MVvals_hp + 1]; - int *mvcost_hp[2]; - int mvsadcosts_hp[2][MVfpvals_hp + 1]; - int *mvsadcost_hp[2]; -#endif /* CONFIG_NEWMVENTROPY */ int mbmode_cost[2][MB_MODE_COUNT]; int intra_uv_mode_cost[2][MB_MODE_COUNT]; int bmode_costs[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES]; int i8x8_mode_costs[MB_MODE_COUNT]; int inter_bmode_costs[B_MODE_COUNT]; -#if CONFIG_SWITCHABLE_INTERP - int switchable_interp_costs[VP8_SWITCHABLE_FILTERS+1] + int switchable_interp_costs[VP8_SWITCHABLE_FILTERS + 1] [VP8_SWITCHABLE_FILTERS]; -#endif - // These define limits to motion vector components to prevent them from extending outside the UMV borders + // These define limits to motion vector components to prevent them + // from extending outside the UMV borders int mv_col_min; int mv_col_max; int mv_row_min; @@ -164,10 +150,8 @@ typedef struct { unsigned int token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS] [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 unsigned int hybrid_token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS] [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; -#endif int optimize; diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index cd13fec7c..0983b1c0a 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -14,8 +14,6 @@ #include "vp8/common/idct.h" #include "vp8/common/systemdependent.h" -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 - #include "vp8/common/blockd.h" // TODO: these transforms can be converted into integer forms to reduce @@ -71,9 +69,7 @@ float adst_8[64] = { 0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509, 0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532 }; -#endif -#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 float dct_16[256] = { 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, @@ -143,7 +139,6 @@ float adst_16[256] = { 0.065889, -0.129396, 0.188227, -0.240255, 0.283599, -0.316693, 0.338341, -0.347761, 0.344612, -0.329007, 0.301511, -0.263118, 0.215215, -0.159534, 0.098087, -0.033094 }; -#endif static const int xC1S7 = 16069; static const int xC2S6 = 15137; @@ -400,7 +395,6 @@ void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8 } -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 void vp8_fht_c(short *input, short *output, int pitch, TX_TYPE tx_type, int tx_dim) { @@ -518,7 +512,6 @@ void vp8_fht_c(short *input, short *output, int pitch, } vp8_clear_system_state(); // Make it simd safe : __asm emms; } -#endif void vp8_short_fdct4x4_c(short *input, short *output, int pitch) { int i; diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h index 180192bbb..4ad1fe85d 100644 --- a/vp8/encoder/dct.h +++ b/vp8/encoder/dct.h @@ -26,10 +26,8 @@ #endif -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 void vp8_fht_c(short *input, short *output, int pitch, TX_TYPE tx_type, int tx_dim); -#endif #ifndef vp8_fdct_short16x16 #define vp8_fdct_short16x16 vp8_short_fdct16x16_c diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 619695c33..0910cfd35 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -120,8 +120,8 @@ static unsigned int tt_activity_measure(VP8_COMP *cpi, MACROBLOCK *x) { * lambda using a non-linear combination (e.g., the smallest, or second * smallest, etc.). */ - act = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)(x->src.y_buffer, - x->src.y_stride, VP8_VAR_OFFS, 0, &sse); + act = vp8_variance16x16(x->src.y_buffer, x->src.y_stride, VP8_VAR_OFFS, 0, + &sse); act = act << 4; /* If the region is flat, lower the activity some more. */ @@ -411,7 +411,6 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; } -#if CONFIG_TX_SELECT { int segment_id = mbmi->segment_id; if (!segfeature_active(xd, segment_id, SEG_LVL_EOB) || @@ -421,7 +420,6 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { } } } -#endif if (cpi->common.frame_type == KEY_FRAME) { // Restore the coding modes to that held in the coding context @@ -1342,37 +1340,18 @@ static void encode_frame_internal(VP8_COMP *cpi) { cpi->pred_filter_on_count = 0; cpi->pred_filter_off_count = 0; #endif -#if CONFIG_SWITCHABLE_INTERP vp8_zero(cpi->switchable_interp_count); -#endif - -#if 0 - // Experimental code - cpi->frame_distortion = 0; - cpi->last_mb_distortion = 0; -#endif xd->mode_info_context = cm->mi; xd->prev_mode_info_context = cm->prev_mi; -#if CONFIG_NEWMVENTROPY vp8_zero(cpi->NMVcount); -#else - vp8_zero(cpi->MVcount); - vp8_zero(cpi->MVcount_hp); -#endif vp8_zero(cpi->coef_counts); -#if CONFIG_HYBRIDTRANSFORM vp8_zero(cpi->hybrid_coef_counts); -#endif vp8_zero(cpi->coef_counts_8x8); -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_zero(cpi->hybrid_coef_counts_8x8); -#endif vp8_zero(cpi->coef_counts_16x16); -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_zero(cpi->hybrid_coef_counts_16x16); -#endif vp8cx_frame_init_quantizer(cpi); @@ -1393,11 +1372,9 @@ static void encode_frame_internal(VP8_COMP *cpi) { vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff)); vpx_memset(cpi->single_pred_count, 0, sizeof(cpi->single_pred_count)); vpx_memset(cpi->comp_pred_count, 0, sizeof(cpi->comp_pred_count)); -#if CONFIG_TX_SELECT vpx_memset(cpi->txfm_count, 0, sizeof(cpi->txfm_count)); vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p)); vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff)); -#endif { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); @@ -1457,7 +1434,6 @@ static int check_dual_ref_flags(VP8_COMP *cpi) { } } -#if CONFIG_TX_SELECT static void reset_skip_txfm_size(VP8_COMP *cpi, TX_SIZE txfm_max) { VP8_COMMON *cm = &cpi->common; int mb_row, mb_col, mis = cm->mode_info_stride; @@ -1481,7 +1457,6 @@ static void reset_skip_txfm_size(VP8_COMP *cpi, TX_SIZE txfm_max) { } } } -#endif void vp8_encode_frame(VP8_COMP *cpi) { if (cpi->sf.RD) { @@ -1527,7 +1502,6 @@ void vp8_encode_frame(VP8_COMP *cpi) { txfm_type = ONLY_4X4; } else #endif -#if CONFIG_TX_SELECT /* FIXME (rbultje) * this is a hack (no really), basically to work around the complete * nonsense coefficient cost prediction for keyframes. The probabilities @@ -1575,16 +1549,11 @@ void vp8_encode_frame(VP8_COMP *cpi) { cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? ALLOW_16X16 : TX_MODE_SELECT; #endif -#else - txfm_type = ALLOW_16X16; -#endif // CONFIG_TX_SELECT cpi->common.txfm_mode = txfm_type; -#if CONFIG_TX_SELECT if (txfm_type != TX_MODE_SELECT) { cpi->common.prob_tx[0] = 128; cpi->common.prob_tx[1] = 128; } -#endif cpi->common.comp_pred_mode = pred_type; encode_frame_internal(cpi); @@ -1594,7 +1563,6 @@ void vp8_encode_frame(VP8_COMP *cpi) { cpi->rd_prediction_type_threshes[frame_type][i] >>= 1; } -#if CONFIG_TX_SELECT for (i = 0; i < NB_TXFM_MODES; ++i) { int64_t pd = cpi->rd_tx_select_diff[i]; int diff; @@ -1604,7 +1572,6 @@ void vp8_encode_frame(VP8_COMP *cpi) { cpi->rd_tx_select_threshes[frame_type][i] += diff; cpi->rd_tx_select_threshes[frame_type][i] /= 2; } -#endif if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { int single_count_zero = 0; @@ -1622,7 +1589,6 @@ void vp8_encode_frame(VP8_COMP *cpi) { } } -#if CONFIG_TX_SELECT if (cpi->common.txfm_mode == TX_MODE_SELECT) { const int count4x4 = cpi->txfm_count[TX_4X4] + cpi->txfm_count_8x8p[TX_4X4]; const int count8x8 = cpi->txfm_count[TX_8X8]; @@ -1639,7 +1605,6 @@ void vp8_encode_frame(VP8_COMP *cpi) { cpi->common.txfm_mode = ALLOW_16X16; } } -#endif } else { encode_frame_internal(cpi); } @@ -1957,15 +1922,12 @@ void vp8cx_encode_intra_macro_block(VP8_COMP *cpi, } if (output_enabled) { -#if CONFIG_TX_SELECT int segment_id = mbmi->segment_id; -#endif // Tokenize sum_intra_stats(cpi, x); vp8_tokenize_mb(cpi, &x->e_mbd, t, 0); -#if CONFIG_TX_SELECT if (cpi->common.txfm_mode == TX_MODE_SELECT && !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) || (segfeature_active(&x->e_mbd, segment_id, SEG_LVL_EOB) && @@ -1975,9 +1937,7 @@ void vp8cx_encode_intra_macro_block(VP8_COMP *cpi, } else if (mbmi->mode == I8X8_PRED) { cpi->txfm_count_8x8p[mbmi->txfm_size]++; } - } else -#endif - if (cpi->common.txfm_mode >= ALLOW_16X16 && mbmi->mode <= TM_PRED) { + } else if (cpi->common.txfm_mode >= ALLOW_16X16 && mbmi->mode <= TM_PRED) { mbmi->txfm_size = TX_16X16; } else if (cpi->common.txfm_mode >= ALLOW_8X8 && mbmi->mode != B_PRED) { @@ -2012,9 +1972,7 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x, assert(!xd->mode_info_context->mbmi.encoded_as_sb); #endif -#if CONFIG_SWITCHABLE_INTERP vp8_setup_interp_filters(xd, mbmi->interp_filter, cm); -#endif if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { // Adjust the zbin based on this MB rate. adjust_act_zbin(cpi, x); @@ -2158,7 +2116,6 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x, } if (output_enabled) { -#if CONFIG_TX_SELECT int segment_id = mbmi->segment_id; if (cpi->common.txfm_mode == TX_MODE_SELECT && !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) || @@ -2167,16 +2124,18 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x, if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV) { cpi->txfm_count[mbmi->txfm_size]++; - } else if (mbmi->mode == I8X8_PRED) { + } else if (mbmi->mode == I8X8_PRED || + (mbmi->mode == SPLITMV && + mbmi->partitioning != PARTITIONING_4X4)) { cpi->txfm_count_8x8p[mbmi->txfm_size]++; } - } else -#endif - if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED && + } else if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) { mbmi->txfm_size = TX_16X16; - } else if (mbmi->mode != B_PRED && mbmi->mode != SPLITMV && - cpi->common.txfm_mode >= ALLOW_8X8) { + } else if (mbmi->mode != B_PRED && + !(mbmi->mode == SPLITMV && + mbmi->partitioning == PARTITIONING_4X4) && + cpi->common.txfm_mode >= ALLOW_8X8) { mbmi->txfm_size = TX_8X8; } else { mbmi->txfm_size = TX_4X4; diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 9076780d9..f44df22ea 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -48,7 +48,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { } } - intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff); + intra_pred_var = vp8_get_mb_ss(x->src_diff); return intra_pred_var; } @@ -57,9 +57,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, int ib) { BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; -#if CONFIG_HYBRIDTRANSFORM TX_TYPE tx_type; -#endif #if CONFIG_COMP_INTRA_PRED if (b->bmi.as_mode.second == (B_PREDICTION_MODE)(B_DC_PRED - 1)) { @@ -74,15 +72,12 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16); -#if CONFIG_HYBRIDTRANSFORM tx_type = get_tx_type(&x->e_mbd, b); if (tx_type != DCT_DCT) { vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 4); vp8_ht_quantize_b_4x4(be, b, tx_type); vp8_ihtllm_c(b->dqcoeff, b->diff, 32, tx_type, 4); - } else -#endif - { + } else { x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32) ; x->quantize_b_4x4(be, b) ; vp8_inverse_transform_b_4x4(IF_RTCD(&rtcd->common->idct), b, 32) ; @@ -103,9 +98,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; BLOCK *b = &x->block[0]; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; -#if CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type; -#endif #if CONFIG_COMP_INTRA_PRED if (xd->mode_info_context->mbmi.second_mode == (MB_PREDICTION_MODE)(DC_PRED - 1)) @@ -120,7 +113,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { xd->predictor, b->src_stride); if (tx_size == TX_16X16) { -#if CONFIG_HYBRIDTRANSFORM16X16 BLOCKD *bd = &xd->block[0]; tx_type = get_tx_type(xd, bd); if (tx_type != DCT_DCT) { @@ -129,9 +121,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { if (x->optimize) vp8_optimize_mby_16x16(x, rtcd); vp8_ihtllm_c(bd->dqcoeff, bd->diff, 32, tx_type, 16); - } else -#endif - { + } else { vp8_transform_mby_16x16(x); vp8_quantize_mby_16x16(x); if (x->optimize) @@ -196,9 +186,7 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd, BLOCK *be = &x->block[ib]; const int iblock[4] = {0, 1, 4, 5}; int i; -#if CONFIG_HYBRIDTRANSFORM8X8 TX_TYPE tx_type; -#endif #if CONFIG_COMP_INTRA_PRED if (b->bmi.as_mode.second == (MB_PREDICTION_MODE)(DC_PRED - 1)) { @@ -217,7 +205,6 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd, // generate residual blocks vp8_subtract_4b_c(be, b, 16); -#if CONFIG_HYBRIDTRANSFORM8X8 tx_type = get_tx_type(xd, xd->block + idx); if (tx_type != DCT_DCT) { vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32, @@ -226,13 +213,10 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd, vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32, tx_type, 8); } else { -#endif x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); vp8_idct_idct8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); -#if CONFIG_HYBRIDTRANSFORM8X8 } -#endif } else { for (i = 0; i < 4; i++) { b = &xd->block[ib + iblock[i]]; diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index dc54d05a2..d3bd0f1dd 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -304,7 +304,6 @@ void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, scan = vp8_default_zig_zag1d; bands = vp8_coef_bands; default_eob = 16; -#if CONFIG_HYBRIDTRANSFORM // TODO: this isn't called (for intra4x4 modes), but will be left in // since it could be used later { @@ -327,7 +326,6 @@ void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, scan = vp8_default_zig_zag1d; } } -#endif break; case TX_8X8: scan = vp8_default_zig_zag1d_8x8; @@ -638,6 +636,7 @@ void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; + int has_2nd_order = x->e_mbd.mode_info_context->mbmi.mode != SPLITMV; if (!x->e_mbd.above_context || !x->e_mbd.left_context) return; @@ -647,18 +646,21 @@ void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; - type = PLANE_TYPE_Y_NO_DC; + type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; for (b = 0; b < 16; b += 4) { optimize_b(x, b, type, ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b], rtcd, TX_8X8); - *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]); - *(tl + vp8_block2left_8x8[b] + 1) = *(tl + vp8_block2left_8x8[b]); + ta[vp8_block2above_8x8[b] + 1] = ta[vp8_block2above_8x8[b]]; + tl[vp8_block2left_8x8[b] + 1] = tl[vp8_block2left_8x8[b]]; } // 8x8 always have 2nd roder haar block - check_reset_8x8_2nd_coeffs(&x->e_mbd, - ta + vp8_block2above_8x8[24], tl + vp8_block2left_8x8[24]); + if (has_2nd_order) { + check_reset_8x8_2nd_coeffs(&x->e_mbd, + ta + vp8_block2above_8x8[24], + tl + vp8_block2left_8x8[24]); + } } void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { @@ -680,8 +682,8 @@ void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b], rtcd, TX_8X8); - *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]); - *(tl + vp8_block2left_8x8[b] + 1) = *(tl + vp8_block2left_8x8[b]); + ta[vp8_block2above_8x8[b] + 1] = ta[vp8_block2above_8x8[b]]; + tl[vp8_block2left_8x8[b] + 1] = tl[vp8_block2left_8x8[b]]; } } @@ -898,11 +900,25 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { optimize_mb_16x16(x, rtcd); vp8_inverse_transform_mb_16x16(IF_RTCD(&rtcd->common->idct), xd); } else if (tx_size == TX_8X8) { - vp8_transform_mb_8x8(x); - vp8_quantize_mb_8x8(x); - if (x->optimize) - optimize_mb_8x8(x, rtcd); - vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), xd); + if (xd->mode_info_context->mbmi.mode == SPLITMV) { + assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4); + vp8_transform_mby_8x8(x); + vp8_transform_mbuv_4x4(x); + vp8_quantize_mby_8x8(x); + vp8_quantize_mbuv_4x4(x); + if (x->optimize) { + vp8_optimize_mby_8x8(x, rtcd); + vp8_optimize_mbuv_4x4(x, rtcd); + } + vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), xd); + vp8_inverse_transform_mbuv_4x4(IF_RTCD(&rtcd->common->idct), xd); + } else { + vp8_transform_mb_8x8(x); + vp8_quantize_mb_8x8(x); + if (x->optimize) + optimize_mb_8x8(x, rtcd); + vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), xd); + } } else { transform_mb_4x4(x); vp8_quantize_mb_4x4(x); diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c index d520d995a..75dad2f9b 100644 --- a/vp8/encoder/encodemv.c +++ b/vp8/encoder/encodemv.c @@ -22,8 +22,6 @@ extern unsigned int active_section; //extern int final_packing; -#if CONFIG_NEWMVENTROPY - #ifdef NMV_STATS nmv_context_counts tnmvcounts; #endif @@ -549,593 +547,3 @@ void vp8_build_nmv_cost_table(int *mvjoint, if (mvc_flag_h) build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp); } - -#else /* CONFIG_NEWMVENTROPY */ - -static void encode_mvcomponent( - vp8_writer *const bc, - const int v, - const struct mv_context *mvc -) { - const vp8_prob *p = mvc->prob; - const int x = v < 0 ? -v : v; - - if (x < mvnum_short) { // Small - vp8_write(bc, 0, p[mvpis_short]); - vp8_treed_write(bc, vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits); - if (!x) - return; // no sign bit - } else { // Large - int i = 0; - - vp8_write(bc, 1, p[mvpis_short]); - - do - vp8_write(bc, (x >> i) & 1, p[MVPbits + i]); - - while (++i < mvnum_short_bits); - - i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */ - - do - vp8_write(bc, (x >> i) & 1, p[MVPbits + i]); - - while (--i > mvnum_short_bits); - - if (x & ~((2 << mvnum_short_bits) - 1)) - vp8_write(bc, (x >> mvnum_short_bits) & 1, p[MVPbits + mvnum_short_bits]); - } - - vp8_write(bc, v < 0, p[MVPsign]); -} - -void vp8_encode_motion_vector(vp8_writer* const bc, - const MV* const mv, - const MV_CONTEXT* const mvc) { - encode_mvcomponent(bc, mv->row >> 1, &mvc[0]); - encode_mvcomponent(bc, mv->col >> 1, &mvc[1]); -} - - -static unsigned int cost_mvcomponent(const int v, - const struct mv_context* const mvc) { - const vp8_prob *p = mvc->prob; - const int x = v; // v<0? -v:v; - unsigned int cost; - - if (x < mvnum_short) { - cost = vp8_cost_zero(p [mvpis_short]) - + vp8_treed_cost(vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits); - - if (!x) - return cost; - } else { - int i = 0; - cost = vp8_cost_one(p [mvpis_short]); - - do - cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1); - - while (++i < mvnum_short_bits); - - i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */ - - do - cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1); - - while (--i > mvnum_short_bits); - - if (x & ~((2 << mvnum_short_bits) - 1)) - cost += vp8_cost_bit(p [MVPbits + mvnum_short_bits], (x >> mvnum_short_bits) & 1); - } - - return cost; // + vp8_cost_bit( p [MVPsign], v < 0); -} - -void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, - const int mvc_flag[2]) { - int i = 1; // -mv_max; - unsigned int cost0 = 0; - unsigned int cost1 = 0; - - vp8_clear_system_state(); - - i = 1; - - if (mvc_flag[0]) { - mvcost [0] [0] = cost_mvcomponent(0, &mvc[0]); - - do { - // mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]); - cost0 = cost_mvcomponent(i, &mvc[0]); - - mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign]); - mvcost [0] [-i] = cost0 + vp8_cost_one(mvc[0].prob[MVPsign]); - } while (++i <= mv_max); - } - - i = 1; - - if (mvc_flag[1]) { - mvcost [1] [0] = cost_mvcomponent(0, &mvc[1]); - - do { - // mvcost [1] [i] = cost_mvcomponent( i, mvc[1]); - cost1 = cost_mvcomponent(i, &mvc[1]); - - mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign]); - mvcost [1] [-i] = cost1 + vp8_cost_one(mvc[1].prob[MVPsign]); - } while (++i <= mv_max); - } -} - - -// Motion vector probability table update depends on benefit. -// Small correction allows for the fact that an update to an MV probability -// may have benefit in subsequent frames as well as the current one. - -#define MV_PROB_UPDATE_CORRECTION -1 - - -__inline static void calc_prob(vp8_prob *p, const unsigned int ct[2]) { - const unsigned int tot = ct[0] + ct[1]; - - if (tot) { - const vp8_prob x = ((ct[0] * 255) / tot) & -2; - *p = x ? x : 1; - } -} - -static void update( - vp8_writer *const bc, - const unsigned int ct[2], - vp8_prob *const cur_p, - const vp8_prob new_p, - const vp8_prob update_p, - int *updated -) { - const int cur_b = vp8_cost_branch(ct, *cur_p); - const int new_b = vp8_cost_branch(ct, new_p); - const int cost = 7 + MV_PROB_UPDATE_CORRECTION + ((vp8_cost_one(update_p) - vp8_cost_zero(update_p) + 128) >> 8); - - if (cur_b - new_b > cost) { - *cur_p = new_p; - vp8_write(bc, 1, update_p); - vp8_write_literal(bc, new_p >> 1, 7); - *updated = 1; - - } else - vp8_write(bc, 0, update_p); -} - -static void write_component_probs( - vp8_writer *const bc, - struct mv_context *cur_mvc, - const struct mv_context *default_mvc_, - const struct mv_context *update_mvc, - const unsigned int events [MVvals], - unsigned int rc, - int *updated -) { - vp8_prob *Pcur = cur_mvc->prob; - const vp8_prob *default_mvc = default_mvc_->prob; - const vp8_prob *Pupdate = update_mvc->prob; - unsigned int is_short_ct[2], sign_ct[2]; - - unsigned int bit_ct [mvlong_width] [2]; - - unsigned int short_ct [mvnum_short]; - unsigned int short_bct [mvnum_short - 1] [2]; - - vp8_prob Pnew [MVPcount]; - - (void) rc; - vp8_copy_array(Pnew, default_mvc, MVPcount); - - vp8_zero(is_short_ct) - vp8_zero(sign_ct) - vp8_zero(bit_ct) - vp8_zero(short_ct) - vp8_zero(short_bct) - - - // j=0 - { - const int c = events [mv_max]; - - is_short_ct [0] += c; // Short vector - short_ct [0] += c; // Magnitude distribution - } - - // j: 1 ~ mv_max (1023) - { - int j = 1; - - do { - const int c1 = events [mv_max + j]; // positive - const int c2 = events [mv_max - j]; // negative - const int c = c1 + c2; - int a = j; - - sign_ct [0] += c1; - sign_ct [1] += c2; - - if (a < mvnum_short) { - is_short_ct [0] += c; // Short vector - short_ct [a] += c; // Magnitude distribution - } else { - int k = mvlong_width - 1; - is_short_ct [1] += c; // Long vector - - /* bit 3 not always encoded. */ - do - bit_ct [k] [(a >> k) & 1] += c; - - while (--k >= 0); - } - } while (++j <= mv_max); - } - - calc_prob(Pnew + mvpis_short, is_short_ct); - - calc_prob(Pnew + MVPsign, sign_ct); - - { - vp8_prob p [mvnum_short - 1]; /* actually only need branch ct */ - int j = 0; - - vp8_tree_probs_from_distribution( - mvnum_short, vp8_small_mvencodings, vp8_small_mvtree, - p, short_bct, short_ct, - 256, 1 - ); - - do - calc_prob(Pnew + MVPshort + j, short_bct[j]); - - while (++j < mvnum_short - 1); - } - - { - int j = 0; - - do - calc_prob(Pnew + MVPbits + j, bit_ct[j]); - - while (++j < mvlong_width); - } - - update(bc, is_short_ct, Pcur + mvpis_short, Pnew[mvpis_short], - *Pupdate++, updated); - - update(bc, sign_ct, Pcur + MVPsign, Pnew[MVPsign], - *Pupdate++, updated); - - { - const vp8_prob *const new_p = Pnew + MVPshort; - vp8_prob *const cur_p = Pcur + MVPshort; - - int j = 0; - - do - - update(bc, short_bct[j], cur_p + j, new_p[j], *Pupdate++, updated); - - while (++j < mvnum_short - 1); - } - - { - const vp8_prob *const new_p = Pnew + MVPbits; - vp8_prob *const cur_p = Pcur + MVPbits; - - int j = 0; - - do - - update(bc, bit_ct[j], cur_p + j, new_p[j], *Pupdate++, updated); - - while (++j < mvlong_width); - } -} - -void vp8_write_mvprobs(VP8_COMP* const cpi, vp8_writer* const bc) { - MV_CONTEXT *mvc = cpi->common.fc.mvc; - int flags[2] = {0, 0}; -#ifdef ENTROPY_STATS - active_section = 4; -#endif - write_component_probs( - bc, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], - cpi->MVcount[0], 0, &flags[0]); - - write_component_probs( - bc, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], - cpi->MVcount[1], 1, &flags[1]); - - if (flags[0] || flags[1]) - vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flags); - -#ifdef ENTROPY_STATS - active_section = 5; -#endif -} - - -static void encode_mvcomponent_hp( - vp8_writer *const bc, - const int v, - const struct mv_context_hp *mvc -) { - const vp8_prob *p = mvc->prob; - const int x = v < 0 ? -v : v; - - if (x < mvnum_short_hp) { // Small - vp8_write(bc, 0, p[mvpis_short_hp]); - vp8_treed_write(bc, vp8_small_mvtree_hp, p + MVPshort_hp, x, - mvnum_short_bits_hp); - if (!x) - return; // no sign bit - } else { // Large - int i = 0; - - vp8_write(bc, 1, p[mvpis_short_hp]); - - do - vp8_write(bc, (x >> i) & 1, p[MVPbits_hp + i]); - - while (++i < mvnum_short_bits_hp); - - i = mvlong_width_hp - 1; /* Skip bit 3, which is sometimes implicit */ - - do - vp8_write(bc, (x >> i) & 1, p[MVPbits_hp + i]); - - while (--i > mvnum_short_bits_hp); - - if (x & ~((2 << mvnum_short_bits_hp) - 1)) - vp8_write(bc, (x >> mvnum_short_bits_hp) & 1, - p[MVPbits_hp + mvnum_short_bits_hp]); - } - - vp8_write(bc, v < 0, p[MVPsign_hp]); -} - -void vp8_encode_motion_vector_hp(vp8_writer *bc, const MV *mv, - const MV_CONTEXT_HP *mvc) { - - encode_mvcomponent_hp(bc, mv->row, &mvc[0]); - encode_mvcomponent_hp(bc, mv->col, &mvc[1]); -} - - -static unsigned int cost_mvcomponent_hp(const int v, - const struct mv_context_hp *mvc) { - const vp8_prob *p = mvc->prob; - const int x = v; // v<0? -v:v; - unsigned int cost; - - if (x < mvnum_short_hp) { - cost = vp8_cost_zero(p [mvpis_short_hp]) - + vp8_treed_cost(vp8_small_mvtree_hp, p + MVPshort_hp, x, - mvnum_short_bits_hp); - - if (!x) - return cost; - } else { - int i = 0; - cost = vp8_cost_one(p [mvpis_short_hp]); - - do - cost += vp8_cost_bit(p [MVPbits_hp + i], (x >> i) & 1); - - while (++i < mvnum_short_bits_hp); - - i = mvlong_width_hp - 1; /* Skip bit 3, which is sometimes implicit */ - - do - cost += vp8_cost_bit(p [MVPbits_hp + i], (x >> i) & 1); - - while (--i > mvnum_short_bits_hp); - - if (x & ~((2 << mvnum_short_bits_hp) - 1)) - cost += vp8_cost_bit(p [MVPbits_hp + mvnum_short_bits_hp], - (x >> mvnum_short_bits_hp) & 1); - } - - return cost; // + vp8_cost_bit( p [MVPsign], v < 0); -} - -void vp8_build_component_cost_table_hp(int *mvcost[2], - const MV_CONTEXT_HP *mvc, - const int mvc_flag[2]) { - int i = 1; // -mv_max; - unsigned int cost0 = 0; - unsigned int cost1 = 0; - - vp8_clear_system_state(); - - i = 1; - - if (mvc_flag[0]) { - mvcost [0] [0] = cost_mvcomponent_hp(0, &mvc[0]); - - do { - // mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]); - cost0 = cost_mvcomponent_hp(i, &mvc[0]); - - mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign_hp]); - mvcost [0] [-i] = cost0 + vp8_cost_one(mvc[0].prob[MVPsign_hp]); - } while (++i <= mv_max_hp); - } - - i = 1; - - if (mvc_flag[1]) { - mvcost [1] [0] = cost_mvcomponent_hp(0, &mvc[1]); - - do { - // mvcost [1] [i] = cost_mvcomponent( i, mvc[1]); - cost1 = cost_mvcomponent_hp(i, &mvc[1]); - - mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign_hp]); - mvcost [1] [-i] = cost1 + vp8_cost_one(mvc[1].prob[MVPsign_hp]); - } while (++i <= mv_max_hp); - } -} - - -static void write_component_probs_hp( - vp8_writer *const bc, - struct mv_context_hp *cur_mvc, - const struct mv_context_hp *default_mvc_, - const struct mv_context_hp *update_mvc, - const unsigned int events [MVvals_hp], - unsigned int rc, - int *updated -) { - vp8_prob *Pcur = cur_mvc->prob; - const vp8_prob *default_mvc = default_mvc_->prob; - const vp8_prob *Pupdate = update_mvc->prob; - unsigned int is_short_ct[2], sign_ct[2]; - - unsigned int bit_ct [mvlong_width_hp] [2]; - - unsigned int short_ct [mvnum_short_hp]; - unsigned int short_bct [mvnum_short_hp - 1] [2]; - - vp8_prob Pnew [MVPcount_hp]; - - (void) rc; - vp8_copy_array(Pnew, default_mvc, MVPcount_hp); - - vp8_zero(is_short_ct) - vp8_zero(sign_ct) - vp8_zero(bit_ct) - vp8_zero(short_ct) - vp8_zero(short_bct) - - - // j=0 - { - const int c = events [mv_max_hp]; - - is_short_ct [0] += c; // Short vector - short_ct [0] += c; // Magnitude distribution - } - - // j: 1 ~ mv_max (1023) - { - int j = 1; - - do { - const int c1 = events [mv_max_hp + j]; // positive - const int c2 = events [mv_max_hp - j]; // negative - const int c = c1 + c2; - int a = j; - - sign_ct [0] += c1; - sign_ct [1] += c2; - - if (a < mvnum_short_hp) { - is_short_ct [0] += c; // Short vector - short_ct [a] += c; // Magnitude distribution - } else { - int k = mvlong_width_hp - 1; - is_short_ct [1] += c; // Long vector - - /* bit 3 not always encoded. */ - do - bit_ct [k] [(a >> k) & 1] += c; - - while (--k >= 0); - } - } while (++j <= mv_max_hp); - } - - calc_prob(Pnew + mvpis_short_hp, is_short_ct); - - calc_prob(Pnew + MVPsign_hp, sign_ct); - - { - vp8_prob p [mvnum_short_hp - 1]; /* actually only need branch ct */ - int j = 0; - - vp8_tree_probs_from_distribution( - mvnum_short_hp, vp8_small_mvencodings_hp, vp8_small_mvtree_hp, - p, short_bct, short_ct, - 256, 1 - ); - - do - calc_prob(Pnew + MVPshort_hp + j, short_bct[j]); - - while (++j < mvnum_short_hp - 1); - } - - { - int j = 0; - - do - calc_prob(Pnew + MVPbits_hp + j, bit_ct[j]); - - while (++j < mvlong_width_hp); - } - - update(bc, is_short_ct, Pcur + mvpis_short_hp, Pnew[mvpis_short_hp], - *Pupdate++, updated); - - update(bc, sign_ct, Pcur + MVPsign_hp, Pnew[MVPsign_hp], *Pupdate++, - updated); - - { - const vp8_prob *const new_p = Pnew + MVPshort_hp; - vp8_prob *const cur_p = Pcur + MVPshort_hp; - - int j = 0; - - do - - update(bc, short_bct[j], cur_p + j, new_p[j], *Pupdate++, updated); - - while (++j < mvnum_short_hp - 1); - } - - { - const vp8_prob *const new_p = Pnew + MVPbits_hp; - vp8_prob *const cur_p = Pcur + MVPbits_hp; - - int j = 0; - - do - - update(bc, bit_ct[j], cur_p + j, new_p[j], *Pupdate++, updated); - - while (++j < mvlong_width_hp); - } -} - -void vp8_write_mvprobs_hp(VP8_COMP* const cpi, vp8_writer* const bc) { - MV_CONTEXT_HP *mvc = cpi->common.fc.mvc_hp; - int flags[2] = {0, 0}; -#ifdef ENTROPY_STATS - active_section = 4; -#endif - write_component_probs_hp( - bc, &mvc[0], &vp8_default_mv_context_hp[0], &vp8_mv_update_probs_hp[0], - cpi->MVcount_hp[0], 0, &flags[0] - ); - write_component_probs_hp( - bc, &mvc[1], &vp8_default_mv_context_hp[1], &vp8_mv_update_probs_hp[1], - cpi->MVcount_hp[1], 1, &flags[1] - ); - - if (flags[0] || flags[1]) - vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp, - (const MV_CONTEXT_HP *) - cpi->common.fc.mvc_hp, flags); -#ifdef ENTROPY_STATS - active_section = 5; -#endif -} - -#endif /* CONFIG_NEWMVENTROPY */ diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h index c06831cb2..254536580 100644 --- a/vp8/encoder/encodemv.h +++ b/vp8/encoder/encodemv.h @@ -14,7 +14,6 @@ #include "onyx_int.h" -#if CONFIG_NEWMVENTROPY void vp8_write_nmvprobs(VP8_COMP* const, int usehp, vp8_writer* const); void vp8_encode_nmv(vp8_writer* const w, const MV* const mv, const MV* const ref, const nmv_context* const mvctx); @@ -27,19 +26,5 @@ void vp8_build_nmv_cost_table(int *mvjoint, int usehp, int mvc_flag_v, int mvc_flag_h); -#else /* CONFIG_NEWMVENTROPY */ -void vp8_write_mvprobs(VP8_COMP* const, vp8_writer* const); -void vp8_encode_motion_vector(vp8_writer* const, const MV* const, - const MV_CONTEXT* const); -void vp8_build_component_cost_table(int *mvcost[2], - const MV_CONTEXT*, - const int mvc_flag[2]); -void vp8_write_mvprobs_hp(VP8_COMP* const, vp8_writer* const); -void vp8_encode_motion_vector_hp(vp8_writer* const, const MV* const, - const MV_CONTEXT_HP* const); -void vp8_build_component_cost_table_hp(int *mvcost[2], - const MV_CONTEXT_HP*, - const int mvc_flag[2]); -#endif /* CONFIG_NEWMVENTROPY */ #endif diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 95f6d97d3..09d5a762e 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -364,7 +364,8 @@ static void zz_motion_search(VP8_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *r ref_ptr = (unsigned char *)(*(d->base_pre) + d->pre); - VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16)(src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err)); + vp8_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride, + (unsigned int *)(best_motion_err)); } static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, @@ -387,7 +388,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, int new_mv_mode_penalty = 256; // override the default variance function to use MSE - v_fn_ptr.vf = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16); + v_fn_ptr.vf = vp8_mse16x16; // Set up pointers for this macro block recon buffer xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; @@ -492,12 +493,7 @@ void vp8_first_pass(VP8_COMP *cpi) { { int flag[2] = {1, 1}; vp8_init_mv_probs(cm); -#if CONFIG_NEWMVENTROPY vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q); -#else - vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); - vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cm->fc.mvc_hp, flag); -#endif } // for each macroblock row in image diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 356e32c3f..44e83fdc7 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -23,80 +23,6 @@ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER void vp8_cmachine_specific_config(VP8_COMP *cpi) { #if CONFIG_RUNTIME_CPU_DETECT cpi->rtcd.common = &cpi->common.rtcd; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.sad32x32 = vp8_sad32x32_c; -#endif - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c; - -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.sad32x32x3 = vp8_sad32x32x3_c; -#endif - cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_c; - cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_c; - cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_c; - cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c; - cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c; - -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.sad32x32x8 = vp8_sad32x32x8_c; -#endif - cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c; - cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c; - cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c; - cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c; - cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c; - -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.sad32x32x4d = vp8_sad32x32x4d_c; -#endif - cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c; - cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c; - cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c; - cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_c; - cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_c; -#if ARCH_X86 || ARCH_X86_64 - cpi->rtcd.variance.copy32xn = vp8_copy32xn_c; -#endif - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.var32x32 = vp8_variance32x32_c; -#endif - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.subpixvar32x32 = vp8_sub_pixel_variance32x32_c; -#endif - cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.halfpixvar32x32_h = vp8_variance_halfpixvar32x32_h_c; -#endif - cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.halfpixvar32x32_v = vp8_variance_halfpixvar32x32_v_c; -#endif - cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.halfpixvar32x32_hv = vp8_variance_halfpixvar32x32_hv_c; -#endif - cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.subpixmse32x32 = vp8_sub_pixel_mse32x32_c; -#endif - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; cpi->rtcd.fdct.short8x8 = vp8_short_fdct8x8_c; cpi->rtcd.fdct.short16x16 = vp8_short_fdct16x16_c; @@ -118,16 +44,11 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) { cpi->rtcd.search.refining_search = vp8_refining_search_sad; cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; - cpi->rtcd.variance.satd16x16 = vp8_satd16x16_c; cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c; cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; -#if CONFIG_INTERNAL_STATS - cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_c; - cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_c; -#endif #endif vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame; diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c index 180ee5870..2eecfcdad 100644 --- a/vp8/encoder/mbgraph.c +++ b/vp8/encoder/mbgraph.c @@ -83,10 +83,8 @@ static unsigned int do_16x16_motion_iteration vp8_set_mbmode_and_mvs(x, NEWMV, dst_mv); vp8_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); - // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16) - best_err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16) - (xd->dst.y_buffer, xd->dst.y_stride, - xd->predictor, 16, INT_MAX); + best_err = vp8_sad16x16(xd->dst.y_buffer, xd->dst.y_stride, + xd->predictor, 16, INT_MAX); /* restore UMV window */ x->mv_col_min = tmp_col_min; @@ -130,11 +128,8 @@ static int do_16x16_motion_search // FIXME should really use something like near/nearest MV and/or MV prediction xd->pre.y_buffer = ref->y_buffer + mb_y_offset; xd->pre.y_stride = ref->y_stride; - // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16) - err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16) - (ref->y_buffer + mb_y_offset, - ref->y_stride, xd->dst.y_buffer, - xd->dst.y_stride, INT_MAX); + err = vp8_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, + xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); dst_mv->as_int = 0; // Test last reference frame using the previous best mv as the @@ -193,10 +188,8 @@ static int do_16x16_zerozero_search xd->pre.y_buffer = ref->y_buffer + mb_y_offset; xd->pre.y_stride = ref->y_stride; // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16) - err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16) - (ref->y_buffer + mb_y_offset, - ref->y_stride, xd->dst.y_buffer, - xd->dst.y_stride, INT_MAX); + err = vp8_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, + xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); dst_mv->as_int = 0; @@ -221,11 +214,8 @@ static int find_best_16x16_intra xd->mode_info_context->mbmi.mode = mode; vp8_build_intra_predictors_mby(xd); - // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16) - err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16) - (xd->predictor, 16, - buf->y_buffer + mb_y_offset, - buf->y_stride, best_err); + err = vp8_sad16x16(xd->predictor, 16, buf->y_buffer + mb_y_offset, + buf->y_stride, best_err); // find best if (err < best_err) { best_err = err; diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index a6cf2f18b..210887491 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -47,15 +47,9 @@ int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS, MV v; v.row = (mv->as_mv.row - ref->as_mv.row); v.col = (mv->as_mv.col - ref->as_mv.col); -#if CONFIG_NEWMVENTROPY return ((mvjcost[vp8_get_mv_joint(v)] + mvcost[0][v.row] + mvcost[1][v.col]) * Weight) >> 7; -#else - return ((mvcost[0][v.row >> (ishp == 0)] + - mvcost[1][v.col >> (ishp == 0)]) - * Weight) >> 7; -#endif } static int mv_err_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS, @@ -64,14 +58,9 @@ static int mv_err_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS, MV v; v.row = (mv->as_mv.row - ref->as_mv.row); v.col = (mv->as_mv.col - ref->as_mv.col); -#if CONFIG_NEWMVENTROPY return ((mvjcost[vp8_get_mv_joint(v)] + mvcost[0][v.row] + mvcost[1][v.col]) * error_per_bit + 128) >> 8; -#else - return ((mvcost[0][v.row >> (ishp == 0)] + - mvcost[1][v.col >> (ishp == 0)]) * error_per_bit + 128) >> 8; -#endif } return 0; } @@ -83,14 +72,9 @@ static int mvsad_err_cost(int_mv *mv, int_mv *ref, DEC_MVSADCOSTS, MV v; v.row = (mv->as_mv.row - ref->as_mv.row); v.col = (mv->as_mv.col - ref->as_mv.col); -#if CONFIG_NEWMVENTROPY return ((mvjsadcost[vp8_get_mv_joint(v)] + mvsadcost[0][v.row] + mvsadcost[1][v.col]) * error_per_bit + 128) >> 8; -#else - return ((mvsadcost[0][v.row] + mvsadcost[1][v.col]) - * error_per_bit + 128) >> 8; -#endif } return 0; } @@ -220,35 +204,42 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) { * could reduce the area. */ -#if CONFIG_NEWMVENTROPY /* estimated cost of a motion vector (r,c) */ -#define MVC(r,c) \ - (mvcost ? \ - ((mvjcost[((r)!=rr)*2 + ((c)!=rc)] + \ - mvcost[0][((r)-rr)] + mvcost[1][((c)-rc)]) * error_per_bit + 128 )>>8 : 0) -#else -#define MVC(r,c) \ - (mvcost ? \ - ((mvcost[0][((r)-rr)>>(xd->allow_high_precision_mv==0)] + \ - mvcost[1][((c)-rc)>>(xd->allow_high_precision_mv==0)]) * \ - error_per_bit + 128 )>>8 : 0) -#endif /* CONFIG_NEWMVENTROPY */ +#define MVC(r, c) \ + (mvcost ? \ + ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ + mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ + error_per_bit + 128) >> 8 : 0) -#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc +#define SP(x) (((x) & 7) << 1) // convert motion vector component to offset + // for svf calc -#define IFMVCV(r,c,s,e) \ - if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; +#define IFMVCV(r, c, s, e) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ + s \ + else \ + e; /* pointer to predictor base of a motionvector */ -#define PRE(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) +#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset))) /* returns subpixel variance error function */ -#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) - -/* checks if (r,c) has better score than previous best */ -#define CHECK_BETTER(v,r,c) \ - IFMVCV(r,c,{thismse = (DIST(r,c)); if((v = MVC(r,c)+thismse) < besterr) \ - { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;) +#define DIST(r, c) \ + vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse) + +/* checks if (r, c) has better score than previous best */ +#define CHECK_BETTER(v, r, c) \ + IFMVCV(r, c, { \ + thismse = (DIST(r, c)); \ + if ((v = MVC(r, c) + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + }, \ + v = INT_MAX;) #define MIN(x,y) (((x)<(y))?(x):(y)) #define MAX(x,y) (((x)>(y))?(x):(y)) @@ -307,17 +298,10 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3; hstep = 4; -#if CONFIG_NEWMVENTROPY minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1)); maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1)); minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1)); maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1)); -#else - minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1)); - maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1)); - minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1)); - maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1)); -#endif tr = br; tc = bc; @@ -403,13 +387,11 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, tc = bc; } -#if CONFIG_NEWMVENTROPY if (xd->allow_high_precision_mv) { usehp = vp8_use_nmv_hp(&ref_mv->as_mv); } else { usehp = 0; } -#endif if (usehp) { hstep >>= 1; @@ -771,13 +753,11 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, *sse1 = sse; } -#if CONFIG_NEWMVENTROPY if (x->e_mbd.allow_high_precision_mv) { usehp = vp8_use_nmv_hp(&ref_mv->as_mv); } else { usehp = 0; } -#endif if (!usehp) return bestmse; @@ -1304,16 +1284,8 @@ int vp8_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; -#if CONFIG_NEWMVENTROPY int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; -#else - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; - if (xd->allow_high_precision_mv) { - mvsadcost[0] = x->mvsadcost_hp[0]; - mvsadcost[1] = x->mvsadcost_hp[1]; - } -#endif fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; @@ -1423,16 +1395,8 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; -#if CONFIG_NEWMVENTROPY int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; -#else - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; - if (xd->allow_high_precision_mv) { - mvsadcost[0] = x->mvsadcost_hp[0]; - mvsadcost[1] = x->mvsadcost_hp[1]; - } -#endif fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; @@ -1479,7 +1443,8 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, for (t = 0; t < 4; t++) block_offset[t] = ss[i + t].offset + best_address; - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); + fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, + sad_array); for (t = 0; t < 4; t++, i++) { if (sad_array[t] < bestsad) { @@ -1631,16 +1596,8 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int col_max = ref_col + distance; int_mv fcenter_mv; -#if CONFIG_NEWMVENTROPY int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; -#else - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; - if (xd->allow_high_precision_mv) { - mvsadcost[0] = x->mvsadcost_hp[0]; - mvsadcost[1] = x->mvsadcost_hp[1]; - } -#endif fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; @@ -1735,16 +1692,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, unsigned int sad_array[3]; int_mv fcenter_mv; -#if CONFIG_NEWMVENTROPY int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; -#else - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; - if (xd->allow_high_precision_mv) { - mvsadcost[0] = x->mvsadcost_hp[0]; - mvsadcost[1] = x->mvsadcost_hp[1]; - } -#endif fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; @@ -1872,16 +1821,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, unsigned int sad_array[3]; int_mv fcenter_mv; -#if CONFIG_NEWMVENTROPY int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; -#else - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; - if (xd->allow_high_precision_mv) { - mvsadcost[0] = x->mvsadcost_hp[0]; - mvsadcost[1] = x->mvsadcost_hp[1]; - } -#endif fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; @@ -2022,16 +1963,8 @@ int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; -#if CONFIG_NEWMVENTROPY int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; -#else - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; - if (xd->allow_high_precision_mv) { - mvsadcost[0] = x->mvsadcost_hp[0]; - mvsadcost[1] = x->mvsadcost_hp[1]; - } -#endif fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; @@ -2106,16 +2039,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; -#if CONFIG_NEWMVENTROPY int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; -#else - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; - if (xd->allow_high_precision_mv) { - mvsadcost[0] = x->mvsadcost_hp[0]; - mvsadcost[1] = x->mvsadcost_hp[1]; - } -#endif fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index afca58084..f09106927 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -15,21 +15,12 @@ #include "block.h" #include "variance.h" -#if CONFIG_NEWMVENTROPY #define MVCOSTS mvjcost, mvcost #define MVSADCOSTS mvjsadcost, mvsadcost #define DEC_MVCOSTS int *mvjcost, int *mvcost[2] #define DEC_MVSADCOSTS int *mvjsadcost, int *mvsadcost[2] #define NULLMVCOST NULL, NULL #define XMVCOST x->nmvjointcost, (x->e_mbd.allow_high_precision_mv?x->nmvcost_hp:x->nmvcost) -#else -#define MVCOSTS mvcost -#define MVSADCOSTS mvsadcost -#define DEC_MVCOSTS int *mvcost[2] -#define DEC_MVSADCOSTS int *mvsadcost[2] -#define NULLMVCOST NULL -#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost) -#endif /* CONFIG_NEWMVENTROPY */ #ifdef ENTROPY_STATS extern void init_mv_ref_counts(); diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c index b1abd1e2a..23b9973c3 100644 --- a/vp8/encoder/modecosts.c +++ b/vp8/encoder/modecosts.c @@ -46,14 +46,11 @@ void vp8_init_mode_costs(VP8_COMP *c) { vp8_cost_tokens(c->mb.i8x8_mode_costs, x->fc.i8x8_mode_prob, vp8_i8x8_mode_tree); -#if CONFIG_SWITCHABLE_INTERP { int i; for (i = 0; i <= VP8_SWITCHABLE_FILTERS; ++i) - //for (i = 0; i <= 0; ++i) vp8_cost_tokens((int *)c->mb.switchable_interp_costs[i], x->fc.switchable_interp_prob[i], vp8_switchable_interp_tree); } -#endif } diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 14e9e784a..f11ff5936 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -77,7 +77,7 @@ extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFF extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); #endif -int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); +int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi, int distance); @@ -85,6 +85,7 @@ static void set_default_lf_deltas(VP8_COMP *cpi); extern const int vp8_gf_interval_table[101]; +#define DEFAULT_INTERP_FILTER EIGHTTAP /* SWITCHABLE for better performance */ #define SEARCH_BEST_FILTER 0 /* to search exhaustively for best filter */ #define RESET_FOREACH_FILTER 0 /* whether to reset the encoder state @@ -101,25 +102,14 @@ extern const int vp8_gf_interval_table[101]; #if CONFIG_INTERNAL_STATS #include "math.h" -extern double vp8_calc_ssim -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - int lumamask, - double *weight, - const vp8_variance_rtcd_vtable_t *rtcd -); +extern double vp8_calc_ssim(YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *dest, int lumamask, + double *weight); -extern double vp8_calc_ssimg -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - double *ssim_y, - double *ssim_u, - double *ssim_v, - const vp8_variance_rtcd_vtable_t *rtcd -); +extern double vp8_calc_ssimg(YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *dest, double *ssim_y, + double *ssim_u, double *ssim_v); #endif @@ -149,12 +139,10 @@ extern int skip_false_count; extern int intra_mode_stats[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES]; #endif -#if CONFIG_NEWMVENTROPY #ifdef NMV_STATS extern void init_nmvstats(); extern void print_nmvstats(); #endif -#endif #ifdef SPEEDSTATS unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -1630,7 +1618,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) { cpi->cq_target_quality = cpi->oxcf.cq_level; if (!cm->use_bilinear_mc_filter) - cm->mcomp_filter_type = EIGHTTAP; + cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; else cm->mcomp_filter_type = BILINEAR; @@ -1700,8 +1688,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) { #define M_LOG2_E 0.693147180559945309417 #define log2f(x) (log (x) / (float) M_LOG2_E) -#if CONFIG_NEWMVENTROPY - static void cal_nmvjointsadcost(int *mvjointsadcost) { mvjointsadcost[0] = 600; mvjointsadcost[1] = 300; @@ -1739,40 +1725,6 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { } while (++i <= MV_MAX); } -#else - -static void cal_mvsadcosts(int *mvsadcost[2]) { - int i = 1; - - mvsadcost [0] [0] = 300; - mvsadcost [1] [0] = 300; - - do { - double z = 256 * (2 * (log2f(8 * i) + .6)); - mvsadcost [0][i] = (int) z; - mvsadcost [1][i] = (int) z; - mvsadcost [0][-i] = (int) z; - mvsadcost [1][-i] = (int) z; - } while (++i <= mvfp_max); -} - -static void cal_mvsadcosts_hp(int *mvsadcost[2]) { - int i = 1; - - mvsadcost [0] [0] = 300; - mvsadcost [1] [0] = 300; - - do { - double z = 256 * (2 * (log2f(8 * i) + .6)); - mvsadcost [0][i] = (int) z; - mvsadcost [1][i] = (int) z; - mvsadcost [0][-i] = (int) z; - mvsadcost [1][-i] = (int) z; - } while (++i <= mvfp_max_hp); -} - -#endif /* CONFIG_NEWMVENTROPY */ - VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) { int i; volatile union { @@ -1824,10 +1776,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) { #endif for (i = 0; i < COMP_PRED_CONTEXTS; i++) cm->prob_comppred[i] = 128; -#if CONFIG_TX_SELECT for (i = 0; i < TX_SIZE_MAX - 1; i++) cm->prob_tx[i] = 128; -#endif // Prime the recent reference frame useage counters. // Hereafter they will be maintained as a sort of moving average @@ -1888,11 +1838,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) { vp8_zero(inter_uv_modes); vp8_zero(inter_b_modes); #endif -#if CONFIG_NEWMVENTROPY #ifdef NMV_STATS init_nmvstats(); #endif -#endif /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; @@ -1958,7 +1906,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) { cpi->gf_rate_correction_factor = 1.0; cpi->twopass.est_max_qcorrection_factor = 1.0; -#if CONFIG_NEWMVENTROPY cal_nmvjointsadcost(cpi->mb.nmvjointsadcost); cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX]; cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX]; @@ -1971,19 +1918,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) { cpi->mb.nmvsadcost_hp[0] = &cpi->mb.nmvsadcosts_hp[0][MV_MAX]; cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX]; cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp); -#else - cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max + 1]; - cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max + 1]; - cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max + 1]; - cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max + 1]; - cal_mvsadcosts(cpi->mb.mvsadcost); - - cpi->mb.mvcost_hp[0] = &cpi->mb.mvcosts_hp[0][mv_max_hp + 1]; - cpi->mb.mvcost_hp[1] = &cpi->mb.mvcosts_hp[1][mv_max_hp + 1]; - cpi->mb.mvsadcost_hp[0] = &cpi->mb.mvsadcosts_hp[0][mvfp_max_hp + 1]; - cpi->mb.mvsadcost_hp[1] = &cpi->mb.mvsadcosts_hp[1][mvfp_max_hp + 1]; - cal_mvsadcosts_hp(cpi->mb.mvsadcost_hp); -#endif /* CONFIG_NEWMVENTROPY */ for (i = 0; i < KEY_FRAME_CONTEXT; i++) { cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate; @@ -2027,74 +1961,48 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) { init_mv_ref_counts(); #endif +#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \ + cpi->fn_ptr[BT].sdf = SDF; \ + cpi->fn_ptr[BT].vf = VF; \ + cpi->fn_ptr[BT].svf = SVF; \ + cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \ + cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \ + cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \ + cpi->fn_ptr[BT].sdx3f = SDX3F; \ + cpi->fn_ptr[BT].sdx8f = SDX8F; \ + cpi->fn_ptr[BT].sdx4df = SDX4DF; + + #if CONFIG_SUPERBLOCKS - cpi->fn_ptr[BLOCK_32X32].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32); - cpi->fn_ptr[BLOCK_32X32].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32); - cpi->fn_ptr[BLOCK_32X32].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar32x32); - cpi->fn_ptr[BLOCK_32X32].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_h); - cpi->fn_ptr[BLOCK_32X32].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_v); - cpi->fn_ptr[BLOCK_32X32].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_hv); - cpi->fn_ptr[BLOCK_32X32].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x3); - cpi->fn_ptr[BLOCK_32X32].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x8); - cpi->fn_ptr[BLOCK_32X32].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x4d); + BFP(BLOCK_32X32, vp8_sad32x32, vp8_variance32x32, vp8_sub_pixel_variance32x32, + vp8_variance_halfpixvar32x32_h, vp8_variance_halfpixvar32x32_v, + vp8_variance_halfpixvar32x32_hv, vp8_sad32x32x3, vp8_sad32x32x8, + vp8_sad32x32x4d) #endif - cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); - cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); - cpi->fn_ptr[BLOCK_16X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16); - cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h); - cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v); - cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv); - cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3); - cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8); - cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d); - - cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8); - cpi->fn_ptr[BLOCK_16X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8); - cpi->fn_ptr[BLOCK_16X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8); - cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL; - cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL; - cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3); - cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8); - cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d); - - cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16); - cpi->fn_ptr[BLOCK_8X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16); - cpi->fn_ptr[BLOCK_8X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16); - cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL; - cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL; - cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3); - cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8); - cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d); - - cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8); - cpi->fn_ptr[BLOCK_8X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8); - cpi->fn_ptr[BLOCK_8X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8); - cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL; - cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL; - cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3); - cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8); - cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d); - - cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4); - cpi->fn_ptr[BLOCK_4X4].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4); - cpi->fn_ptr[BLOCK_4X4].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4); - cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL; - cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL; - cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3); - cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8); - cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d); + BFP(BLOCK_16X16, vp8_sad16x16, vp8_variance16x16, vp8_sub_pixel_variance16x16, + vp8_variance_halfpixvar16x16_h, vp8_variance_halfpixvar16x16_v, + vp8_variance_halfpixvar16x16_hv, vp8_sad16x16x3, vp8_sad16x16x8, + vp8_sad16x16x4d) + + BFP(BLOCK_16X8, vp8_sad16x8, vp8_variance16x8, vp8_sub_pixel_variance16x8, + NULL, NULL, NULL, vp8_sad16x8x3, vp8_sad16x8x8, vp8_sad16x8x4d) + + BFP(BLOCK_8X16, vp8_sad8x16, vp8_variance8x16, vp8_sub_pixel_variance8x16, + NULL, NULL, NULL, vp8_sad8x16x3, vp8_sad8x16x8, vp8_sad8x16x4d) + + BFP(BLOCK_8X8, vp8_sad8x8, vp8_variance8x8, vp8_sub_pixel_variance8x8, + NULL, NULL, NULL, vp8_sad8x8x3, vp8_sad8x8x8, vp8_sad8x8x4d) + + BFP(BLOCK_4X4, vp8_sad4x4, vp8_variance4x4, vp8_sub_pixel_variance4x4, + NULL, NULL, NULL, vp8_sad4x4x3, vp8_sad4x4x8, vp8_sad4x4x4d) #if ARCH_X86 || ARCH_X86_64 - cpi->fn_ptr[BLOCK_16X16].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); - cpi->fn_ptr[BLOCK_16X8].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); - cpi->fn_ptr[BLOCK_8X16].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); - cpi->fn_ptr[BLOCK_8X8].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); - cpi->fn_ptr[BLOCK_4X4].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); + cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn; + cpi->fn_ptr[BLOCK_16X8].copymem = vp8_copy32xn; + cpi->fn_ptr[BLOCK_8X16].copymem = vp8_copy32xn; + cpi->fn_ptr[BLOCK_8X8].copymem = vp8_copy32xn; + cpi->fn_ptr[BLOCK_4X4].copymem = vp8_copy32xn; #endif cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search); @@ -2136,12 +2044,10 @@ void vp8_remove_compressor(VP8_PTR *ptr) { print_mode_context(); } #endif -#if CONFIG_NEWMVENTROPY #ifdef NMV_STATS if (cpi->pass != 1) print_nmvstats(); #endif -#endif #if CONFIG_INTERNAL_STATS @@ -2370,8 +2276,7 @@ void vp8_remove_compressor(VP8_PTR *ptr) { static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, unsigned char *recon, int recon_stride, - unsigned int cols, unsigned int rows, - vp8_variance_rtcd_vtable_t *rtcd) { + unsigned int cols, unsigned int rows) { unsigned int row, col; uint64_t total_sse = 0; int diff; @@ -2380,9 +2285,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, for (col = 0; col + 16 <= cols; col += 16) { unsigned int sse; - VARIANCE_INVOKE(rtcd, mse16x16)(orig + col, orig_stride, - recon + col, recon_stride, - &sse); + vp8_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); total_sse += sse; } @@ -2434,8 +2337,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) { pkt.kind = VPX_CODEC_PSNR_PKT; sse = calc_plane_error(orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride, - width, height, - IF_RTCD(&cpi->rtcd.variance)); + width, height); pkt.data.psnr.sse[0] = sse; pkt.data.psnr.sse[1] = sse; pkt.data.psnr.samples[0] = width * height; @@ -2446,8 +2348,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) { sse = calc_plane_error(orig->u_buffer, orig->uv_stride, recon->u_buffer, recon->uv_stride, - width, height, - IF_RTCD(&cpi->rtcd.variance)); + width, height); pkt.data.psnr.sse[0] += sse; pkt.data.psnr.sse[2] = sse; pkt.data.psnr.samples[0] += width * height; @@ -2455,8 +2356,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) { sse = calc_plane_error(orig->v_buffer, orig->uv_stride, recon->v_buffer, recon->uv_stride, - width, height, - IF_RTCD(&cpi->rtcd.variance)); + width, height); pkt.data.psnr.sse[0] += sse; pkt.data.psnr.sse[3] = sse; pkt.data.psnr.samples[0] += width * height; @@ -3034,13 +2934,10 @@ static void encode_frame_to_data_rate /* list of filters to search over */ int mcomp_filters_to_search[] = { -#if CONFIG_SWITCHABLE_INTERP EIGHTTAP, EIGHTTAP_SHARP, SIXTAP, SWITCHABLE -#else - EIGHTTAP, EIGHTTAP_SHARP, SIXTAP, -#endif }; - int mcomp_filters = sizeof(mcomp_filters_to_search) / sizeof(*mcomp_filters_to_search); + int mcomp_filters = sizeof(mcomp_filters_to_search) / + sizeof(*mcomp_filters_to_search); int mcomp_filter_index = 0; INT64 mcomp_filter_cost[4]; @@ -3265,12 +3162,7 @@ static void encode_frame_to_data_rate cm->mcomp_filter_type = mcomp_filters_to_search[0]; mcomp_filter_index = 0; } else { -#if CONFIG_SWITCHABLE_INTERP - cm->mcomp_filter_type = SWITCHABLE; -#else - cm->mcomp_filter_type = - (Q < SHARP_FILTER_QTHRESH ? EIGHTTAP_SHARP : EIGHTTAP); -#endif + cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; } /* TODO: Decide this more intelligently */ xd->allow_high_precision_mv = (Q < HIGH_PRECISION_MV_QTHRESH); @@ -3428,8 +3320,7 @@ static void encode_frame_to_data_rate if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { int last_q = Q; int kf_err = vp8_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx], - IF_RTCD(&cpi->rtcd.variance)); + &cm->yv12_fb[cm->new_fb_idx]); int high_err_target = cpi->ambient_err; int low_err_target = (cpi->ambient_err >> 1); @@ -3584,7 +3475,6 @@ static void encode_frame_to_data_rate if (cpi->is_src_frame_alt_ref) Loop = FALSE; -#if CONFIG_SWITCHABLE_INTERP if (cm->frame_type != KEY_FRAME && !sf->search_best_filter && cm->mcomp_filter_type == SWITCHABLE) { @@ -3610,19 +3500,16 @@ static void encode_frame_to_data_rate if (count[i]) { cm->mcomp_filter_type = vp8_switchable_interp[i]; Loop = TRUE; /* Make sure to loop since the filter changed */ - //loop_count = -1; break; } } } } -#endif if (Loop == FALSE && cm->frame_type != KEY_FRAME && sf->search_best_filter) { if (mcomp_filter_index < mcomp_filters) { INT64 err = vp8_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx], - IF_RTCD(&cpi->rtcd.variance)); + &cm->yv12_fb[cm->new_fb_idx]); INT64 rate = cpi->projected_frame_size << 8; mcomp_filter_cost[mcomp_filter_index] = (RDCOST(cpi->RDMULT, cpi->RDDIV, rate, err)); @@ -3684,8 +3571,7 @@ static void encode_frame_to_data_rate // the force key frame if (cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0)) { cpi->ambient_err = vp8_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx], - IF_RTCD(&cpi->rtcd.variance)); + &cm->yv12_fb[cm->new_fb_idx]); } // This frame's MVs are saved and will be used in next frame's MV @@ -3758,18 +3644,12 @@ static void encode_frame_to_data_rate update_reference_frames(cm); vp8_copy(cpi->common.fc.coef_counts, cpi->coef_counts); -#if CONFIG_HYBRIDTRANSFORM vp8_copy(cpi->common.fc.hybrid_coef_counts, cpi->hybrid_coef_counts); -#endif vp8_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8); -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_copy(cpi->common.fc.hybrid_coef_counts_8x8, cpi->hybrid_coef_counts_8x8); -#endif vp8_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_copy(cpi->common.fc.hybrid_coef_counts_16x16, cpi->hybrid_coef_counts_16x16); -#endif vp8_adapt_coef_probs(&cpi->common); if (cpi->common.frame_type != KEY_FRAME) { vp8_copy(cpi->common.fc.ymode_counts, cpi->ymode_count); @@ -3780,14 +3660,8 @@ static void encode_frame_to_data_rate vp8_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count); vp8_adapt_mode_probs(&cpi->common); -#if CONFIG_NEWMVENTROPY cpi->common.fc.NMVcount = cpi->NMVcount; vp8_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv); -#else - vp8_copy(cpi->common.fc.MVcount, cpi->MVcount); - vp8_copy(cpi->common.fc.MVcount_hp, cpi->MVcount_hp); - vp8_adapt_mv_probs(&cpi->common); -#endif /* CONFIG_NEWMVENTROPY */ vp8_update_mode_context(&cpi->common); } @@ -3903,8 +3777,7 @@ static void encode_frame_to_data_rate vp8_clear_system_state(); // __asm emms; recon_err = vp8_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx], - IF_RTCD(&cpi->rtcd.variance)); + &cm->yv12_fb[cm->new_fb_idx]); if (cpi->twopass.total_left_stats->coded_error != 0.0) fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d" @@ -4390,16 +4263,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon int64_t sq_error; ye = calc_plane_error(orig->y_buffer, orig->y_stride, - recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height, - IF_RTCD(&cpi->rtcd.variance)); + recon->y_buffer, recon->y_stride, orig->y_width, + orig->y_height); ue = calc_plane_error(orig->u_buffer, orig->uv_stride, - recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height, - IF_RTCD(&cpi->rtcd.variance)); + recon->u_buffer, recon->uv_stride, orig->uv_width, + orig->uv_height); ve = calc_plane_error(orig->v_buffer, orig->uv_stride, - recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height, - IF_RTCD(&cpi->rtcd.variance)); + recon->v_buffer, recon->uv_stride, orig->uv_width, + orig->uv_height); sq_error = ye + ue + ve; @@ -4419,16 +4292,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon vp8_clear_system_state(); ye = calc_plane_error(orig->y_buffer, orig->y_stride, - pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height, - IF_RTCD(&cpi->rtcd.variance)); + pp->y_buffer, pp->y_stride, orig->y_width, + orig->y_height); ue = calc_plane_error(orig->u_buffer, orig->uv_stride, - pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height, - IF_RTCD(&cpi->rtcd.variance)); + pp->u_buffer, pp->uv_stride, orig->uv_width, + orig->uv_height); ve = calc_plane_error(orig->v_buffer, orig->uv_stride, - pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height, - IF_RTCD(&cpi->rtcd.variance)); + pp->v_buffer, pp->uv_stride, orig->uv_width, + orig->uv_height); sq_error = ye + ue + ve; @@ -4441,8 +4314,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon cpi->totalp += frame_psnr2; frame_ssim2 = vp8_calc_ssim(cpi->Source, - &cm->post_proc_buffer, 1, &weight, - IF_RTCD(&cpi->rtcd.variance)); + &cm->post_proc_buffer, 1, &weight); cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; @@ -4461,7 +4333,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon if (cpi->b_calculate_ssimg) { double y, u, v, frame_all; frame_all = vp8_calc_ssimg(cpi->Source, cm->frame_to_show, - &y, &u, &v, IF_RTCD(&cpi->rtcd.variance)); + &y, &u, &v); cpi->total_ssimg_y += y; cpi->total_ssimg_u += u; cpi->total_ssimg_v += v; @@ -4604,19 +4476,19 @@ int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert -int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd) { +int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { int i, j; int Total = 0; unsigned char *src = source->y_buffer; unsigned char *dst = dest->y_buffer; - (void)rtcd; // Loop through the Y plane raw and reconstruction data summing (square differences) for (i = 0; i < source->y_height; i += 16) { for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse); + Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, + &sse); } src += 16 * source->y_stride; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 01151280c..ab6802509 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -60,17 +60,10 @@ #define VP8_TEMPORAL_ALT_REF 1 typedef struct { -#if CONFIG_NEWMVENTROPY nmv_context nmvc; int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; int nmvcosts_hp[2][MV_VALS]; -#else - MV_CONTEXT mvc[2]; - int mvcosts[2][MVvals + 1]; - MV_CONTEXT_HP mvc_hp[2]; - int mvcosts_hp[2][MVvals_hp + 1]; -#endif #ifdef MODE_STATS // Stats @@ -97,24 +90,18 @@ typedef struct { vp8_prob coef_probs[BLOCK_TYPES] [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; -#if CONFIG_HYBRIDTRANSFORM vp8_prob hybrid_coef_probs[BLOCK_TYPES] [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; -#endif vp8_prob coef_probs_8x8[BLOCK_TYPES_8X8] [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_prob hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; -#endif vp8_prob coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_prob hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; -#endif vp8_prob ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */ vp8_prob uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1]; @@ -123,10 +110,8 @@ typedef struct { vp8_prob sub_mv_ref_prob [SUBMVREF_COUNT][VP8_SUBMVREFS - 1]; vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1]; -#if CONFIG_SWITCHABLE_INTERP vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS + 1] [VP8_SWITCHABLE_FILTERS - 1]; -#endif int mv_ref_ct[6][4][2]; int mode_context[6][4]; @@ -365,7 +350,6 @@ typedef struct { typedef struct VP8_ENCODER_RTCD { VP8_COMMON_RTCD *common; - vp8_variance_rtcd_vtable_t variance; vp8_fdct_rtcd_vtable_t fdct; vp8_encodemb_rtcd_vtable_t encodemb; vp8_search_rtcd_vtable_t search; @@ -373,10 +357,10 @@ typedef struct VP8_ENCODER_RTCD { } VP8_ENCODER_RTCD; enum { - BLOCK_16X8, - BLOCK_8X16, - BLOCK_8X8, - BLOCK_4X4, + BLOCK_16X8 = PARTITIONING_16X8, + BLOCK_8X16 = PARTITIONING_8X16, + BLOCK_8X8 = PARTITIONING_8X8, + BLOCK_4X4 = PARTITIONING_4X4, BLOCK_16X16, BLOCK_MAX_SEGMENTS, BLOCK_32X32 = BLOCK_MAX_SEGMENTS, @@ -465,13 +449,11 @@ typedef struct VP8_COMP { int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES]; int comp_pred_count[COMP_PRED_CONTEXTS]; int single_pred_count[COMP_PRED_CONTEXTS]; -#if CONFIG_TX_SELECT // FIXME contextualize int txfm_count[TX_SIZE_MAX]; int txfm_count_8x8p[TX_SIZE_MAX - 1]; int64_t rd_tx_select_diff[NB_TXFM_MODES]; int rd_tx_select_threshes[4][NB_TXFM_MODES]; -#endif int RDMULT; int RDDIV; @@ -563,39 +545,28 @@ typedef struct VP8_COMP { // int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */ int y_uv_mode_count[VP8_YMODES][VP8_UV_MODES]; -#if CONFIG_NEWMVENTROPY nmv_context_counts NMVcount; -#else - unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */ - unsigned int MVcount_hp [2] [MVvals_hp]; /* (row,col) MV cts this frame */ -#endif unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; -#if CONFIG_HYBRIDTRANSFORM unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ vp8_prob frame_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_hybrid_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; -#endif unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ vp8_prob frame_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; -#if CONFIG_HYBRIDTRANSFORM8X8 unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ vp8_prob frame_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_hybrid_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; -#endif unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ vp8_prob frame_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; -#if CONFIG_HYBRIDTRANSFORM16X16 unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ vp8_prob frame_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_hybrid_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; -#endif int gfu_boost; int last_boost; @@ -780,10 +751,8 @@ typedef struct VP8_COMP { int pred_filter_on_count; int pred_filter_off_count; #endif -#if CONFIG_SWITCHABLE_INTERP - unsigned int switchable_interp_count[VP8_SWITCHABLE_FILTERS+1] + unsigned int switchable_interp_count[VP8_SWITCHABLE_FILTERS + 1] [VP8_SWITCHABLE_FILTERS]; -#endif #if CONFIG_NEW_MVREF unsigned int best_ref_index_counts[MAX_MV_REFS]; diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c index 954997889..57bd41468 100644 --- a/vp8/encoder/picklpf.c +++ b/vp8/encoder/picklpf.c @@ -21,7 +21,8 @@ #include "vpx_ports/arm.h" #endif -extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); +extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *dest); #if HAVE_ARMV7 extern void vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); #endif @@ -71,7 +72,8 @@ vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst vpx_memcpy(dst_y, src_y, ystride * (linestocopy + 16)); } -static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, int Fraction, const vp8_variance_rtcd_vtable_t *rtcd) { +static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *dest, int Fraction) { int i, j; int Total = 0; int srcoffset, dstoffset; @@ -79,7 +81,6 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF unsigned char *dst = dest->y_buffer; int linestocopy = (source->y_height >> (Fraction + 4)); - (void)rtcd; if (linestocopy < 1) linestocopy = 1; @@ -97,7 +98,8 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF for (i = 0; i < linestocopy; i += 16) { for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse); + Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, + &sse); } src += 16 * source->y_stride; @@ -179,7 +181,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { // Get the err using the previous frame's filter value. vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance)); + best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3); // Re-instate the unfiltered frame vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); @@ -192,7 +194,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); // Get the err for filtered frame - filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3); // Re-instate the unfiltered frame vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); @@ -221,7 +223,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); // Get the err for filtered frame - filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3); // Re-instate the unfiltered frame vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); @@ -308,7 +310,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme vp8cx_set_alt_lf_level(cpi, filt_mid); vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_mid, segment); - best_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + best_err = vp8_calc_ss_err(sd, cm->frame_to_show); filt_best = filt_mid; // Re-instate the unfiltered frame @@ -348,7 +350,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme vp8cx_set_alt_lf_level(cpi, filt_low); vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_low, segment); - filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame #if HAVE_ARMV7 @@ -383,7 +385,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme vp8cx_set_alt_lf_level(cpi, filt_high); vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_high, segment); - filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame #if HAVE_ARMV7 @@ -517,7 +519,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8cx_set_alt_lf_level(cpi, filt_mid); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid); - best_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + best_err = vp8_calc_ss_err(sd, cm->frame_to_show); filt_best = filt_mid; // Re-instate the unfiltered frame @@ -557,7 +559,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8cx_set_alt_lf_level(cpi, filt_low); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low); - filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame #if HAVE_ARMV7 @@ -592,7 +594,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8cx_set_alt_lf_level(cpi, filt_high); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high); - filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame #if HAVE_ARMV7 diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index b6a1f27f8..16b4e6e1d 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -21,7 +21,6 @@ extern int enc_debug; #endif -#if CONFIG_HYBRIDTRANSFORM void vp8_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) { int i, rc, eob; int zbin; @@ -85,7 +84,6 @@ void vp8_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) { d->eob = eob + 1; } -#endif void vp8_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) { int i, rc, eob; diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h index 1375ed0b0..e39433fc3 100644 --- a/vp8/encoder/quantize.h +++ b/vp8/encoder/quantize.h @@ -30,11 +30,9 @@ #include "arm/quantize_arm.h" #endif -#if CONFIG_HYBRIDTRANSFORM #define prototype_quantize_block_type(sym) \ void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type) extern prototype_quantize_block_type(vp8_ht_quantize_b_4x4); -#endif #ifndef vp8_quantize_quantb_4x4 #define vp8_quantize_quantb_4x4 vp8_regular_quantize_b_4x4 diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 570bedfe9..cc3c82e74 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -132,17 +132,10 @@ void vp8_save_coding_context(VP8_COMP *cpi) { // intended for use in a re-code loop in vp8_compress_frame where the // quantizer value is adjusted between loop iterations. -#if CONFIG_NEWMVENTROPY cc->nmvc = cm->fc.nmvc; vp8_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); vp8_copy(cc->nmvcosts, cpi->mb.nmvcosts); vp8_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); -#else - vp8_copy(cc->mvc, cm->fc.mvc); - vp8_copy(cc->mvcosts, cpi->mb.mvcosts); - vp8_copy(cc->mvc_hp, cm->fc.mvc_hp); - vp8_copy(cc->mvcosts_hp, cpi->mb.mvcosts_hp); -#endif vp8_copy(cc->mv_ref_ct, cm->fc.mv_ref_ct); vp8_copy(cc->mode_context, cm->fc.mode_context); @@ -178,20 +171,12 @@ void vp8_save_coding_context(VP8_COMP *cpi) { vp8_copy(cc->last_mode_lf_deltas, xd->last_mode_lf_deltas); vp8_copy(cc->coef_probs, cm->fc.coef_probs); -#if CONFIG_HYBRIDTRANSFORM vp8_copy(cc->hybrid_coef_probs, cm->fc.hybrid_coef_probs); -#endif vp8_copy(cc->coef_probs_8x8, cm->fc.coef_probs_8x8); -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8); -#endif vp8_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16); -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16); -#endif -#if CONFIG_SWITCHABLE_INTERP vp8_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob); -#endif } void vp8_restore_coding_context(VP8_COMP *cpi) { @@ -202,17 +187,10 @@ void vp8_restore_coding_context(VP8_COMP *cpi) { // Restore key state variables to the snapshot state stored in the // previous call to vp8_save_coding_context. -#if CONFIG_NEWMVENTROPY cm->fc.nmvc = cc->nmvc; vp8_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); vp8_copy(cpi->mb.nmvcosts, cc->nmvcosts); vp8_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); -#else - vp8_copy(cm->fc.mvc, cc->mvc); - vp8_copy(cpi->mb.mvcosts, cc->mvcosts); - vp8_copy(cm->fc.mvc_hp, cc->mvc_hp); - vp8_copy(cpi->mb.mvcosts_hp, cc->mvcosts_hp); -#endif vp8_copy(cm->fc.mv_ref_ct, cc->mv_ref_ct); vp8_copy(cm->fc.mode_context, cc->mode_context); @@ -249,20 +227,12 @@ void vp8_restore_coding_context(VP8_COMP *cpi) { vp8_copy(xd->last_mode_lf_deltas, cc->last_mode_lf_deltas); vp8_copy(cm->fc.coef_probs, cc->coef_probs); -#if CONFIG_HYBRIDTRANSFORM vp8_copy(cm->fc.hybrid_coef_probs, cc->hybrid_coef_probs); -#endif vp8_copy(cm->fc.coef_probs_8x8, cc->coef_probs_8x8); -#if CONFIG_HYBRIDTRANSFORM8X8 vp8_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8); -#endif vp8_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16); -#if CONFIG_HYBRIDTRANSFORM16X16 vp8_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16); -#endif -#if CONFIG_SWITCHABLE_INTERP vp8_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob); -#endif } @@ -275,16 +245,6 @@ void vp8_setup_key_frame(VP8_COMP *cpi) { vp8_default_bmode_probs(cm->fc.bmode_prob); vp8_init_mv_probs(& cpi->common); -#if CONFIG_NEWMVENTROPY == 0 - /* this is not really required */ - { - int flag[2] = {1, 1}; - vp8_build_component_cost_table( - cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag); - vp8_build_component_cost_table_hp( - cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cpi->common.fc.mvc_hp, flag); - } -#endif // cpi->common.filter_level = 0; // Reset every key frame. cpi->common.filter_level = cpi->common.base_qindex * 3 / 8; diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 4b9e90725..e919de36f 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -60,10 +60,8 @@ extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x); #define INVALID_MV 0x80008000 -#if CONFIG_SWITCHABLE_INTERP /* Factor to weigh the rate for switchable interp filters */ #define SWITCHABLE_INTERP_RATE_FACTOR 1 -#endif static const int auto_speed_thresh[17] = { 1000, @@ -355,37 +353,31 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) { cpi->mb.token_costs[TX_4X4], (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs, BLOCK_TYPES); -#if CONFIG_HYBRIDTRANSFORM fill_token_costs( cpi->mb.hybrid_token_costs[TX_4X4], (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.hybrid_coef_probs, BLOCK_TYPES); -#endif fill_token_costs( cpi->mb.token_costs[TX_8X8], (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8); -#if CONFIG_HYBRIDTRANSFORM8X8 fill_token_costs( cpi->mb.hybrid_token_costs[TX_8X8], (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8); -#endif fill_token_costs( cpi->mb.token_costs[TX_16X16], (const vp8_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16, BLOCK_TYPES_16X16); -#if CONFIG_HYBRIDTRANSFORM16X16 fill_token_costs( cpi->mb.hybrid_token_costs[TX_16X16], (const vp8_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16); -#endif /*rough estimate for costing*/ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; @@ -393,14 +385,12 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) { if (cpi->common.frame_type != KEY_FRAME) { -#if CONFIG_NEWMVENTROPY vp8_build_nmv_cost_table( cpi->mb.nmvjointcost, cpi->mb.e_mbd.allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost, &cpi->common.fc.nmvc, cpi->mb.e_mbd.allow_high_precision_mv, 1, 1); -#endif } } @@ -409,19 +399,6 @@ void vp8_auto_select_speed(VP8_COMP *cpi) { milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16; -#if 0 - - if (0) { - FILE *f; - - f = fopen("speed.stt", "a"); - fprintf(f, " %8ld %10ld %10ld %10ld\n", - cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time); - fclose(f); - } - -#endif - /* // this is done during parameter valid check if( cpi->oxcf.cpu_used > 16) @@ -520,7 +497,7 @@ int vp8_mbuverror_c(MACROBLOCK *mb) { return error; } -int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) { +int vp8_uvsse(MACROBLOCK *x) { unsigned char *uptr, *vptr; unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src); unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src); @@ -551,16 +528,14 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) { vptr = x->e_mbd.pre.v_buffer + offset; if ((mv_row | mv_col) & 7) { - VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride, - (mv_col & 7) << 1, (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2); - VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride, - (mv_col & 7) << 1, (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1); + vp8_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1, + (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2); + vp8_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1, + (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1); sse2 += sse1; } else { - VARIANCE_INVOKE(rtcd, var8x8)(uptr, pre_stride, - upred_ptr, uv_stride, &sse2); - VARIANCE_INVOKE(rtcd, var8x8)(vptr, pre_stride, - vpred_ptr, uv_stride, &sse1); + vp8_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2); + vp8_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1); sse2 += sse1; } return sse2; @@ -607,9 +582,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, short *qcoeff_ptr = b->qcoeff; MACROBLOCKD *xd = &mb->e_mbd; MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi; -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type = DCT_DCT; -#endif int segment_id = mbmi->segment_id; switch (tx_size) { @@ -617,7 +590,6 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, scan = vp8_default_zig_zag1d; band = vp8_coef_bands; default_eob = 16; -#if CONFIG_HYBRIDTRANSFORM if (type == PLANE_TYPE_Y_WITH_DC) { tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { @@ -636,14 +608,12 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, } } } -#endif break; case TX_8X8: scan = vp8_default_zig_zag1d_8x8; band = vp8_coef_bands_8x8; default_eob = 64; -#if CONFIG_HYBRIDTRANSFORM8X8 if (type == PLANE_TYPE_Y_WITH_DC) { BLOCKD *bb; int ib = (b - xd->block); @@ -653,17 +623,14 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, tx_type = get_tx_type_8x8(xd, bb); } } -#endif break; case TX_16X16: scan = vp8_default_zig_zag1d_16x16; band = vp8_coef_bands_16x16; default_eob = 256; -#if CONFIG_HYBRIDTRANSFORM16X16 if (type == PLANE_TYPE_Y_WITH_DC) { tx_type = get_tx_type_16x16(xd, b); } -#endif break; default: break; @@ -675,7 +642,6 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 if (tx_type != DCT_DCT) { for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; @@ -687,9 +653,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, if (c < seg_eob) cost += mb->hybrid_token_costs[tx_size][type][band[c]] [pt][DCT_EOB_TOKEN]; - } else -#endif - { + } else { for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; int t = vp8_dct_value_tokens_ptr[v].Token; @@ -870,9 +834,7 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, MACROBLOCKD *xd = &mb->e_mbd; BLOCKD *b = &mb->e_mbd.block[0]; BLOCK *be = &mb->block[0]; -#if CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type; -#endif ENCODEMB_INVOKE(&rtcd->encodemb, submby)( mb->src_diff, @@ -880,24 +842,18 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, mb->e_mbd.predictor, mb->block[0].src_stride); -#if CONFIG_HYBRIDTRANSFORM16X16 tx_type = get_tx_type_16x16(xd, b); if (tx_type != DCT_DCT) { vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 16); } else vp8_transform_mby_16x16(mb); -#else - vp8_transform_mby_16x16(mb); -#endif vp8_quantize_mby_16x16(mb); -#if CONFIG_HYBRIDTRANSFORM16X16 // TODO(jingning) is it possible to quickly determine whether to force // trailing coefficients to be zero, instead of running trellis // optimization in the rate-distortion optimization loop? if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) vp8_optimize_mby_16x16(mb, rtcd); -#endif d = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(mb, 0); @@ -913,8 +869,6 @@ static void macro_block_yrd(VP8_COMP *cpi, MACROBLOCK *x, int *rate, VP8_COMMON *cm = &cpi->common; MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; -#if CONFIG_TX_SELECT - MACROBLOCKD *xd = &x->e_mbd; int can_skip = cm->mb_no_coeff_skip; vp8_prob skip_prob = can_skip ? get_pred_prob(cm, xd, PRED_MBSKIP) : 128; @@ -1022,25 +976,6 @@ static void macro_block_yrd(VP8_COMP *cpi, MACROBLOCK *x, int *rate, else txfm_cache[TX_MODE_SELECT] = rd4x4s < rd8x8s ? rd4x4s : rd8x8s; -#else /* CONFIG_TX_SELECT */ - - switch (cpi->common.txfm_mode) { - case ALLOW_16X16: - macro_block_yrd_16x16(x, rate, distortion, IF_RTCD(&cpi->rtcd), skippable); - mbmi->txfm_size = TX_16X16; - break; - case ALLOW_8X8: - macro_block_yrd_8x8(x, rate, distortion, IF_RTCD(&cpi->rtcd), skippable); - mbmi->txfm_size = TX_8X8; - break; - default: - case ONLY_4X4: - macro_block_yrd_4x4(x, rate, distortion, IF_RTCD(&cpi->rtcd), skippable); - mbmi->txfm_size = TX_4X4; - break; - } - -#endif /* CONFIG_TX_SELECT */ } static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { @@ -1155,10 +1090,8 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be, ENTROPY_CONTEXT ta = *a, tempa = *a; ENTROPY_CONTEXT tl = *l, templ = *l; -#if CONFIG_HYBRIDTRANSFORM TX_TYPE tx_type = DCT_DCT; TX_TYPE best_tx_type = DCT_DCT; -#endif /* * The predictor buffer is a 2d buffer with a stride of 16. Create * a temp buffer that meets the stride requirements, but we are only @@ -1191,7 +1124,6 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be, ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16); b->bmi.as_mode.first = mode; -#if CONFIG_HYBRIDTRANSFORM tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 4); @@ -1200,10 +1132,6 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be, x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b); } -#else - x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(be, b); -#endif tempa = ta; templ = tl; @@ -1221,9 +1149,7 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be, *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; -#if CONFIG_HYBRIDTRANSFORM best_tx_type = tx_type; -#endif #if CONFIG_COMP_INTRA_PRED *best_second_mode = mode2; @@ -1242,17 +1168,12 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be, b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode); #endif -#if CONFIG_HYBRIDTRANSFORM // inverse transform if (best_tx_type != DCT_DCT) vp8_ihtllm_c(best_dqcoeff, b->diff, 32, best_tx_type, 4); else IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)( best_dqcoeff, b->diff, 32); -#else - IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)( - best_dqcoeff, b->diff, 32); -#endif vp8_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); @@ -1405,11 +1326,9 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi, int64_t this_rd; MACROBLOCKD *xd = &x->e_mbd; -#if CONFIG_TX_SELECT int i; for (i = 0; i < NB_TXFM_MODES; i++) txfm_cache[i] = INT64_MAX; -#endif // Y Search for 16x16 intra prediction mode for (mode = DC_PRED; mode <= TM_PRED; mode++) { @@ -1452,7 +1371,6 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi, *skippable = skip; } -#if CONFIG_TX_SELECT for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd = this_rd + local_txfm_cache[i] - local_txfm_cache[cpi->common.txfm_mode]; @@ -1460,7 +1378,6 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi, txfm_cache[i] = adj_rd; } } -#endif #if CONFIG_COMP_INTRA_PRED } @@ -1535,22 +1452,18 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib, vp8_subtract_4b_c(be, b, 16); if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { -#if CONFIG_HYBRIDTRANSFORM8X8 TX_TYPE tx_type = get_tx_type_8x8(xd, b); if (tx_type != DCT_DCT) vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32, tx_type, 8); else x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); -#else - x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); -#endif x->quantize_b_8x8(x->block + idx, xd->block + idx); // compute quantization mse of 8x8 block distortion = vp8_block_error_c((x->block + idx)->coeff, (xd->block + idx)->dqcoeff, 64); - ta0 = *(a + vp8_block2above_8x8[idx]); - tl0 = *(l + vp8_block2left_8x8 [idx]); + ta0 = a[vp8_block2above_8x8[idx]]; + tl0 = l[vp8_block2left_8x8[idx]]; rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, &ta0, &tl0, TX_8X8); @@ -1576,10 +1489,10 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib, distortion += vp8_block_error_c((x->block + ib + 5)->coeff, (xd->block + ib + 5)->dqcoeff, 16); - ta0 = *(a + vp8_block2above[ib]); - ta1 = *(a + vp8_block2above[ib + 1]); - tl0 = *(l + vp8_block2above[ib]); - tl1 = *(l + vp8_block2above[ib + 4]); + ta0 = a[vp8_block2above[ib]]; + ta1 = a[vp8_block2above[ib + 1]]; + tl0 = l[vp8_block2left[ib]]; + tl1 = l[vp8_block2left[ib + 4]]; rate_t = cost_coeffs(x, xd->block + ib, PLANE_TYPE_Y_WITH_DC, &ta0, &tl0, TX_4X4); rate_t += cost_coeffs(x, xd->block + ib + 1, PLANE_TYPE_Y_WITH_DC, @@ -1621,15 +1534,15 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib, vp8_encode_intra8x8(IF_RTCD(&cpi->rtcd), x, ib); if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - *(a + vp8_block2above_8x8[idx]) = besta0; - *(a + vp8_block2above_8x8[idx] + 1) = besta1; - *(l + vp8_block2left_8x8 [idx]) = bestl0; - *(l + vp8_block2left_8x8 [idx] + 1) = bestl1; + a[vp8_block2above_8x8[idx]] = besta0; + a[vp8_block2above_8x8[idx] + 1] = besta1; + l[vp8_block2left_8x8[idx]] = bestl0; + l[vp8_block2left_8x8[idx] + 1] = bestl1; } else { - *(a + vp8_block2above[ib]) = besta0; - *(a + vp8_block2above[ib + 1]) = besta1; - *(l + vp8_block2above[ib]) = bestl0; - *(l + vp8_block2above[ib + 4]) = bestl1; + a[vp8_block2above[ib]] = besta0; + a[vp8_block2above[ib + 1]] = besta1; + l[vp8_block2left[ib]] = bestl0; + l[vp8_block2left[ib + 4]] = bestl1; } return best_rd; @@ -2223,12 +2136,22 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, int which_label, int *labelyrate, int *distortion, + int64_t *otherrd, ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl, const VP8_ENCODER_RTCD *rtcd) { int i, j; MACROBLOCKD *xd = &x->e_mbd; const int iblock[4] = { 0, 1, 4, 5 }; + int othercost = 0, otherdist = 0; + ENTROPY_CONTEXT_PLANES tac, tlc; + ENTROPY_CONTEXT *tacp = (ENTROPY_CONTEXT *) &tac, + *tlcp = (ENTROPY_CONTEXT *) &tlc; + + if (otherrd) { + memcpy(&tac, ta, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&tlc, tl, sizeof(ENTROPY_CONTEXT_PLANES)); + } *distortion = 0; *labelyrate = 0; @@ -2236,8 +2159,9 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, int ib = vp8_i8x8_block[i]; if (labels[ib] == which_label) { - BLOCKD *bd = &xd->block[ib]; - BLOCK *be = &x->block[ib]; + int idx = (ib & 8) + ((ib & 2) << 1); + BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx]; + BLOCK *be = &x->block[ib], *be2 = &x->block[idx]; int thisdistortion; vp8_build_inter_predictors4b(xd, bd, 16); @@ -2245,24 +2169,66 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, vp8_build_2nd_inter_predictors4b(xd, bd, 16); vp8_subtract_4b_c(be, bd, 16); - for (j = 0; j < 4; j += 2) { - bd = &xd->block[ib + iblock[j]]; - be = &x->block[ib + iblock[j]]; - x->vp8_short_fdct8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1); - thisdistortion = vp8_block_error_c(be->coeff, bd->dqcoeff, 32); + if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { + if (otherrd) { + x->vp8_short_fdct8x8(be->src_diff, be2->coeff, 32); + x->quantize_b_8x8(be2, bd2); + thisdistortion = vp8_block_error_c(be2->coeff, bd2->dqcoeff, 64); + otherdist += thisdistortion; + othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, + tacp + vp8_block2above_8x8[idx], + tlcp + vp8_block2left_8x8[idx], TX_8X8); + } + for (j = 0; j < 4; j += 2) { + bd = &xd->block[ib + iblock[j]]; + be = &x->block[ib + iblock[j]]; + x->vp8_short_fdct8x4(be->src_diff, be->coeff, 32); + x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1); + thisdistortion = vp8_block_error_c(be->coeff, bd->dqcoeff, 32); + *distortion += thisdistortion; + *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, + ta + vp8_block2above[ib + iblock[j]], + tl + vp8_block2left[ib + iblock[j]], + TX_4X4); + *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, + ta + vp8_block2above[ib + iblock[j] + 1], + tl + vp8_block2left[ib + iblock[j]], + TX_4X4); + } + } else /* 8x8 */ { + if (otherrd) { + for (j = 0; j < 4; j += 2) { + BLOCKD *bd3 = &xd->block[ib + iblock[j]]; + BLOCK *be3 = &x->block[ib + iblock[j]]; + x->vp8_short_fdct8x4(be3->src_diff, be3->coeff, 32); + x->quantize_b_4x4_pair(be3, be3 + 1, bd3, bd3 + 1); + thisdistortion = vp8_block_error_c(be3->coeff, bd3->dqcoeff, 32); + otherdist += thisdistortion; + othercost += cost_coeffs(x, bd3, PLANE_TYPE_Y_WITH_DC, + tacp + vp8_block2above[ib + iblock[j]], + tlcp + vp8_block2left[ib + iblock[j]], + TX_4X4); + othercost += cost_coeffs(x, bd3 + 1, PLANE_TYPE_Y_WITH_DC, + tacp + vp8_block2above[ib + iblock[j] + 1], + tlcp + vp8_block2left[ib + iblock[j]], + TX_4X4); + } + } + x->vp8_short_fdct8x8(be->src_diff, be2->coeff, 32); + x->quantize_b_8x8(be2, bd2); + thisdistortion = vp8_block_error_c(be2->coeff, bd2->dqcoeff, 64); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, - ta + vp8_block2above[ib + iblock[j]], - tl + vp8_block2left[ib + iblock[j]], TX_4X4); - *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, - ta + vp8_block2above[ib + iblock[j] + 1], - tl + vp8_block2left[ib + iblock[j]], - TX_4X4); + *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, + ta + vp8_block2above_8x8[idx], + tl + vp8_block2left_8x8[idx], TX_8X8); } } } *distortion >>= 2; + if (otherrd) { + othercost >>= 2; + *otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist); + } return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); } @@ -2274,7 +2240,8 @@ typedef struct { int_mv mvp; int64_t segment_rd; - int segment_num; + SPLITMV_PARTITIONING_TYPE segment_num; + TX_SIZE txfm_size; int r; int d; int segment_yrate; @@ -2300,9 +2267,14 @@ int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { return r; } -static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - BEST_SEG_INFO *bsi, unsigned int segmentation, - int_mv seg_mvs[16 /* n_blocks */][MAX_REF_FRAMES - 1]) { +static void rd_check_segment_txsize(VP8_COMP *cpi, MACROBLOCK *x, + BEST_SEG_INFO *bsi, + SPLITMV_PARTITIONING_TYPE segmentation, + TX_SIZE tx_size, int64_t *otherrds, + int64_t *rds, int *completed, + /* 16 = n_blocks */ + int_mv seg_mvs[16 /* n_blocks */] + [MAX_REF_FRAMES - 1]) { int i, j; int const *labels; int br = 0, bd = 0; @@ -2310,12 +2282,12 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; int label_count; - int64_t this_segment_rd = 0; + int64_t this_segment_rd = 0, other_segment_rd; int label_mv_thresh; int rate = 0; int sbr = 0, sbd = 0; int segmentyrate = 0; - uint8_t best_eobs[16]; + uint8_t best_eobs[16] = { 0 }; vp8_variance_fn_ptr_t *v_fn_ptr; @@ -2343,20 +2315,23 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, label_mv_thresh = 1 * bsi->mvthresh / label_count; // Segmentation method overheads - rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation); + rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, + vp8_mbsplit_encodings + segmentation); rate += vp8_cost_mv_ref(cpi, SPLITMV, bsi->mdcounts); this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); br += rate; + other_segment_rd = this_segment_rd; - for (i = 0; i < label_count; i++) { + mbmi->txfm_size = tx_size; + for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) { int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; - int64_t best_label_rd = INT64_MAX; + int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; B_PREDICTION_MODE mode_selected = ZERO4X4; int bestlabelyrate = 0; // search for the best motion vector on this segment for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) { - int64_t this_rd; + int64_t this_rd, other_rd; int distortion; int labelyrate; ENTROPY_CONTEXT_PLANES t_above_s, t_left_s; @@ -2378,21 +2353,23 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *c; BLOCKD *e; - // Is the best so far sufficiently good that we cant justify doing and new motion search. + /* Is the best so far sufficiently good that we cant justify doing + * and new motion search. */ if (best_label_rd < label_mv_thresh) break; if (cpi->compressor_speed) { - if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) { + if (segmentation == PARTITIONING_8X16 || + segmentation == PARTITIONING_16X8) { bsi->mvp.as_int = bsi->sv_mvp[i].as_int; - if (i == 1 && segmentation == BLOCK_16X8) + if (i == 1 && segmentation == PARTITIONING_16X8) bsi->mvp.as_int = bsi->sv_mvp[2].as_int; step_param = bsi->sv_istep[i]; } // use previous block's result as next block's MV predictor. - if (segmentation == BLOCK_4X4 && i > 0) { + if (segmentation == PARTITIONING_4X4 && i > 0) { bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int; if (i == 4 || i == 8 || i == 12) bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int; @@ -2424,7 +2401,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, // Should we do a full search (best quality only) if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { /* Check if mvp_full is within the range. */ - vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, + x->mv_row_min, x->mv_row_max); thissme = cpi->full_search_sad(x, c, e, &mvp_full, sadpb, 16, v_fn_ptr, @@ -2434,7 +2412,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, bestsme = thissme; mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int; } else { - // The full search result is actually worse so re-instate the previous best vector + /* The full search result is actually worse so re-instate the + * previous best vector */ e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int; } } @@ -2444,15 +2423,16 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, int distortion; unsigned int sse; cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], - bsi->ref_mv, x->errorperbit, v_fn_ptr, XMVCOST, - &distortion, &sse); + bsi->ref_mv, x->errorperbit, v_fn_ptr, + XMVCOST, &distortion, &sse); // safe motion search result for use in compound prediction seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int; } } /* NEW4X4 */ else if (mbmi->second_ref_frame && this_mode == NEW4X4) { - // motion search not completed? Then skip newmv for this block with comppred + /* motion search not completed? Then skip newmv for this block with + * comppred */ if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV || seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) { continue; @@ -2474,14 +2454,15 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, mv_check_bounds(x, &second_mode_mv[this_mode])) continue; - if (segmentation == BLOCK_4X4) { + if (segmentation == PARTITIONING_4X4) { this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate, &distortion, ta_s, tl_s, IF_RTCD(&cpi->rtcd)); + other_rd = this_rd; } else { this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate, - &distortion, ta_s, tl_s, - IF_RTCD(&cpi->rtcd)); + &distortion, &other_rd, + ta_s, tl_s, IF_RTCD(&cpi->rtcd)); } this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); rate += labelyrate; @@ -2492,9 +2473,20 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, bestlabelyrate = labelyrate; mode_selected = this_mode; best_label_rd = this_rd; - for (j = 0; j < 16; j++) - if (labels[j] == i) - best_eobs[j] = x->e_mbd.block[j].eob; + if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) { + for (j = 0; j < 16; j++) + if (labels[j] == i) + best_eobs[j] = x->e_mbd.block[j].eob; + } else { + for (j = 0; j < 4; j++) { + int ib = vp8_i8x8_block[j], idx = j * 4; + + if (labels[ib] == i) + best_eobs[idx] = x->e_mbd.block[idx].eob; + } + } + if (other_rd < best_other_rd) + best_other_rd = other_rd; vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES)); @@ -2506,18 +2498,18 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES)); labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], - &second_mode_mv[mode_selected], seg_mvs[i], bsi->ref_mv, bsi->second_ref_mv, XMVCOST); + &second_mode_mv[mode_selected], seg_mvs[i], + bsi->ref_mv, bsi->second_ref_mv, XMVCOST); br += sbr; bd += sbd; segmentyrate += bestlabelyrate; this_segment_rd += best_label_rd; - - if (this_segment_rd >= bsi->segment_rd) { - break; - } - - + other_segment_rd += best_other_rd; + if (rds) + rds[i] = this_segment_rd; + if (otherrds) + rds[i] = other_segment_rd; } /* for each label */ if (this_segment_rd < bsi->segment_rd) { @@ -2526,6 +2518,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, bsi->segment_yrate = segmentyrate; bsi->segment_rd = this_segment_rd; bsi->segment_num = segmentation; + bsi->txfm_size = mbmi->txfm_size; // store everything needed to come back to this!! for (i = 0; i < 16; i++) { @@ -2538,6 +2531,105 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, bsi->eobs[i] = best_eobs[i]; } } + + if (completed) { + *completed = i; + } +} + +static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + BEST_SEG_INFO *bsi, + unsigned int segmentation, + /* 16 = n_blocks */ + int_mv seg_mvs[16][MAX_REF_FRAMES - 1], + int64_t txfm_cache[NB_TXFM_MODES]) { + int i, n, c = vp8_mbsplit_count[segmentation]; + + if (segmentation == PARTITIONING_4X4) { + int64_t rd[16]; + + rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, NULL, + rd, &n, seg_mvs); + if (n == c) { + for (i = 0; i < NB_TXFM_MODES; i++) { + if (rd[c - 1] < txfm_cache[i]) + txfm_cache[i] = rd[c - 1]; + } + } + } else { + int64_t diff, base_rd; + int cost4x4 = vp8_cost_bit(cpi->common.prob_tx[0], 0); + int cost8x8 = vp8_cost_bit(cpi->common.prob_tx[0], 1); + + if (cpi->common.txfm_mode == TX_MODE_SELECT) { + int64_t rd4x4[4], rd8x8[4]; + int n4x4, n8x8, nmin; + BEST_SEG_INFO bsi4x4, bsi8x8; + + /* factor in cost of cost4x4/8x8 in decision */ + vpx_memcpy(&bsi4x4, bsi, sizeof(*bsi)); + vpx_memcpy(&bsi8x8, bsi, sizeof(*bsi)); + rd_check_segment_txsize(cpi, x, &bsi4x4, segmentation, + TX_4X4, NULL, rd4x4, &n4x4, seg_mvs); + rd_check_segment_txsize(cpi, x, &bsi8x8, segmentation, + TX_8X8, NULL, rd8x8, &n8x8, seg_mvs); + if (bsi4x4.segment_num == segmentation) { + bsi4x4.segment_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0); + if (bsi4x4.segment_rd < bsi->segment_rd) + vpx_memcpy(bsi, &bsi4x4, sizeof(*bsi)); + } + if (bsi8x8.segment_num == segmentation) { + bsi8x8.segment_rd += RDCOST(x->rdmult, x->rddiv, cost8x8, 0); + if (bsi8x8.segment_rd < bsi->segment_rd) + vpx_memcpy(bsi, &bsi8x8, sizeof(*bsi)); + } + n = n4x4 > n8x8 ? n4x4 : n8x8; + if (n == c) { + nmin = n4x4 < n8x8 ? n4x4 : n8x8; + diff = rd8x8[nmin - 1] - rd4x4[nmin - 1]; + if (n == n4x4) { + base_rd = rd4x4[c - 1]; + } else { + base_rd = rd8x8[c - 1] - diff; + } + } + } else { + int64_t rd[4], otherrd[4]; + + if (cpi->common.txfm_mode == ONLY_4X4) { + rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, otherrd, + rd, &n, seg_mvs); + if (n == c) { + base_rd = rd[c - 1]; + diff = otherrd[c - 1] - rd[c - 1]; + } + } else /* use 8x8 transform */ { + rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_8X8, otherrd, + rd, &n, seg_mvs); + if (n == c) { + diff = rd[c - 1] - otherrd[c - 1]; + base_rd = otherrd[c - 1]; + } + } + } + + if (n == c) { + if (base_rd < txfm_cache[ONLY_4X4]) { + txfm_cache[ONLY_4X4] = base_rd; + } + if (base_rd + diff < txfm_cache[1]) { + txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = base_rd + diff; + } + if (diff < 0) { + base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0); + } else { + base_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0); + } + if (base_rd < txfm_cache[TX_MODE_SELECT]) { + txfm_cache[TX_MODE_SELECT] = base_rd; + } + } + } } static __inline @@ -2553,17 +2645,26 @@ void vp8_cal_step_param(int sr, int *sp) { *sp = MAX_MVSEARCH_STEPS - 1 - step; } -static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, - int_mv *best_ref_mv, int_mv *second_best_ref_mv, int64_t best_rd, - int *mdcounts, int *returntotrate, - int *returnyrate, int *returndistortion, - int *skippable, int mvthresh, - int_mv seg_mvs[BLOCK_MAX_SEGMENTS - 1][16 /* n_blocks */][MAX_REF_FRAMES - 1]) { +static int rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, + int_mv *best_ref_mv, + int_mv *second_best_ref_mv, + int64_t best_rd, + int *mdcounts, + int *returntotrate, + int *returnyrate, + int *returndistortion, + int *skippable, int mvthresh, + int_mv seg_mvs[NB_PARTITIONINGS] + [16 /* n_blocks */] + [MAX_REF_FRAMES - 1], + int64_t txfm_cache[NB_TXFM_MODES]) { int i; BEST_SEG_INFO bsi; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; vpx_memset(&bsi, 0, sizeof(bsi)); + for (i = 0; i < NB_TXFM_MODES; i++) + txfm_cache[i] = INT64_MAX; bsi.segment_rd = best_rd; bsi.ref_mv = best_ref_mv; @@ -2571,6 +2672,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, bsi.mvp.as_int = best_ref_mv->as_int; bsi.mvthresh = mvthresh; bsi.mdcounts = mdcounts; + bsi.txfm_size = TX_4X4; for (i = 0; i < 16; i++) bsi.modes[i] = ZERO4X4; @@ -2578,15 +2680,19 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, if (cpi->compressor_speed == 0) { /* for now, we will keep the original segmentation order when in best quality mode */ - rd_check_segment(cpi, x, &bsi, BLOCK_16X8, seg_mvs[BLOCK_16X8]); - rd_check_segment(cpi, x, &bsi, BLOCK_8X16, seg_mvs[BLOCK_8X16]); - rd_check_segment(cpi, x, &bsi, BLOCK_8X8, seg_mvs[BLOCK_8X8]); - rd_check_segment(cpi, x, &bsi, BLOCK_4X4, seg_mvs[BLOCK_4X4]); + rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8, + seg_mvs[PARTITIONING_16X8], txfm_cache); + rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16, + seg_mvs[PARTITIONING_8X16], txfm_cache); + rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8, + seg_mvs[PARTITIONING_8X8], txfm_cache); + rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4, + seg_mvs[PARTITIONING_4X4], txfm_cache); } else { int sr; - rd_check_segment(cpi, x, &bsi, BLOCK_8X8, seg_mvs[BLOCK_8X8]); - + rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8, + seg_mvs[PARTITIONING_8X8], txfm_cache); if (bsi.segment_rd < best_rd) { int tmp_col_min = x->mv_col_min; @@ -2602,34 +2708,40 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int; bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int; - /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */ + /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range + * according to the closeness of 2 MV. */ /* block 8X16 */ - { - sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3); - vp8_cal_step_param(sr, &bsi.sv_istep[0]); + sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3, + (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3); + vp8_cal_step_param(sr, &bsi.sv_istep[0]); - sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); - vp8_cal_step_param(sr, &bsi.sv_istep[1]); + sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, + (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); + vp8_cal_step_param(sr, &bsi.sv_istep[1]); - rd_check_segment(cpi, x, &bsi, BLOCK_8X16, seg_mvs[BLOCK_8X16]); - } + rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16, + seg_mvs[PARTITIONING_8X16], txfm_cache); /* block 16X8 */ - { - sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3); - vp8_cal_step_param(sr, &bsi.sv_istep[0]); + sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3, + (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3); + vp8_cal_step_param(sr, &bsi.sv_istep[0]); - sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); - vp8_cal_step_param(sr, &bsi.sv_istep[1]); + sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, + (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); + vp8_cal_step_param(sr, &bsi.sv_istep[1]); - rd_check_segment(cpi, x, &bsi, BLOCK_16X8, seg_mvs[BLOCK_16X8]); - } + rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8, + seg_mvs[PARTITIONING_16X8], txfm_cache); /* If 8x8 is better than 16x8/8x16, then do 4x4 search */ /* Not skip 4x4 if speed=0 (good quality) */ - if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8) { /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ + if (cpi->sf.no_skip_block4x4_search || + bsi.segment_num == PARTITIONING_8X8) { + /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ bsi.mvp.as_int = bsi.sv_mvp[0].as_int; - rd_check_segment(cpi, x, &bsi, BLOCK_4X4, seg_mvs[BLOCK_4X4]); + rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4, + seg_mvs[PARTITIONING_4X4], txfm_cache); } /* restore UMV window */ @@ -2653,9 +2765,12 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, *returntotrate = bsi.r; *returndistortion = bsi.d; *returnyrate = bsi.segment_yrate; - *skippable = mby_is_skippable_4x4(&x->e_mbd, 0); + *skippable = bsi.txfm_size == TX_4X4 ? + mby_is_skippable_4x4(&x->e_mbd, 0) : + mby_is_skippable_8x8(&x->e_mbd, 0); /* save partitions */ + mbmi->txfm_size = bsi.txfm_size; mbmi->partitioning = bsi.segment_num; x->partition_info->count = vp8_mbsplit_count[bsi.segment_num]; @@ -2901,9 +3016,7 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv, int_mv *second_best_ref_mv) { MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; -#if CONFIG_NEWMVENTROPY MV mv; -#endif if (mbmi->mode == SPLITMV) { int i; @@ -2911,7 +3024,6 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, for (i = 0; i < x->partition_info->count; i++) { if (x->partition_info->bmi[i].mode == NEW4X4) { if (x->e_mbd.allow_high_precision_mv) { -#if CONFIG_NEWMVENTROPY mv.row = (x->partition_info->bmi[i].mv.as_mv.row - best_ref_mv->as_mv.row); mv.col = (x->partition_info->bmi[i].mv.as_mv.col @@ -2925,20 +3037,7 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1); } -#else - cpi->MVcount_hp[0][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.row - - best_ref_mv->as_mv.row)]++; - cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.col - - best_ref_mv->as_mv.col)]++; - if (mbmi->second_ref_frame) { - cpi->MVcount_hp[0][mv_max_hp + (x->partition_info->bmi[i].second_mv.as_mv.row - - second_best_ref_mv->as_mv.row)]++; - cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].second_mv.as_mv.col - - second_best_ref_mv->as_mv.col)]++; - } -#endif } else { -#if CONFIG_NEWMVENTROPY mv.row = (x->partition_info->bmi[i].mv.as_mv.row - best_ref_mv->as_mv.row); mv.col = (x->partition_info->bmi[i].mv.as_mv.col @@ -2952,24 +3051,11 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0); } -#else - cpi->MVcount[0][mv_max + ((x->partition_info->bmi[i].mv.as_mv.row - - best_ref_mv->as_mv.row) >> 1)]++; - cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].mv.as_mv.col - - best_ref_mv->as_mv.col) >> 1)]++; - if (mbmi->second_ref_frame) { - cpi->MVcount[0][mv_max + ((x->partition_info->bmi[i].second_mv.as_mv.row - - second_best_ref_mv->as_mv.row) >> 1)]++; - cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].second_mv.as_mv.col - - second_best_ref_mv->as_mv.col) >> 1)]++; - } -#endif } } } } else if (mbmi->mode == NEWMV) { if (x->e_mbd.allow_high_precision_mv) { -#if CONFIG_NEWMVENTROPY mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1); @@ -2978,20 +3064,7 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1); } -#else - cpi->MVcount_hp[0][mv_max_hp + (mbmi->mv[0].as_mv.row - - best_ref_mv->as_mv.row)]++; - cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[0].as_mv.col - - best_ref_mv->as_mv.col)]++; - if (mbmi->second_ref_frame) { - cpi->MVcount_hp[0][mv_max_hp + (mbmi->mv[1].as_mv.row - - second_best_ref_mv->as_mv.row)]++; - cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[1].as_mv.col - - second_best_ref_mv->as_mv.col)]++; - } -#endif } else { -#if CONFIG_NEWMVENTROPY mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0); @@ -3000,18 +3073,6 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0); } -#else - cpi->MVcount[0][mv_max + ((mbmi->mv[0].as_mv.row - - best_ref_mv->as_mv.row) >> 1)]++; - cpi->MVcount[1][mv_max + ((mbmi->mv[0].as_mv.col - - best_ref_mv->as_mv.col) >> 1)]++; - if (mbmi->second_ref_frame) { - cpi->MVcount[0][mv_max + ((mbmi->mv[1].as_mv.row - - second_best_ref_mv->as_mv.row) >> 1)]++; - cpi->MVcount[1][mv_max + ((mbmi->mv[1].as_mv.col - - second_best_ref_mv->as_mv.col) >> 1)]++; - } -#endif } } } @@ -3185,9 +3246,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int hybrid_pred_diff, int64_t txfm_size_diff[NB_TXFM_MODES]) { MACROBLOCKD *xd = &x->e_mbd; -#if CONFIG_TX_SELECT MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; -#endif // Take a snapshot of the coding context so it can be // restored if we decide to encode this way @@ -3207,9 +3266,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->comp_pred_diff = comp_pred_diff; ctx->hybrid_pred_diff = hybrid_pred_diff; -#if CONFIG_TX_SELECT memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); -#endif } static void inter_mode_cost(VP8_COMP *cpi, MACROBLOCK *x, int this_mode, @@ -3326,9 +3383,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int int_mv ref_mv[MAX_REF_FRAMES] = {{0}}; #endif -#if CONFIG_SWITCHABLE_INTERP int switchable_filter_index = 0; -#endif MB_PREDICTION_MODE uv_intra_mode; MB_PREDICTION_MODE uv_intra_mode_8x8 = 0; @@ -3344,7 +3399,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int unsigned char *y_buffer[4], *u_buffer[4], *v_buffer[4]; unsigned int ref_costs[MAX_REF_FRAMES]; - int_mv seg_mvs[BLOCK_MAX_SEGMENTS - 1][16 /* n_blocks */][MAX_REF_FRAMES - 1]; + int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1]; vpx_memset(mode8x8, 0, sizeof(mode8x8)); vpx_memset(&frame_mv, 0, sizeof(frame_mv)); @@ -3359,7 +3414,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int for (i = 0; i < NB_TXFM_MODES; i++) best_txfm_rd[i] = INT64_MAX; - for (i = 0; i < BLOCK_MAX_SEGMENTS - 1; i++) { + for (i = 0; i < NB_PARTITIONINGS; i++) { int j, k; for (j = 0; j < 16; j++) @@ -3425,12 +3480,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int // that depend on the current prediction etc. vp8_estimate_ref_frame_costs(cpi, segment_id, ref_costs); -#if CONFIG_SWITCHABLE_INTERP for (mode_index = 0; mode_index < MAX_MODES; mode_index += (!switchable_filter_index)) { -#else - for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { -#endif int64_t this_rd = INT64_MAX; int is_comp_pred; int disable_skip = 0, skippable = 0; @@ -3458,19 +3509,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int #if CONFIG_PRED_FILTER mbmi->pred_filter_enabled = 0; #endif -#if CONFIG_SWITCHABLE_INTERP if (cpi->common.mcomp_filter_type == SWITCHABLE && this_mode >= NEARESTMV && this_mode <= SPLITMV) { mbmi->interp_filter = vp8_switchable_interp[switchable_filter_index++]; if (switchable_filter_index == VP8_SWITCHABLE_FILTERS) switchable_filter_index = 0; - //printf("Searching %d (%d)\n", this_mode, switchable_filter_index); } else { mbmi->interp_filter = cpi->common.mcomp_filter_type; } vp8_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); -#endif // Test best rd so far against threshold for trying this mode. if (best_rd <= cpi->rd_threshes[mode_index]) @@ -3612,11 +3660,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } break; case I8X8_PRED: { -#if CONFIG_TX_SELECT int cost0 = vp8_cost_bit(cm->prob_tx[0], 0); int cost1 = vp8_cost_bit(cm->prob_tx[0], 1); int64_t tmp_rd_4x4s, tmp_rd_8x8s; -#endif int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd; int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8; mbmi->txfm_size = TX_4X4; @@ -3638,7 +3684,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int txfm_cache[ONLY_4X4] = tmp_rd_4x4; txfm_cache[ALLOW_8X8] = tmp_rd_8x8; txfm_cache[ALLOW_16X16] = tmp_rd_8x8; -#if CONFIG_TX_SELECT tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0); tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0); txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? tmp_rd_4x4s : tmp_rd_8x8s; @@ -3667,9 +3712,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int mode8x8[1][3] = x->e_mbd.mode_info_context->bmi[10].as_mode.second; #endif } - } else -#endif - if (cm->txfm_mode == ONLY_4X4) { + } else if (cm->txfm_mode == ONLY_4X4) { rate = r4x4; rate_y = tok4x4; distortion = d4x4; @@ -3725,21 +3768,19 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int (mbmi->ref_frame == GOLDEN_FRAME) ? cpi->rd_threshes[THR_NEWG] : this_rd_thresh; - mbmi->txfm_size = TX_4X4; // FIXME use 8x8 in case of 8x8/8x16/16x8 - tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, - second_ref, best_yrd, mdcounts, - &rate, &rate_y, &distortion, - &skippable, - this_rd_thresh, seg_mvs); + tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, + second_ref, best_yrd, mdcounts, + &rate, &rate_y, &distortion, + &skippable, + this_rd_thresh, seg_mvs, + txfm_cache); rate2 += rate; distortion2 += distortion; -#if CONFIG_SWITCHABLE_INTERP if (cpi->common.mcomp_filter_type == SWITCHABLE) rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs [get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] [vp8_switchable_interp_map[mbmi->interp_filter]]; -#endif // If even the 'Y' rd value of split is higher than best so far // then dont bother looking at UV if (tmp_rd < best_yrd) { @@ -3877,13 +3918,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rate2 += vp8_cost_bit(cpi->common.prob_pred_filter_off, xd->mode_info_context->mbmi.pred_filter_enabled); #endif -#if CONFIG_SWITCHABLE_INTERP if (cpi->common.mcomp_filter_type == SWITCHABLE) rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs [get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] [vp8_switchable_interp_map[ x->e_mbd.mode_info_context->mbmi.interp_filter]]; -#endif /* We don't include the cost of the second reference here, because there are only * three options: Last/Golden, ARF/Last or Golden/ARF, or in other words if you @@ -3908,8 +3947,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if (threshold < x->encode_breakout) threshold = x->encode_breakout; - var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) - (*(b->base_src), b->src_stride, + var = vp8_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); if (sse < threshold) { @@ -3919,7 +3957,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if ((sse - var < q2dc *q2dc >> 4) || (sse / 2 > var && sse - var < 64)) { // Check u and v to make sure skip is ok - int sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance)); + int sse2 = vp8_uvsse(x); if (sse2 * 2 < threshold) { x->skip = 1; distortion2 = sse + sse2; @@ -4127,7 +4165,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if (!mode_excluded && this_rd != INT64_MAX) { for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd; - if (this_mode != B_PRED && this_mode != SPLITMV) { + if (this_mode != B_PRED) { adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode]; } else { adj_rd = this_rd; @@ -4151,7 +4189,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int else ++cpi->pred_filter_off_count; #endif -#if CONFIG_SWITCHABLE_INTERP if (cpi->common.mcomp_filter_type == SWITCHABLE && best_mbmode.mode >= NEARESTMV && best_mbmode.mode <= SPLITMV) { @@ -4159,7 +4196,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int [get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] [vp8_switchable_interp_map[best_mbmode.interp_filter]]; } -#endif // Reduce the activation RD thresholds for the best choice mode if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && @@ -4185,11 +4221,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int (cpi->oxcf.arnr_max_frames == 0) && (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { mbmi->mode = ZEROMV; -#if CONFIG_TX_SELECT if (cm->txfm_mode != TX_MODE_SELECT) mbmi->txfm_size = cm->txfm_mode; else -#endif mbmi->txfm_size = TX_16X16; mbmi->ref_frame = ALTREF_FRAME; mbmi->mv[0].as_int = 0; @@ -4239,7 +4273,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int best_pred_diff[i] = best_rd - best_pred_rd[i]; } -#if CONFIG_TX_SELECT if (!x->skip) { for (i = 0; i < NB_TXFM_MODES; i++) { if (best_txfm_rd[i] == INT64_MAX) @@ -4250,7 +4283,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } else { vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); } -#endif end: store_coding_context(x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition, @@ -4381,10 +4413,8 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 1); dist = dist16x16 + (distuv8x8 >> 2); mbmi->txfm_size = txfm_size_16x16; -#if CONFIG_TX_SELECT memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); -#endif } else if (error8x8 > error16x16) { if (error4x4 < error16x16) { rate = rateuv; @@ -4401,20 +4431,16 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, mbmi->mode = B_PRED; mbmi->txfm_size = TX_4X4; dist = dist4x4 + (distuv >> 2); -#if CONFIG_TX_SELECT memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); -#endif } else { mbmi->txfm_size = txfm_size_16x16; mbmi->mode = mode16x16; rate = rate16x16 + rateuv8x8; dist = dist16x16 + (distuv8x8 >> 2); -#if CONFIG_TX_SELECT for (i = 0; i < NB_TXFM_MODES; i++) { x->mb_context[xd->mb_index].txfm_rd_diff[i] = error16x16 - txfm_cache[i]; } -#endif } if (cpi->common.mb_no_coeff_skip) rate += vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 0); @@ -4434,10 +4460,8 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, mbmi->mode = B_PRED; mbmi->txfm_size = TX_4X4; dist = dist4x4 + (distuv >> 2); -#if CONFIG_TX_SELECT memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); -#endif } else { // FIXME(rbultje) support transform-size selection mbmi->mode = I8X8_PRED; @@ -4445,10 +4469,8 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, set_i8x8_block_modes(x, mode8x8); rate = rate8x8 + rateuv; dist = dist8x8 + (distuv >> 2); -#if CONFIG_TX_SELECT memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); -#endif } if (cpi->common.mb_no_coeff_skip) rate += vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 0); @@ -4805,8 +4827,8 @@ int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x, if (threshold < x->encode_breakout) threshold = x->encode_breakout; - var = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32)(*(b->base_src), - b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); + var = vp8_variance32x32(*(b->base_src), b->src_stride, + xd->dst.y_buffer, xd->dst.y_stride, &sse); if (sse < threshold) { unsigned int q2dc = xd->block[24].dequant[0]; @@ -4816,11 +4838,9 @@ int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x, (sse / 2 > var && sse - var < 64)) { // Check u and v to make sure skip is ok unsigned int sse2, sse3; - var += VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) - (x->src.u_buffer, x->src.uv_stride, + var += vp8_variance16x16(x->src.u_buffer, x->src.uv_stride, xd->dst.u_buffer, xd->dst.uv_stride, &sse2); - var += VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) - (x->src.v_buffer, x->src.uv_stride, + var += vp8_variance16x16(x->src.v_buffer, x->src.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, &sse3); sse2 += sse3; if (sse2 * 2 < threshold) { diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c index 2e86a16c0..f15e687c6 100644 --- a/vp8/encoder/sad_c.c +++ b/vp8/encoder/sad_c.c @@ -10,33 +10,10 @@ #include <stdlib.h> +#include "vp8/common/sadmxn.h" #include "vpx_ports/config.h" #include "vpx/vpx_integer.h" -static __inline -unsigned int sad_mx_n_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int m, - int n) { - - int r, c; - unsigned int sad = 0; - - for (r = 0; r < n; r++) { - for (c = 0; c < m; c++) { - sad += abs(src_ptr[c] - ref_ptr[c]); - } - - src_ptr += src_stride; - ref_ptr += ref_stride; - } - - return sad; -} - unsigned int vp8_sad32x32_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, @@ -97,25 +74,6 @@ unsigned int vp8_sad4x4_c( return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4); } -#if CONFIG_NEWBESTREFMV -unsigned int vp8_sad3x16_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad){ - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16); -} -unsigned int vp8_sad16x3_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad){ - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3); -} -#endif - void vp8_sad32x32x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c index d3d9711dc..865496ae2 100644 --- a/vp8/encoder/ssim.c +++ b/vp8/encoder/ssim.c @@ -11,18 +11,10 @@ #include "onyx_int.h" -void vp8_ssim_parms_16x16_c -( - unsigned char *s, - int sp, - unsigned char *r, - int rp, - unsigned long *sum_s, - unsigned long *sum_r, - unsigned long *sum_sq_s, - unsigned long *sum_sq_r, - unsigned long *sum_sxr -) { +void vp8_ssim_parms_16x16_c(unsigned char *s, int sp, unsigned char *r, + int rp, unsigned long *sum_s, unsigned long *sum_r, + unsigned long *sum_sq_s, unsigned long *sum_sq_r, + unsigned long *sum_sxr) { int i, j; for (i = 0; i < 16; i++, s += sp, r += rp) { for (j = 0; j < 16; j++) { @@ -34,18 +26,10 @@ void vp8_ssim_parms_16x16_c } } } -void vp8_ssim_parms_8x8_c -( - unsigned char *s, - int sp, - unsigned char *r, - int rp, - unsigned long *sum_s, - unsigned long *sum_r, - unsigned long *sum_sq_s, - unsigned long *sum_sq_r, - unsigned long *sum_sxr -) { +void vp8_ssim_parms_8x8_c(unsigned char *s, int sp, unsigned char *r, int rp, + unsigned long *sum_s, unsigned long *sum_r, + unsigned long *sum_sq_s, unsigned long *sum_sq_r, + unsigned long *sum_sxr) { int i, j; for (i = 0; i < 8; i++, s += sp, r += rp) { for (j = 0; j < 8; j++) { @@ -61,15 +45,9 @@ void vp8_ssim_parms_8x8_c const static int64_t cc1 = 26634; // (64^2*(.01*255)^2 const static int64_t cc2 = 239708; // (64^2*(.03*255)^2 -static double similarity -( - unsigned long sum_s, - unsigned long sum_r, - unsigned long sum_sq_s, - unsigned long sum_sq_r, - unsigned long sum_sxr, - int count -) { +static double similarity(unsigned long sum_s, unsigned long sum_r, + unsigned long sum_sq_s, unsigned long sum_sq_r, + unsigned long sum_sxr, int count) { int64_t ssim_n, ssim_d; int64_t c1, c2; @@ -87,23 +65,22 @@ static double similarity return ssim_n * 1.0 / ssim_d; } -static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp, - const vp8_variance_rtcd_vtable_t *rtcd) { +static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp) { unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - SSIMPF_INVOKE(rtcd, 16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, + &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256); } -static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp, - const vp8_variance_rtcd_vtable_t *rtcd) { +static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp) { unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - SSIMPF_INVOKE(rtcd, 8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + vp8_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, + &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64); } // TODO: (jbb) tried to scale this function such that we may be able to use it // for distortion metric in mode selection code ( provided we do a reconstruction) -long dssim(unsigned char *s, int sp, unsigned char *r, int rp, - const vp8_variance_rtcd_vtable_t *rtcd) { +long dssim(unsigned char *s, int sp, unsigned char *r, int rp) { unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; int64_t ssim3; int64_t ssim_n1, ssim_n2; @@ -115,7 +92,8 @@ long dssim(unsigned char *s, int sp, unsigned char *r, int rp, c1 = cc1 * 16; c2 = cc2 * 16; - SSIMPF_INVOKE(rtcd, 16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, + &sum_sxr); ssim_n1 = (2 * sum_s * sum_r + c1); ssim_n2 = ((int64_t) 2 * 256 * sum_sxr - (int64_t) 2 * sum_s * sum_r + c2); @@ -137,16 +115,8 @@ long dssim(unsigned char *s, int sp, unsigned char *r, int rp, // We are using a 8x8 moving window with starting location of each 8x8 window // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. -double vp8_ssim2 -( - unsigned char *img1, - unsigned char *img2, - int stride_img1, - int stride_img2, - int width, - int height, - const vp8_variance_rtcd_vtable_t *rtcd -) { +double vp8_ssim2(unsigned char *img1, unsigned char *img2, int stride_img1, + int stride_img2, int width, int height) { int i, j; int samples = 0; double ssim_total = 0; @@ -154,7 +124,7 @@ double vp8_ssim2 // sample point start with each 4x4 location for (i = 0; i < height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { for (j = 0; j < width - 8; j += 4) { - double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2, rtcd); + double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2); ssim_total += v; samples++; } @@ -162,28 +132,22 @@ double vp8_ssim2 ssim_total /= samples; return ssim_total; } -double vp8_calc_ssim -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - int lumamask, - double *weight, - const vp8_variance_rtcd_vtable_t *rtcd -) { +double vp8_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + int lumamask, double *weight) { double a, b, c; double ssimv; a = vp8_ssim2(source->y_buffer, dest->y_buffer, source->y_stride, dest->y_stride, source->y_width, - source->y_height, rtcd); + source->y_height); b = vp8_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height, rtcd); + source->uv_height); c = vp8_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height, rtcd); + source->uv_height); ssimv = a * .8 + .1 * (b + c); @@ -192,29 +156,22 @@ double vp8_calc_ssim return ssimv; } -double vp8_calc_ssimg -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - double *ssim_y, - double *ssim_u, - double *ssim_v, - const vp8_variance_rtcd_vtable_t *rtcd -) { +double vp8_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *ssim_y, double *ssim_u, double *ssim_v) { double ssim_all = 0; double a, b, c; a = vp8_ssim2(source->y_buffer, dest->y_buffer, source->y_stride, dest->y_stride, source->y_width, - source->y_height, rtcd); + source->y_height); b = vp8_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height, rtcd); + source->uv_height); c = vp8_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height, rtcd); + source->uv_height); *ssim_y = a; *ssim_u = b; *ssim_v = c; diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index d46637a3e..2ddae1cbd 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -26,38 +26,26 @@ #ifdef ENTROPY_STATS INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#if CONFIG_HYBRIDTRANSFORM INT64 hybrid_context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#endif INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#if CONFIG_HYBRIDTRANSFORM8X8 INT64 hybrid_context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#endif INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#if CONFIG_HYBRIDTRANSFORM16X16 INT64 hybrid_context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#endif extern unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS] [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]; -#if CONFIG_HYBRIDTRANSFORM extern unsigned int hybrid_tree_update_hist[BLOCK_TYPES][COEF_BANDS] [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]; -#endif extern unsigned int tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS] [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -#if CONFIG_HYBRIDTRANSFORM8X8 extern unsigned int hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS] [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -#endif extern unsigned int tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS] [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -#if CONFIG_HYBRIDTRANSFORM16X16 extern unsigned int hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS] [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -#endif #endif /* ENTROPY_STATS */ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); @@ -134,9 +122,7 @@ static void tokenize1st_order_b_16x16(MACROBLOCKD *xd, const int eob = b->eob; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ const short *qcoeff_ptr = b->qcoeff; -#if CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type = get_tx_type(xd, b); -#endif int seg_eob = 256; int segment_id = xd->mode_info_context->mbmi.segment_id; @@ -162,22 +148,18 @@ static void tokenize1st_order_b_16x16(MACROBLOCKD *xd, } t->Token = x; -#if CONFIG_HYBRIDTRANSFORM16X16 if (tx_type != DCT_DCT) t->context_tree = cpi->common.fc.hybrid_coef_probs_16x16[type][band][pt]; else -#endif t->context_tree = cpi->common.fc.coef_probs_16x16[type][band][pt]; t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) || (band > 1 && type == PLANE_TYPE_Y_NO_DC)); assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0); if (!dry_run) { -#if CONFIG_HYBRIDTRANSFORM16X16 if (tx_type != DCT_DCT) ++cpi->hybrid_coef_counts_16x16[type][band][pt][x]; else -#endif ++cpi->coef_counts_16x16[type][band][pt][x]; } pt = vp8_prev_token_class[x]; @@ -310,9 +292,7 @@ static void tokenize1st_order_b_8x8(MACROBLOCKD *xd, int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; /* start at DC unless type 0 */ TOKENEXTRA *t = *tp; /* store tokens starting here */ const short *qcoeff_ptr = b->qcoeff; -#if CONFIG_HYBRIDTRANSFORM8X8 TX_TYPE tx_type = get_tx_type(xd, b); -#endif const int eob = b->eob; int seg_eob = 64; int segment_id = xd->mode_info_context->mbmi.segment_id; @@ -338,11 +318,9 @@ static void tokenize1st_order_b_8x8(MACROBLOCKD *xd, x = DCT_EOB_TOKEN; t->Token = x; -#if CONFIG_HYBRIDTRANSFORM8X8 if (tx_type != DCT_DCT) t->context_tree = cpi->common.fc.hybrid_coef_probs_8x8[type][band][pt]; else -#endif t->context_tree = cpi->common.fc.coef_probs_8x8[type][band][pt]; t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) || @@ -350,11 +328,9 @@ static void tokenize1st_order_b_8x8(MACROBLOCKD *xd, assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0); if (!dry_run) { -#if CONFIG_HYBRIDTRANSFORM8X8 if (tx_type != DCT_DCT) ++cpi->hybrid_coef_counts_8x8[type][band][pt][x]; else -#endif ++cpi->coef_counts_8x8[type][band][pt][x]; } pt = vp8_prev_token_class[x]; @@ -451,7 +427,6 @@ static void tokenize1st_order_b_4x4(MACROBLOCKD *xd, const int16_t *qcoeff_ptr = b->qcoeff; int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; -#if CONFIG_HYBRIDTRANSFORM TX_TYPE tx_type = get_tx_type(xd, &xd->block[block]); switch (tx_type) { case ADST_DCT: @@ -464,7 +439,6 @@ static void tokenize1st_order_b_4x4(MACROBLOCKD *xd, pt_scan = vp8_default_zig_zag1d; break; } -#endif a = (ENTROPY_CONTEXT *)xd->above_context + vp8_block2above[block]; l = (ENTROPY_CONTEXT *)xd->left_context + vp8_block2left[block]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); @@ -485,22 +459,18 @@ static void tokenize1st_order_b_4x4(MACROBLOCKD *xd, token = DCT_EOB_TOKEN; t->Token = token; -#if CONFIG_HYBRIDTRANSFORM if (tx_type != DCT_DCT) t->context_tree = cpi->common.fc.hybrid_coef_probs[type][band][pt]; else -#endif t->context_tree = cpi->common.fc.coef_probs[type][band][pt]; t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) || (band > 1 && type == PLANE_TYPE_Y_NO_DC)); assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0); if (!dry_run) { -#if CONFIG_HYBRIDTRANSFORM if (tx_type != DCT_DCT) ++cpi->hybrid_coef_counts[type][band][pt][token]; else -#endif ++cpi->coef_counts[type][band][pt][token]; } pt = vp8_prev_token_class[token]; @@ -619,7 +589,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd); break; case TX_8X8: - if (xd->mode_info_context->mbmi.mode == I8X8_PRED) + if (xd->mode_info_context->mbmi.mode == I8X8_PRED || + xd->mode_info_context->mbmi.mode == SPLITMV) xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8_4x4uv(xd, 0); else xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8(xd, has_y2_block); @@ -668,17 +639,15 @@ void vp8_tokenize_mb(VP8_COMP *cpi, tokenize1st_order_b_16x16(xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, A, L, cpi, dry_run); + A[1] = A[2] = A[3] = A[0]; + L[1] = L[2] = L[3] = L[0]; - for (b = 1; b < 16; b++) { - *(A + vp8_block2above[b]) = *(A); - *(L + vp8_block2left[b] ) = *(L); - } for (b = 16; b < 24; b += 4) { tokenize1st_order_b_8x8(xd, xd->block + b, t, PLANE_TYPE_UV, A + vp8_block2above_8x8[b], L + vp8_block2left_8x8[b], cpi, dry_run); - *(A + vp8_block2above_8x8[b]+1) = *(A + vp8_block2above_8x8[b]); - *(L + vp8_block2left_8x8[b]+1 ) = *(L + vp8_block2left_8x8[b]); + A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]]; + L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]]; } vpx_memset(&A[8], 0, sizeof(A[8])); vpx_memset(&L[8], 0, sizeof(L[8])); @@ -692,18 +661,19 @@ void vp8_tokenize_mb(VP8_COMP *cpi, A + vp8_block2above_8x8[b], L + vp8_block2left_8x8[b], cpi, dry_run); - *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]); - *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]); + A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]]; + L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]]; } - if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { + if (xd->mode_info_context->mbmi.mode == I8X8_PRED || + xd->mode_info_context->mbmi.mode == SPLITMV) { tokenize1st_order_chroma_4x4(xd, t, cpi, dry_run); } else { for (b = 16; b < 24; b += 4) { tokenize1st_order_b_8x8(xd, xd->block + b, t, PLANE_TYPE_UV, A + vp8_block2above_8x8[b], L + vp8_block2left_8x8[b], cpi, dry_run); - *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]); - *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]); + A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]]; + L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]]; } } } else { @@ -995,30 +965,24 @@ static __inline void stuff1st_order_b_8x8(MACROBLOCKD *xd, int dry_run) { int pt; /* near block/prev token context index */ TOKENEXTRA *t = *tp; /* store tokens starting here */ -#if CONFIG_HYBRIDTRANSFORM8X8 TX_TYPE tx_type = get_tx_type(xd, b); -#endif const int band = vp8_coef_bands_8x8[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); (void) b; t->Token = DCT_EOB_TOKEN; -#if CONFIG_HYBRIDTRANSFORM8X8 if (tx_type != DCT_DCT) t->context_tree = cpi->common.fc.hybrid_coef_probs_8x8[type][band][pt]; else -#endif t->context_tree = cpi->common.fc.coef_probs_8x8[type][band][pt]; // t->section = 8; t->skip_eob_node = 0; ++t; *tp = t; if (!dry_run) { -#if CONFIG_HYBRIDTRANSFORM8X8 if (tx_type == DCT_DCT) ++cpi->hybrid_coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN]; else -#endif ++cpi->coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN]; } pt = 0; /* 0 <-> all coeff data is zero */ @@ -1074,8 +1038,8 @@ static void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *xd, A + vp8_block2above_8x8[b], L + vp8_block2left_8x8[b], cpi, dry_run); - *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]); - *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]); + A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]]; + L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]]; } for (b = 16; b < 24; b += 4) { @@ -1083,8 +1047,8 @@ static void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *xd, A + vp8_block2above[b], L + vp8_block2left[b], cpi, dry_run); - *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]); - *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]); + A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]]; + L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]]; } if (dry_run) *t = t_backup; @@ -1100,29 +1064,23 @@ static __inline void stuff1st_order_b_16x16(MACROBLOCKD *xd, int dry_run) { int pt; /* near block/prev token context index */ TOKENEXTRA *t = *tp; /* store tokens starting here */ -#if CONFIG_HYBRIDTRANSFORM16X16 TX_TYPE tx_type = get_tx_type(xd, b); -#endif const int band = vp8_coef_bands_16x16[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); (void) b; t->Token = DCT_EOB_TOKEN; -#if CONFIG_HYBRIDTRANSFORM16X16 if (tx_type != DCT_DCT) t->context_tree = cpi->common.fc.hybrid_coef_probs_16x16[type][band][pt]; else -#endif t->context_tree = cpi->common.fc.coef_probs_16x16[type][band][pt]; t->skip_eob_node = 0; ++t; *tp = t; if (!dry_run) { -#if CONFIG_HYBRIDTRANSFORM16X16 if (tx_type != DCT_DCT) ++cpi->hybrid_coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN]; else -#endif ++cpi->coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN]; } pt = 0; /* 0 <-> all coeff data is zero */ @@ -1138,17 +1096,15 @@ static void vp8_stuff_mb_16x16(VP8_COMP *cpi, MACROBLOCKD *xd, stuff1st_order_b_16x16(xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, A, L, cpi, dry_run); - for (i = 1; i < 16; i++) { - *(A + vp8_block2above[i]) = *(A); - *(L + vp8_block2left[i]) = *(L); - } + A[1] = A[2] = A[3] = A[0]; + L[1] = L[2] = L[3] = L[0]; for (b = 16; b < 24; b += 4) { stuff1st_order_buv_8x8(xd, xd->block + b, t, A + vp8_block2above[b], L + vp8_block2left[b], cpi, dry_run); - *(A + vp8_block2above_8x8[b]+1) = *(A + vp8_block2above_8x8[b]); - *(L + vp8_block2left_8x8[b]+1 ) = *(L + vp8_block2left_8x8[b]); + A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]]; + L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]]; } vpx_memset(&A[8], 0, sizeof(A[8])); vpx_memset(&L[8], 0, sizeof(L[8])); @@ -1189,28 +1145,22 @@ static __inline void stuff1st_order_b_4x4(MACROBLOCKD *xd, int dry_run) { int pt; /* near block/prev token context index */ TOKENEXTRA *t = *tp; /* store tokens starting here */ -#if CONFIG_HYBRIDTRANSFORM TX_TYPE tx_type = get_tx_type(xd, b); -#endif const int band = vp8_coef_bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); t->Token = DCT_EOB_TOKEN; -#if CONFIG_HYBRIDTRANSFORM if (tx_type != DCT_DCT) t->context_tree = cpi->common.fc.hybrid_coef_probs[type][band][pt]; else -#endif t->context_tree = cpi->common.fc.coef_probs[type][band][pt]; t->skip_eob_node = 0; ++t; *tp = t; if (!dry_run) { -#if CONFIG_HYBRIDTRANSFORM if (tx_type != DCT_DCT) ++cpi->hybrid_coef_counts[type][band][pt][DCT_EOB_TOKEN]; else -#endif ++cpi->coef_counts[type][band][pt][DCT_EOB_TOKEN]; } pt = 0; /* 0 <-> all coeff data is zero */ @@ -1288,8 +1238,8 @@ static void vp8_stuff_mb_8x8_4x4uv(VP8_COMP *cpi, MACROBLOCKD *xd, A + vp8_block2above_8x8[b], L + vp8_block2left_8x8[b], cpi, dry_run); - *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]); - *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]); + A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]]; + L[vp8_block2left_8x8[b] + 1] = L[vp8_block2left_8x8[b]]; } for (b = 16; b < 24; b++) @@ -1308,7 +1258,8 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { if (tx_size == TX_16X16) { vp8_stuff_mb_16x16(cpi, xd, t, dry_run); } else if (tx_size == TX_8X8) { - if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { + if (xd->mode_info_context->mbmi.mode == I8X8_PRED || + xd->mode_info_context->mbmi.mode == SPLITMV) { vp8_stuff_mb_8x8_4x4uv(cpi, xd, t, dry_run); } else { vp8_stuff_mb_8x8(cpi, xd, t, dry_run); diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h index a2fadfc4c..cdeb390c3 100644 --- a/vp8/encoder/variance.h +++ b/vp8/encoder/variance.h @@ -12,507 +12,73 @@ #ifndef VARIANCE_H #define VARIANCE_H -#include "vpx_config.h" - -#define prototype_sad(sym)\ - unsigned int (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - int max_sad\ - ) - -#define prototype_sad_multi_same_address(sym)\ - void (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - unsigned int *sad_array\ - ) - -#define prototype_sad_multi_same_address_1(sym)\ - void (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - unsigned short *sad_array\ - ) - -#define prototype_sad_multi_dif_address(sym)\ - void (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - unsigned char *ref_ptr[4], \ - int ref_stride, \ - unsigned int *sad_array\ - ) - -#define prototype_variance(sym) \ - unsigned int (sym) \ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - unsigned int *sse\ - ) - -#define prototype_variance2(sym) \ - unsigned int (sym) \ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - unsigned int *sse,\ - int *sum\ - ) - -#define prototype_subpixvariance(sym) \ - unsigned int (sym) \ - ( \ - const unsigned char *src_ptr, \ - int source_stride, \ - int xoffset, \ - int yoffset, \ - const unsigned char *ref_ptr, \ - int Refstride, \ - unsigned int *sse \ - ); - -#define prototype_ssimpf(sym) \ - void (sym) \ - ( \ - unsigned char *s, \ - int sp, \ - unsigned char *r, \ - int rp, \ - unsigned long *sum_s, \ - unsigned long *sum_r, \ - unsigned long *sum_sq_s, \ - unsigned long *sum_sq_r, \ - unsigned long *sum_sxr \ - ); - -#define prototype_getmbss(sym) unsigned int (sym)(const short *) - -#define prototype_get16x16prederror(sym)\ - unsigned int (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride \ - ) - -#if ARCH_X86 || ARCH_X86_64 -#include "x86/variance_x86.h" -#endif - -#if ARCH_ARM -#include "arm/variance_arm.h" -#endif - -#ifndef vp8_variance_sad4x4 -#define vp8_variance_sad4x4 vp8_sad4x4_c -#endif -extern prototype_sad(vp8_variance_sad4x4); - -#ifndef vp8_variance_sad8x8 -#define vp8_variance_sad8x8 vp8_sad8x8_c -#endif -extern prototype_sad(vp8_variance_sad8x8); - -#ifndef vp8_variance_sad8x16 -#define vp8_variance_sad8x16 vp8_sad8x16_c -#endif -extern prototype_sad(vp8_variance_sad8x16); - -#ifndef vp8_variance_sad16x8 -#define vp8_variance_sad16x8 vp8_sad16x8_c -#endif -extern prototype_sad(vp8_variance_sad16x8); - -#ifndef vp8_variance_sad16x16 -#define vp8_variance_sad16x16 vp8_sad16x16_c -#endif -extern prototype_sad(vp8_variance_sad16x16); - -#ifndef vp8_variance_sad32x32 -#define vp8_variance_sad32x32 vp8_sad32x32_c -#endif -extern prototype_sad(vp8_variance_sad32x32); - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_sad32x32x3 -#define vp8_variance_sad32x32x3 vp8_sad32x32x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad32x32x3); - -#ifndef vp8_variance_sad16x16x3 -#define vp8_variance_sad16x16x3 vp8_sad16x16x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad16x16x3); - -#ifndef vp8_variance_sad16x8x3 -#define vp8_variance_sad16x8x3 vp8_sad16x8x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad16x8x3); - -#ifndef vp8_variance_sad8x8x3 -#define vp8_variance_sad8x8x3 vp8_sad8x8x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad8x8x3); - -#ifndef vp8_variance_sad8x16x3 -#define vp8_variance_sad8x16x3 vp8_sad8x16x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3); - -#ifndef vp8_variance_sad4x4x3 -#define vp8_variance_sad4x4x3 vp8_sad4x4x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3); - -#ifndef vp8_variance_sad32x32x8 -#define vp8_variance_sad32x32x8 vp8_sad32x32x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad32x32x8); - -#ifndef vp8_variance_sad16x16x8 -#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8); - -#ifndef vp8_variance_sad16x8x8 -#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8); - -#ifndef vp8_variance_sad8x8x8 -#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8); - -#ifndef vp8_variance_sad8x16x8 -#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8); - -#ifndef vp8_variance_sad4x4x8 -#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8); - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_sad32x32x4d -#define vp8_variance_sad32x32x4d vp8_sad32x32x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad32x32x4d); - -#ifndef vp8_variance_sad16x16x4d -#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad16x16x4d); - -#ifndef vp8_variance_sad16x8x4d -#define vp8_variance_sad16x8x4d vp8_sad16x8x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad16x8x4d); - -#ifndef vp8_variance_sad8x8x4d -#define vp8_variance_sad8x8x4d vp8_sad8x8x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad8x8x4d); - -#ifndef vp8_variance_sad8x16x4d -#define vp8_variance_sad8x16x4d vp8_sad8x16x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad8x16x4d); - -#ifndef vp8_variance_sad4x4x4d -#define vp8_variance_sad4x4x4d vp8_sad4x4x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad4x4x4d); - -#if ARCH_X86 || ARCH_X86_64 -#ifndef vp8_variance_copy32xn -#define vp8_variance_copy32xn vp8_copy32xn_c -#endif -extern prototype_sad(vp8_variance_copy32xn); -#endif - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_var4x4 -#define vp8_variance_var4x4 vp8_variance4x4_c -#endif -extern prototype_variance(vp8_variance_var4x4); - -#ifndef vp8_variance_var8x8 -#define vp8_variance_var8x8 vp8_variance8x8_c -#endif -extern prototype_variance(vp8_variance_var8x8); - -#ifndef vp8_variance_var8x16 -#define vp8_variance_var8x16 vp8_variance8x16_c -#endif -extern prototype_variance(vp8_variance_var8x16); - -#ifndef vp8_variance_var16x8 -#define vp8_variance_var16x8 vp8_variance16x8_c -#endif -extern prototype_variance(vp8_variance_var16x8); - -#ifndef vp8_variance_var16x16 -#define vp8_variance_var16x16 vp8_variance16x16_c -#endif -extern prototype_variance(vp8_variance_var16x16); - -#ifndef vp8_variance_var32x32 -#define vp8_variance_var32x32 vp8_variance32x32_c -#endif -extern prototype_variance(vp8_variance_var32x32); - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_subpixvar4x4 -#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar4x4); - -#ifndef vp8_variance_subpixvar8x8 -#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar8x8); - -#ifndef vp8_variance_subpixvar8x16 -#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar8x16); - -#ifndef vp8_variance_subpixvar16x8 -#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar16x8); - -#ifndef vp8_variance_subpixvar16x16 -#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar16x16); - -#ifndef vp8_variance_subpixvar32x32 -#define vp8_variance_subpixvar32x32 vp8_sub_pixel_variance32x32_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar32x32); - -#ifndef vp8_variance_halfpixvar16x16_h -#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c -#endif -extern prototype_variance(vp8_variance_halfpixvar16x16_h); - -#ifndef vp8_variance_halfpixvar32x32_h -#define vp8_variance_halfpixvar32x32_h vp8_variance_halfpixvar32x32_h_c -#endif -extern prototype_variance(vp8_variance_halfpixvar32x32_h); - -#ifndef vp8_variance_halfpixvar16x16_v -#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c -#endif -extern prototype_variance(vp8_variance_halfpixvar16x16_v); - -#ifndef vp8_variance_halfpixvar32x32_v -#define vp8_variance_halfpixvar32x32_v vp8_variance_halfpixvar32x32_v_c -#endif -extern prototype_variance(vp8_variance_halfpixvar32x32_v); - -#ifndef vp8_variance_halfpixvar16x16_hv -#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c -#endif -extern prototype_variance(vp8_variance_halfpixvar16x16_hv); - -#ifndef vp8_variance_halfpixvar32x32_hv -#define vp8_variance_halfpixvar32x32_hv vp8_variance_halfpixvar32x32_hv_c -#endif -extern prototype_variance(vp8_variance_halfpixvar32x32_hv); - -#ifndef vp8_variance_subpixmse16x16 -#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixmse16x16); - -#ifndef vp8_variance_subpixmse32x32 -#define vp8_variance_subpixmse32x32 vp8_sub_pixel_mse32x32_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixmse32x32); - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_getmbss -#define vp8_variance_getmbss vp8_get_mb_ss_c -#endif -extern prototype_getmbss(vp8_variance_getmbss); - -#ifndef vp8_variance_mse16x16 -#define vp8_variance_mse16x16 vp8_mse16x16_c -#endif -extern prototype_variance(vp8_variance_mse16x16); - -#ifndef vp8_ssimpf_8x8 -#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c -#endif -extern prototype_ssimpf(vp8_ssimpf_8x8) - -#ifndef vp8_ssimpf_16x16 -#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c -#endif -extern prototype_ssimpf(vp8_ssimpf_16x16) - -#ifndef vp8_variance_satd16x16 -#define vp8_variance_satd16x16 vp8_satd16x16_c -#endif -extern prototype_variance(vp8_variance_satd16x16); - -typedef prototype_sad(*vp8_sad_fn_t); -typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t); -typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t); -typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t); -typedef prototype_variance(*vp8_variance_fn_t); -typedef prototype_variance2(*vp8_variance2_fn_t); -typedef prototype_subpixvariance(*vp8_subpixvariance_fn_t); -typedef prototype_getmbss(*vp8_getmbss_fn_t); -typedef prototype_ssimpf(*vp8_ssimpf_fn_t); -typedef prototype_get16x16prederror(*vp8_get16x16prederror_fn_t); - -typedef struct { - vp8_sad_fn_t sad4x4; - vp8_sad_fn_t sad8x8; - vp8_sad_fn_t sad8x16; - vp8_sad_fn_t sad16x8; - vp8_sad_fn_t sad16x16; -#if CONFIG_SUPERBLOCKS - vp8_sad_fn_t sad32x32; -#endif - - vp8_variance_fn_t var4x4; - vp8_variance_fn_t var8x8; - vp8_variance_fn_t var8x16; - vp8_variance_fn_t var16x8; - vp8_variance_fn_t var16x16; -#if CONFIG_SUPERBLOCKS - vp8_variance_fn_t var32x32; -#endif - - vp8_subpixvariance_fn_t subpixvar4x4; - vp8_subpixvariance_fn_t subpixvar8x8; - vp8_subpixvariance_fn_t subpixvar8x16; - vp8_subpixvariance_fn_t subpixvar16x8; - vp8_subpixvariance_fn_t subpixvar16x16; -#if CONFIG_SUPERBLOCKS - vp8_subpixvariance_fn_t subpixvar32x32; -#endif - vp8_variance_fn_t halfpixvar16x16_h; - vp8_variance_fn_t halfpixvar32x32_h; - vp8_variance_fn_t halfpixvar16x16_v; -#if CONFIG_SUPERBLOCKS - vp8_variance_fn_t halfpixvar32x32_v; -#endif - vp8_variance_fn_t halfpixvar16x16_hv; -#if CONFIG_SUPERBLOCKS - vp8_variance_fn_t halfpixvar32x32_hv; -#endif - vp8_subpixvariance_fn_t subpixmse16x16; -#if CONFIG_SUPERBLOCKS - vp8_subpixvariance_fn_t subpixmse32x32; -#endif - - vp8_getmbss_fn_t getmbss; - vp8_variance_fn_t mse16x16; - -#if CONFIG_SUPERBLOCKS - vp8_sad_multi_fn_t sad32x32x3; -#endif - vp8_sad_multi_fn_t sad16x16x3; - vp8_sad_multi_fn_t sad16x8x3; - vp8_sad_multi_fn_t sad8x16x3; - vp8_sad_multi_fn_t sad8x8x3; - vp8_sad_multi_fn_t sad4x4x3; - -#if CONFIG_SUPERBLOCKS - vp8_sad_multi1_fn_t sad32x32x8; -#endif - vp8_sad_multi1_fn_t sad16x16x8; - vp8_sad_multi1_fn_t sad16x8x8; - vp8_sad_multi1_fn_t sad8x16x8; - vp8_sad_multi1_fn_t sad8x8x8; - vp8_sad_multi1_fn_t sad4x4x8; - -#if CONFIG_SUPERBLOCKS - vp8_sad_multi_d_fn_t sad32x32x4d; -#endif - vp8_sad_multi_d_fn_t sad16x16x4d; - vp8_sad_multi_d_fn_t sad16x8x4d; - vp8_sad_multi_d_fn_t sad8x16x4d; - vp8_sad_multi_d_fn_t sad8x8x4d; - vp8_sad_multi_d_fn_t sad4x4x4d; - -#if ARCH_X86 || ARCH_X86_64 - vp8_sad_fn_t copy32xn; -#endif - -#if CONFIG_INTERNAL_STATS - vp8_ssimpf_fn_t ssimpf_8x8; - vp8_ssimpf_fn_t ssimpf_16x16; -#endif - - vp8_variance_fn_t satd16x16; -} vp8_variance_rtcd_vtable_t; - -typedef struct { - vp8_sad_fn_t sdf; - vp8_variance_fn_t vf; - vp8_subpixvariance_fn_t svf; - vp8_variance_fn_t svf_halfpix_h; - vp8_variance_fn_t svf_halfpix_v; - vp8_variance_fn_t svf_halfpix_hv; - vp8_sad_multi_fn_t sdx3f; - vp8_sad_multi1_fn_t sdx8f; - vp8_sad_multi_d_fn_t sdx4df; -#if ARCH_X86 || ARCH_X86_64 - vp8_sad_fn_t copymem; -#endif +typedef unsigned int(*vp8_sad_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned int max_sad); + +typedef void (*vp8_copy32xn_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + int n); + +typedef void (*vp8_sad_multi_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned int *sad_array); + +typedef void (*vp8_sad_multi1_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array); + +typedef void (*vp8_sad_multi_d_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char * const ref_ptr[], + int ref_stride, unsigned int *sad_array); + +typedef unsigned int (*vp8_variance_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned int *sse); + +typedef unsigned int (*vp8_subpixvariance_fn_t)(const unsigned char *src_ptr, + int source_stride, + int xoffset, + int yoffset, + const unsigned char *ref_ptr, + int Refstride, + unsigned int *sse); + +typedef void (*vp8_ssimpf_fn_t)(unsigned char *s, int sp, unsigned char *r, + int rp, unsigned long *sum_s, + unsigned long *sum_r, unsigned long *sum_sq_s, + unsigned long *sum_sq_r, + unsigned long *sum_sxr); + +typedef unsigned int (*vp8_getmbss_fn_t)(const short *); + +typedef unsigned int (*vp8_get16x16prederror_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride); + +typedef struct variance_vtable { + vp8_sad_fn_t sdf; + vp8_variance_fn_t vf; + vp8_subpixvariance_fn_t svf; + vp8_variance_fn_t svf_halfpix_h; + vp8_variance_fn_t svf_halfpix_v; + vp8_variance_fn_t svf_halfpix_hv; + vp8_sad_multi_fn_t sdx3f; + vp8_sad_multi1_fn_t sdx8f; + vp8_sad_multi_d_fn_t sdx4df; + vp8_copy32xn_fn_t copymem; } vp8_variance_fn_ptr_t; -#if CONFIG_RUNTIME_CPU_DETECT -#define VARIANCE_INVOKE(ctx,fn) (ctx)->fn -#define SSIMPF_INVOKE(ctx,fn) (ctx)->ssimpf_##fn -#else -#define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn -#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn -#endif - -#if CONFIG_NEWBESTREFMV -unsigned int vp8_sad2x16_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad); -unsigned int vp8_sad16x2_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad); -#endif - #endif diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h deleted file mode 100644 index 0971f11b0..000000000 --- a/vp8/encoder/x86/variance_x86.h +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VARIANCE_X86_H -#define VARIANCE_X86_H - - -/* Note: - * - * This platform is commonly built for runtime CPU detection. If you modify - * any of the function mappings present in this file, be sure to also update - * them in the function pointer initialization code - */ -#if HAVE_MMX -extern prototype_sad(vp8_sad4x4_mmx); -extern prototype_sad(vp8_sad8x8_mmx); -extern prototype_sad(vp8_sad8x16_mmx); -extern prototype_sad(vp8_sad16x8_mmx); -extern prototype_sad(vp8_sad16x16_mmx); -extern prototype_variance(vp8_variance4x4_mmx); -extern prototype_variance(vp8_variance8x8_mmx); -extern prototype_variance(vp8_variance8x16_mmx); -extern prototype_variance(vp8_variance16x8_mmx); -extern prototype_variance(vp8_variance16x16_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance4x4_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx); -extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx); -extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx); -extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx); -extern prototype_getmbss(vp8_get_mb_ss_mmx); -extern prototype_variance(vp8_mse16x16_mmx); -extern prototype_variance2(vp8_get8x8var_mmx); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_variance_sad4x4 -#define vp8_variance_sad4x4 vp8_sad4x4_mmx - -#undef vp8_variance_sad8x8 -#define vp8_variance_sad8x8 vp8_sad8x8_mmx - -#undef vp8_variance_sad8x16 -#define vp8_variance_sad8x16 vp8_sad8x16_mmx - -#undef vp8_variance_sad16x8 -#define vp8_variance_sad16x8 vp8_sad16x8_mmx - -#undef vp8_variance_sad16x16 -#define vp8_variance_sad16x16 vp8_sad16x16_mmx - -#undef vp8_variance_var4x4 -#define vp8_variance_var4x4 vp8_variance4x4_mmx - -#undef vp8_variance_var8x8 -#define vp8_variance_var8x8 vp8_variance8x8_mmx - -#undef vp8_variance_var8x16 -#define vp8_variance_var8x16 vp8_variance8x16_mmx - -#undef vp8_variance_var16x8 -#define vp8_variance_var16x8 vp8_variance16x8_mmx - -#undef vp8_variance_var16x16 -#define vp8_variance_var16x16 vp8_variance16x16_mmx - -#undef vp8_variance_subpixvar4x4 -#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_mmx - -#undef vp8_variance_subpixvar8x8 -#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_mmx - -#undef vp8_variance_subpixvar8x16 -#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_mmx - -#undef vp8_variance_subpixvar16x8 -#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_mmx - -#undef vp8_variance_subpixvar16x16 -#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx - -#undef vp8_variance_halfpixvar16x16_h -#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx - -#undef vp8_variance_halfpixvar16x16_v -#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx - -#undef vp8_variance_halfpixvar16x16_hv -#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx - -#undef vp8_variance_subpixmse16x16 -#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx - -#undef vp8_variance_getmbss -#define vp8_variance_getmbss vp8_get_mb_ss_mmx - -#undef vp8_variance_mse16x16 -#define vp8_variance_mse16x16 vp8_mse16x16_mmx - -#endif -#endif - - -#if HAVE_SSE2 -extern prototype_sad(vp8_sad4x4_wmt); -extern prototype_sad(vp8_sad8x8_wmt); -extern prototype_sad(vp8_sad8x16_wmt); -extern prototype_sad(vp8_sad16x8_wmt); -extern prototype_sad(vp8_sad16x16_wmt); -extern prototype_sad(vp8_copy32xn_sse2); -extern prototype_variance(vp8_variance4x4_wmt); -extern prototype_variance(vp8_variance8x8_wmt); -extern prototype_variance(vp8_variance8x16_wmt); -extern prototype_variance(vp8_variance16x8_wmt); -extern prototype_variance(vp8_variance16x16_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance4x4_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt); -extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt); -extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt); -extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt); -extern prototype_getmbss(vp8_get_mb_ss_sse2); -extern prototype_variance(vp8_mse16x16_wmt); -extern prototype_variance2(vp8_get8x8var_sse2); -extern prototype_variance2(vp8_get16x16var_sse2); -extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2) -extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2) - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_variance_sad4x4 -#define vp8_variance_sad4x4 vp8_sad4x4_wmt - -#undef vp8_variance_sad8x8 -#define vp8_variance_sad8x8 vp8_sad8x8_wmt - -#undef vp8_variance_sad8x16 -#define vp8_variance_sad8x16 vp8_sad8x16_wmt - -#undef vp8_variance_sad16x8 -#define vp8_variance_sad16x8 vp8_sad16x8_wmt - -#undef vp8_variance_sad16x16 -#define vp8_variance_sad16x16 vp8_sad16x16_wmt - -#undef vp8_variance_copy32xn -#define vp8_variance_copy32xn vp8_copy32xn_sse2 - -#undef vp8_variance_var4x4 -#define vp8_variance_var4x4 vp8_variance4x4_wmt - -#undef vp8_variance_var8x8 -#define vp8_variance_var8x8 vp8_variance8x8_wmt - -#undef vp8_variance_var8x16 -#define vp8_variance_var8x16 vp8_variance8x16_wmt - -#undef vp8_variance_var16x8 -#define vp8_variance_var16x8 vp8_variance16x8_wmt - -#undef vp8_variance_var16x16 -#define vp8_variance_var16x16 vp8_variance16x16_wmt - -#undef vp8_variance_subpixvar4x4 -#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_wmt - -#undef vp8_variance_subpixvar8x8 -#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_wmt - -#undef vp8_variance_subpixvar8x16 -#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_wmt - -#undef vp8_variance_subpixvar16x8 -#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_wmt - -#undef vp8_variance_subpixvar16x16 -#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt - -#undef vp8_variance_halfpixvar16x16_h -#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt - -#undef vp8_variance_halfpixvar16x16_v -#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt - -#undef vp8_variance_halfpixvar16x16_hv -#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt - -#undef vp8_variance_subpixmse16x16 -#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt - -#undef vp8_variance_getmbss -#define vp8_variance_getmbss vp8_get_mb_ss_sse2 - -#undef vp8_variance_mse16x16 -#define vp8_variance_mse16x16 vp8_mse16x16_wmt - -#if ARCH_X86_64 -#undef vp8_ssimpf_8x8 -#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2 - -#undef vp8_ssimpf_16x16 -#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2 -#endif - -#endif -#endif - - -#if HAVE_SSE3 -extern prototype_sad(vp8_sad16x16_sse3); -extern prototype_sad(vp8_sad16x8_sse3); -extern prototype_sad_multi_same_address(vp8_sad16x16x3_sse3); -extern prototype_sad_multi_same_address(vp8_sad16x8x3_sse3); -extern prototype_sad_multi_same_address(vp8_sad8x16x3_sse3); -extern prototype_sad_multi_same_address(vp8_sad8x8x3_sse3); -extern prototype_sad_multi_same_address(vp8_sad4x4x3_sse3); - -extern prototype_sad_multi_dif_address(vp8_sad16x16x4d_sse3); -extern prototype_sad_multi_dif_address(vp8_sad16x8x4d_sse3); -extern prototype_sad_multi_dif_address(vp8_sad8x16x4d_sse3); -extern prototype_sad_multi_dif_address(vp8_sad8x8x4d_sse3); -extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3); -extern prototype_sad(vp8_copy32xn_sse3); - -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp8_variance_sad16x16 -#define vp8_variance_sad16x16 vp8_sad16x16_sse3 - -#undef vp8_variance_sad16x16x3 -#define vp8_variance_sad16x16x3 vp8_sad16x16x3_sse3 - -#undef vp8_variance_sad16x8x3 -#define vp8_variance_sad16x8x3 vp8_sad16x8x3_sse3 - -#undef vp8_variance_sad8x16x3 -#define vp8_variance_sad8x16x3 vp8_sad8x16x3_sse3 - -#undef vp8_variance_sad8x8x3 -#define vp8_variance_sad8x8x3 vp8_sad8x8x3_sse3 - -#undef vp8_variance_sad4x4x3 -#define vp8_variance_sad4x4x3 vp8_sad4x4x3_sse3 - -#undef vp8_variance_sad16x16x4d -#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_sse3 - -#undef vp8_variance_sad16x8x4d -#define vp8_variance_sad16x8x4d vp8_sad16x8x4d_sse3 - -#undef vp8_variance_sad8x16x4d -#define vp8_variance_sad8x16x4d vp8_sad8x16x4d_sse3 - -#undef vp8_variance_sad8x8x4d -#define vp8_variance_sad8x8x4d vp8_sad8x8x4d_sse3 - -#undef vp8_variance_sad4x4x4d -#define vp8_variance_sad4x4x4d vp8_sad4x4x4d_sse3 - -#undef vp8_variance_copy32xn -#define vp8_variance_copy32xn vp8_copy32xn_sse3 - -#endif -#endif - - -#if HAVE_SSSE3 -extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3); -extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_ssse3); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_variance_sad16x16x3 -#define vp8_variance_sad16x16x3 vp8_sad16x16x3_ssse3 - -#undef vp8_variance_sad16x8x3 -#define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3 - -#undef vp8_variance_subpixvar16x8 -#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_ssse3 - -#undef vp8_variance_subpixvar16x16 -#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3 - -#endif -#endif - - -#if HAVE_SSE4_1 -extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4); -extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4); -extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4); -extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4); -extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_variance_sad16x16x8 -#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4 - -#undef vp8_variance_sad16x8x8 -#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4 - -#undef vp8_variance_sad8x16x8 -#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4 - -#undef vp8_variance_sad8x8x8 -#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4 - -#undef vp8_variance_sad4x4x8 -#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4 - -#endif -#endif - -#endif diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 71c51c14f..a169b493e 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -90,31 +90,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { /* Override default functions with fastest ones for this CPU. */ #if HAVE_MMX if (flags & HAS_MMX) { - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_mmx; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_mmx; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_mmx; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_mmx; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_mmx; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_mmx; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_mmx; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_mmx; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_mmx; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_mmx; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_mmx; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mmx; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mmx; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_mmx; - cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; - cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx; - cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx; - cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_mmx; - cpi->rtcd.encodemb.berr = vp8_block_error_mmx; cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx; cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx; @@ -126,32 +101,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { #if HAVE_SSE2 if (flags & HAS_SSE2) { - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_wmt; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_wmt; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_wmt; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_wmt; - cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse2; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_wmt; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_wmt; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_wmt; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_wmt; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_wmt; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_wmt; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_wmt; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wmt; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wmt; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_wmt; - cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; - cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt; - cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt; - cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_sse2; - cpi->rtcd.encodemb.berr = vp8_block_error_xmm; cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm; cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm; @@ -160,54 +109,20 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2; cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2; -#if CONFIG_INTERNAL_STATS -#if ARCH_X86_64 - cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse2; - cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_sse2; -#endif -#endif } #endif #if HAVE_SSE3 if (flags & HAS_SSE3) { - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3; - cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_sse3; - cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_sse3; - cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3; - cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3; - cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3; cpi->rtcd.search.full_search = vp8_full_search_sadx3; - cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3; - cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3; - cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3; - cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_sse3; - cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3; - cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse3; cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4; cpi->rtcd.search.refining_search = vp8_refining_search_sadx4; } #endif -#if HAVE_SSSE3 - if (flags & HAS_SSSE3) { - cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3; - cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3; - - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3; - } -#endif - - #if HAVE_SSE4_1 if (flags & HAS_SSE4_1) { - cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4; - cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4; - cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4; - cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; - cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; cpi->rtcd.search.full_search = vp8_full_search_sadx8; } #endif diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 25c4fe210..fbbdec145 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -53,6 +53,7 @@ VP8_COMMON_SRCS-yes += common/reconintra.h VP8_COMMON_SRCS-yes += common/reconintra4x4.h VP8_COMMON_SRCS-yes += common/rtcd.c VP8_COMMON_SRCS-yes += common/rtcd_defs.sh +VP8_COMMON_SRCS-yes += common/sadmxn.h VP8_COMMON_SRCS-yes += common/seg_common.h VP8_COMMON_SRCS-yes += common/seg_common.c VP8_COMMON_SRCS-yes += common/setupintrarecon.h @@ -119,6 +120,8 @@ endif VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/filter_sse2.c ifeq ($(HAVE_SSE2),yes) vp8/common/x86/filter_sse2.c.o: CFLAGS += -msse2 +vp8/common/x86/loopfilter_x86.c.o: CFLAGS += -msse2 +vp8/common/loopfilter_filters.c.o: CFLAGS += -msse2 endif VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 7058e316b..6d2f18080 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -92,7 +92,6 @@ VP8_CX_SRCS-yes += encoder/mbgraph.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h -VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c |