diff options
Diffstat (limited to 'vp8/common')
-rw-r--r-- | vp8/common/blockd.h | 70 | ||||
-rw-r--r-- | vp8/common/coefupdateprobs.h | 2 | ||||
-rw-r--r-- | vp8/common/default_coef_probs.h | 761 | ||||
-rw-r--r-- | vp8/common/entropy.c | 120 | ||||
-rw-r--r-- | vp8/common/entropy.h | 13 | ||||
-rw-r--r-- | vp8/common/entropymv.c | 476 | ||||
-rw-r--r-- | vp8/common/entropymv.h | 119 | ||||
-rw-r--r-- | vp8/common/findnearmv.c | 159 | ||||
-rw-r--r-- | vp8/common/findnearmv.h | 1 | ||||
-rw-r--r-- | vp8/common/generic/systemdependent.c | 2 | ||||
-rw-r--r-- | vp8/common/idct.h | 6 | ||||
-rw-r--r-- | vp8/common/idctllm.c | 98 | ||||
-rw-r--r-- | vp8/common/invtrans.c | 2 | ||||
-rw-r--r-- | vp8/common/invtrans.h | 2 | ||||
-rw-r--r-- | vp8/common/loopfilter.c | 8 | ||||
-rw-r--r-- | vp8/common/mvref_common.c | 349 | ||||
-rw-r--r-- | vp8/common/mvref_common.h | 33 | ||||
-rw-r--r-- | vp8/common/onyxc_int.h | 49 | ||||
-rw-r--r-- | vp8/common/recon.h | 8 | ||||
-rw-r--r-- | vp8/common/reconinter.c | 7 | ||||
-rw-r--r-- | vp8/common/reconinter.h | 9 | ||||
-rw-r--r-- | vp8/common/reconintra.c | 246 | ||||
-rw-r--r-- | vp8/common/rtcd_defs.sh | 8 | ||||
-rw-r--r-- | vp8/common/treecoder.c | 12 | ||||
-rw-r--r-- | vp8/common/treecoder.h | 1 | ||||
-rw-r--r-- | vp8/common/x86/filter_sse2.c | 289 | ||||
-rw-r--r-- | vp8/common/x86/filter_sse4.c | 3 |
27 files changed, 2481 insertions, 372 deletions
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 4e5d9e813..6f980ad65 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -44,6 +44,9 @@ void vpx_log(const char *format, ...); /* Segment Feature Masks */ #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 +#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF +#define MAX_MV_REFS 19 +#endif typedef struct { int r, c; @@ -86,10 +89,6 @@ typedef enum #endif } INTERPOLATIONFILTERTYPE; -#if 0//CONFIG_SWITCHABLE_INTERP -#define VP8_SWITCHABLE_FILTERS 2 /* number of switchable filters */ -#endif - typedef enum { DC_PRED, /* average of above and left pixels */ @@ -130,13 +129,13 @@ typedef enum { typedef enum { TX_4X4, // 4x4 dct transform TX_8X8, // 8x8 dct transform -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 TX_16X16, // 16x16 dct transform #endif TX_SIZE_MAX // Number of different transforms available } TX_SIZE; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 typedef enum { DCT_DCT = 0, // DCT in both horizontal and vertical ADST_DCT = 1, // ADST in horizontal, DCT in vertical @@ -152,10 +151,14 @@ typedef enum { #define VP8_MVREFS (1 + SPLITMV - NEARESTMV) -#if CONFIG_HYBRIDTRANSFORM +#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 #define ACTIVE_HT 110 // quantization stepsize threshold #endif +#if CONFIG_HYBRIDTRANSFORM16X16 +#define ACTIVE_HT16 300 +#endif + typedef enum { B_DC_PRED, /* average of above and left pixels */ B_TM_PRED, @@ -179,7 +182,7 @@ typedef enum { B_MODE_COUNT } B_PREDICTION_MODE; -#if CONFIG_HYBRIDTRANSFORM8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 // convert MB_PREDICTION_MODE to B_PREDICTION_MODE static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) { B_PREDICTION_MODE b_mode; @@ -233,7 +236,7 @@ static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) { union b_mode_info { struct { B_PREDICTION_MODE first; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 B_PREDICTION_MODE test; TX_TYPE tx_type; #endif @@ -258,7 +261,7 @@ typedef enum { typedef struct { MB_PREDICTION_MODE mode, uv_mode; -#if CONFIG_HYBRIDTRANSFORM +#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 MB_PREDICTION_MODE mode_rdopt; #endif @@ -270,7 +273,10 @@ typedef struct { int_mv mv[2]; // for each reference frame used #if CONFIG_NEWBESTREFMV int_mv ref_mv, second_ref_mv; + int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS]; + int mv_ref_index[MAX_REF_FRAMES]; #endif + unsigned char partitioning; unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ unsigned char need_to_clamp_mvs; @@ -419,7 +425,7 @@ typedef struct MacroBlockD { int corrupted; -#if ARCH_X86 || ARCH_X86_64 +#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) /* This is an intermediate buffer currently used in sub-pixel motion search * to keep a copy of the reference area. This buffer can be used for other * purpose. @@ -432,19 +438,21 @@ typedef struct MacroBlockD { #endif int mb_index; // Index of the MB in the SB (0..3) + #if CONFIG_NEWBESTREFMV - int_mv ref_mv[4]; + int_mv ref_mv[MAX_MV_REFS]; #endif -#if CONFIG_HYBRIDTRANSFORM +#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 int q_index; #endif } MACROBLOCKD; -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 // transform mapping static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) { + // map transform type switch (bmode) { case B_TM_PRED : case B_RD_PRED : @@ -467,6 +475,40 @@ static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) { break; } } + +static TX_TYPE get_tx_type(MACROBLOCKD *xd, const BLOCKD *b) { + TX_TYPE tx_type = DCT_DCT; + int ib = (b - xd->block); + if (ib >= 16) return tx_type; +#if CONFIG_HYBRIDTRANSFORM16X16 + if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) { + if (xd->mode_info_context->mbmi.mode < I8X8_PRED && + xd->q_index < ACTIVE_HT16) + tx_type = b->bmi.as_mode.tx_type; + return tx_type; + } +#endif +#if CONFIG_HYBRIDTRANSFORM8X8 + if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { + BLOCKD *bb; + ib = (ib & 8) + ((ib & 4) >> 1); + bb = xd->block + ib; + if (xd->mode_info_context->mbmi.mode == I8X8_PRED) + tx_type = bb->bmi.as_mode.tx_type; + return tx_type; + } +#endif +#if CONFIG_HYBRIDTRANSFORM + if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { + if (xd->mode_info_context->mbmi.mode == B_PRED && + xd->q_index < ACTIVE_HT) { + tx_type = b->bmi.as_mode.tx_type; + } + return tx_type; + } +#endif + return tx_type; +} #endif extern void vp8_build_block_doffsets(MACROBLOCKD *xd); diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h index 0fb25cc94..0610356cc 100644 --- a/vp8/common/coefupdateprobs.h +++ b/vp8/common/coefupdateprobs.h @@ -13,7 +13,7 @@ Generated file included by entropy.c */ #define COEF_UPDATE_PROB 252 #define COEF_UPDATE_PROB_8X8 252 -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 #define COEF_UPDATE_PROB_16X16 252 #endif diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h index 940e971b7..717cef78b 100644 --- a/vp8/common/default_coef_probs.h +++ b/vp8/common/default_coef_probs.h @@ -253,8 +253,253 @@ static const vp8_prob default_coef_probs [BLOCK_TYPES] } } }; + +#if CONFIG_HYBRIDTRANSFORM +static const vp8_prob default_hybrid_coef_probs [BLOCK_TYPES] +[COEF_BANDS] +[PREV_COEF_CONTEXTS] +[ENTROPY_NODES] = { + { + /* Block Type ( 0 ) */ + { + /* Coeff Band ( 0 )*/ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + /* Coeff Band ( 1 )*/ + { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, + { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, + { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }, + { 90, 116, 227, 252, 214, 209, 255, 255, 128, 128, 128 }, + }, + { + /* Coeff Band ( 2 )*/ + { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, + { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, + { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, + { 64, 128, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, + }, + { + /* Coeff Band ( 3 )*/ + { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, + { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, + { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, + { 64, 100, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, + }, + { + /* Coeff Band ( 4 )*/ + { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, + { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, + { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }, + { 28, 110, 196, 243, 228, 255, 255, 255, 128, 128, 128 }, + }, + { + /* Coeff Band ( 5 )*/ + { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, + { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, + { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }, + { 90, 90, 231, 255, 211, 171, 128, 128, 128, 128, 128 }, + }, + { + /* Coeff Band ( 6 )*/ + { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, + { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, + { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }, + { 64, 120, 211, 255, 194, 224, 128, 128, 128, 128, 128 }, + }, + { + /* Coeff Band ( 7 )*/ + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + } + }, + { + /* Block Type ( 1 ) */ + { + /* Coeff Band ( 0 )*/ + { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, + { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, + { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }, + { 48, 32, 146, 208, 149, 167, 221, 162, 255, 223, 128 }, + }, + { + /* Coeff Band ( 1 )*/ + { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, + { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, + { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }, + { 66, 90, 181, 242, 176, 190, 249, 202, 255, 255, 128 }, + }, + { + /* Coeff Band ( 2 )*/ + { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, + { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, + { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }, + { 18, 80, 163, 242, 170, 187, 247, 210, 255, 255, 128 }, + }, + { + /* Coeff Band ( 3 )*/ + { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, + { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, + { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }, + { 36, 120, 201, 253, 205, 192, 255, 255, 128, 128, 128 }, + }, + { + /* Coeff Band ( 4 )*/ + { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, + { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, + { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }, + { 18, 90, 174, 245, 186, 161, 255, 199, 128, 128, 128 }, + }, + { + /* Coeff Band ( 5 )*/ + { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, + { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, + { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }, + { 28, 70, 181, 251, 193, 211, 255, 205, 128, 128, 128 }, + }, + { + /* Coeff Band ( 6 )*/ + { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, + { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, + { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }, + { 40, 90, 188, 251, 195, 217, 255, 224, 128, 128, 128 }, + }, + { + /* Coeff Band ( 7 )*/ + { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }, + { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }, + } + }, + { + /* Block Type ( 2 ) */ + { + /* Coeff Band ( 0 )*/ + { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, + { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, + { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }, + { 64, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }, + }, + { + /* Coeff Band ( 1 )*/ + { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, + { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, + { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }, + { 140, 70, 195, 248, 188, 195, 255, 255, 128, 128, 128 }, + }, + { + /* Coeff Band ( 2 )*/ + { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, + { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, + { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }, + { 60, 40, 190, 239, 201, 218, 255, 228, 128, 128, 128 }, + }, + { + /* Coeff Band ( 3 )*/ + { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 132, 118, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + }, + { + /* Coeff Band ( 4 )*/ + { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, + { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + /* Coeff Band ( 5 )*/ + { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + /* Coeff Band ( 6 )*/ + { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 48, 85, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + /* Coeff Band ( 7 )*/ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + } + }, + { + /* Block Type ( 3 ) */ + { + /* Coeff Band ( 0 )*/ + { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, + { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, + { 63, 48, 138, 219, 151, 178, 240, 170, 255, 216, 128 }, + { 54, 40, 138, 219, 151, 178, 240, 170, 255, 216, 128 }, + }, + { + /* Coeff Band ( 1 )*/ + { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, + { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, + { 44, 84, 162, 232, 172, 180, 245, 178, 255, 255, 128 }, + { 32, 70, 162, 232, 172, 180, 245, 178, 255, 255, 128 }, + }, + { + /* Coeff Band ( 2 )*/ + { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, + { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, + { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }, + { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }, + }, + { + /* Coeff Band ( 3 )*/ + { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, + { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, + { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }, + { 26, 104, 170, 242, 183, 194, 254, 223, 255, 255, 128 }, + }, + { + /* Coeff Band ( 4 )*/ + { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, + { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, + { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }, + { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }, + }, + { + /* Coeff Band ( 5 )*/ + { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, + { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, + { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }, + { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }, + }, + { + /* Coeff Band ( 6 )*/ + { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, + { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, + { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }, + { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }, + }, + { + /* Coeff Band ( 7 )*/ + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + } + } +}; +#endif + static const vp8_prob -vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8] +default_coef_probs_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = { @@ -433,9 +678,8 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8] { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}, { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128} } - } + }, #if CONFIG_HYBRIDTRANSFORM8X8 - , { /* block Type 3 */ { /* Coeff Band 0 */ { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255}, @@ -488,156 +732,183 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8] } #endif }; -#if CONFIG_TX16X16 + +#if CONFIG_HYBRIDTRANSFORM8X8 static const vp8_prob -vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] = -{ - { /* block Type 0 */ - { /* Coeff Band 0 */ +default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = { + { + /* block Type 0 */ + { + /* Coeff Band 0 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} }, - { /* Coeff Band 1 */ + { + /* Coeff Band 1 */ { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128}, { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128}, { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}, { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128} }, - { /* Coeff Band 2 */ + { + /* Coeff Band 2 */ { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128}, { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128}, { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}, { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128} }, - { /* Coeff Band 3 */ + { + /* Coeff Band 3 */ { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128}, { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128}, { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}, { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128} }, - { /* Coeff Band 4 */ + { + /* Coeff Band 4 */ { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128}, { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128}, { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}, { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128} }, - { /* Coeff Band 5 */ + { + /* Coeff Band 5 */ { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128}, { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128}, { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} }, - { /* Coeff Band 6 */ + { + /* Coeff Band 6 */ { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128}, { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128}, { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} }, - { /* Coeff Band 7 */ + { + /* Coeff Band 7 */ { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128}, { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128}, { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}, { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128} } }, - { /* block Type 1 */ - { /* Coeff Band 0 */ + { + /* block Type 1 */ + { + /* Coeff Band 0 */ { 134, 152, 233, 224, 234, 52, 255, 166, 128, 128, 128}, { 97, 132, 185, 234, 186, 189, 197, 171, 255, 212, 128}, { 84, 110, 185, 237, 182, 182, 145, 145, 255, 255, 128} }, - { /* Coeff Band 1 */ + { + /* Coeff Band 1 */ { 1, 124, 213, 247, 192, 212, 255, 255, 128, 128, 128}, { 88, 111, 178, 254, 189, 211, 255, 255, 128, 128, 128}, { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128}, { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128} }, - { /* Coeff Band 2 */ + { + /* Coeff Band 2 */ { 1, 102, 225, 255, 210, 240, 128, 128, 128, 128, 128}, { 110, 78, 195, 254, 200, 191, 255, 255, 128, 128, 128}, { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128}, { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128} }, - { /* Coeff Band 3 */ + { + /* Coeff Band 3 */ { 1, 1, 229, 255, 202, 224, 128, 128, 128, 128, 128}, { 150, 1, 192, 255, 206, 226, 128, 128, 128, 128, 128}, { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128}, { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128} }, - { /* Coeff Band 4 */ + { + /* Coeff Band 4 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} }, - { /* Coeff Band 5 */ + { + /* Coeff Band 5 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} }, - { /* Coeff Band 6 */ + { + /* Coeff Band 6 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} }, - { /* Coeff Band 7 */ + { + /* Coeff Band 7 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} } }, - { /* block Type 2 */ - { /* Coeff Band 0 */ + { + /* block Type 2 */ + { + /* Coeff Band 0 */ { 11, 181, 226, 199, 183, 255, 255, 255, 128, 128, 128}, { 2, 147, 185, 248, 163, 180, 255, 236, 128, 128, 128}, { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128}, { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128} }, - { /* Coeff Band 1 */ + { + /* Coeff Band 1 */ { 1, 150, 191, 246, 174, 188, 255, 235, 128, 128, 128}, { 1, 125, 166, 245, 165, 185, 255, 234, 128, 128, 128}, { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128}, { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128} }, - { /* Coeff Band 2 */ + { + /* Coeff Band 2 */ { 1, 146, 184, 242, 167, 183, 255, 230, 255, 255, 128}, { 1, 119, 160, 239, 156, 178, 255, 231, 255, 255, 128}, { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128}, { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128} }, - { /* Coeff Band 3 */ + { + /* Coeff Band 3 */ { 1, 150, 188, 244, 169, 183, 255, 233, 255, 255, 128}, { 1, 123, 162, 243, 161, 180, 255, 233, 128, 128, 128}, { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128}, { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128} }, - { /* Coeff Band 4 */ + { + /* Coeff Band 4 */ { 1, 163, 202, 252, 188, 204, 255, 248, 128, 128, 128}, { 1, 136, 180, 251, 181, 201, 255, 246, 128, 128, 128}, { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128}, { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128} }, - { /* Coeff Band 5 */ + { + /* Coeff Band 5 */ { 1, 156, 195, 249, 179, 193, 255, 241, 255, 255, 128}, { 1, 128, 169, 248, 171, 192, 255, 242, 255, 255, 128}, { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128}, { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128} }, - { /* Coeff Band 6 */ + { + /* Coeff Band 6 */ { 1, 36, 71, 251, 192, 201, 255, 243, 255, 255, 128}, { 1, 49, 185, 250, 184, 199, 255, 242, 128, 128, 128}, { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128}, { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128} }, - { /* Coeff Band 7 */ + { + /* Coeff Band 7 */ { 1, 19, 98, 255, 218, 222, 255, 255, 128, 128, 128}, { 36, 50, 210, 255, 212, 221, 255, 255, 128, 128, 128}, { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}, @@ -646,6 +917,423 @@ vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16] }, { /* block Type 3 */ { /* Coeff Band 0 */ + { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255}, + { 94, 29, 97, 131, 131, 153, 171, 121, 250, 190, 255}, + { 25, 29, 63, 128, 119, 147, 168, 124, 251, 183, 255}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 1, 108, 192, 220, 186, 173, 255, 194, 255, 255, 128}, + { 123, 104, 188, 221, 165, 171, 247, 180, 255, 255, 128}, + { 23, 76, 152, 216, 154, 166, 226, 182, 255, 209, 128}, + { 1, 26, 52, 162, 109, 152, 208, 144, 255, 231, 128} + }, + { /* Coeff Band 2 */ + { 1, 57, 179, 220, 156, 175, 210, 158, 255, 223, 128}, + { 48, 57, 134, 212, 151, 170, 219, 185, 255, 248, 128}, + { 4, 35, 63, 189, 120, 156, 221, 159, 255, 241, 128}, + { 1, 17, 23, 110, 97, 143, 187, 120, 255, 234, 128} + }, + { /* Coeff Band 3 */ + { 1, 115, 205, 243, 182, 187, 254, 218, 255, 255, 128}, + { 80, 101, 186, 241, 183, 186, 249, 182, 255, 255, 128}, + { 10, 81, 144, 229, 164, 175, 241, 185, 255, 255, 128}, + { 1, 44, 81, 192, 130, 148, 240, 180, 255, 255, 128} + }, + { /* Coeff Band 4 */ + { 1, 161, 207, 249, 187, 176, 255, 180, 128, 128, 128}, + { 79, 148, 196, 240, 186, 182, 253, 171, 255, 255, 128}, + { 14, 111, 171, 233, 170, 178, 235, 204, 255, 255, 128}, + { 1, 63, 103, 202, 143, 162, 240, 178, 255, 255, 128} + }, + { /* Coeff Band 5 */ + { 1, 101, 202, 239, 185, 184, 252, 186, 255, 255, 128}, + { 43, 67, 166, 237, 178, 190, 246, 194, 255, 255, 128}, + { 4, 49, 85, 220, 140, 168, 253, 182, 255, 255, 128}, + { 1, 24, 35, 144, 93, 135, 239, 159, 255, 253, 128} + }, + { /* Coeff Band 6 */ + { 1, 212, 243, 255, 240, 234, 255, 255, 128, 128, 128}, + { 98, 168, 234, 255, 229, 234, 255, 255, 128, 128, 128}, + { 19, 127, 199, 255, 212, 198, 255, 255, 128, 128, 128}, + { 1, 103, 162, 253, 186, 151, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 1, 188, 253, 255, 255, 128, 128, 128, 128, 128, 128}, + { 191, 68, 242, 255, 255, 128, 128, 128, 128, 128, 128}, + { 8, 132, 255, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + } + } +}; +#endif + +#if CONFIG_TX16X16 +static const vp8_prob + default_coef_probs_16x16[BLOCK_TYPES_16X16] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = { + { /* block Type 0 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128}, + { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128}, + { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}, + { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128} + }, + { /* Coeff Band 2 */ + { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128}, + { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128}, + { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}, + { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 3 */ + { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128}, + { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128}, + { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}, + { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 4 */ + { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128}, + { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128}, + { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}, + { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128} + }, + { /* Coeff Band 5 */ + { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128}, + { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} + }, + { /* Coeff Band 6 */ + { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128}, + { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128}, + { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128}, + { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}, + { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128} + } + }, + { /* block Type 1 */ + { /* Coeff Band 0 */ + { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128}, + { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255}, + { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128}, + { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128}, + { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128}, + { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128} + }, + { /* Coeff Band 2 */ + { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128}, + { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128}, + { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128}, + { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128} + }, + { /* Coeff Band 3 */ + { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128}, + { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128}, + { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128}, + { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128} + }, + { /* Coeff Band 4 */ + { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128}, + { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128}, + { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128}, + { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 5 */ + { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128}, + { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128}, + { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128}, + { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128} + }, + { /* Coeff Band 6 */ + { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128}, + { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128}, + { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128}, + { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128}, + { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128}, + { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128}, + { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128} + } + }, + { /* block Type 2 */ + { /* Coeff Band 0 */ + { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128}, + { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255}, + { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128}, + { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128}, + { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128}, + { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128} + }, + { /* Coeff Band 2 */ + { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128}, + { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128}, + { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128}, + { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128} + }, + { /* Coeff Band 3 */ + { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128}, + { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128}, + { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128}, + { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128} + }, + { /* Coeff Band 4 */ + { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128}, + { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128}, + { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128}, + { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 5 */ + { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128}, + { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128}, + { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128}, + { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128} + }, + { /* Coeff Band 6 */ + { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128}, + { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128}, + { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128}, + { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128}, + { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128}, + { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128}, + { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128} + } + }, + { /* block Type 3 */ + { /* Coeff Band 0 */ + { 17, 105, 227, 195, 164, 170, 168, 137, 221, 160, 184}, + { 6, 92, 166, 193, 158, 169, 179, 142, 236, 175, 200}, + { 2, 68, 118, 193, 147, 168, 187, 149, 241, 178, 247}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 1, 193, 221, 246, 198, 194, 244, 176, 255, 192, 128}, + { 112, 160, 209, 244, 196, 194, 243, 175, 255, 209, 128}, + { 45, 123, 175, 240, 184, 195, 239, 178, 255, 218, 255}, + { 16, 53, 75, 169, 119, 152, 209, 146, 255, 219, 255} + }, + { /* Coeff Band 2 */ + { 1, 141, 183, 240, 176, 187, 246, 198, 255, 218, 128}, + { 36, 97, 150, 231, 161, 180, 243, 191, 255, 217, 255}, + { 8, 65, 111, 210, 143, 166, 230, 167, 255, 224, 255}, + { 2, 35, 61, 157, 113, 149, 208, 142, 255, 217, 255} + }, + { /* Coeff Band 3 */ + { 1, 173, 196, 245, 184, 191, 252, 211, 255, 240, 128}, + { 35, 119, 175, 242, 177, 187, 252, 209, 255, 235, 128}, + { 4, 88, 141, 234, 161, 180, 249, 200, 255, 228, 128}, + { 1, 57, 95, 203, 133, 161, 235, 167, 255, 231, 255} + }, + { /* Coeff Band 4 */ + { 1, 208, 227, 249, 209, 204, 248, 188, 255, 248, 128}, + { 28, 162, 211, 247, 203, 200, 252, 188, 255, 232, 128}, + { 5, 114, 174, 238, 182, 189, 245, 184, 255, 238, 128}, + { 1, 61, 100, 205, 136, 164, 235, 163, 255, 239, 128} + }, + { /* Coeff Band 5 */ + { 1, 195, 218, 252, 208, 207, 250, 205, 255, 245, 128}, + { 22, 141, 196, 249, 198, 201, 250, 202, 255, 244, 128}, + { 2, 105, 163, 240, 178, 189, 246, 191, 255, 246, 128}, + { 1, 70, 112, 206, 144, 167, 232, 162, 255, 239, 128} + }, + { /* Coeff Band 6 */ + { 1, 204, 215, 251, 204, 203, 255, 222, 255, 225, 128}, + { 15, 140, 194, 249, 194, 199, 254, 221, 255, 253, 128}, + { 1, 95, 153, 243, 172, 188, 254, 213, 255, 248, 128}, + { 1, 59, 99, 216, 135, 166, 247, 190, 255, 237, 255} + }, + { /* Coeff Band 7 */ + { 1, 7, 231, 255, 227, 223, 255, 240, 255, 255, 128}, + { 15, 157, 217, 255, 218, 219, 255, 239, 255, 255, 128}, + { 1, 114, 182, 252, 198, 207, 255, 235, 255, 255, 128}, + { 1, 71, 122, 238, 154, 181, 255, 216, 255, 255, 128} + } + } +}; + +#if CONFIG_HYBRIDTRANSFORM16X16 +static const vp8_prob + default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = { + { /* block Type 0 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128}, + { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128}, + { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}, + { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128} + }, + { /* Coeff Band 2 */ + { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128}, + { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128}, + { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}, + { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 3 */ + { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128}, + { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128}, + { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}, + { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 4 */ + { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128}, + { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128}, + { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}, + { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128} + }, + { /* Coeff Band 5 */ + { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128}, + { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} + }, + { /* Coeff Band 6 */ + { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128}, + { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128}, + { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128}, + { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}, + { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128} + } + }, + { /* block Type 1 */ + { /* Coeff Band 0 */ + { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128}, + { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255}, + { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128}, + { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128}, + { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128}, + { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128} + }, + { /* Coeff Band 2 */ + { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128}, + { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128}, + { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128}, + { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128} + }, + { /* Coeff Band 3 */ + { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128}, + { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128}, + { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128}, + { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128} + }, + { /* Coeff Band 4 */ + { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128}, + { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128}, + { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128}, + { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 5 */ + { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128}, + { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128}, + { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128}, + { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128} + }, + { /* Coeff Band 6 */ + { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128}, + { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128}, + { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128}, + { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128}, + { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128}, + { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128}, + { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128} + } + }, + { /* block Type 2 */ + { /* Coeff Band 0 */ + { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128}, + { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255}, + { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128}, + { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128}, + { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128}, + { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128} + }, + { /* Coeff Band 2 */ + { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128}, + { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128}, + { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128}, + { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128} + }, + { /* Coeff Band 3 */ + { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128}, + { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128}, + { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128}, + { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128} + }, + { /* Coeff Band 4 */ + { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128}, + { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128}, + { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128}, + { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 5 */ + { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128}, + { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128}, + { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128}, + { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128} + }, + { /* Coeff Band 6 */ + { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128}, + { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128}, + { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128}, + { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128}, + { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128}, + { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128}, + { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128} + } + }, + { /* block Type 3 */ + { /* Coeff Band 0 */ { 17, 105, 227, 195, 164, 170, 168, 137, 221, 160, 184}, { 6, 92, 166, 193, 158, 169, 179, 142, 236, 175, 200}, { 2, 68, 118, 193, 147, 168, 187, 149, 241, 178, 247}, @@ -696,3 +1384,4 @@ vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16] } }; #endif +#endif diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index cbe798289..67242d5ea 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -28,6 +28,8 @@ typedef vp8_prob Prob; #include "coefupdateprobs.h" +const int vp8_i8x8_block[4] = {0, 2, 8, 10}; + DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = { 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -95,7 +97,7 @@ DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) = { 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, }; -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 // Table can be optimized. DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, @@ -208,13 +210,26 @@ vp8_extra_bit_struct vp8_extra_bits[12] = { void vp8_default_coef_probs(VP8_COMMON *pc) { vpx_memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(pc->fc.coef_probs)); +#if CONFIG_HYBRIDTRANSFORM + vpx_memcpy(pc->fc.hybrid_coef_probs, default_hybrid_coef_probs, + sizeof(pc->fc.hybrid_coef_probs)); +#endif - vpx_memcpy(pc->fc.coef_probs_8x8, vp8_default_coef_probs_8x8, + vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8, sizeof(pc->fc.coef_probs_8x8)); +#if CONFIG_HYBRIDTRANSFORM8X8 + vpx_memcpy(pc->fc.hybrid_coef_probs_8x8, default_hybrid_coef_probs_8x8, + sizeof(pc->fc.hybrid_coef_probs_8x8)); +#endif #if CONFIG_TX16X16 - vpx_memcpy(pc->fc.coef_probs_16x16, vp8_default_coef_probs_16x16, + vpx_memcpy(pc->fc.coef_probs_16x16, default_coef_probs_16x16, sizeof(pc->fc.coef_probs_16x16)); +#if CONFIG_HYBRIDTRANSFORM16X16 + vpx_memcpy(pc->fc.hybrid_coef_probs_16x16, + default_hybrid_coef_probs_16x16, + sizeof(pc->fc.hybrid_coef_probs_16x16)); +#endif #endif } @@ -263,7 +278,8 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { printf(" {\n"); for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { printf(" {"); - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) printf("%d, ", cm->fc.coef_counts[i][j][k][t]); + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + printf("%d, ", cm->fc.coef_counts[i][j][k][t]); printf("},\n"); } printf(" },\n"); @@ -280,7 +296,26 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { printf(" {\n"); for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { printf(" {"); - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) printf("%d, ", cm->fc.coef_counts_8x8[i][j][k][t]); + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + printf("%d, ", cm->fc.coef_counts_8x8[i][j][k][t]); + printf("},\n"); + } + printf(" },\n"); + } + printf(" },\n"); + } + printf("};\n"); + printf("static const unsigned int\nhybrid_coef_counts" + "[BLOCK_TYPES] [COEF_BANDS]" + "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {\n"); + for (i = 0; i < BLOCK_TYPES; ++i) { + printf(" {\n"); + for (j = 0; j < COEF_BANDS; ++j) { + printf(" {\n"); + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + printf(" {"); + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + printf("%d, ", cm->fc.hybrid_coef_counts[i][j][k][t]); printf("},\n"); } printf(" },\n"); @@ -313,6 +348,30 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { } } +#if CONFIG_HYBRIDTRANSFORM + for (i = 0; i < BLOCK_TYPES; ++i) + for (j = 0; j < COEF_BANDS; ++j) + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + coef_probs, branch_ct, cm->fc.hybrid_coef_counts [i][j][k], + 256, 1); + for (t = 0; t < ENTROPY_NODES; ++t) { + int prob; + count = branch_ct[t][0] + branch_ct[t][1]; + count = count > count_sat ? count_sat : count; + factor = (update_factor * count / count_sat); + prob = ((int)cm->fc.pre_hybrid_coef_probs[i][j][k][t] * (256 - factor) + + (int)coef_probs[t] * factor + 128) >> 8; + if (prob <= 0) cm->fc.hybrid_coef_probs[i][j][k][t] = 1; + else if (prob > 255) cm->fc.hybrid_coef_probs[i][j][k][t] = 255; + else cm->fc.hybrid_coef_probs[i][j][k][t] = prob; + } + } +#endif + for (i = 0; i < BLOCK_TYPES_8X8; ++i) for (j = 0; j < COEF_BANDS; ++j) for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { @@ -335,6 +394,31 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { } } +#if CONFIG_HYBRIDTRANSFORM8X8 + for (i = 0; i < BLOCK_TYPES_8X8; ++i) + for (j = 0; j < COEF_BANDS; ++j) + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + coef_probs, branch_ct, cm->fc.hybrid_coef_counts_8x8 [i][j][k], + 256, 1); + for (t = 0; t < ENTROPY_NODES; ++t) { + int prob; + count = branch_ct[t][0] + branch_ct[t][1]; + count = count > count_sat ? count_sat : count; + factor = (update_factor * count / count_sat); + prob = ((int)cm->fc.pre_hybrid_coef_probs_8x8[i][j][k][t] * + (256 - factor) + + (int)coef_probs[t] * factor + 128) >> 8; + if (prob <= 0) cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = 1; + else if (prob > 255) cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = 255; + else cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = prob; + } + } +#endif + #if CONFIG_TX16X16 for (i = 0; i < BLOCK_TYPES_16X16; ++i) for (j = 0; j < COEF_BANDS; ++j) @@ -349,12 +433,36 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { count = branch_ct[t][0] + branch_ct[t][1]; count = count > count_sat ? count_sat : count; factor = (update_factor * count / count_sat); - prob = ((int)cm->fc.pre_coef_probs_16x16[i][j][k][t] * (256 - factor) + + prob = ((int)cm->fc.pre_coef_probs_16x16[i][j][k][t] * + (256 - factor) + (int)coef_probs[t] * factor + 128) >> 8; if (prob <= 0) cm->fc.coef_probs_16x16[i][j][k][t] = 1; else if (prob > 255) cm->fc.coef_probs_16x16[i][j][k][t] = 255; else cm->fc.coef_probs_16x16[i][j][k][t] = prob; } } + +#if CONFIG_HYBRIDTRANSFORM16X16 + for (i = 0; i < BLOCK_TYPES_16X16; ++i) + for (j = 0; j < COEF_BANDS; ++j) + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + coef_probs, branch_ct, cm->fc.hybrid_coef_counts_16x16[i][j][k], 256, 1); + for (t = 0; t < ENTROPY_NODES; ++t) { + int prob; + count = branch_ct[t][0] + branch_ct[t][1]; + count = count > count_sat ? count_sat : count; + factor = (update_factor * count / count_sat); + prob = ((int)cm->fc.pre_hybrid_coef_probs_16x16[i][j][k][t] * (256 - factor) + + (int)coef_probs[t] * factor + 128) >> 8; + if (prob <= 0) cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = 1; + else if (prob > 255) cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = 255; + else cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = prob; + } + } +#endif #endif } diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index a435448e6..b3d3eff9f 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -21,6 +21,8 @@ //#define SUBMVREF_COUNT 5 //#define VP8_NUMMBSPLITS 4 +extern const int vp8_i8x8_block[4]; + /* Coefficient token alphabet */ #define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ @@ -62,11 +64,8 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ /* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */ #define BLOCK_TYPES 4 -#if CONFIG_HYBRIDTRANSFORM8X8 #define BLOCK_TYPES_8X8 4 -#else -#define BLOCK_TYPES_8X8 3 -#endif + #define BLOCK_TYPES_16X16 4 /* Middle dimension is a coarsening of the coefficient's @@ -75,7 +74,7 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ #define COEF_BANDS 8 extern DECLARE_ALIGNED(16, const int, vp8_coef_bands[16]); extern DECLARE_ALIGNED(64, const int, vp8_coef_bands_8x8[64]); -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 extern DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]); #endif @@ -100,8 +99,6 @@ extern DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]); #define SUBEXP_PARAM 4 /* Subexponential code parameter */ #define MODULUS_PARAM 13 /* Modulus parameter */ -#define COEFUPDATETYPE 1 /* coef update type to use (1/2) */ - extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]); @@ -118,7 +115,7 @@ extern short vp8_default_zig_zag_mask[16]; extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]); void vp8_coef_tree_initialize(void); -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d_16x16[256]); #endif void vp8_adapt_coef_probs(struct VP8Common *); diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c index 1664b2899..6c31236ec 100644 --- a/vp8/common/entropymv.c +++ b/vp8/common/entropymv.c @@ -12,6 +12,449 @@ #include "onyxc_int.h" #include "entropymv.h" +//#define MV_COUNT_TESTING + +#if CONFIG_NEWMVENTROPY + +#define MV_COUNT_SAT 16 +#define MV_MAX_UPDATE_FACTOR 160 + +/* Integer pel reference mv threshold for use of high-precision 1/8 mv */ +#define COMPANDED_MVREF_THRESH 8 + +/* Smooth or bias the mv-counts before prob computation */ +/* #define SMOOTH_MV_COUNTS */ + +const vp8_tree_index vp8_mv_joint_tree[2 * MV_JOINTS - 2] = { + -MV_JOINT_ZERO, 2, + -MV_JOINT_HNZVZ, 4, + -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ +}; +struct vp8_token_struct vp8_mv_joint_encodings[MV_JOINTS]; + +const vp8_tree_index vp8_mv_class_tree[2 * MV_CLASSES - 2] = { + -MV_CLASS_0, 2, + -MV_CLASS_1, 4, + 6, 8, + -MV_CLASS_2, -MV_CLASS_3, + 10, 12, + -MV_CLASS_4, -MV_CLASS_5, + -MV_CLASS_6, -MV_CLASS_7, +}; +struct vp8_token_struct vp8_mv_class_encodings[MV_CLASSES]; + +const vp8_tree_index vp8_mv_class0_tree [2 * CLASS0_SIZE - 2] = { + -0, -1, +}; +struct vp8_token_struct vp8_mv_class0_encodings[CLASS0_SIZE]; + +const vp8_tree_index vp8_mv_fp_tree [2 * 4 - 2] = { + -0, 2, + -1, 4, + -2, -3 +}; +struct vp8_token_struct vp8_mv_fp_encodings[4]; + +const nmv_context vp8_default_nmv_context = { + {32, 64, 96}, + { + { /* vert component */ + 128, /* sign */ + {224, 144, 192, 168, 192, 176, 192}, /* class */ + {216}, /* class0 */ + {136, 140, 148, 160, 176, 192, 224}, /* bits */ + {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */ + {64, 96, 64}, /* fp */ + 160, /* class0_hp bit */ + 128, /* hp */ + }, + { /* hor component */ + 128, /* sign */ + {216, 128, 176, 160, 176, 176, 192}, /* class */ + {208}, /* class0 */ + {136, 140, 148, 160, 176, 192, 224}, /* bits */ + {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */ + {64, 96, 64}, /* fp */ + 160, /* class0_hp bit */ + 128, /* hp */ + } + }, +}; + +MV_JOINT_TYPE vp8_get_mv_joint(MV mv) { + if (mv.row == 0 && mv.col == 0) return MV_JOINT_ZERO; + else if (mv.row == 0 && mv.col != 0) return MV_JOINT_HNZVZ; + else if (mv.row != 0 && mv.col == 0) return MV_JOINT_HZVNZ; + else return MV_JOINT_HNZVNZ; +} + +#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0) + +MV_CLASS_TYPE vp8_get_mv_class(int z, int *offset) { + MV_CLASS_TYPE c; + if (z < CLASS0_SIZE * 8) c = MV_CLASS_0; + else if (z < CLASS0_SIZE * 16) c = MV_CLASS_1; + else if (z < CLASS0_SIZE * 32) c = MV_CLASS_2; + else if (z < CLASS0_SIZE * 64) c = MV_CLASS_3; + else if (z < CLASS0_SIZE * 128) c = MV_CLASS_4; + else if (z < CLASS0_SIZE * 256) c = MV_CLASS_5; + else if (z < CLASS0_SIZE * 512) c = MV_CLASS_6; + else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7; + else assert(0); + if (offset) + *offset = z - mv_class_base(c); + return c; +} + +int vp8_use_nmv_hp(const MV *ref) { + if ((abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH && + (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH) + return 1; + else + return 0; +} + +int vp8_get_mv_mag(MV_CLASS_TYPE c, int offset) { + return mv_class_base(c) + offset; +} + +static void increment_nmv_component_count(int v, + nmv_component_counts *mvcomp, + int incr, + int usehp) { + assert (v != 0); /* should not be zero */ + mvcomp->mvcount[MV_MAX + v] += incr; +} + +static void increment_nmv_component(int v, + nmv_component_counts *mvcomp, + int incr, + int usehp) { + int s, z, c, o, d, e, f; + assert (v != 0); /* should not be zero */ + s = v < 0; + mvcomp->sign[s] += incr; + z = (s ? -v : v) - 1; /* magnitude - 1 */ + + c = vp8_get_mv_class(z, &o); + mvcomp->classes[c] += incr; + + d = (o >> 3); /* int mv data */ + f = (o >> 1) & 3; /* fractional pel mv data */ + e = (o & 1); /* high precision mv data */ + if (c == MV_CLASS_0) { + mvcomp->class0[d] += incr; + } else { + int i, b; + b = c + CLASS0_BITS - 1; /* number of bits */ + for (i = 0; i < b; ++i) + mvcomp->bits[i][((d >> i) & 1)] += incr; + } + + /* Code the fractional pel bits */ + if (c == MV_CLASS_0) { + mvcomp->class0_fp[d][f] += incr; + } else { + mvcomp->fp[f] += incr; + } + + /* Code the high precision bit */ + if (usehp) { + if (c == MV_CLASS_0) { + mvcomp->class0_hp[e] += incr; + } else { + mvcomp->hp[e] += incr; + } + } +} + +#ifdef SMOOTH_MV_COUNTS +static void smooth_counts(nmv_component_counts *mvcomp) { + static const int flen = 3; // (filter_length + 1) / 2 + static const int fval[] = {8, 3, 1}; + static const int fvalbits = 4; + int i; + unsigned int smvcount[MV_VALS]; + vpx_memcpy(smvcount, mvcomp->mvcount, sizeof(smvcount)); + smvcount[MV_MAX] = (smvcount[MV_MAX - 1] + smvcount[MV_MAX + 1]) >> 1; + for (i = flen - 1; i <= MV_VALS - flen; ++i) { + int j, s = smvcount[i] * fval[0]; + for (j = 1; j < flen; ++j) + s += (smvcount[i - j] + smvcount[i + j]) * fval[j]; + mvcomp->mvcount[i] = (s + (1 << (fvalbits - 1))) >> fvalbits; + } +} +#endif + +static void counts_to_context(nmv_component_counts *mvcomp, int usehp) { + int v; + vpx_memset(mvcomp->sign, 0, sizeof(nmv_component_counts) - sizeof(mvcomp->mvcount)); + for (v = 1; v <= MV_MAX; v++) { + increment_nmv_component(-v, mvcomp, mvcomp->mvcount[MV_MAX - v], usehp); + increment_nmv_component( v, mvcomp, mvcomp->mvcount[MV_MAX + v], usehp); + } +} + +void vp8_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx, + int usehp) { + MV_JOINT_TYPE j = vp8_get_mv_joint(*mv); + mvctx->joints[j]++; + usehp = usehp && vp8_use_nmv_hp(ref); + if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { + increment_nmv_component_count(mv->row, &mvctx->comps[0], 1, usehp); + } + if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { + increment_nmv_component_count(mv->col, &mvctx->comps[1], 1, usehp); + } +} + +static void adapt_prob(vp8_prob *dest, vp8_prob prep, vp8_prob newp, + unsigned int ct[2]) { + int factor; + int prob; + int count = ct[0] + ct[1]; + if (count) { + count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; + factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); + prob = ((int)prep * (256 - factor) + (int)(newp) * factor + 128) >> 8; + prob += !prob; + prob = (prob > 255 ? 255 : prob); + *dest = prob; + } +} + +void vp8_counts_to_nmv_context( + nmv_context_counts *NMVcount, + nmv_context *prob, + int usehp, + unsigned int (*branch_ct_joint)[2], + unsigned int (*branch_ct_sign)[2], + unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2], + unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2], + unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2], + unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2], + unsigned int (*branch_ct_fp)[4 - 1][2], + unsigned int (*branch_ct_class0_hp)[2], + unsigned int (*branch_ct_hp)[2]) { + int i, j, k; + counts_to_context(&NMVcount->comps[0], usehp); + counts_to_context(&NMVcount->comps[1], usehp); + vp8_tree_probs_from_distribution(MV_JOINTS, + vp8_mv_joint_encodings, + vp8_mv_joint_tree, + prob->joints, + branch_ct_joint, + NMVcount->joints, + 256, 1); + for (i = 0; i < 2; ++i) { + prob->comps[i].sign = + vp8_bin_prob_from_distribution(NMVcount->comps[i].sign); + branch_ct_sign[i][0] = NMVcount->comps[i].sign[0]; + branch_ct_sign[i][1] = NMVcount->comps[i].sign[1]; + vp8_tree_probs_from_distribution(MV_CLASSES, + vp8_mv_class_encodings, + vp8_mv_class_tree, + prob->comps[i].classes, + branch_ct_classes[i], + NMVcount->comps[i].classes, + 256, 1); + vp8_tree_probs_from_distribution(CLASS0_SIZE, + vp8_mv_class0_encodings, + vp8_mv_class0_tree, + prob->comps[i].class0, + branch_ct_class0[i], + NMVcount->comps[i].class0, + 256, 1); + for (j = 0; j < MV_OFFSET_BITS; ++j) { + prob->comps[i].bits[j] = vp8_bin_prob_from_distribution( + NMVcount->comps[i].bits[j]); + branch_ct_bits[i][j][0] = NMVcount->comps[i].bits[j][0]; + branch_ct_bits[i][j][1] = NMVcount->comps[i].bits[j][1]; + } + } + for (i = 0; i < 2; ++i) { + for (k = 0; k < CLASS0_SIZE; ++k) { + vp8_tree_probs_from_distribution(4, + vp8_mv_fp_encodings, + vp8_mv_fp_tree, + prob->comps[i].class0_fp[k], + branch_ct_class0_fp[i][k], + NMVcount->comps[i].class0_fp[k], + 256, 1); + } + vp8_tree_probs_from_distribution(4, + vp8_mv_fp_encodings, + vp8_mv_fp_tree, + prob->comps[i].fp, + branch_ct_fp[i], + NMVcount->comps[i].fp, + 256, 1); + } + if (usehp) { + for (i = 0; i < 2; ++i) { + prob->comps[i].class0_hp = vp8_bin_prob_from_distribution( + NMVcount->comps[i].class0_hp); + branch_ct_class0_hp[i][0] = NMVcount->comps[i].class0_hp[0]; + branch_ct_class0_hp[i][1] = NMVcount->comps[i].class0_hp[1]; + + prob->comps[i].hp = + vp8_bin_prob_from_distribution(NMVcount->comps[i].hp); + branch_ct_hp[i][0] = NMVcount->comps[i].hp[0]; + branch_ct_hp[i][1] = NMVcount->comps[i].hp[1]; + } + } +} + +void vp8_adapt_nmv_probs(VP8_COMMON *cm, int usehp) { + int i, j, k; + nmv_context prob; + unsigned int branch_ct_joint[MV_JOINTS - 1][2]; + unsigned int branch_ct_sign[2][2]; + unsigned int branch_ct_classes[2][MV_CLASSES - 1][2]; + unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2]; + unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2]; + unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2]; + unsigned int branch_ct_fp[2][4 - 1][2]; + unsigned int branch_ct_class0_hp[2][2]; + unsigned int branch_ct_hp[2][2]; +#ifdef MV_COUNT_TESTING + printf("joints count: "); + for (j = 0; j < MV_JOINTS; ++j) printf("%d ", cm->fc.NMVcount.joints[j]); + printf("\n"); fflush(stdout); + printf("signs count:\n"); + for (i = 0; i < 2; ++i) + printf("%d/%d ", cm->fc.NMVcount.comps[i].sign[0], cm->fc.NMVcount.comps[i].sign[1]); + printf("\n"); fflush(stdout); + printf("classes count:\n"); + for (i = 0; i < 2; ++i) { + for (j = 0; j < MV_CLASSES; ++j) + printf("%d ", cm->fc.NMVcount.comps[i].classes[j]); + printf("\n"); fflush(stdout); + } + printf("class0 count:\n"); + for (i = 0; i < 2; ++i) { + for (j = 0; j < CLASS0_SIZE; ++j) + printf("%d ", cm->fc.NMVcount.comps[i].class0[j]); + printf("\n"); fflush(stdout); + } + printf("bits count:\n"); + for (i = 0; i < 2; ++i) { + for (j = 0; j < MV_OFFSET_BITS; ++j) + printf("%d/%d ", cm->fc.NMVcount.comps[i].bits[j][0], + cm->fc.NMVcount.comps[i].bits[j][1]); + printf("\n"); fflush(stdout); + } + printf("class0_fp count:\n"); + for (i = 0; i < 2; ++i) { + for (j = 0; j < CLASS0_SIZE; ++j) { + printf("{"); + for (k = 0; k < 4; ++k) + printf("%d ", cm->fc.NMVcount.comps[i].class0_fp[j][k]); + printf("}, "); + } + printf("\n"); fflush(stdout); + } + printf("fp count:\n"); + for (i = 0; i < 2; ++i) { + for (j = 0; j < 4; ++j) + printf("%d ", cm->fc.NMVcount.comps[i].fp[j]); + printf("\n"); fflush(stdout); + } + if (usehp) { + printf("class0_hp count:\n"); + for (i = 0; i < 2; ++i) + printf("%d/%d ", cm->fc.NMVcount.comps[i].class0_hp[0], + cm->fc.NMVcount.comps[i].class0_hp[1]); + printf("\n"); fflush(stdout); + printf("hp count:\n"); + for (i = 0; i < 2; ++i) + printf("%d/%d ", cm->fc.NMVcount.comps[i].hp[0], + cm->fc.NMVcount.comps[i].hp[1]); + printf("\n"); fflush(stdout); + } +#endif +#ifdef SMOOTH_MV_COUNTS + smooth_counts(&cm->fc.NMVcount.comps[0]); + smooth_counts(&cm->fc.NMVcount.comps[1]); +#endif + vp8_counts_to_nmv_context(&cm->fc.NMVcount, + &prob, + usehp, + branch_ct_joint, + branch_ct_sign, + branch_ct_classes, + branch_ct_class0, + branch_ct_bits, + branch_ct_class0_fp, + branch_ct_fp, + branch_ct_class0_hp, + branch_ct_hp); + + for (j = 0; j < MV_JOINTS - 1; ++j) { + adapt_prob(&cm->fc.nmvc.joints[j], + cm->fc.pre_nmvc.joints[j], + prob.joints[j], + branch_ct_joint[j]); + } + for (i = 0; i < 2; ++i) { + adapt_prob(&cm->fc.nmvc.comps[i].sign, + cm->fc.pre_nmvc.comps[i].sign, + prob.comps[i].sign, + branch_ct_sign[i]); + for (j = 0; j < MV_CLASSES - 1; ++j) { + adapt_prob(&cm->fc.nmvc.comps[i].classes[j], + cm->fc.pre_nmvc.comps[i].classes[j], + prob.comps[i].classes[j], + branch_ct_classes[i][j]); + } + for (j = 0; j < CLASS0_SIZE - 1; ++j) { + adapt_prob(&cm->fc.nmvc.comps[i].class0[j], + cm->fc.pre_nmvc.comps[i].class0[j], + prob.comps[i].class0[j], + branch_ct_class0[i][j]); + } + for (j = 0; j < MV_OFFSET_BITS; ++j) { + adapt_prob(&cm->fc.nmvc.comps[i].bits[j], + cm->fc.pre_nmvc.comps[i].bits[j], + prob.comps[i].bits[j], + branch_ct_bits[i][j]); + } + } + for (i = 0; i < 2; ++i) { + for (j = 0; j < CLASS0_SIZE; ++j) { + for (k = 0; k < 3; ++k) { + adapt_prob(&cm->fc.nmvc.comps[i].class0_fp[j][k], + cm->fc.pre_nmvc.comps[i].class0_fp[j][k], + prob.comps[i].class0_fp[j][k], + branch_ct_class0_fp[i][j][k]); + } + } + for (j = 0; j < 3; ++j) { + adapt_prob(&cm->fc.nmvc.comps[i].fp[j], + cm->fc.pre_nmvc.comps[i].fp[j], + prob.comps[i].fp[j], + branch_ct_fp[i][j]); + } + } + if (usehp) { + for (i = 0; i < 2; ++i) { + adapt_prob(&cm->fc.nmvc.comps[i].class0_hp, + cm->fc.pre_nmvc.comps[i].class0_hp, + prob.comps[i].class0_hp, + branch_ct_class0_hp[i]); + adapt_prob(&cm->fc.nmvc.comps[i].hp, + cm->fc.pre_nmvc.comps[i].hp, + prob.comps[i].hp, + branch_ct_hp[i]); + } + } +} + +#else /* CONFIG_NEWMVENTROPY */ + +#define MV_COUNT_SAT 16 +#define MV_MAX_UPDATE_FACTOR 128 + const MV_CONTEXT_HP vp8_mv_update_probs_hp[2] = { {{ 237, @@ -266,14 +709,6 @@ static void compute_component_probs_hp( } } -void vp8_entropy_mv_init() { - vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree); - vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp); -} - -// #define MV_COUNT_TESTING -#define MV_COUNT_SAT 16 -#define MV_MAX_UPDATE_FACTOR 128 void vp8_adapt_mv_probs(VP8_COMMON *cm) { int i, t, count, factor; #ifdef MV_COUNT_TESTING @@ -400,3 +835,28 @@ void vp8_adapt_mv_probs(VP8_COMMON *cm) { } } } + +#endif /* CONFIG_NEWMVENTROPY */ + +void vp8_entropy_mv_init() { +#if CONFIG_NEWMVENTROPY + vp8_tokens_from_tree(vp8_mv_joint_encodings, vp8_mv_joint_tree); + vp8_tokens_from_tree(vp8_mv_class_encodings, vp8_mv_class_tree); + vp8_tokens_from_tree(vp8_mv_class0_encodings, vp8_mv_class0_tree); + vp8_tokens_from_tree(vp8_mv_fp_encodings, vp8_mv_fp_tree); +#else + vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree); + vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp); +#endif +} + +void vp8_init_mv_probs(VP8_COMMON *cm) { +#if CONFIG_NEWMVENTROPY + vpx_memcpy(&cm->fc.nmvc, &vp8_default_nmv_context, sizeof(nmv_context)); +#else + vpx_memcpy(cm->fc.mvc, + vp8_default_mv_context, sizeof(vp8_default_mv_context)); + vpx_memcpy(cm->fc.mvc_hp, + vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp)); +#endif +} diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h index 535d9b8ac..1a193b172 100644 --- a/vp8/common/entropymv.h +++ b/vp8/common/entropymv.h @@ -16,6 +16,121 @@ #include "vpx_config.h" #include "blockd.h" +struct VP8Common; + +void vp8_entropy_mv_init(); +void vp8_init_mv_probs(struct VP8Common *cm); +void vp8_adapt_mv_probs(struct VP8Common *cm); + +#if CONFIG_NEWMVENTROPY +void vp8_adapt_nmv_probs(struct VP8Common *cm, int usehp); +void vp8_lower_mv_precision(MV *mv); +int vp8_use_nmv_hp(const MV *ref); + +#define VP8_NMV_UPDATE_PROB 255 +//#define MV_GROUP_UPDATE + +#define LOW_PRECISION_MV_UPDATE /* Use 7 bit forward update */ + +/* Symbols for coding which components are zero jointly */ +#define MV_JOINTS 4 +typedef enum { + MV_JOINT_ZERO = 0, /* Zero vector */ + MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ + MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ + MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ +} MV_JOINT_TYPE; + +extern const vp8_tree_index vp8_mv_joint_tree[2 * MV_JOINTS - 2]; +extern struct vp8_token_struct vp8_mv_joint_encodings [MV_JOINTS]; + +/* Symbols for coding magnitude class of nonzero components */ +#define MV_CLASSES 8 +typedef enum { + MV_CLASS_0 = 0, /* (0, 2] integer pel */ + MV_CLASS_1 = 1, /* (2, 4] integer pel */ + MV_CLASS_2 = 2, /* (4, 8] integer pel */ + MV_CLASS_3 = 3, /* (8, 16] integer pel */ + MV_CLASS_4 = 4, /* (16, 32] integer pel */ + MV_CLASS_5 = 5, /* (32, 64] integer pel */ + MV_CLASS_6 = 6, /* (64, 128] integer pel */ + MV_CLASS_7 = 7, /* (128, 256] integer pel */ +} MV_CLASS_TYPE; + +extern const vp8_tree_index vp8_mv_class_tree[2 * MV_CLASSES - 2]; +extern struct vp8_token_struct vp8_mv_class_encodings [MV_CLASSES]; + +#define CLASS0_BITS 1 /* bits at integer precision for class 0 */ +#define CLASS0_SIZE (1 << CLASS0_BITS) +#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) + +#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2) +#define MV_MAX ((1 << MV_MAX_BITS) - 1) +#define MV_VALS ((MV_MAX << 1) + 1) + +extern const vp8_tree_index vp8_mv_class0_tree[2 * CLASS0_SIZE - 2]; +extern struct vp8_token_struct vp8_mv_class0_encodings[CLASS0_SIZE]; + +extern const vp8_tree_index vp8_mv_fp_tree[2 * 4 - 2]; +extern struct vp8_token_struct vp8_mv_fp_encodings[4]; + +typedef struct { + vp8_prob sign; + vp8_prob classes[MV_CLASSES - 1]; + vp8_prob class0[CLASS0_SIZE - 1]; + vp8_prob bits[MV_OFFSET_BITS]; + vp8_prob class0_fp[CLASS0_SIZE][4 - 1]; + vp8_prob fp[4 - 1]; + vp8_prob class0_hp; + vp8_prob hp; +} nmv_component; + +typedef struct { + vp8_prob joints[MV_JOINTS - 1]; + nmv_component comps[2]; +} nmv_context; + +MV_JOINT_TYPE vp8_get_mv_joint(MV mv); +MV_CLASS_TYPE vp8_get_mv_class(int z, int *offset); +int vp8_get_mv_mag(MV_CLASS_TYPE c, int offset); + + +typedef struct { + unsigned int mvcount[MV_VALS]; + unsigned int sign[2]; + unsigned int classes[MV_CLASSES]; + unsigned int class0[CLASS0_SIZE]; + unsigned int bits[MV_OFFSET_BITS][2]; + unsigned int class0_fp[CLASS0_SIZE][4]; + unsigned int fp[4]; + unsigned int class0_hp[2]; + unsigned int hp[2]; +} nmv_component_counts; + +typedef struct { + unsigned int joints[MV_JOINTS]; + nmv_component_counts comps[2]; +} nmv_context_counts; + +void vp8_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx, + int usehp); +extern const nmv_context vp8_default_nmv_context; +void vp8_counts_to_nmv_context( + nmv_context_counts *NMVcount, + nmv_context *prob, + int usehp, + unsigned int (*branch_ct_joint)[2], + unsigned int (*branch_ct_sign)[2], + unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2], + unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2], + unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2], + unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2], + unsigned int (*branch_ct_fp)[4 - 1][2], + unsigned int (*branch_ct_class0_hp)[2], + unsigned int (*branch_ct_hp)[2]); + +#else /* CONFIG_NEWMVENTROPY */ + enum { mv_max = 1023, /* max absolute value of a MV component */ MVvals = (2 * mv_max) + 1, /* # possible values "" */ @@ -73,8 +188,6 @@ extern struct vp8_token_struct vp8_small_mvencodings [8]; extern const vp8_tree_index vp8_small_mvtree_hp[]; extern struct vp8_token_struct vp8_small_mvencodings_hp [16]; -void vp8_entropy_mv_init(); -struct VP8Common; -void vp8_adapt_mv_probs(struct VP8Common *cm); +#endif /* CONFIG_NEWMVENTROPY */ #endif diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c index 6f7361dd0..235ca46ce 100644 --- a/vp8/common/findnearmv.c +++ b/vp8/common/findnearmv.c @@ -20,15 +20,20 @@ const unsigned char vp8_mbsplit_offset[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} }; -static void lower_mv_precision(int_mv *mv) +static void lower_mv_precision(int_mv *mv, int usehp) { - if (mv->as_mv.row & 1) - mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1); - if (mv->as_mv.col & 1) - mv->as_mv.col += (mv->as_mv.col > 0 ? -1 : 1); +#if CONFIG_NEWMVENTROPY + if (!usehp || !vp8_use_nmv_hp(&mv->as_mv)) { +#else + if (!usehp) { +#endif + if (mv->as_mv.row & 1) + mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1); + if (mv->as_mv.col & 1) + mv->as_mv.col += (mv->as_mv.col > 0 ? -1 : 1); + } } - /* Predict motion vectors using those from already-decoded nearby blocks. Note that we only consider one 4x4 subblock from each candidate 16x16 macroblock. */ @@ -173,11 +178,9 @@ void vp8_find_near_mvs /* Make sure that the 1/8th bits of the Mvs are zero if high_precision * is not being used, by truncating the last bit towards 0 */ - if (!xd->allow_high_precision_mv) { - lower_mv_precision(best_mv); - lower_mv_precision(nearest); - lower_mv_precision(nearby); - } + lower_mv_precision(best_mv, xd->allow_high_precision_mv); + lower_mv_precision(nearest, xd->allow_high_precision_mv); + lower_mv_precision(nearby, xd->allow_high_precision_mv); // TODO: move clamp outside findnearmv vp8_clamp_mv2(nearest, xd); @@ -200,75 +203,109 @@ vp8_prob *vp8_mv_ref_probs(VP8_COMMON *pc, * above and a number cols of pixels in the left to select the one with best * score to use as ref motion vector */ + void vp8_find_best_ref_mvs(MACROBLOCKD *xd, unsigned char *ref_y_buffer, int ref_y_stride, + int_mv *mvlist, int_mv *best_mv, int_mv *nearest, int_mv *near) { - int_mv *ref_mv = xd->ref_mv; - int bestsad = INT_MAX; - int i; + int i, j; unsigned char *above_src; unsigned char *left_src; unsigned char *above_ref; unsigned char *left_ref; int sad; + int sad_scores[MAX_MV_REFS]; + int_mv sorted_mvs[MAX_MV_REFS]; + int zero_seen = FALSE; - above_src = xd->dst.y_buffer - xd->dst.y_stride * 2; - left_src = xd->dst.y_buffer - 2; - above_ref = ref_y_buffer - ref_y_stride * 2; - left_ref = ref_y_buffer - 2; + // Default all to 0,0 if nothing else available + best_mv->as_int = nearest->as_int = near->as_int = 0; + vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs)); - bestsad = vp8_sad16x2_c(above_src, xd->dst.y_stride, - above_ref, ref_y_stride, - INT_MAX); - bestsad += vp8_sad2x16_c(left_src, xd->dst.y_stride, - left_ref, ref_y_stride, - INT_MAX); - best_mv->as_int = 0; + above_src = xd->dst.y_buffer - xd->dst.y_stride * 3; + left_src = xd->dst.y_buffer - 3; + above_ref = ref_y_buffer - ref_y_stride * 3; + left_ref = ref_y_buffer - 3; + //for(i = 0; i < MAX_MV_REFS; ++i) { + // Limit search to the predicted best 4 for(i = 0; i < 4; ++i) { - if (ref_mv[i].as_int) { - int_mv this_mv; - int offset=0; - int row_offset, col_offset; - this_mv.as_int = ref_mv[i].as_int; - vp8_clamp_mv(&this_mv, - xd->mb_to_left_edge - LEFT_TOP_MARGIN + 16, - xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, - xd->mb_to_top_edge - LEFT_TOP_MARGIN + 16, - xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); - - row_offset = (this_mv.as_mv.row > 0) ? - ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3); - col_offset = (this_mv.as_mv.col > 0) ? - ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3); - offset = ref_y_stride * row_offset + col_offset; - - sad = vp8_sad16x2_c(above_src, xd->dst.y_stride, - above_ref + offset, ref_y_stride, INT_MAX); - - sad += vp8_sad2x16_c(left_src, xd->dst.y_stride, - left_ref + offset, ref_y_stride, INT_MAX); - - if (sad < bestsad) { - bestsad = sad; - best_mv->as_int = this_mv.as_int; - } + int_mv this_mv; + int offset=0; + int row_offset, col_offset; + + this_mv.as_int = mvlist[i].as_int; + + // If we see a 0,0 vector for a second time we have reached the end of + // the list of valid candidate vectors. + if (!this_mv.as_int) + if (zero_seen) + break; + else + zero_seen = TRUE; + + vp8_clamp_mv(&this_mv, + xd->mb_to_left_edge - LEFT_TOP_MARGIN + 16, + xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, + xd->mb_to_top_edge - LEFT_TOP_MARGIN + 16, + xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); + + row_offset = (this_mv.as_mv.row > 0) ? + ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3); + col_offset = (this_mv.as_mv.col > 0) ? + ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3); + offset = ref_y_stride * row_offset + col_offset; + + sad = vp8_sad16x3_c(above_src, xd->dst.y_stride, + above_ref + offset, ref_y_stride, INT_MAX); + + sad += vp8_sad3x16_c(left_src, xd->dst.y_stride, + left_ref + offset, ref_y_stride, INT_MAX); + + // Add the entry to our list and then resort the list on score. + sad_scores[i] = sad; + sorted_mvs[i].as_int = this_mv.as_int; + j = i; + while (j > 0) { + if (sad_scores[j] < sad_scores[j-1]) { + sad_scores[j] = sad_scores[j-1]; + sorted_mvs[j].as_int = sorted_mvs[j-1].as_int; + sad_scores[j-1] = sad; + sorted_mvs[j-1].as_int = this_mv.as_int; + j--; + } else + break; } } - if (!xd->allow_high_precision_mv) - lower_mv_precision(best_mv); - vp8_clamp_mv2(best_mv, xd); + // Set the best mv to the first entry in the sorted list + best_mv->as_int = sorted_mvs[0].as_int; - if (best_mv->as_int != 0 && - (best_mv->as_mv.row >> 3) != (nearest->as_mv.row >>3 ) && - (best_mv->as_mv.col >> 3) != (nearest->as_mv.col >>3 )) { - near->as_int = nearest->as_int; - nearest->as_int = best_mv->as_int; + // Provided that there are non zero vectors available there will not + // be more than one 0,0 entry in the sorted list. + // The best ref mv is always set to the first entry (which gave the best + // results. The nearest is set to the first non zero vector if available and + // near to the second non zero vector if avaialable. + // We do not use 0,0 as a nearest or near as 0,0 has its own mode. + if ( sorted_mvs[0].as_int ) { + nearest->as_int = sorted_mvs[0].as_int; + if ( sorted_mvs[1].as_int ) + near->as_int = sorted_mvs[1].as_int; + else + near->as_int = sorted_mvs[2].as_int; + } else { + nearest->as_int = sorted_mvs[1].as_int; + near->as_int = sorted_mvs[2].as_int; } + + // Copy back the re-ordered mv list + vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs)); + lower_mv_precision(best_mv, xd->allow_high_precision_mv); + + vp8_clamp_mv2(best_mv, xd); } -#endif +#endif // CONFIG_NEWBESTREFMV diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h index e3cdab5ce..cd7b87adf 100644 --- a/vp8/common/findnearmv.h +++ b/vp8/common/findnearmv.h @@ -26,6 +26,7 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd, unsigned char *ref_y_buffer, int ref_y_stride, + int_mv *mvlist, int_mv *best_mv, int_mv *nearest, int_mv *near); diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index d28024cda..32b5e5a6c 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -33,7 +33,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) { rtcd->idct.idct8 = vp8_short_idct8x8_c; rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c; rtcd->idct.ihaar2 = vp8_short_ihaar2x2_c; -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 rtcd->idct.idct16x16 = vp8_short_idct16x16_c; #endif rtcd->recon.copy16x16 = vp8_copy_mem16x16_c; diff --git a/vp8/common/idct.h b/vp8/common/idct.h index 2a410c34e..a4246c2a7 100644 --- a/vp8/common/idct.h +++ b/vp8/common/idct.h @@ -43,7 +43,7 @@ #define Y2_WHT_UPSCALE_FACTOR 2 #endif -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 #ifndef vp8_idct_idct16x16 #define vp8_idct_idct16x16 vp8_short_idct16x16_c #endif @@ -111,7 +111,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c); extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c); #endif -#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 #include "vp8/common/blockd.h" void vp8_ihtllm_c(short *input, short *output, int pitch, TX_TYPE tx_type, int tx_dim); @@ -136,7 +136,7 @@ typedef struct { vp8_idct_fn_t ihaar2; vp8_idct_fn_t ihaar2_1; -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 vp8_idct_fn_t idct16x16; #endif } vp8_idct_rtcd_vtable_t; diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index bf019af06..b4475c628 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -37,7 +37,7 @@ static const int rounding = 0; // TODO: these transforms can be further converted into integer forms // for complexity optimization -#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 +#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16 float idct_4[16] = { 0.500000000000000, 0.653281482438188, 0.500000000000000, 0.270598050073099, 0.500000000000000, 0.270598050073099, -0.500000000000000, -0.653281482438188, @@ -89,11 +89,85 @@ float iadst_8[64] = { 0.483002021635509, -0.466553967085785, 0.434217976756762, -0.387095214016348, 0.326790388032145, -0.255357107325375, 0.175227946595736, -0.089131608307532 }; +#endif + +#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 +float idct_16[256] = { + 0.250000, 0.351851, 0.346760, 0.338330, 0.326641, 0.311806, 0.293969, 0.273300, + 0.250000, 0.224292, 0.196424, 0.166664, 0.135299, 0.102631, 0.068975, 0.034654, + 0.250000, 0.338330, 0.293969, 0.224292, 0.135299, 0.034654, -0.068975, -0.166664, + -0.250000, -0.311806, -0.346760, -0.351851, -0.326641, -0.273300, -0.196424, -0.102631, + 0.250000, 0.311806, 0.196424, 0.034654, -0.135299, -0.273300, -0.346760, -0.338330, + -0.250000, -0.102631, 0.068975, 0.224292, 0.326641, 0.351851, 0.293969, 0.166664, + 0.250000, 0.273300, 0.068975, -0.166664, -0.326641, -0.338330, -0.196424, 0.034654, + 0.250000, 0.351851, 0.293969, 0.102631, -0.135299, -0.311806, -0.346760, -0.224292, + 0.250000, 0.224292, -0.068975, -0.311806, -0.326641, -0.102631, 0.196424, 0.351851, + 0.250000, -0.034654, -0.293969, -0.338330, -0.135299, 0.166664, 0.346760, 0.273300, + 0.250000, 0.166664, -0.196424, -0.351851, -0.135299, 0.224292, 0.346760, 0.102631, + -0.250000, -0.338330, -0.068975, 0.273300, 0.326641, 0.034654, -0.293969, -0.311806, + 0.250000, 0.102631, -0.293969, -0.273300, 0.135299, 0.351851, 0.068975, -0.311806, + -0.250000, 0.166664, 0.346760, 0.034654, -0.326641, -0.224292, 0.196424, 0.338330, + 0.250000, 0.034654, -0.346760, -0.102631, 0.326641, 0.166664, -0.293969, -0.224292, + 0.250000, 0.273300, -0.196424, -0.311806, 0.135299, 0.338330, -0.068975, -0.351851, + 0.250000, -0.034654, -0.346760, 0.102631, 0.326641, -0.166664, -0.293969, 0.224292, + 0.250000, -0.273300, -0.196424, 0.311806, 0.135299, -0.338330, -0.068975, 0.351851, + 0.250000, -0.102631, -0.293969, 0.273300, 0.135299, -0.351851, 0.068975, 0.311806, + -0.250000, -0.166664, 0.346760, -0.034654, -0.326641, 0.224292, 0.196424, -0.338330, + 0.250000, -0.166664, -0.196424, 0.351851, -0.135299, -0.224292, 0.346760, -0.102631, + -0.250000, 0.338330, -0.068975, -0.273300, 0.326641, -0.034654, -0.293969, 0.311806, + 0.250000, -0.224292, -0.068975, 0.311806, -0.326641, 0.102631, 0.196424, -0.351851, + 0.250000, 0.034654, -0.293969, 0.338330, -0.135299, -0.166664, 0.346760, -0.273300, + 0.250000, -0.273300, 0.068975, 0.166664, -0.326641, 0.338330, -0.196424, -0.034654, + 0.250000, -0.351851, 0.293969, -0.102631, -0.135299, 0.311806, -0.346760, 0.224292, + 0.250000, -0.311806, 0.196424, -0.034654, -0.135299, 0.273300, -0.346760, 0.338330, + -0.250000, 0.102631, 0.068975, -0.224292, 0.326641, -0.351851, 0.293969, -0.166664, + 0.250000, -0.338330, 0.293969, -0.224292, 0.135299, -0.034654, -0.068975, 0.166664, + -0.250000, 0.311806, -0.346760, 0.351851, -0.326641, 0.273300, -0.196424, 0.102631, + 0.250000, -0.351851, 0.346760, -0.338330, 0.326641, -0.311806, 0.293969, -0.273300, + 0.250000, -0.224292, 0.196424, -0.166664, 0.135299, -0.102631, 0.068975, -0.034654 +}; + +float iadst_16[256] = { + 0.033094, 0.098087, 0.159534, 0.215215, 0.263118, 0.301511, 0.329007, 0.344612, + 0.347761, 0.338341, 0.316693, 0.283599, 0.240255, 0.188227, 0.129396, 0.065889, + 0.065889, 0.188227, 0.283599, 0.338341, 0.344612, 0.301511, 0.215215, 0.098087, + -0.033094, -0.159534, -0.263118, -0.329007, -0.347761, -0.316693, -0.240255, -0.129396, + 0.098087, 0.263118, 0.344612, 0.316693, 0.188227, 0.000000, -0.188227, -0.316693, + -0.344612, -0.263118, -0.098087, 0.098087, 0.263118, 0.344612, 0.316693, 0.188227, + 0.129396, 0.316693, 0.329007, 0.159534, -0.098087, -0.301511, -0.338341, -0.188227, + 0.065889, 0.283599, 0.344612, 0.215215, -0.033094, -0.263118, -0.347761, -0.240255, + 0.159534, 0.344612, 0.240255, -0.065889, -0.316693, -0.301511, -0.033094, 0.263118, + 0.338341, 0.129396, -0.188227, -0.347761, -0.215215, 0.098087, 0.329007, 0.283599, + 0.188227, 0.344612, 0.098087, -0.263118, -0.316693, -0.000000, 0.316693, 0.263118, + -0.098087, -0.344612, -0.188227, 0.188227, 0.344612, 0.098087, -0.263118, -0.316693, + 0.215215, 0.316693, -0.065889, -0.347761, -0.098087, 0.301511, 0.240255, -0.188227, + -0.329007, 0.033094, 0.344612, 0.129396, -0.283599, -0.263118, 0.159534, 0.338341, + 0.240255, 0.263118, -0.215215, -0.283599, 0.188227, 0.301511, -0.159534, -0.316693, + 0.129396, 0.329007, -0.098087, -0.338341, 0.065889, 0.344612, -0.033094, -0.347761, + 0.263118, 0.188227, -0.316693, -0.098087, 0.344612, 0.000000, -0.344612, 0.098087, + 0.316693, -0.188227, -0.263118, 0.263118, 0.188227, -0.316693, -0.098087, 0.344612, + 0.283599, 0.098087, -0.347761, 0.129396, 0.263118, -0.301511, -0.065889, 0.344612, + -0.159534, -0.240255, 0.316693, 0.033094, -0.338341, 0.188227, 0.215215, -0.329007, + 0.301511, 0.000000, -0.301511, 0.301511, 0.000000, -0.301511, 0.301511, 0.000000, + -0.301511, 0.301511, 0.000000, -0.301511, 0.301511, 0.000000, -0.301511, 0.301511, + 0.316693, -0.098087, -0.188227, 0.344612, -0.263118, -0.000000, 0.263118, -0.344612, + 0.188227, 0.098087, -0.316693, 0.316693, -0.098087, -0.188227, 0.344612, -0.263118, + 0.329007, -0.188227, -0.033094, 0.240255, -0.344612, 0.301511, -0.129396, -0.098087, + 0.283599, -0.347761, 0.263118, -0.065889, -0.159534, 0.316693, -0.338341, 0.215215, + 0.338341, -0.263118, 0.129396, 0.033094, -0.188227, 0.301511, -0.347761, 0.316693, + -0.215215, 0.065889, 0.098087, -0.240255, 0.329007, -0.344612, 0.283599, -0.159534, + 0.344612, -0.316693, 0.263118, -0.188227, 0.098087, 0.000000, -0.098087, 0.188227, + -0.263118, 0.316693, -0.344612, 0.344612, -0.316693, 0.263118, -0.188227, 0.098087, + 0.347761, -0.344612, 0.338341, -0.329007, 0.316693, -0.301511, 0.283599, -0.263118, + 0.240255, -0.215215, 0.188227, -0.159534, 0.129396, -0.098087, 0.065889, -0.033094 +}; +#endif +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16 void vp8_ihtllm_c(short *input, short *output, int pitch, TX_TYPE tx_type, int tx_dim) { int i, j, k; - float bufa[64], bufb[64]; // buffers are for floating-point test purpose + float bufa[256], bufb[256]; // buffers are for floating-point test purpose // the implementation could be simplified in // conjunction with integer transform @@ -126,11 +200,13 @@ void vp8_ihtllm_c(short *input, short *output, int pitch, switch(tx_type) { case ADST_ADST : case ADST_DCT : - ptv = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0]; + ptv = (tx_dim == 4) ? &iadst_4[0] : + ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]); break; default : - ptv = (tx_dim == 4) ? &idct_4[0] : &idct_8[0]; + ptv = (tx_dim == 4) ? &idct_4[0] : + ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]); break; } @@ -155,11 +231,13 @@ void vp8_ihtllm_c(short *input, short *output, int pitch, switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0]; + pth = (tx_dim == 4) ? &iadst_4[0] : + ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]); break; default : - pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0]; + pth = (tx_dim == 4) ? &idct_4[0] : + ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]); break; } @@ -178,11 +256,13 @@ void vp8_ihtllm_c(short *input, short *output, int pitch, switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0]; + pth = (tx_dim == 4) ? &iadst_4[0] : + ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]); break; default : - pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0]; + pth = (tx_dim == 4) ? &idct_4[0] : + ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]); break; } } @@ -692,7 +772,7 @@ void vp8_short_ihaar2x2_c(short *input, short *output, int pitch) { } -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 #if 0 // Keep a really bad float version as reference for now. void vp8_short_idct16x16_c(short *input, short *output, int pitch) { diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c index 1d63f465a..9fc94eab8 100644 --- a/vp8/common/invtrans.c +++ b/vp8/common/invtrans.c @@ -171,7 +171,7 @@ void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, } -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch) { diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h index 76258d435..2097c368c 100644 --- a/vp8/common/invtrans.h +++ b/vp8/common/invtrans.h @@ -30,7 +30,7 @@ extern void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MAC extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *xd); extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *xd); -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 extern void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch); diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c index d9c4b54be..727326cba 100644 --- a/vp8/common/loopfilter.c +++ b/vp8/common/loopfilter.c @@ -336,7 +336,7 @@ void vp8_loop_filter_frame (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); if (!skip_lf -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 && tx_type != TX_16X16 #endif ) { @@ -361,7 +361,7 @@ void vp8_loop_filter_frame (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); if (!skip_lf -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 && tx_type != TX_16X16 #endif ) { @@ -479,7 +479,7 @@ void vp8_loop_filter_frame_yonly (y_ptr, 0, 0, post->y_stride, 0, &lfi); if (!skip_lf -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 && tx_type != TX_16X16 #endif ) { @@ -497,7 +497,7 @@ void vp8_loop_filter_frame_yonly (y_ptr, 0, 0, post->y_stride, 0, &lfi); if (!skip_lf -#if CONFIG_TX16X16 +#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16 && tx_type != TX_16X16 #endif ) { diff --git a/vp8/common/mvref_common.c b/vp8/common/mvref_common.c new file mode 100644 index 000000000..b6040cd59 --- /dev/null +++ b/vp8/common/mvref_common.c @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "mvref_common.h" + +#if CONFIG_NEWBESTREFMV + +#define MVREF_NEIGHBOURS 8 +static int mv_ref_search[MVREF_NEIGHBOURS][2] = + { {0,-1},{-1,0},{-1,-1},{0,-2},{-2,0},{-1,-2},{-2,-1},{-2,-2} }; +static int ref_distance_weight[MVREF_NEIGHBOURS] = + { 3,3,2,1,1,1,1,1 }; + +// clamp_mv +#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units +static void clamp_mv(const MACROBLOCKD *xd, int_mv *mv) { + + if (mv->as_mv.col < (xd->mb_to_left_edge - MV_BORDER)) + mv->as_mv.col = xd->mb_to_left_edge - MV_BORDER; + else if (mv->as_mv.col > xd->mb_to_right_edge + MV_BORDER) + mv->as_mv.col = xd->mb_to_right_edge + MV_BORDER; + + if (mv->as_mv.row < (xd->mb_to_top_edge - MV_BORDER)) + mv->as_mv.row = xd->mb_to_top_edge - MV_BORDER; + else if (mv->as_mv.row > xd->mb_to_bottom_edge + MV_BORDER) + mv->as_mv.row = xd->mb_to_bottom_edge + MV_BORDER; +} + +// Code for selecting / building and entropy coding a motion vector reference +// Returns a seperation value for two vectors. +// This is taken as the sum of the abs x and y difference. +unsigned int mv_distance(int_mv *mv1, int_mv *mv2) { + return (abs(mv1->as_mv.row - mv2->as_mv.row) + + abs(mv1->as_mv.col - mv2->as_mv.col)); +} + +// Gets a best matching candidate refenence motion vector +// from the given mode info structure (if available) +int get_candidate_mvref( + const MODE_INFO *candidate_mi, + MV_REFERENCE_FRAME ref_frame, + MV_REFERENCE_FRAME *c_ref_frame, + int_mv *c_mv, + MV_REFERENCE_FRAME *c2_ref_frame, + int_mv *c2_mv +) { + + int ret_val = FALSE; + c2_mv->as_int = 0; + *c2_ref_frame = INTRA_FRAME; + + // Target ref frame matches candidate first ref frame + if (ref_frame == candidate_mi->mbmi.ref_frame) { + c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; + *c_ref_frame = ref_frame; + ret_val = TRUE; + + // Is there a second non zero vector we can use. + if ((candidate_mi->mbmi.second_ref_frame != INTRA_FRAME) && + (candidate_mi->mbmi.mv[1].as_int != 0) && + (candidate_mi->mbmi.mv[1].as_int != c_mv->as_int)) { + c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int; + *c2_ref_frame = candidate_mi->mbmi.second_ref_frame; + } + + // Target ref frame matches candidate second ref frame + } else if (ref_frame == candidate_mi->mbmi.second_ref_frame) { + c_mv->as_int = candidate_mi->mbmi.mv[1].as_int; + *c_ref_frame = ref_frame; + ret_val = TRUE; + + // Is there a second non zero vector we can use. + if ((candidate_mi->mbmi.ref_frame != INTRA_FRAME) && + (candidate_mi->mbmi.mv[0].as_int != 0) && + (candidate_mi->mbmi.mv[0].as_int != c_mv->as_int)) { + c2_mv->as_int = candidate_mi->mbmi.mv[0].as_int; + *c2_ref_frame = candidate_mi->mbmi.ref_frame; + } + + // No ref frame matches so use first ref mv as first choice + } else if (candidate_mi->mbmi.ref_frame != INTRA_FRAME) { + c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; + *c_ref_frame = candidate_mi->mbmi.ref_frame; + ret_val = TRUE; + + // Is there a second non zero vector we can use. + if ((candidate_mi->mbmi.second_ref_frame != INTRA_FRAME) && + (candidate_mi->mbmi.mv[1].as_int != 0) && + (candidate_mi->mbmi.mv[1].as_int != c_mv->as_int)) { + c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int; + *c2_ref_frame = candidate_mi->mbmi.second_ref_frame; + } + + // If only the second ref mv is valid:- (Should not trigger in current code + // base given current possible compound prediction options). + } else if (candidate_mi->mbmi.second_ref_frame != INTRA_FRAME) { + c_mv->as_int = candidate_mi->mbmi.mv[1].as_int; + *c_ref_frame = candidate_mi->mbmi.second_ref_frame; + ret_val = TRUE; + } + + return ret_val; +} + +// Performs mv adjustment based on reference frame and clamps the MV +// if it goes off the edge of the buffer. +void scale_mv( + MACROBLOCKD *xd, + MV_REFERENCE_FRAME this_ref_frame, + MV_REFERENCE_FRAME candidate_ref_frame, + int_mv *candidate_mv, + int *ref_sign_bias +) { + + if (candidate_ref_frame != this_ref_frame) { + + //int frame_distances[MAX_REF_FRAMES]; + //int last_distance = 1; + //int gf_distance = xd->frames_since_golden; + //int arf_distance = xd->frames_till_alt_ref_frame; + + // Sign inversion where appropriate. + if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) { + candidate_mv->as_mv.row = -candidate_mv->as_mv.row; + candidate_mv->as_mv.col = -candidate_mv->as_mv.col; + } + + // Scale based on frame distance if the reference frames not the same. + /*frame_distances[INTRA_FRAME] = 1; // should never be used + frame_distances[LAST_FRAME] = 1; + frame_distances[GOLDEN_FRAME] = + (xd->frames_since_golden) ? xd->frames_since_golden : 1; + frame_distances[ALTREF_FRAME] = + (xd->frames_till_alt_ref_frame) ? xd->frames_till_alt_ref_frame : 1; + + if (frame_distances[this_ref_frame] && + frame_distances[candidate_ref_frame]) { + candidate_mv->as_mv.row = + (short)(((int)(candidate_mv->as_mv.row) * + frame_distances[this_ref_frame]) / + frame_distances[candidate_ref_frame]); + + candidate_mv->as_mv.col = + (short)(((int)(candidate_mv->as_mv.col) * + frame_distances[this_ref_frame]) / + frame_distances[candidate_ref_frame]); + } + */ + } + + // Clamp the MV so it does not point out of the frame buffer + clamp_mv(xd, candidate_mv); +} + +// Adds a new candidate reference vector to the list if indeed it is new. +// If it is not new then the score of the existing candidate that it matches +// is increased and the list is resorted. +void addmv_and_shuffle( + int_mv *mv_list, + int *mv_scores, + int *index, + int_mv candidate_mv, + int weight +) { + + int i = *index; + int duplicate_found = FALSE; + + // Check for duplicates. If there is one increment its score. + // Duplicate defined as being the same full pel vector with rounding. + while (i > 0) { + i--; + + if (candidate_mv.as_int == mv_list[i].as_int) { + duplicate_found = TRUE; + mv_scores[i] += weight; + break; + } + } + + // If no duplicate was found add the new vector and give it a weight + if (!duplicate_found) { + mv_list[*index].as_int = candidate_mv.as_int; + mv_scores[*index] = weight; + i = *index; + (*index)++; + } + + // Reshuffle the list so that highest scoring mvs at the top. + while (i > 0) { + if (mv_scores[i] > mv_scores[i-1]) { + int tmp_score = mv_scores[i-1]; + int_mv tmp_mv = mv_list[i-1]; + + mv_scores[i-1] = mv_scores[i]; + mv_list[i-1] = mv_list[i]; + mv_scores[i] = tmp_score; + mv_list[i] = tmp_mv; + i--; + } else + break; + } +} + +// This function searches the neighbourhood of a given MB/SB and populates a +// list of candidate reference vectors. +// +void find_mv_refs( + MACROBLOCKD *xd, + MODE_INFO *here, + MODE_INFO *lf_here, + MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int *ref_sign_bias +) { + + int i; + MODE_INFO *candidate_mi; + int_mv candidate_mvs[MAX_MV_REFS]; + int_mv c_refmv; + MV_REFERENCE_FRAME c_ref_frame; + int_mv c2_refmv; + MV_REFERENCE_FRAME c2_ref_frame; + int candidate_scores[MAX_MV_REFS]; + int index = 0; + int ref_weight = 0; + int valid_mv_ref; + + // Blank the reference vector lists and other local structures. + vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REFS); + vpx_memset(candidate_mvs, 0, sizeof(int_mv) * MAX_MV_REFS); + vpx_memset(candidate_scores, 0, sizeof(candidate_scores)); + + // Populate a list with candidate reference vectors from the + // spatial neighbours. + for (i = 0; i < 2; ++i) { + if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) && + ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) { + + candidate_mi = here + mv_ref_search[i][0] + + (mv_ref_search[i][1] * xd->mode_info_stride); + + valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame, + &c_ref_frame, &c_refmv, + &c2_ref_frame, &c2_refmv); + + // If there is a valid MV candidate then add it to the list + if (valid_mv_ref) { + scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias ); + ref_weight = ref_distance_weight[i] + + ((c_ref_frame == ref_frame) << 4); + + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c_refmv, ref_weight); + + // If there is a second valid mv then add it as well. + if (c2_ref_frame != INTRA_FRAME) { + scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias ); + ref_weight = ref_distance_weight[i] + + ((c2_ref_frame == ref_frame) << 4); + + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c2_refmv, ref_weight); + } + } + } + } + + // Look at the corresponding vector in the last frame + candidate_mi = lf_here; + valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame, + &c_ref_frame, &c_refmv, + &c2_ref_frame, &c2_refmv); + + // If there is a valid MV candidate then add it to the list + if (valid_mv_ref) { + scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias ); + ref_weight = 2 + ((c_ref_frame == ref_frame) << 4); + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c_refmv, ref_weight); + + // If there is a second valid mv then add it as well. + if (c2_ref_frame != INTRA_FRAME) { + scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias ); + ref_weight = ref_distance_weight[i] + + ((c2_ref_frame == ref_frame) << 4); + + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c2_refmv, ref_weight); + } + } + + // Populate a list with candidate reference vectors from the + // spatial neighbours. + for (i = 2; i < MVREF_NEIGHBOURS; ++i) { + if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) && + ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) { + + candidate_mi = here + mv_ref_search[i][0] + + (mv_ref_search[i][1] * xd->mode_info_stride); + + valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame, + &c_ref_frame, &c_refmv, + &c2_ref_frame, &c2_refmv); + + // If there is a valid MV candidate then add it to the list + if (valid_mv_ref) { + scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias ); + ref_weight = ref_distance_weight[i] + + ((c_ref_frame == ref_frame) << 4); + + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c_refmv, ref_weight); + + // If there is a second valid mv then add it as well. + if (c2_ref_frame != INTRA_FRAME) { + scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias ); + ref_weight = ref_distance_weight[i] + + ((c2_ref_frame == ref_frame) << 4); + + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c2_refmv, ref_weight); + } + } + } + } + + // 0,0 is always a valid reference. + for (i = 0; i < index; ++i) + if (candidate_mvs[i].as_int == 0) + break; + if (i == index) { + c_refmv.as_int = 0; + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c_refmv, candidate_scores[3]+1 ); + } + + // Copy over the candidate list. + vpx_memcpy(mv_ref_list, candidate_mvs, sizeof(candidate_mvs)); +} + +#endif diff --git a/vp8/common/mvref_common.h b/vp8/common/mvref_common.h new file mode 100644 index 000000000..3f19ddbdb --- /dev/null +++ b/vp8/common/mvref_common.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "onyxc_int.h" +#include "blockd.h" + +// MR reference entropy header file. +#if CONFIG_NEWBESTREFMV + +#ifndef __INC_MVREF_COMMON_H +#define __INC_MVREF_COMMON_H + +unsigned int mv_distance(int_mv *mv1, int_mv *mv2); + +void find_mv_refs( + MACROBLOCKD *xd, + MODE_INFO *here, + MODE_INFO *lf_here, + MV_REFERENCE_FRAME ref_frame, + int_mv * mv_ref_list, + int *ref_sign_bias +); + +#endif + +#endif diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index b7a543220..7c6093b41 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -51,14 +51,29 @@ typedef struct frame_contexts { vp8_prob sub_mv_ref_prob [SUBMVREF_COUNT][VP8_SUBMVREFS - 1]; vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1]; vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_HYBRIDTRANSFORM + vp8_prob hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif vp8_prob coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_HYBRIDTRANSFORM8X8 + vp8_prob hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif #if CONFIG_TX16X16 vp8_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_HYBRIDTRANSFORM16X16 + vp8_prob hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; #endif +#endif + +#if CONFIG_NEWMVENTROPY + nmv_context nmvc; + nmv_context pre_nmvc; +#else MV_CONTEXT mvc[2]; MV_CONTEXT_HP mvc_hp[2]; MV_CONTEXT pre_mvc[2]; MV_CONTEXT_HP pre_mvc_hp[2]; +#endif vp8_prob pre_bmode_prob [VP8_BINTRAMODES - 1]; vp8_prob pre_ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */ vp8_prob pre_uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1]; @@ -74,22 +89,56 @@ typedef struct frame_contexts { vp8_prob pre_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_HYBRIDTRANSFORM + vp8_prob pre_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif + vp8_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_HYBRIDTRANSFORM8X8 + vp8_prob pre_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif + #if CONFIG_TX16X16 vp8_prob pre_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_HYBRIDTRANSFORM16X16 + vp8_prob pre_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; #endif +#endif + unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_HYBRIDTRANSFORM + unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif + unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_HYBRIDTRANSFORM8X8 + unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif + #if CONFIG_TX16X16 unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_HYBRIDTRANSFORM16X16 + unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif #endif + +#if CONFIG_NEWMVENTROPY + nmv_context_counts NMVcount; +#else unsigned int MVcount [2] [MVvals]; unsigned int MVcount_hp [2] [MVvals_hp]; +#endif #if CONFIG_SWITCHABLE_INTERP vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS+1] [VP8_SWITCHABLE_FILTERS-1]; diff --git a/vp8/common/recon.h b/vp8/common/recon.h index 3527fc14d..0bb5c8863 100644 --- a/vp8/common/recon.h +++ b/vp8/common/recon.h @@ -262,4 +262,12 @@ typedef struct vp8_recon_rtcd_vtable { void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *xd); + +#if CONFIG_SUPERBLOCKS +extern void vp8_recon_mby_s_c(const vp8_recon_rtcd_vtable_t *rtcd, + MACROBLOCKD *xd, uint8_t *dst); +extern void vp8_recon_mbuv_s_c(const vp8_recon_rtcd_vtable_t *rtcd, + MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst); +#endif + #endif diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index 0212c92c7..647b3ada7 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -723,9 +723,9 @@ void vp8_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd, // Sub-pel filter xd->subpixel_predict8x8(pTemp, len, - _o16x16mv.as_mv.col & 15, - _o16x16mv.as_mv.row & 15, - pDst, dst_uvstride); + _o16x16mv.as_mv.col & 15, + _o16x16mv.as_mv.row & 15, + pDst, dst_uvstride); } else { filter_mb(pSrc, pre_stride, pDst, dst_uvstride, 8, 8); } @@ -750,7 +750,6 @@ void vp8_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd, } - void vp8_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd, unsigned char *dst_y, unsigned char *dst_u, diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h index 7ad0adbd4..37e34b5e1 100644 --- a/vp8/common/reconinter.h +++ b/vp8/common/reconinter.h @@ -45,6 +45,15 @@ extern void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd, int dst_ystride, int dst_uvstride); +#if CONFIG_SUPERBLOCKS +extern void vp8_build_inter32x32_predictors_sb(MACROBLOCKD *x, + unsigned char *dst_y, + unsigned char *dst_u, + unsigned char *dst_v, + int dst_ystride, + int dst_uvstride); +#endif + extern void vp8_build_inter_predictors_mb(MACROBLOCKD *xd); extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c index e391fa9be..cad9652b7 100644 --- a/vp8/common/reconintra.c +++ b/vp8/common/reconintra.c @@ -207,10 +207,10 @@ void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, } } -void vp8_build_intra_predictors_internal(MACROBLOCKD *xd, - unsigned char *src, int src_stride, +void vp8_build_intra_predictors_internal(unsigned char *src, int src_stride, unsigned char *ypred_ptr, - int y_stride, int mode, int bsize) { + int y_stride, int mode, int bsize, + int up_available, int left_available) { unsigned char *yabove_row = src - src_stride; unsigned char yleft_col[32]; @@ -218,7 +218,7 @@ void vp8_build_intra_predictors_internal(MACROBLOCKD *xd, int r, c, i; for (i = 0; i < bsize; i++) { - yleft_col[i] = xd->dst.y_buffer [i * src_stride - 1]; + yleft_col[i] = src[i * src_stride - 1]; } /* for Y */ @@ -230,8 +230,10 @@ void vp8_build_intra_predictors_internal(MACROBLOCKD *xd, int average = 0; int log2_bsize_minus_1; - assert(bsize == 8 || bsize == 16 || bsize == 32); - if (bsize == 8) { + assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32); + if (bsize == 4) { + log2_bsize_minus_1 = 1; + } else if (bsize == 8) { log2_bsize_minus_1 = 2; } else if (bsize == 16) { log2_bsize_minus_1 = 3; @@ -239,19 +241,19 @@ void vp8_build_intra_predictors_internal(MACROBLOCKD *xd, log2_bsize_minus_1 = 4; } - if (xd->up_available || xd->left_available) { - if (xd->up_available) { + if (up_available || left_available) { + if (up_available) { for (i = 0; i < bsize; i++) { average += yabove_row[i]; } } - if (xd->left_available) { + if (left_available) { for (i = 0; i < bsize; i++) { average += yleft_col[i]; } } - shift = log2_bsize_minus_1 + xd->up_available + xd->left_available; + shift = log2_bsize_minus_1 + up_available + left_available; expected_dc = (average + (1 << (shift - 1))) >> shift; } else { expected_dc = 128; @@ -332,22 +334,25 @@ void vp8_build_intra_predictors_internal(MACROBLOCKD *xd, } void vp8_build_intra_predictors_mby(MACROBLOCKD *xd) { - vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride, + vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, xd->predictor, 16, - xd->mode_info_context->mbmi.mode, 16); + xd->mode_info_context->mbmi.mode, 16, + xd->up_available, xd->left_available); } void vp8_build_intra_predictors_mby_s(MACROBLOCKD *xd) { - vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride, + vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, xd->dst.y_buffer, xd->dst.y_stride, - xd->mode_info_context->mbmi.mode, 16); + xd->mode_info_context->mbmi.mode, 16, + xd->up_available, xd->left_available); } #if CONFIG_SUPERBLOCKS -void vp8_build_intra_predictors_sby_s(MACROBLOCKD *x) { - vp8_build_intra_predictors_internal(x, x->dst.y_buffer, x->dst.y_stride, - x->dst.y_buffer, x->dst.y_stride, - x->mode_info_context->mbmi.mode, 32); +void vp8_build_intra_predictors_sby_s(MACROBLOCKD *xd) { + vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, + xd->mode_info_context->mbmi.mode, 32, + xd->up_available, xd->left_available); } #endif @@ -356,14 +361,16 @@ void vp8_build_comp_intra_predictors_mby(MACROBLOCKD *xd) { unsigned char predictor[2][256]; int i; - vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride, + vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, predictor[0], 16, xd->mode_info_context->mbmi.mode, - 16); - vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride, + 16, xd->up_available, + xd->left_available); + vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, predictor[1], 16, xd->mode_info_context->mbmi.second_mode, - 16); + 16, xd->up_available, + xd->left_available); for (i = 0; i < 256; i++) { xd->predictor[i] = (predictor[0][i] + predictor[1][i] + 1) >> 1; @@ -376,10 +383,12 @@ void vp8_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd, unsigned char *vpred_ptr, int uv_stride, int mode, int bsize) { - vp8_build_intra_predictors_internal(xd, xd->dst.u_buffer, xd->dst.uv_stride, - upred_ptr, uv_stride, mode, bsize); - vp8_build_intra_predictors_internal(xd, xd->dst.v_buffer, xd->dst.uv_stride, - vpred_ptr, uv_stride, mode, bsize); + vp8_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride, + upred_ptr, uv_stride, mode, bsize, + xd->up_available, xd->left_available); + vp8_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride, + vpred_ptr, uv_stride, mode, bsize, + xd->up_available, xd->left_available); } void vp8_build_intra_predictors_mbuv(MACROBLOCKD *xd) { @@ -428,95 +437,9 @@ void vp8_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) { void vp8_intra8x8_predict(BLOCKD *xd, int mode, unsigned char *predictor) { - - unsigned char *yabove_row = *(xd->base_dst) + xd->dst - xd->dst_stride; - unsigned char yleft_col[8]; - unsigned char ytop_left = yabove_row[-1]; - int r, c, i; - - for (i = 0; i < 8; i++) { - yleft_col[i] = (*(xd->base_dst))[xd->dst - 1 + i * xd->dst_stride]; - } - switch (mode) { - case DC_PRED: { - int expected_dc = 0; - - for (i = 0; i < 8; i++) { - expected_dc += yabove_row[i]; - expected_dc += yleft_col[i]; - } - expected_dc = (expected_dc + 8) >> 4; - - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) { - predictor[c] = expected_dc; - } - predictor += 16; - } - } - break; - case V_PRED: { - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) { - predictor[c] = yabove_row[c]; - } - predictor += 16; - } - - } - break; - case H_PRED: { - - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) { - predictor[c] = yleft_col[r]; - } - predictor += 16; - } - } - break; - case TM_PRED: { - /* prediction similar to true_motion prediction */ - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) { - int pred = yabove_row[c] - ytop_left + yleft_col[r]; - if (pred < 0) - pred = 0; - - if (pred > 255) - pred = 255; - predictor[c] = pred; - } - - predictor += 16; - } - } - break; - case D45_PRED: { - d45_predictor(predictor, 16, 8, yabove_row, yleft_col); - } - break; - case D135_PRED: { - d135_predictor(predictor, 16, 8, yabove_row, yleft_col); - } - break; - case D117_PRED: { - d117_predictor(predictor, 16, 8, yabove_row, yleft_col); - } - break; - case D153_PRED: { - d153_predictor(predictor, 16, 8, yabove_row, yleft_col); - } - break; - case D27_PRED: { - d27_predictor(predictor, 16, 8, yabove_row, yleft_col); - } - break; - case D63_PRED: { - d63_predictor(predictor, 16, 8, yabove_row, yleft_col); - } - break; - } + vp8_build_intra_predictors_internal(*(xd->base_dst) + xd->dst, + xd->dst_stride, predictor, 16, + mode, 8, 1, 1); } #if CONFIG_COMP_INTRA_PRED @@ -540,96 +463,9 @@ void vp8_comp_intra8x8_predict(BLOCKD *xd, void vp8_intra_uv4x4_predict(BLOCKD *xd, int mode, unsigned char *predictor) { - - unsigned char *above_row = *(xd->base_dst) + xd->dst - xd->dst_stride; - unsigned char left_col[4]; - unsigned char top_left = above_row[-1]; - int r, c, i; - - for (i = 0; i < 4; i++) { - left_col[i] = (*(xd->base_dst))[xd->dst - 1 + i * xd->dst_stride]; - } - switch (mode) { - case DC_PRED: { - int expected_dc = 0; - - for (i = 0; i < 4; i++) { - expected_dc += above_row[i]; - expected_dc += left_col[i]; - } - expected_dc = (expected_dc + 4) >> 3; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - predictor[c] = expected_dc; - } - predictor += 8; - } - } - break; - case V_PRED: { - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - - predictor[c] = above_row[c]; - } - predictor += 8; - } - - } - break; - case H_PRED: { - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - predictor[c] = left_col[r]; - } - predictor += 8; - } - } - break; - case TM_PRED: { - /* prediction similar to true_motion prediction */ - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - int pred = above_row[c] - top_left + left_col[r]; - if (pred < 0) - pred = 0; - - if (pred > 255) - pred = 255; - predictor[c] = pred; - } - - predictor += 8; - } - } - break; - case D45_PRED: { - d45_predictor(predictor, 8, 4, above_row, left_col); - } - break; - case D135_PRED: { - d135_predictor(predictor, 8, 4, above_row, left_col); - } - break; - case D117_PRED: { - d117_predictor(predictor, 8, 4, above_row, left_col); - } - break; - case D153_PRED: { - d153_predictor(predictor, 8, 4, above_row, left_col); - } - break; - case D27_PRED: { - d27_predictor(predictor, 8, 4, above_row, left_col); - } - break; - case D63_PRED: { - d63_predictor(predictor, 8, 4, above_row, left_col); - } - break; - } + vp8_build_intra_predictors_internal(*(xd->base_dst) + xd->dst, + xd->dst_stride, predictor, 8, + mode, 4, 1, 1); } #if CONFIG_COMP_INTRA_PRED diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh index 1cb5de311..66029f88e 100644 --- a/vp8/common/rtcd_defs.sh +++ b/vp8/common/rtcd_defs.sh @@ -14,8 +14,8 @@ prototype void vp8_filter_block2d_16x16_8 "const unsigned char *src_ptr, const u # compiles warning free but a dissassembly of generated code show bugs. To be # on the safe side, only enabled when compiled with 'gcc'. if [ "$CONFIG_GCC" = "yes" ]; then - specialize vp8_filter_block2d_4x4_8 sse4_1 - specialize vp8_filter_block2d_8x4_8 sse4_1 - specialize vp8_filter_block2d_8x8_8 sse4_1 - specialize vp8_filter_block2d_16x16_8 sse4_1 + specialize vp8_filter_block2d_4x4_8 sse4_1 sse2 + specialize vp8_filter_block2d_8x4_8 sse4_1 sse2 + specialize vp8_filter_block2d_8x8_8 sse4_1 sse2 + specialize vp8_filter_block2d_16x16_8 sse4_1 sse2 fi diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c index def4caa04..adf291bef 100644 --- a/vp8/common/treecoder.c +++ b/vp8/common/treecoder.c @@ -124,3 +124,15 @@ void vp8_tree_probs_from_distribution( probs[t] = vp8_prob_half; } while (++t < tree_len); } + +vp8_prob vp8_bin_prob_from_distribution(const unsigned int counts[2]) { + int tot_count = counts[0] + counts[1]; + vp8_prob prob; + if (tot_count) { + prob = (counts[0] * 255 + (tot_count >> 1)) / tot_count; + prob += !prob; + } else { + prob = 128; + } + return prob; +} diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h index c4d0aa6ee..b7fa17df9 100644 --- a/vp8/common/treecoder.h +++ b/vp8/common/treecoder.h @@ -85,5 +85,6 @@ void vp8bc_tree_probs_from_distribution( c_bool_coder_spec *s ); +vp8_prob vp8_bin_prob_from_distribution(const unsigned int counts[2]); #endif diff --git a/vp8/common/x86/filter_sse2.c b/vp8/common/x86/filter_sse2.c new file mode 100644 index 000000000..fe57b4e0b --- /dev/null +++ b/vp8/common/x86/filter_sse2.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> // for alignment checks +#include <emmintrin.h> // SSE2 +#include "vp8/common/filter.h" +#include "vpx_ports/mem.h" // for DECLARE_ALIGNED +#include "vpx_rtcd.h" + +// TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is +// just a quick partial snapshot so that other can already use some +// speedup. +// TODO(cd): Use vectorized 8 tap filtering code as speedup to pure C 6 tap +// filtering. +// TODO(cd): Add some comments, better variable naming. +// TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum +// of positive above 128), or have higher precision filter +// coefficients. + +DECLARE_ALIGNED(16, static const unsigned int, rounding_c[4]) = { + VP8_FILTER_WEIGHT >> 1, + VP8_FILTER_WEIGHT >> 1, + VP8_FILTER_WEIGHT >> 1, + VP8_FILTER_WEIGHT >> 1, +}; + +// Creating a macro to do more than four pixels at once to hide instruction +// latency is actually slower :-( +#define DO_FOUR_PIXELS(result, src_ptr, offset) \ + { \ + /* Do shifted load to achieve require shuffles through unpacking */ \ + const __m128i src0 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 0)); \ + const __m128i src1 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 1)); \ + const __m128i src2 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 2)); \ + const __m128i src3 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 3)); \ + const __m128i src01 = _mm_unpacklo_epi8(src0, src1); \ + const __m128i src01_16 = _mm_unpacklo_epi8(src01, zero); \ + const __m128i src23 = _mm_unpacklo_epi8(src2, src3); \ + const __m128i src23_16 = _mm_unpacklo_epi8(src23, zero); \ + /* Shit by 4 bytes through suffle to get additional shifted loads */ \ + const __m128i src4 = _mm_shuffle_epi32(src0, _MM_SHUFFLE(3, 3, 2, 1)); \ + const __m128i src5 = _mm_shuffle_epi32(src1, _MM_SHUFFLE(3, 3, 2, 1)); \ + const __m128i src6 = _mm_shuffle_epi32(src2, _MM_SHUFFLE(3, 3, 2, 1)); \ + const __m128i src7 = _mm_shuffle_epi32(src3, _MM_SHUFFLE(3, 3, 2, 1)); \ + const __m128i src45 = _mm_unpacklo_epi8(src4, src5); \ + const __m128i src45_16 = _mm_unpacklo_epi8(src45, zero); \ + const __m128i src67 = _mm_unpacklo_epi8(src6, src7); \ + const __m128i src67_16 = _mm_unpacklo_epi8(src67, zero); \ + /* multiply accumulate them */ \ + const __m128i mad01 = _mm_madd_epi16(src01_16, fil01); \ + const __m128i mad23 = _mm_madd_epi16(src23_16, fil23); \ + const __m128i mad45 = _mm_madd_epi16(src45_16, fil45); \ + const __m128i mad67 = _mm_madd_epi16(src67_16, fil67); \ + const __m128i mad0123 = _mm_add_epi32(mad01, mad23); \ + const __m128i mad4567 = _mm_add_epi32(mad45, mad67); \ + __m128i mad_all = _mm_add_epi32(mad0123, mad4567); \ + mad_all = _mm_add_epi32(mad_all, rounding); \ + result = _mm_srai_epi32(mad_all, VP8_FILTER_SHIFT); \ + } + +void vp8_filter_block2d_4x4_8_sse2 +( + const unsigned char *src_ptr, const unsigned int src_stride, + const short *HFilter_aligned16, const short *VFilter_aligned16, + unsigned char *dst_ptr, unsigned int dst_stride +) { + __m128i intermediateA, intermediateB, intermediateC; + + const int kInterp_Extend = 4; + + const __m128i zero = _mm_set1_epi16(0); + const __m128i rounding = _mm_load_si128((const __m128i *)rounding_c); + + // check alignment + assert(0 == ((long)HFilter_aligned16)%16); + assert(0 == ((long)VFilter_aligned16)%16); + + { + __m128i transpose3_0; + __m128i transpose3_1; + __m128i transpose3_2; + __m128i transpose3_3; + + // Horizontal pass (src -> intermediate). + { + const __m128i HFilter = _mm_load_si128((const __m128i *)HFilter_aligned16); + // get first two columns filter coefficients + __m128i fil01 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i fil23 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i fil45 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i fil67 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(3, 3, 3, 3)); + src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); + + { + __m128i mad_all0; + __m128i mad_all1; + __m128i mad_all2; + __m128i mad_all3; + DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride) + DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride) + DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride) + DO_FOUR_PIXELS(mad_all3, src_ptr, 3*src_stride) + mad_all0 = _mm_packs_epi32(mad_all0, mad_all1); + mad_all2 = _mm_packs_epi32(mad_all2, mad_all3); + intermediateA = _mm_packus_epi16(mad_all0, mad_all2); + // -- + src_ptr += src_stride*4; + // -- + DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride) + DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride) + DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride) + DO_FOUR_PIXELS(mad_all3, src_ptr, 3*src_stride) + mad_all0 = _mm_packs_epi32(mad_all0, mad_all1); + mad_all2 = _mm_packs_epi32(mad_all2, mad_all3); + intermediateB = _mm_packus_epi16(mad_all0, mad_all2); + // -- + src_ptr += src_stride*4; + // -- + DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride) + DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride) + DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride) + mad_all0 = _mm_packs_epi32(mad_all0, mad_all1); + mad_all2 = _mm_packs_epi32(mad_all2, mad_all2); + intermediateC = _mm_packus_epi16(mad_all0, mad_all2); + } + } + + // Transpose result (intermediate -> transpose3_x) + { + // 00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33 + // 40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73 + // 80 81 82 83 90 91 92 93 A0 A1 A2 A3 xx xx xx xx + const __m128i transpose0_0 = _mm_unpacklo_epi8(intermediateA, intermediateB); + const __m128i transpose0_1 = _mm_unpackhi_epi8(intermediateA, intermediateB); + const __m128i transpose0_2 = _mm_unpacklo_epi8(intermediateC, intermediateC); + const __m128i transpose0_3 = _mm_unpackhi_epi8(intermediateC, intermediateC); + // 00 40 01 41 02 42 03 43 10 50 11 51 12 52 13 53 + // 20 60 21 61 22 62 23 63 30 70 31 71 32 72 33 73 + // 80 xx 81 xx 82 xx 83 xx 90 xx 91 xx 92 xx 93 xx + // A0 xx A1 xx A2 xx A3 xx xx xx xx xx xx xx xx xx + const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1); + const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1); + const __m128i transpose1_2 = _mm_unpacklo_epi8(transpose0_2, transpose0_3); + const __m128i transpose1_3 = _mm_unpackhi_epi8(transpose0_2, transpose0_3); + // 00 20 40 60 01 21 41 61 02 22 42 62 03 23 43 63 + // 10 30 50 70 11 31 51 71 12 32 52 72 13 33 53 73 + // 80 A0 xx xx 81 A1 xx xx 82 A2 xx xx 83 A3 xx xx + // 90 xx xx xx 91 xx xx xx 92 xx xx xx 93 xx xx xx + const __m128i transpose2_0 = _mm_unpacklo_epi8(transpose1_0, transpose1_1); + const __m128i transpose2_1 = _mm_unpackhi_epi8(transpose1_0, transpose1_1); + const __m128i transpose2_2 = _mm_unpacklo_epi8(transpose1_2, transpose1_3); + const __m128i transpose2_3 = _mm_unpackhi_epi8(transpose1_2, transpose1_3); + // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 + // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 + // 80 90 A0 xx xx xx xx xx 81 91 A1 xx xx xx xx xx + // 82 92 A2 xx xx xx xx xx 83 93 A3 xx xx xx xx xx + transpose3_0 = _mm_castps_si128( + _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0), + _mm_castsi128_ps(transpose2_2), + _MM_SHUFFLE(1, 0, 1, 0))); + transpose3_1 = _mm_castps_si128( + _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0), + _mm_castsi128_ps(transpose2_2), + _MM_SHUFFLE(3, 2, 3, 2))); + transpose3_2 = _mm_castps_si128( + _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1), + _mm_castsi128_ps(transpose2_3), + _MM_SHUFFLE(1, 0, 1, 0))); + transpose3_3 = _mm_castps_si128( + _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1), + _mm_castsi128_ps(transpose2_3), + _MM_SHUFFLE(3, 2, 3, 2))); + // 00 10 20 30 40 50 60 70 80 90 A0 xx xx xx xx xx + // 01 11 21 31 41 51 61 71 81 91 A1 xx xx xx xx xx + // 02 12 22 32 42 52 62 72 82 92 A2 xx xx xx xx xx + // 03 13 23 33 43 53 63 73 83 93 A3 xx xx xx xx xx + } + + // Vertical pass (transpose3_x -> dst). + { + const __m128i VFilter = _mm_load_si128((const __m128i *)VFilter_aligned16); + // get first two columns filter coefficients + __m128i fil01 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i fil23 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i fil45 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i fil67 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i col0, col1, col2, col3; + DECLARE_ALIGNED(16, unsigned char, temp[32]); + { + _mm_store_si128((__m128i *)temp, transpose3_0); + DO_FOUR_PIXELS(col0, temp, 0); + } + { + _mm_store_si128((__m128i *)temp, transpose3_1); + DO_FOUR_PIXELS(col1, temp, 0); + } + { + _mm_store_si128((__m128i *)temp, transpose3_2); + DO_FOUR_PIXELS(col2, temp, 0); + } + { + _mm_store_si128((__m128i *)temp, transpose3_3); + DO_FOUR_PIXELS(col3, temp, 0); + } + // transpose + { + __m128i T0 = _mm_unpacklo_epi32(col0, col1); + __m128i T1 = _mm_unpacklo_epi32(col2, col3); + __m128i T2 = _mm_unpackhi_epi32(col0, col1); + __m128i T3 = _mm_unpackhi_epi32(col2, col3); + col0 = _mm_unpacklo_epi64(T0, T1); + col1 = _mm_unpackhi_epi64(T0, T1); + col2 = _mm_unpacklo_epi64(T2, T3); + col3 = _mm_unpackhi_epi64(T2, T3); + } + // saturate to 8 bit + { + col0 = _mm_packs_epi32(col0, col0); + col0 = _mm_packus_epi16(col0, col0); + col1 = _mm_packs_epi32(col1, col1); + col1 = _mm_packus_epi16(col1, col1); + col2 = _mm_packs_epi32 (col2, col2); + col2 = _mm_packus_epi16(col2, col2); + col3 = _mm_packs_epi32 (col3, col3); + col3 = _mm_packus_epi16(col3, col3); + } + // store + { + *((unsigned int *)&dst_ptr[dst_stride * 0]) = _mm_cvtsi128_si32(col0); + *((unsigned int *)&dst_ptr[dst_stride * 1]) = _mm_cvtsi128_si32(col1); + *((unsigned int *)&dst_ptr[dst_stride * 2]) = _mm_cvtsi128_si32(col2); + *((unsigned int *)&dst_ptr[dst_stride * 3]) = _mm_cvtsi128_si32(col3); + } + } + } +} + +void vp8_filter_block2d_8x4_8_sse2 +( + const unsigned char *src_ptr, const unsigned int src_stride, + const short *HFilter_aligned16, const short *VFilter_aligned16, + unsigned char *dst_ptr, unsigned int dst_stride +) { + int j; + for (j=0; j<8; j+=4) { + vp8_filter_block2d_4x4_8_sse2(src_ptr + j, src_stride, + HFilter_aligned16, VFilter_aligned16, + dst_ptr + j, dst_stride); + } +} + +void vp8_filter_block2d_8x8_8_sse2 +( + const unsigned char *src_ptr, const unsigned int src_stride, + const short *HFilter_aligned16, const short *VFilter_aligned16, + unsigned char *dst_ptr, unsigned int dst_stride +) { + int i, j; + for (i=0; i<8; i+=4) { + for (j=0; j<8; j+=4) { + vp8_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride, + HFilter_aligned16, VFilter_aligned16, + dst_ptr + j + i*dst_stride, dst_stride); + } + } +} + +void vp8_filter_block2d_16x16_8_sse2 +( + const unsigned char *src_ptr, const unsigned int src_stride, + const short *HFilter_aligned16, const short *VFilter_aligned16, + unsigned char *dst_ptr, unsigned int dst_stride +) { + int i, j; + for (i=0; i<16; i+=4) { + for (j=0; j<16; j+=4) { + vp8_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride, + HFilter_aligned16, VFilter_aligned16, + dst_ptr + j + i*dst_stride, dst_stride); + } + } +} diff --git a/vp8/common/x86/filter_sse4.c b/vp8/common/x86/filter_sse4.c index a037622e1..c461db173 100644 --- a/vp8/common/x86/filter_sse4.c +++ b/vp8/common/x86/filter_sse4.c @@ -25,9 +25,6 @@ // TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum // of positive above 128), or have higher precision filter // coefficients. -// TODO(cd): Remove use of _mm_extract_epi32 and _mm_extract_epi64, to not -// require SSE4.1 -// TODO(cd): Remove use of _mm_shuffle_epi8 to not require SSSE3 DECLARE_ALIGNED(16, static const unsigned char, mask0123_c[16]) = { 0x00, 0x01, |