diff options
48 files changed, 6417 insertions, 243 deletions
@@ -218,6 +218,9 @@ HAVE_LIST=" " EXPERIMENT_LIST=" extend_qrange + segmentation + t8x8 + csm " CONFIG_LIST=" external_build diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 96155237a..65ad43559 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -20,6 +20,7 @@ void vpx_log(const char *format, ...); #include "treecoder.h" #include "subpixel.h" #include "vpx_ports/mem.h" +#include "common.h" #define TRUE 1 #define FALSE 0 @@ -29,6 +30,7 @@ void vpx_log(const char *format, ...); #define DCPREDCNTTHRESH 3 #define MB_FEATURE_TREE_PROBS 3 + #define MAX_MB_SEGMENTS 4 #define MAX_REF_LF_DELTAS 4 @@ -64,6 +66,10 @@ extern const unsigned char vp8_block2above[25]; #define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \ Dest = ((A)!=0) + ((B)!=0); +#if CONFIG_T8X8 +#define VP8_COMBINEENTROPYCONTEXTS_8x8( Dest, A1, B1, A2, B2) \ + Dest = ((A1)!=0 || (A2)!=0) + ((B1)!=0 || (B2)!=0); +#endif typedef enum { @@ -88,18 +94,23 @@ typedef enum MB_MODE_COUNT } MB_PREDICTION_MODE; -/* Macroblock level features */ +// Segment level features. typedef enum { - MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */ - MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */ - MB_LVL_MAX = 2 /* Number of MB level features supported */ - -} MB_LVL_FEATURES; + SEG_LVL_ALT_Q = 0, // Use alternate Quantizer .... + SEG_LVL_ALT_LF = 1, // Use alternate loop filter value... +#if CONFIG_SEGFEATURES + SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame + SEG_LVL_MODE = 3, // Optional Segment mode + SEG_LVL_EOB = 4, // EOB end stop marker. + SEG_LVL_TRANSFORM = 6, // Block transform size. + + SEG_LVL_MAX = 6 // Number of MB level features supported +#else + SEG_LVL_MAX = 2 // Number of MB level features supported +#endif -/* Segment Feature Masks */ -#define SEGMENT_ALTQ 0x01 -#define SEGMENT_ALT_LF 0x02 +} SEG_LVL_FEATURES; #define VP8_YMODES (B_PRED + 1) #define VP8_UV_MODES (TM_PRED + 1) @@ -157,7 +168,9 @@ typedef struct MB_PREDICTION_MODE mode, uv_mode; MV_REFERENCE_FRAME ref_frame; int_mv mv; - +#if CONFIG_SEGMENTATION + unsigned char segment_flag; +#endif unsigned char partitioning; unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ unsigned char need_to_clamp_mvs; @@ -232,9 +245,18 @@ typedef struct MacroBlockD /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */ /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */ - vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */ +#if CONFIG_SEGMENTATION + vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS + 3]; // Probability Tree used to code Segment number + unsigned char temporal_update; +#else + vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; +#endif - signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */ + // Segment features + signed char segment_feature_data[MAX_MB_SEGMENTS][SEG_LVL_MAX]; +#if CONFIG_SEGFEATURES + unsigned int segment_feature_mask[MAX_MB_SEGMENTS]; +#endif /* mode_based Loop filter adjustment */ unsigned char mode_ref_lf_delta_enabled; diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h index 9e194dc9a..6fe5fcc6f 100644 --- a/vp8/common/coefupdateprobs.h +++ b/vp8/common/coefupdateprobs.h @@ -183,3 +183,180 @@ const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTE }, }, }; +#if CONFIG_T8X8 +const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = +{ + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {219, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {239, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 209, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {239, 219, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 209, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 209, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 198, 239, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 204, 204, 255, 255, 255, 255, 255, 255, 255, 255, }, + {219, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 198, 204, 255, 255, 255, 255, 255, 255, 255, 255, }, + {209, 193, 234, 249, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 249, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 214, 214, 255, 255, 255, 255, 255, 255, 255, 255, }, + {173, 193, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {255, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 224, 219, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 239, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 234, 224, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 234, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 255, 239, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {234, 183, 214, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 229, 255, 249, 255, 255, 255, 255, 255, 255, }, + {229, 214, 234, 249, 255, 255, 255, 255, 255, 255, 255, }, + {255, 249, 255, 255, 249, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 219, 249, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 249, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 224, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 204, 234, 249, 249, 255, 255, 255, 255, 255, 255, }, + {255, 249, 249, 255, 244, 249, 255, 255, 255, 255, 255, }, + }, + { + {255, 178, 224, 255, 249, 255, 255, 255, 255, 255, 255, }, + {234, 224, 234, 249, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 183, 229, 255, 249, 255, 255, 255, 255, 255, 255, }, + {234, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 249, 249, 255, 249, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 224, 249, 255, 244, 255, 255, 255, 255, 255, }, + {219, 224, 229, 255, 255, 249, 255, 255, 255, 255, 255, }, + {255, 255, 255, 249, 249, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 224, 239, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 244, 249, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 239, 234, 244, 239, 244, 249, 255, 255, 255, 255, }, + }, + { + {255, 249, 239, 239, 244, 255, 255, 255, 255, 255, 255, }, + {255, 249, 244, 255, 249, 255, 255, 255, 255, 255, 255, }, + {255, 255, 239, 255, 255, 249, 255, 255, 255, 255, 255, }, + }, + { + {255, 244, 239, 239, 244, 255, 255, 255, 255, 255, 255, }, + {255, 234, 239, 234, 249, 255, 255, 255, 255, 255, 255, }, + {255, 255, 229, 239, 234, 249, 244, 255, 255, 255, 255, }, + }, + { + {255, 239, 229, 239, 234, 234, 255, 255, 255, 255, 255, }, + {255, 239, 234, 229, 244, 239, 255, 234, 255, 255, 255, }, + {255, 229, 209, 229, 239, 234, 244, 229, 255, 249, 255, }, + }, + { + {255, 239, 234, 229, 244, 249, 255, 249, 255, 255, 255, }, + {255, 234, 229, 244, 234, 249, 255, 249, 255, 255, 255, }, + {255, 229, 239, 229, 249, 255, 255, 244, 255, 255, 255, }, + }, + { + {255, 239, 234, 239, 234, 239, 255, 249, 255, 255, 255, }, + {255, 229, 234, 239, 239, 239, 255, 244, 255, 255, 255, }, + {255, 229, 234, 239, 239, 244, 255, 255, 255, 255, 255, }, + }, + { + {255, 219, 224, 229, 229, 234, 239, 224, 255, 255, 255, }, + {255, 229, 229, 224, 234, 229, 239, 239, 255, 255, 255, }, + {255, 229, 224, 239, 234, 239, 224, 224, 255, 249, 255, }, + }, + { + {255, 234, 229, 244, 229, 229, 255, 214, 255, 255, 255, }, + {255, 239, 234, 239, 214, 239, 255, 209, 255, 255, 255, }, + {249, 239, 219, 209, 219, 224, 239, 204, 255, 255, 255, }, + }, + }, + +}; +#endif
\ No newline at end of file diff --git a/vp8/common/common.h b/vp8/common/common.h index 9a93da991..999f79f2f 100644 --- a/vp8/common/common.h +++ b/vp8/common/common.h @@ -13,7 +13,7 @@ #define common_h 1 #include <assert.h> - +#include "vpx_config.h" /* Interface header for common constant data structures and lookup tables */ #include "vpx_mem/vpx_mem.h" @@ -38,5 +38,4 @@ #define vp8_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest)); - #endif /* common_h */ diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index 0f8a7898e..5fb64df36 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -60,6 +60,24 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = 9, 12, 13, 10, 7, 11, 14, 15, }; +#if CONFIG_T8X8 +DECLARE_ALIGNED(64, cuchar, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5, + 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 +}; +DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) = +{ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, +}; +#endif DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = { @@ -70,8 +88,15 @@ DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = }; DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]); +#if CONFIG_T8X8 +DECLARE_ALIGNED(64, short, vp8_default_zig_zag_mask_8x8[64]);//int64_t +#endif -const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6}; +#if CONFIG_SEGFEATURES +const int vp8_mb_feature_data_bits[SEG_LVL_MAX] = {7, 6, 2, 3, 4, 2}; +#else +const int vp8_mb_feature_data_bits[SEG_LVL_MAX] = {7, 6}; +#endif /* Array indices are identical to previously-existing CONTEXT_NODE indices */ @@ -100,8 +125,14 @@ static const Prob Pcat2[] = { 165, 145}; static const Prob Pcat3[] = { 173, 148, 140}; static const Prob Pcat4[] = { 176, 155, 140, 135}; static const Prob Pcat5[] = { 180, 157, 141, 134, 130}; +#if CONFIG_EXTEND_QRANGE static const Prob Pcat6[] = { 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129}; +#else +static const Prob Pcat6[] = +{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129}; + +#endif static vp8_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[22]; @@ -113,7 +144,12 @@ void vp8_init_scan_order_mask() { vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i; } - +#if CONFIG_T8X8 + for (i = 0; i < 64; i++) + { + vp8_default_zig_zag_mask_8x8[vp8_default_zig_zag1d_8x8[i]] = 1 << i; + } +#endif } static void init_bit_tree(vp8_tree_index *p, int n) @@ -136,7 +172,11 @@ static void init_bit_trees() init_bit_tree(cat3, 3); init_bit_tree(cat4, 4); init_bit_tree(cat5, 5); +#if CONFIG_EXTEND_QRANGE init_bit_tree(cat6, 13); +#else + init_bit_tree(cat6, 11); +#endif } vp8_extra_bit_struct vp8_extra_bits[12] = @@ -151,7 +191,11 @@ vp8_extra_bit_struct vp8_extra_bits[12] = { cat3, Pcat3, 3, 11}, { cat4, Pcat4, 4, 19}, { cat5, Pcat5, 5, 35}, +#if CONFIG_EXTEND_QRANGE { cat6, Pcat6, 13, 67}, +#else + { cat6, Pcat6, 11, 67}, +#endif { 0, 0, 0, 0} }; @@ -161,6 +205,31 @@ void vp8_default_coef_probs(VP8_COMMON *pc) { vpx_memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs)); +#if CONFIG_T8X8 + h = 0; + do + { + int i = 0; + + do + { + int k = 0; + + do + { + unsigned int branch_ct [ENTROPY_NODES] [2]; + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + pc->fc.coef_probs_8x8 [h][i][k], branch_ct, vp8_default_coef_counts_8x8 [h][i][k], + 256, 1); + + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++i < COEF_BANDS); + } + while (++h < BLOCK_TYPES); +#endif } void vp8_coef_tree_initialize() diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index 3c25453a7..4d15449c9 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -14,7 +14,7 @@ #include "treecoder.h" #include "blockd.h" - +#include "common.h" /* Coefficient token alphabet */ #define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ @@ -50,8 +50,11 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ #define PROB_UPDATE_BASELINE_COST 7 #define MAX_PROB 255 +#if CONFIG_EXTEND_QRANGE #define DCT_MAX_VALUE 8192 - +#else +#define DCT_MAX_VALUE 2048 +#endif /* Coefficients are predicted via a 3-dimensional probability table. */ @@ -64,6 +67,9 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ #define COEF_BANDS 8 extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); +#if CONFIG_T8X8 +extern DECLARE_ALIGNED(64, const unsigned char, vp8_coef_bands_8x8[64]); +#endif /* Inside dimension is 3-valued measure of nearby complexity, that is, the extent to which nearby coefficients are nonzero. For the first @@ -87,15 +93,20 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]); extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - +#if CONFIG_T8X8 +extern const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif struct VP8Common; void vp8_default_coef_probs(struct VP8Common *); - extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]); extern short vp8_default_zig_zag_mask[16]; -extern const int vp8_mb_feature_data_bits[MB_LVL_MAX]; +#if CONFIG_T8X8 +extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]); +extern short vp8_default_zig_zag_mask_8x8[64];//int64_t +#endif +extern const int vp8_mb_feature_data_bits[SEG_LVL_MAX]; void vp8_coef_tree_initialize(void); #endif diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index 47b13c755..1acc0157b 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -75,7 +75,13 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c; rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c; rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c; - +#if CONFIG_T8X8 + rtcd->idct.idct8 = vp8_short_idct8x8_c; + rtcd->idct.idct8_1 = vp8_short_idct8x8_1_c; + rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c; + rtcd->idct.ihaar2 = vp8_short_ihaar2x2_c; + rtcd->idct.ihaar2_1 = vp8_short_ihaar2x2_1_c; +#endif rtcd->recon.copy16x16 = vp8_copy_mem16x16_c; rtcd->recon.copy8x8 = vp8_copy_mem8x8_c; rtcd->recon.copy8x4 = vp8_copy_mem8x4_c; diff --git a/vp8/common/idct.h b/vp8/common/idct.h index f5fd94dfd..d1890b9e5 100644 --- a/vp8/common/idct.h +++ b/vp8/common/idct.h @@ -31,6 +31,34 @@ #include "arm/idct_arm.h" #endif +#if CONFIG_T8X8 +#ifndef vp8_idct_idct8 +#define vp8_idct_idct8 vp8_short_idct8x8_c +#endif +extern prototype_idct(vp8_idct_idct8); + +#ifndef vp8_idct_idct8_1 +#define vp8_idct_idct8_1 vp8_short_idct8x8_1_c +#endif +extern prototype_idct(vp8_idct_idct8_1); + +#ifndef vp8_idct_ihaar2 +#define vp8_idct_ihaar2 vp8_short_ihaar2x2_c +#endif +extern prototype_idct(vp8_idct_ihaar2); + +#ifndef vp8_idct_ihaar2_1 +#define vp8_idct_ihaar2_1 vp8_short_ihaar2x2_1_c +#endif +extern prototype_idct(vp8_idct_ihaar2_1); + +#ifndef vp8_idct_idct1_scalar_add_8x8 +#define vp8_idct_idct1_scalar_add_8x8 vp8_dc_only_idct_add_8x8_c +#endif +extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add_8x8); + +#endif + #ifndef vp8_idct_idct1 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_c #endif @@ -69,6 +97,14 @@ typedef struct vp8_second_order_fn_t iwalsh1; vp8_second_order_fn_t iwalsh16; + +#if CONFIG_T8X8 + vp8_idct_fn_t idct8; + vp8_idct_fn_t idct8_1; + vp8_idct_scalar_add_fn_t idct1_scalar_add_8x8; + vp8_idct_fn_t ihaar2; + vp8_idct_fn_t ihaar2_1; +#endif } vp8_idct_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index c65d35adc..4f3a01b1b 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -24,9 +24,13 @@ **************************************************************************/ #include "vpx_ports/config.h" + +#include <math.h> + static const int cospi8sqrt2minus1 = 20091; static const int sinpi8sqrt2 = 35468; static const int rounding = 0; + void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) { int i; @@ -222,3 +226,312 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output) op += 4; } } + +#if CONFIG_T8X8 + +#define FAST_IDCT_8X8 + +void vp8_short_idct8x8_1_c(short *input, short *output, int pitch) +{ + int i, b; + int a1; + short *op = output; + short *orig_op = output; + int shortpitch = pitch >> 1; + a1 = ((input[0] + 4) >> 3); + for (b = 0; b < 4; b++) + { + for (i = 0; i < 4; i++) + { + op[0] = a1; + op[1] = a1; + op[2] = a1; + op[3] = a1; + op += shortpitch; + } + op = orig_op + (b+1)%2*4 +(b+1)/2*4*shortpitch; + } +} + +void vp8_dc_only_idct_add_8x8_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride) +{ + int a1 = ((input_dc + 4) >> 3); + int r, c, b; + unsigned char *orig_pred = pred_ptr; + unsigned char *orig_dst = dst_ptr; + for (b = 0; b < 4; b++) + { + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = a1 + pred_ptr[c] ; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dst_ptr[c] = (unsigned char) a ; + } + + dst_ptr += stride; + pred_ptr += pitch; + } + dst_ptr = orig_dst + (b+1)%2*4 + (b+1)/2*4*stride; + pred_ptr = orig_pred + (b+1)%2*4 + (b+1)/2*4*pitch; + } +} + +#ifdef FAST_IDCT_8X8 + +#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */ +#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */ +#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */ +#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */ +#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */ +#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */ + +/* row (horizontal) IDCT + * + * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- * + * ( k + - ) * l ) l=0 8 2 + * + * where: c[0] = 128 c[1..7] = 128*sqrt(2) */ + +static void idctrow (int *blk) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) | + (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) + { + blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3; + return; + } + x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */ + + /* first stage */ + x8 = W7 * (x4 + x5); + x4 = x8 + (W1 - W7) * x4; + x5 = x8 - (W1 + W7) * x5; + x8 = W3 * (x6 + x7); + x6 = x8 - (W3 - W5) * x6; + x7 = x8 - (W3 + W5) * x7; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2); + x2 = x1 - (W2 + W6) * x2; + x3 = x1 + (W2 - W6) * x3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + blk[0] = (x7 + x1) >> 8; + blk[1] = (x3 + x2) >> 8; + blk[2] = (x0 + x4) >> 8; + blk[3] = (x8 + x6) >> 8; + blk[4] = (x8 - x6) >> 8; + blk[5] = (x0 - x4) >> 8; + blk[6] = (x3 - x2) >> 8; + blk[7] = (x7 - x1) >> 8; +} + +/* column (vertical) IDCT + * + * 7 pi 1 dst[8*k] = sum c[l] * src[8*l] * + * cos( -- * ( k + - ) * l ) l=0 8 2 + * + * where: c[0] = 1/1024 c[1..7] = (1/1024)*sqrt(2) */ +static void idctcol (int *blk) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) | + (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) | (x7 = blk[8 * 3]))) + { + blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3] = blk[8 * 4] = blk[8 * 5] = blk[8 * 6] = blk[8 * 7] = + ((blk[8 * 0] + 32) >> 6); + return; + } + x0 = (blk[8 * 0] << 8) + 8192; + + /* first stage */ + x8 = W7 * (x4 + x5) + 4; + x4 = (x8 + (W1 - W7) * x4) >> 3; + x5 = (x8 - (W1 + W7) * x5) >> 3; + x8 = W3 * (x6 + x7) + 4; + x6 = (x8 - (W3 - W5) * x6) >> 3; + x7 = (x8 - (W3 + W5) * x7) >> 3; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2) + 4; + x2 = (x1 - (W2 + W6) * x2) >> 3; + x3 = (x1 + (W2 - W6) * x3) >> 3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + blk[8 * 0] = (x7 + x1) >> 14; + blk[8 * 1] = (x3 + x2) >> 14; + blk[8 * 2] = (x0 + x4) >> 14; + blk[8 * 3] = (x8 + x6) >> 14; + blk[8 * 4] = (x8 - x6) >> 14; + blk[8 * 5] = (x0 - x4) >> 14; + blk[8 * 6] = (x3 - x2) >> 14; + blk[8 * 7] = (x7 - x1) >> 14; +} + +#define TX_DIM 8 +void vp8_short_idct8x8_c(short *coefs, short *block, int pitch) +// an approximate 8x8 dct implementation, but not used +{ + int X[TX_DIM*TX_DIM]; + int i,j; + int shortpitch = pitch >> 1; + + for (i = 0; i < TX_DIM; i++) + { + for (j = 0; j < TX_DIM; j++) + { + X[i * TX_DIM + j] = (int)coefs[i * TX_DIM + j]; + } + } + for (i = 0; i < 8; i++) + idctrow (X + 8 * i); + + for (i = 0; i < 8; i++) + idctcol (X + i); + + for (i = 0; i < TX_DIM; i++) + { + for (j = 0; j < TX_DIM; j++) + { + block[i*shortpitch+j] = X[i * TX_DIM + j]>>1; + } + } +} + +#else + +/* This is really for testing */ +void vp8_short_idct8x8_c(short *input, short *output, int pitch) +{ + int X[8][8]; + double C[8][8]={{0.0}}, Ct[8][8]={{0.0}}, temp[8][8]={{0.0}}; + int i,j,k; + double temp1=0.0; + double pi = atan( 1.0 ) * 4.0; + //static int count=0; + + int shortpitch = pitch >> 1; + + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + X[i][j] = input[i * 8 + j]; + } + } + + // TODO: DCT matrix should be calculated once for all + for ( j = 0 ; j < 8 ; j++ ) { + C[ 0 ][ j ] = 1.0 / sqrt( (double) 8 ); + Ct[ j ][ 0 ] = C[ 0 ][ j ]; + } + for ( i = 1 ; i < 8 ; i++ ) { + for ( j = 0 ; j < 8 ; j++ ) { + C[ i ][ j ] = sqrt( 2.0 / 8 ) * + cos( pi * ( 2 * j + 1 ) * i / ( 2.0 * 8 ) ); + Ct[ j ][ i ] = C[ i ][ j ]; + } + } + /* MatrixMultiply( temp, input, C ); */ + for ( i = 0 ; i < 8 ; i++ ) { + for ( j = 0 ; j < 8 ; j++ ) { + temp[ i ][ j ] = 0.0; + for ( k = 0 ; k < 8 ; k++ ) + temp[ i ][ j ] += X[ i ][ k ] * C[ k ][ j ]; + } + } + + /* MatrixMultiply( output, Ct, temp ); */ + for ( i = 0 ; i < 8 ; i++ ) { + for ( j = 0 ; j < 8 ; j++ ) { + temp1 = 0.0; + for ( k = 0 ; k < 8 ; k++ ) + temp1 += Ct[ i ][ k ] * temp[ k ][ j ]; + X[ i ][ j ] = floor( temp1/ 2.0 + 0.5); + } + } + + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + output[i*shortpitch+j] = X[i][j]; + } + } +} +#endif + +void vp8_short_ihaar2x2_c(short *input, short *output, int pitch) +{ + int i, x; + short *ip = input; //0,1, 4, 8 + short *op = output; + for (i = 0; i < 16; i++) + { + op[i] = 0; + } + + x = (ip[0] + ip[1] + ip[4] + ip[8]); + op[0] = (x>=0?x+1:x-1)>>2; + x = (ip[0] - ip[1] + ip[4] - ip[8]); + op[1] = (x>=0?x+1:x-1)>>2; + x = (ip[0] + ip[1] - ip[4] - ip[8]); + op[4] = (x>=0?x+1:x-1)>>2; + x = (ip[0] - ip[1] - ip[4] + ip[8]); + op[8] = (x>=0?x+1:x-1)>>2; +} + +void vp8_short_ihaar2x2_1_c(short *input, short *output, int pitch) +{ + int a1; + short *ip = input; + short *op = output; + a1 = ((ip[0]>=0?ip[0]+1:ip[0]-1) >> 2); + op[0] = a1; + op[2] = a1; + op[8] = a1; + op[10] = a1; + +} +#endif diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c index 81a3f2d89..d361b654a 100644 --- a/vp8/common/invtrans.c +++ b/vp8/common/invtrans.c @@ -24,13 +24,24 @@ static void recon_dcblock(MACROBLOCKD *x) } } +#if CONFIG_T8X8 +static void recon_dcblock_8x8(MACROBLOCKD *x) +{ + BLOCKD *b = &x->block[24]; //for coeff 0, 2, 8, 10 + x->block[0].dqcoeff[0] = b->diff[0]; + x->block[4].dqcoeff[0] = b->diff[1]; + x->block[8].dqcoeff[0] = b->diff[4]; + x->block[12].dqcoeff[0] = b->diff[8]; + +} +#endif void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) { - if (b->eob > 1) - IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch); - else + if (b->eob <= 1) IDCT_INVOKE(rtcd, idct1)(b->dqcoeff, b->diff, pitch); + else + IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch); } @@ -86,3 +97,77 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x } } + +#if CONFIG_T8X8 +void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch)//pay attention to use when 8x8 +{ + // int b,i; + //if (b->eob > 1) + IDCT_INVOKE(rtcd, idct8)(input_dqcoeff, output_coeff, pitch); + //else + //IDCT_INVOKE(rtcd, idct8_1)(b->dqcoeff, b->diff, pitch);//pitch + +} + + +void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + int i; + + // do 2nd order transform on the dc block + IDCT_INVOKE(rtcd, ihaar2)(x->block[24].dqcoeff, x->block[24].diff, 8); + + recon_dcblock_8x8(x); //need to change for 8x8 + for (i = 0; i < 9; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32); + } + +} +void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + int i; + + for (i = 16; i < 24; i += 4) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16); + } + +} + + +void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + int i; + + if (x->mode_info_context->mbmi.mode != B_PRED && + x->mode_info_context->mbmi.mode != SPLITMV) + { + // do 2nd order transform on the dc block + + IDCT_INVOKE(rtcd, ihaar2)(&x->block[24].dqcoeff[0], x->block[24].diff, 8);//dqcoeff[0] + recon_dcblock_8x8(x); //need to change for 8x8 + + } + + for (i = 0; i < 9; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32); + } + + + for (i = 16; i < 24; i += 4) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16); + } + +} +#endif diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h index b3ffb7073..1466a5844 100644 --- a/vp8/common/invtrans.h +++ b/vp8/common/invtrans.h @@ -20,4 +20,11 @@ extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBL extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); extern void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +#if CONFIG_T8X8 +extern void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch); +extern void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +#endif + #endif diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c index fe0644bdd..a8855531e 100644 --- a/vp8/common/loopfilter.c +++ b/vp8/common/loopfilter.c @@ -215,17 +215,23 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm, int lvl_seg = default_filt_lvl; int lvl_ref, lvl_mode; - /* Note the baseline filter values for each segment */ - if (mbd->segmentation_enabled) + + // Set the baseline filter values for each segment +#if CONFIG_SEGFEATURES + if ( mbd->segmentation_enabled && + ( mbd->segment_feature_mask[seg] & (1 << SEG_LVL_ALT_LF) ) ) +#else + if ( mbd->segmentation_enabled ) +#endif { /* Abs value */ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) { - lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; + lvl_seg = mbd->segment_feature_data[seg][SEG_LVL_ALT_LF]; } else /* Delta Value */ { - lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; + lvl_seg += mbd->segment_feature_data[seg][SEG_LVL_ALT_LF]; lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0; } } @@ -541,13 +547,13 @@ void vp8_loop_filter_partial_frame { /* Abs value */ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) { - lvl_seg[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i]; + lvl_seg[i] = mbd->segment_feature_data[i][SEG_LVL_ALT_LF]; } /* Delta Value */ else { lvl_seg[i] = default_filt_lvl - + mbd->segment_feature_data[MB_LVL_ALT_LF][i]; + + mbd->segment_feature_data[i][SEG_LVL_ALT_LF]; lvl_seg[i] = (lvl_seg[i] > 0) ? ((lvl_seg[i] > 63) ? 63: lvl_seg[i]) : 0; } diff --git a/vp8/common/maskingmv.c b/vp8/common/maskingmv.c new file mode 100644 index 000000000..d01a18fc8 --- /dev/null +++ b/vp8/common/maskingmv.c @@ -0,0 +1,855 @@ +/* + ============================================================================ + Name : maskingmv.c + Author : jimbankoski + Version : + Copyright : Your copyright notice + Description : Hello World in C, Ansi-style + ============================================================================ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +extern unsigned int vp8_sad16x16_sse3( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride, + int max_err); + +extern void vp8_sad16x16x3_sse3( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride, + int *results); + +extern int vp8_growmaskmb_sse3( + unsigned char *om, + unsigned char *nm); + +extern void vp8_makemask_sse3( + unsigned char *y, + unsigned char *u, + unsigned char *v, + unsigned char *ym, + int yp, + int uvp, + int ys, + int us, + int vs, + int yt, + int ut, + int vt); + +unsigned int vp8_sad16x16_unmasked_wmt( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride, + unsigned char *mask); + +unsigned int vp8_sad16x16_masked_wmt( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride, + unsigned char *mask); + +unsigned int vp8_masked_predictor_wmt( + unsigned char *masked, + unsigned char *unmasked, + int src_stride, + unsigned char *dst_ptr, + int dst_stride, + unsigned char *mask); +unsigned int vp8_masked_predictor_uv_wmt( + unsigned char *masked, + unsigned char *unmasked, + int src_stride, + unsigned char *dst_ptr, + int dst_stride, + unsigned char *mask); +unsigned int vp8_uv_from_y_mask( + unsigned char *ymask, + unsigned char *uvmask); +int yp=16; +unsigned char sxy[]= +{ +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90 +}; + +unsigned char sts[]= +{ +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +}; +unsigned char str[]= +{ +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +}; + +unsigned char y[]= +{ +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40 +}; +int uvp=8; +unsigned char u[]= +{ +90,80,70,70,90,90,90,17, +90,80,70,70,90,90,90,17, +84,70,70,90,90,90,17,17, +84,70,70,90,90,90,17,17, +80,70,70,90,90,90,17,17, +90,80,70,70,90,90,90,17, +90,80,70,70,90,90,90,17, +90,80,70,70,90,90,90,17 +}; + +unsigned char v[]= +{ +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80 +}; + +unsigned char ym[256]; +unsigned char uvm[64]; +typedef struct +{ + unsigned char y; + unsigned char yt; + unsigned char u; + unsigned char ut; + unsigned char v; + unsigned char vt; + unsigned char use; +} COLOR_SEG_ELEMENT; + +/* +COLOR_SEG_ELEMENT segmentation[]= +{ + { 60,4,80,17,80,10, 1}, + { 40,4,15,10,80,10, 1}, +}; +*/ + +COLOR_SEG_ELEMENT segmentation[]= +{ + { 79,44,92,44, 237,60, 1}, +}; + +unsigned char pixel_mask(unsigned char y,unsigned char u,unsigned char v, + COLOR_SEG_ELEMENT sgm[], + int c) +{ + COLOR_SEG_ELEMENT *s=sgm; + unsigned char m =0; + int i; + for(i=0;i<c;i++,s++) + m |= ( abs(y-s->y)< s->yt && + abs(u-s->u)< s->ut && + abs(v-s->v)< s->vt ? 255 : 0 ); + + return m; +} +int neighbors[256][8]; +int makeneighbors(void) +{ + int i,j; + for(i=0;i<256;i++) + { + int r=(i>>4),c=(i&15); + int ni=0; + for(j=0;j<8;j++) + neighbors[i][j]=i; + for(j=0;j<256;j++) + { + int nr=(j>>4),nc=(j&15); + if(abs(nr-r)<2&&abs(nc-c)<2) + neighbors[i][ni++]=j; + } + } + return 0; +} +void grow_ymask(unsigned char *ym) +{ + unsigned char nym[256]; + int i,j; + + for(i=0;i<256;i++) + { + nym[i]=ym[i]; + for(j=0;j<8;j++) + { + nym[i]|=ym[neighbors[i][j]]; + } + } + for(i=0;i<256;i++) + ym[i]=nym[i]; +} +void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v, + unsigned char *ym, unsigned char *uvm, + int yp, int uvp, + COLOR_SEG_ELEMENT sgm[], + int count) +{ + int r,c; + unsigned char *oym = ym; + + memset(ym,20,256); + for(r=0;r<8;r++,uvm+=8,u+=uvp,v+=uvp,y+=(yp+yp),ym+=32) + for(c=0;c<8;c++) + { + int y1=y[c<<1]; + int u1=u[c]; + int v1=v[c]; + int m = pixel_mask(y1,u1,v1,sgm,count); + uvm[c] = m; + ym[c<<1] = uvm[c];// = pixel_mask(y[c<<1],u[c],v[c],sgm,count); + ym[(c<<1)+1] = pixel_mask(y[1+(c<<1)],u[c],v[c],sgm,count); + ym[(c<<1)+16] = pixel_mask(y[yp+(c<<1)],u[c],v[c],sgm,count); + ym[(c<<1)+17] = pixel_mask(y[1+yp+(c<<1)],u[c],v[c],sgm,count); + } + grow_ymask(oym); +} + +int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp, + unsigned char *ym ) +{ + int i,j; + unsigned sad = 0; + for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16) + for(j=0;j<16;j++) + if(ym[j]) + sad+= abs(src[j]-dst[j]); + + return sad; +} + +int compare_masks(unsigned char *sym, unsigned char *ym) +{ + int i,j; + unsigned sad = 0; + for(i=0;i<16;i++,sym += 16,ym+=16) + for(j=0;j<16;j++) + sad+= (sym[j]!=ym[j]?1:0); + + return sad; +} +int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp, + unsigned char *ym) +{ + int i,j; + unsigned sad = 0; + for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16) + for(j=0;j<16;j++) + if(!ym[j]) + sad+= abs(src[j]-dst[j]); + + return sad; +} +int masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v, + int yp, int uvp, + unsigned char *dy, unsigned char *du, unsigned char *dv, + int dyp, int duvp, + COLOR_SEG_ELEMENT sgm[], + int count, + int *mi, + int *mj, + int *ui, + int *uj, + int *wm) +{ + int i,j; + + unsigned char ym[256]; + unsigned char uvm[64]; + unsigned char dym[256]; + unsigned char duvm[64]; + unsigned int e = 0 ; + int beste=256; + int bmi=-32,bmj=-32; + int bui=-32,buj=-32; + int beste1=256; + int bmi1=-32,bmj1=-32; + int bui1=-32,buj1=-32; + int obeste; + + // first try finding best mask and then unmasked + beste = 0xffffffff; + + // find best unmasked mv + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + unsigned char *duz = i/2*duvp + du; + unsigned char *dvz = i/2*duvp + dv; + for(j=-32;j<32;j++) + { + // 0,0 masked destination + make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count); + + e = unmasked_sad(y, yp, dyz+j, dyp, dym ); + + if(e<beste) + { + bui=i; + buj=j; + beste=e; + } + } + } + //bui=0;buj=0; + // best mv masked destination + make_mb_mask(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2, + dym, duvm, dyp, duvp,sgm,count); + + obeste = beste; + beste = 0xffffffff; + + // find best masked + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<32;j++) + { + e = masked_sad(y, yp, dyz+j, dyp, dym ); + + if(e<beste) + { + bmi=i; + bmj=j; + beste=e; + } + } + } + beste1=beste+obeste; + bmi1=bmi;bmj1=bmj; + bui1=bui;buj1=buj; + + beste = 0xffffffff; + // source mask + make_mb_mask(y,u, v, ym, uvm, yp, uvp,sgm,count); + + // find best mask + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + unsigned char *duz = i/2*duvp + du; + unsigned char *dvz = i/2*duvp + dv; + for(j=-32;j<32;j++) + { + // 0,0 masked destination + make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count); + + e = compare_masks(ym, dym); + + if(e<beste) + { + bmi=i; + bmj=j; + beste=e; + } + } + } + + + // best mv masked destination + make_mb_mask(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2, + dym, duvm, dyp, duvp,sgm,count); + + obeste = masked_sad(y, yp, dy+bmi*dyp+bmj, dyp, dym ); + + beste = 0xffffffff; + + // find best unmasked mv + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<32;j++) + { + e = unmasked_sad(y, yp, dyz+j, dyp, dym ); + + if(e<beste) + { + bui=i; + buj=j; + beste=e; + } + } + } + beste += obeste; + + + if(beste<beste1) + { + *mi = bmi; + *mj = bmj; + *ui = bui; + *uj = buj; + *wm = 1; + } + else + { + *mi = bmi1; + *mj = bmj1; + *ui = bui1; + *uj = buj1; + *wm = 0; + + } + return 0; +} + +int predict(unsigned char *src, int p, unsigned char *dst, int dp, + unsigned char *ym, unsigned char *prd ) +{ + int i,j; + for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16, prd+=16) + for(j=0;j<16;j++) + prd[j]=(ym[j] ? src[j]:dst[j]); + return 0; +} + +int fast_masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v, + int yp, int uvp, + unsigned char *dy, unsigned char *du, unsigned char *dv, + int dyp, int duvp, + COLOR_SEG_ELEMENT sgm[], + int count, + int *mi, + int *mj, + int *ui, + int *uj, + int *wm) +{ + int i,j; + + unsigned char ym[256]; + unsigned char ym2[256]; + unsigned char uvm[64]; + unsigned char dym2[256]; + unsigned char dym[256]; + unsigned char duvm[64]; + unsigned int e = 0 ; + int beste=256; + int bmi=-32,bmj=-32; + int bui=-32,buj=-32; + int beste1=256; + int bmi1=-32,bmj1=-32; + int bui1=-32,buj1=-32; + int obeste; + + // first try finding best mask and then unmasked + beste = 0xffffffff; + +#if 0 + for(i=0;i<16;i++) + { + unsigned char *dy = i*yp + y; + for(j=0;j<16;j++) + printf("%2x",dy[j]); + printf("\n"); + } + printf("\n"); + + for(i=-32;i<48;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<48;j++) + printf("%2x",dyz[j]); + printf("\n"); + } +#endif + + // find best unmasked mv + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + unsigned char *duz = i/2*duvp + du; + unsigned char *dvz = i/2*duvp + dv; + for(j=-32;j<32;j++) + { + // 0,0 masked destination + vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + + e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 ); + + if(e<beste) + { + bui=i; + buj=j; + beste=e; + } + } + } + //bui=0;buj=0; + // best mv masked destination + + vp8_makemask_sse3(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2, + dym, dyp, duvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + + obeste = beste; + beste = 0xffffffff; + + // find best masked + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<32;j++) + { + e = vp8_sad16x16_masked_wmt(y, yp, dyz+j, dyp, dym2 ); + if(e<beste) + { + bmi=i; + bmj=j; + beste=e; + } + } + } + beste1=beste+obeste; + bmi1=bmi;bmj1=bmj; + bui1=bui;buj1=buj; + + // source mask + vp8_makemask_sse3(y,u, v, + ym, yp, uvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(ym,ym2); + + // find best mask + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + unsigned char *duz = i/2*duvp + du; + unsigned char *dvz = i/2*duvp + dv; + for(j=-32;j<32;j++) + { + // 0,0 masked destination + vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + + e = compare_masks(ym2, dym2); + + if(e<beste) + { + bmi=i; + bmj=j; + beste=e; + } + } + } + + vp8_makemask_sse3(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2, + dym, dyp, duvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + + obeste = vp8_sad16x16_masked_wmt(y, yp, dy+bmi*dyp+bmj, dyp, dym2 ); + + beste = 0xffffffff; + + // find best unmasked mv + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<32;j++) + { + e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 ); + + if(e<beste) + { + bui=i; + buj=j; + beste=e; + } + } + } + beste += obeste; + + if(beste<beste1) + { + *mi = bmi; + *mj = bmj; + *ui = bui; + *uj = buj; + *wm = 1; + } + else + { + *mi = bmi1; + *mj = bmj1; + *ui = bui1; + *uj = buj1; + *wm = 0; + beste=beste1; + + } + return beste; +} + +int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm, + int ymp, int uvmp, + unsigned char *yp, unsigned char *up, unsigned char *vp, + int ypp, int uvpp, + COLOR_SEG_ELEMENT sgm[], + int count, + int mi, + int mj, + int ui, + int uj, + int wm) +{ + int i,j; + unsigned char dym[256]; + unsigned char dym2[256]; + unsigned char duvm[64]; + unsigned char *yu=ym,*uu=um, *vu=vm; + + unsigned char *dym3=dym2; + + ym+=mi*ymp+mj; + um+=mi/2*uvmp+mj/2; + vm+=mi/2*uvmp+mj/2; + + yu+=ui*ymp+uj; + uu+=ui/2*uvmp+uj/2; + vu+=ui/2*uvmp+uj/2; + + // best mv masked destination + if(wm) + vp8_makemask_sse3(ym,um, vm, dym, ymp, uvmp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + else + vp8_makemask_sse3(yu,uu, vu, dym, ymp, uvmp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + vp8_masked_predictor_wmt(ym,yu,ymp,yp,ypp,dym3); + vp8_uv_from_y_mask(dym3,duvm); + vp8_masked_predictor_uv_wmt(um,uu,uvmp,up,uvpp,duvm); + vp8_masked_predictor_uv_wmt(vm,vu,uvmp,vp,uvpp,duvm); + + return 0; +} + +unsigned char f0p[1280*720*3/2]; +unsigned char f1p[1280*720*3/2]; +unsigned char prd[1280*720*3/2]; +unsigned char msk[1280*720*3/2]; + + +int mainz(int argc, char *argv[]) { + + FILE *f=fopen(argv[1],"rb"); + FILE *g=fopen(argv[2],"wb"); + int w=atoi(argv[3]),h=atoi(argv[4]); + int y_stride=w,uv_stride=w/2; + int r,c; + unsigned char *f0=f0p,*f1=f1p,*t; + unsigned char ym[256],uvm[64]; + unsigned char ym2[256],uvm2[64]; + unsigned char ym3[256],uvm3[64]; + int a,b; + + COLOR_SEG_ELEMENT last={ 20,20,20,20, 230,20, 1},best; +#if 0 + makeneighbors(); + COLOR_SEG_ELEMENT segmentation[]= + { + { 60,4,80,17,80,10, 1}, + { 40,4,15,10,80,10, 1}, + }; + make_mb_mask(y, u, v,ym2,uvm2,16,8,segmentation,1); + + vp8_makemask_sse3(y,u,v,ym, (int) 16,(int) 8, + (int) segmentation[0].y,(int) segmentation[0].u,(int) segmentation[0].v, + segmentation[0].yt,segmentation[0].ut,segmentation[0].vt); + + vp8_growmaskmb_sse3(ym,ym3); + + a = vp8_sad16x16_masked_wmt(str,16,sts,16,ym3); + b = vp8_sad16x16_unmasked_wmt(str,16,sts,16,ym3); + + vp8_masked_predictor_wmt(str,sts,16,ym,16,ym3); + + vp8_uv_from_y_mask(ym3,uvm3); + + return 4; +#endif + makeneighbors(); + + + memset(prd,128,w*h*3/2); + + fread(f0,w*h*3/2,1,f); + + while(!feof(f)) + { + unsigned char *ys=f1,*yd=f0,*yp=prd; + unsigned char *us=f1+w*h,*ud=f0+w*h,*up=prd+w*h; + unsigned char *vs=f1+w*h*5/4,*vd=f0+w*h*5/4,*vp=prd+w*h*5/4; + fread(f1,w*h*3/2,1,f); + + ys+=32*y_stride;yd+=32*y_stride;yp+=32*y_stride; + us+=16*uv_stride;ud+=16*uv_stride;up+=16*uv_stride; + vs+=16*uv_stride;vd+=16*uv_stride;vp+=16*uv_stride; + for(r=32;r<h-32;r+=16, + ys+=16*w,yd+=16*w,yp+=16*w, + us+=8*uv_stride,ud+=8*uv_stride,up+=8*uv_stride, + vs+=8*uv_stride,vd+=8*uv_stride,vp+=8*uv_stride) + { + for(c=32;c<w-32;c+=16) + { + int mi,mj,ui,uj,wm; + int bmi,bmj,bui,buj,bwm; + unsigned char ym[256]; + + if(vp8_sad16x16_sse3( ys+c,y_stride, yd+c,y_stride,0xffff) == 0) + bmi=bmj=bui=buj=bwm=0; + else + { + COLOR_SEG_ELEMENT cs[5]; + int j; + unsigned int beste=0xfffffff; + unsigned int bestj=0; + + // try color from last mb segmentation + cs[0] = last; + + // try color segs from 4 pixels in mb recon as segmentation + cs[1].y = yd[c + y_stride + 1];cs[1].u = ud[c/2 + uv_stride]; + cs[1].v = vd[c/2 + uv_stride]; + cs[1].yt = cs[1].ut = cs[1].vt = 20; + cs[2].y = yd[c + w + 14]; + cs[2].u = ud[c/2 + uv_stride+7]; + cs[2].v = vd[c/2 + uv_stride+7]; + cs[2].yt = cs[2].ut = cs[2].vt = 20; + cs[3].y = yd[c + w*14 + 1]; + cs[3].u = ud[c/2 + uv_stride*7]; + cs[3].v = vd[c/2 + uv_stride*7]; + cs[3].yt = cs[3].ut = cs[3].vt = 20; + cs[4].y = yd[c + w*14 + 14]; + cs[4].u = ud[c/2 + uv_stride*7+7]; + cs[4].v = vd[c/2 + uv_stride*7+7]; + cs[4].yt = cs[4].ut = cs[4].vt = 20; + + for(j=0;j<5;j++) + { + int e; + + e = fast_masked_motion_search( + ys+c, us+c/2, vs+c/2, y_stride, uv_stride, + yd+c, ud+c/2, vd+c/2, y_stride, uv_stride, + &cs[j], 1, &mi,&mj,&ui,&uj,&wm); + + if(e<beste) + { + bmi=mi;bmj=mj;bui=ui;buj=uj,bwm=wm; + bestj=j; + beste=e; + } + } + best = cs[bestj]; + //best = segmentation[0]; + last = best; + } + predict_all(yd+c, ud+c/2, vd+c/2, w, uv_stride, + yp+c, up+c/2, vp+c/2, w, uv_stride, + &best, 1, bmi,bmj,bui,buj,bwm); + + } + } + fwrite(prd,w*h*3/2,1,g); + t=f0; + f0=f1; + f1=t; + + } + fclose(f); + fclose(g); + return; +} diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 11a830ff6..f84bd6e4a 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -46,6 +46,9 @@ typedef struct frame_contexts vp8_prob uv_mode_prob [VP8_UV_MODES-1]; vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_T8X8 + vp8_prob coef_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif MV_CONTEXT mvc[2]; MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */ } FRAME_CONTEXT; diff --git a/vp8/common/x86/mask_sse3.asm b/vp8/common/x86/mask_sse3.asm new file mode 100644 index 000000000..0d90cfa86 --- /dev/null +++ b/vp8/common/x86/mask_sse3.asm @@ -0,0 +1,484 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void int vp8_makemask_sse3( +; unsigned char *y, +; unsigned char *u, +; unsigned char *v, +; unsigned char *ym, +; unsigned char *uvm, +; int yp, +; int uvp, +; int ys, +; int us, +; int vs, +; int yt, +; int ut, +; int vt) +global sym(vp8_makemask_sse3) +sym(vp8_makemask_sse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 14 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;y + mov rdi, arg(1) ;u + mov rcx, arg(2) ;v + mov rax, arg(3) ;ym + movsxd rbx, dword arg(4) ;yp + movsxd rdx, dword arg(5) ;uvp + + pxor xmm0,xmm0 + + ;make 16 copies of the center y value + movd xmm1, arg(6) + pshufb xmm1, xmm0 + + ; make 16 copies of the center u value + movd xmm2, arg(7) + pshufb xmm2, xmm0 + + ; make 16 copies of the center v value + movd xmm3, arg(8) + pshufb xmm3, xmm0 + unpcklpd xmm2, xmm3 + + ;make 16 copies of the y tolerance + movd xmm3, arg(9) + pshufb xmm3, xmm0 + + ;make 16 copies of the u tolerance + movd xmm4, arg(10) + pshufb xmm4, xmm0 + + ;make 16 copies of the v tolerance + movd xmm5, arg(11) + pshufb xmm5, xmm0 + unpckhpd xmm4, xmm5 + + mov r8,8 + +NextPairOfRows: + + ;grab the y source values + movdqu xmm0, [rsi] + + ;compute abs difference between source and y target + movdqa xmm6, xmm1 + movdqa xmm7, xmm0 + psubusb xmm0, xmm1 + psubusb xmm6, xmm7 + por xmm0, xmm6 + + ;compute abs difference between + movdqa xmm6, xmm3 + pcmpgtb xmm6, xmm0 + + ;grab the y source values + add rsi, rbx + movdqu xmm0, [rsi] + + ;compute abs difference between source and y target + movdqa xmm11, xmm1 + movdqa xmm7, xmm0 + psubusb xmm0, xmm1 + psubusb xmm11, xmm7 + por xmm0, xmm11 + + ;compute abs difference between + movdqa xmm11, xmm3 + pcmpgtb xmm11, xmm0 + + + ;grab the u and v source values + movdqu xmm7, [rdi] + movdqu xmm8, [rcx] + unpcklpd xmm7, xmm8 + + ;compute abs difference between source and uv targets + movdqa xmm9, xmm2 + movdqa xmm10, xmm7 + psubusb xmm7, xmm2 + psubusb xmm9, xmm10 + por xmm7, xmm9 + + ;check whether the number is < tolerance + movdqa xmm0, xmm4 + pcmpgtb xmm0, xmm7 + + ;double u and v masks + movdqa xmm8, xmm0 + punpckhbw xmm0, xmm0 + punpcklbw xmm8, xmm8 + + ;mask row 0 and output + pand xmm6, xmm8 + pand xmm6, xmm0 + movdqa [rax],xmm6 + + ;mask row 1 and output + pand xmm11, xmm8 + pand xmm11, xmm0 + movdqa [rax+16],xmm11 + + + ; to the next row or set of rows + add rsi, rbx + add rdi, rdx + add rcx, rdx + add rax,32 + dec r8 + jnz NextPairOfRows + + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;GROW_HORIZ (register for result, source register or mem local) +; takes source and shifts left and ors with source +; then shifts right and ors with source +%macro GROW_HORIZ 2 + movdqa %1, %2 + movdqa xmm14, %1 + movdqa xmm15, %1 + pslldq xmm14, 1 + psrldq xmm15, 1 + por %1,xmm14 + por %1,xmm15 +%endmacro +;GROW_VERT (result, center row, above row, below row) +%macro GROW_VERT 4 + movdqa %1,%2 + por %1,%3 + por %1,%4 +%endmacro + +;GROW_NEXTLINE (new line to grow, new source, line to write) +%macro GROW_NEXTLINE 3 + GROW_HORIZ %1, %2 + GROW_VERT xmm3, xmm0, xmm1, xmm2 + movdqa %3,xmm3 +%endmacro + + +;void int vp8_growmaskmb_sse3( +; unsigned char *om, +; unsigned char *nm, +global sym(vp8_growmaskmb_sse3) +sym(vp8_growmaskmb_sse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 2 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src + mov rdi, arg(1) ;rst + + GROW_HORIZ xmm0, [rsi] + GROW_HORIZ xmm1, [rsi+16] + GROW_HORIZ xmm2, [rsi+32] + + GROW_VERT xmm3, xmm0, xmm1, xmm2 + por xmm0,xmm1 + movdqa [rdi], xmm0 + movdqa [rdi+16],xmm3 + + GROW_NEXTLINE xmm0,[rsi+48],[rdi+32] + GROW_NEXTLINE xmm1,[rsi+64],[rdi+48] + GROW_NEXTLINE xmm2,[rsi+80],[rdi+64] + GROW_NEXTLINE xmm0,[rsi+96],[rdi+80] + GROW_NEXTLINE xmm1,[rsi+112],[rdi+96] + GROW_NEXTLINE xmm2,[rsi+128],[rdi+112] + GROW_NEXTLINE xmm0,[rsi+144],[rdi+128] + GROW_NEXTLINE xmm1,[rsi+160],[rdi+144] + GROW_NEXTLINE xmm2,[rsi+176],[rdi+160] + GROW_NEXTLINE xmm0,[rsi+192],[rdi+176] + GROW_NEXTLINE xmm1,[rsi+208],[rdi+192] + GROW_NEXTLINE xmm2,[rsi+224],[rdi+208] + GROW_NEXTLINE xmm0,[rsi+240],[rdi+224] + + por xmm0,xmm2 + movdqa [rdi+240], xmm0 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + + +;unsigned int vp8_sad16x16_masked_wmt( +; unsigned char *src_ptr, +; int src_stride, +; unsigned char *ref_ptr, +; int ref_stride, +; unsigned char *mask) +global sym(vp8_sad16x16_masked_wmt) +sym(vp8_sad16x16_masked_wmt): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + mov rbx, arg(4) ;mask + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + mov rcx, 16 + + pxor xmm3, xmm3 + +NextSadRow: + movdqu xmm0, [rsi] + movdqu xmm1, [rdi] + movdqu xmm2, [rbx] + pand xmm0, xmm2 + pand xmm1, xmm2 + + psadbw xmm0, xmm1 + paddw xmm3, xmm0 + + add rsi, rax + add rdi, rdx + add rbx, 16 + + dec rcx + jnz NextSadRow + + movdqa xmm4 , xmm3 + psrldq xmm4, 8 + paddw xmm3, xmm4 + movq rax, xmm3 + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;unsigned int vp8_sad16x16_unmasked_wmt( +; unsigned char *src_ptr, +; int src_stride, +; unsigned char *ref_ptr, +; int ref_stride, +; unsigned char *mask) +global sym(vp8_sad16x16_unmasked_wmt) +sym(vp8_sad16x16_unmasked_wmt): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + mov rbx, arg(4) ;mask + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + mov rcx, 16 + + pxor xmm3, xmm3 + +next_vp8_sad16x16_unmasked_wmt: + movdqu xmm0, [rsi] + movdqu xmm1, [rdi] + movdqu xmm2, [rbx] + por xmm0, xmm2 + por xmm1, xmm2 + + psadbw xmm0, xmm1 + paddw xmm3, xmm0 + + add rsi, rax + add rdi, rdx + add rbx, 16 + + dec rcx + jnz next_vp8_sad16x16_unmasked_wmt + + movdqa xmm4 , xmm3 + psrldq xmm4, 8 + paddw xmm3, xmm4 + movq rax, xmm3 + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;unsigned int vp8_masked_predictor_wmt( +; unsigned char *masked, +; unsigned char *unmasked, +; int src_stride, +; unsigned char *dst_ptr, +; int dst_stride, +; unsigned char *mask) +global sym(vp8_masked_predictor_wmt) +sym(vp8_masked_predictor_wmt): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(1) ;ref_ptr + + mov rbx, arg(5) ;mask + movsxd rax, dword ptr arg(2) ;src_stride + mov r11, arg(3) ; destination + movsxd rdx, dword ptr arg(4) ;dst_stride + + mov rcx, 16 + + pxor xmm3, xmm3 + +next_vp8_masked_predictor_wmt: + movdqu xmm0, [rsi] + movdqu xmm1, [rdi] + movdqu xmm2, [rbx] + + pand xmm0, xmm2 + pandn xmm2, xmm1 + por xmm0, xmm2 + movdqu [r11], xmm0 + + add r11, rdx + add rsi, rax + add rdi, rdx + add rbx, 16 + + dec rcx + jnz next_vp8_masked_predictor_wmt + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;unsigned int vp8_masked_predictor_uv_wmt( +; unsigned char *masked, +; unsigned char *unmasked, +; int src_stride, +; unsigned char *dst_ptr, +; int dst_stride, +; unsigned char *mask) +global sym(vp8_masked_predictor_uv_wmt) +sym(vp8_masked_predictor_uv_wmt): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(1) ;ref_ptr + + mov rbx, arg(5) ;mask + movsxd rax, dword ptr arg(2) ;src_stride + mov r11, arg(3) ; destination + movsxd rdx, dword ptr arg(4) ;dst_stride + + mov rcx, 8 + + pxor xmm3, xmm3 + +next_vp8_masked_predictor_uv_wmt: + movq xmm0, [rsi] + movq xmm1, [rdi] + movq xmm2, [rbx] + + pand xmm0, xmm2 + pandn xmm2, xmm1 + por xmm0, xmm2 + movq [r11], xmm0 + + add r11, rdx + add rsi, rax + add rdi, rax + add rbx, 8 + + dec rcx + jnz next_vp8_masked_predictor_uv_wmt + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;unsigned int vp8_uv_from_y_mask( +; unsigned char *ymask, +; unsigned char *uvmask) +global sym(vp8_uv_from_y_mask) +sym(vp8_uv_from_y_mask): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(1) ;dst_ptr + + + mov rcx, 8 + + pxor xmm3, xmm3 + +next_p8_uv_from_y_mask: + movdqu xmm0, [rsi] + pshufb xmm0, [shuf1b] ;[GLOBAL(shuf1b)] + movq [rdi],xmm0 + add rdi, 8 + add rsi,32 + + dec rcx + jnz next_p8_uv_from_y_mask + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +shuf1b: + db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0 + diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c index 54547d95c..0997cfd96 100644 --- a/vp8/decoder/decodemv.c +++ b/vp8/decoder/decodemv.c @@ -208,11 +208,13 @@ static const unsigned char mbsplit_fill_offset[4][16] = { - static void mb_mode_mv_init(VP8D_COMP *pbi) { vp8_reader *const bc = & pbi->bc; MV_CONTEXT *const mvc = pbi->common.fc.mvc; +#if CONFIG_SEGMENTATION + MACROBLOCKD *const xd = & pbi->mb; +#endif #if CONFIG_ERROR_CONCEALMENT /* Default is that no macroblock is corrupt, therefore we initialize @@ -253,6 +255,9 @@ static void mb_mode_mv_init(VP8D_COMP *pbi) } read_mvcontexts(bc, mvc); +#if CONFIG_SEGMENTATION + xd->temporal_update = vp8_read_bit(bc); +#endif } } @@ -263,7 +268,11 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, vp8_reader *const bc = & pbi->bc; MV_CONTEXT *const mvc = pbi->common.fc.mvc; const int mis = pbi->common.mode_info_stride; - +#if CONFIG_SEGMENTATION + MACROBLOCKD *const xd = & pbi->mb; + int sum; + int index = mb_row * pbi->common.mb_cols + mb_col; +#endif int_mv *const mv = & mbmi->mv; int mb_to_left_edge; int mb_to_right_edge; @@ -274,7 +283,6 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge; mb_to_top_edge -= LEFT_TOP_MARGIN; mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN; - mbmi->need_to_clamp_mvs = 0; /* Distance of Mb to the various image edges. * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units @@ -289,7 +297,41 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, /* If required read in new segmentation data for this MB */ if (pbi->mb.update_mb_segmentation_map) - vp8_read_mb_features(bc, mbmi, &pbi->mb); + { +#if CONFIG_SEGMENTATION + if (xd->temporal_update) + { + sum = 0; + + if (mb_col != 0) + sum += (mi-1)->mbmi.segment_flag; + if (mb_row != 0) + sum += (mi-pbi->common.mb_cols)->mbmi.segment_flag; + + if (vp8_read(bc, xd->mb_segment_tree_probs[3+sum]) == 0) + { + mbmi->segment_id = pbi->segmentation_map[index]; + mbmi->segment_flag = 0; + } + else + { + vp8_read_mb_features(bc, &mi->mbmi, &pbi->mb); + mbmi->segment_flag = 1; + pbi->segmentation_map[index] = mbmi->segment_id; + } + + } + else + { + vp8_read_mb_features(bc, &mi->mbmi, &pbi->mb); + pbi->segmentation_map[index] = mbmi->segment_id; + } + index++; +#else + vp8_read_mb_features(bc, &mi->mbmi, &pbi->mb); +#endif + } + /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */ if (pbi->common.mb_no_coeff_skip) @@ -530,4 +572,3 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) mi++; /* skip left predictor each row */ } } - diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index ddb09703b..69f2905d4 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -40,6 +40,10 @@ #include <assert.h> #include <stdio.h> +#ifdef DEC_DEBUG +int dec_debug = 0; +#endif + void vp8cx_init_de_quantizer(VP8D_COMP *pbi) { int i; @@ -68,20 +72,27 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) { int i; int QIndex; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; VP8_COMMON *const pc = & pbi->common; + int segment_id = xd->mode_info_context->mbmi.segment_id; - /* Decide whether to use the default or alternate baseline Q value. */ - if (xd->segmentation_enabled) + + // Set the Q baseline allowing for any segment level adjustment +#if CONFIG_SEGFEATURES + if ( xd->segmentation_enabled && + ( xd->segment_feature_mask[segment_id] & (1 << SEG_LVL_ALT_Q) ) ) +#else + if ( xd->segmentation_enabled ) +#endif { /* Abs Value */ if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) - QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; + QIndex = xd->segment_feature_data[segment_id][SEG_LVL_ALT_Q]; /* Delta Value */ else { - QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; + QIndex = pc->base_qindex + + xd->segment_feature_data[segment_id][SEG_LVL_ALT_Q]; QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ } } @@ -125,6 +136,16 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); +#ifdef DEC_DEBUG + if (dec_debug) { + int i, j; + printf("Generating predictors\n"); + for (i=0;i<16;i++) { + for (j=0;j<16;j++) printf("%3d ", xd->dst.y_buffer[i*xd->dst.y_stride+j]); + printf("\n"); + } + } +#endif } } @@ -193,7 +214,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } else { - eobtotal = vp8_decode_mb_tokens(pbi, xd); + +#if CONFIG_T8X8 + for(i = 0; i < 25; i++) + { + xd->block[i].eob = 0; + xd->eobs[i] = 0; + } + if (xd->mode_info_context->mbmi.segment_id >= 2) + eobtotal = vp8_decode_mb_tokens_8x8(pbi, xd); + else +#endif + eobtotal = vp8_decode_mb_tokens(pbi, xd); +#ifdef DEC_DEBUG + if (dec_debug) { + printf("\nTokens (%d)\n", eobtotal); + for (i =0; i<400; i++) { + printf("%3d ", xd->qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("\n"); + } +#endif } /* Perform temporary clamping of the MV to be used for prediction */ @@ -285,10 +327,22 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } else if (mode == SPLITMV) { - DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); +#if CONFIG_T8X8 + if(xd->mode_info_context->mbmi.segment_id >= 2) + { + DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block_8x8) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd); + } + else +#endif + { + DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs); + } } else { @@ -297,10 +351,23 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, DEQUANT_INVOKE(&pbi->dequant, block)(b); /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) +#if CONFIG_T8X8 + if(xd->mode_info_context->mbmi.segment_id >= 2) { - IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; + DEQUANT_INVOKE(&pbi->dequant, block_8x8)(b); +#ifdef DEC_DEBUG + if (dec_debug) + { + int j; + printf("DQcoeff Haar\n"); + for (j=0;j<16;j++) { + printf("%d ", b->dqcoeff[j]); + } + printf("\n"); + } +#endif + IDCT_INVOKE(RTCD_VTABLE(idct), ihaar2)(&b->dqcoeff[0], b->diff, 8); + ((int *)b->qcoeff)[0] = 0;//2nd order block are set to 0 after inverse transform ((int *)b->qcoeff)[1] = 0; ((int *)b->qcoeff)[2] = 0; ((int *)b->qcoeff)[3] = 0; @@ -308,23 +375,55 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[5] = 0; ((int *)b->qcoeff)[6] = 0; ((int *)b->qcoeff)[7] = 0; + DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block_8x8) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); + } + else - { - IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - } +#endif + if (xd->eobs[24] > 1) + { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + ((int *)b->qcoeff)[1] = 0; + ((int *)b->qcoeff)[2] = 0; + ((int *)b->qcoeff)[3] = 0; + ((int *)b->qcoeff)[4] = 0; + ((int *)b->qcoeff)[5] = 0; + ((int *)b->qcoeff)[6] = 0; + ((int *)b->qcoeff)[7] = 0; + } + else + { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + } - DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs, xd->block[24].diff); + DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd->block[24].diff); } +#if CONFIG_T8X8 + if(xd->mode_info_context->mbmi.segment_id >= 2) + { + DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block_8x8)// + (xd->qcoeff+16*16, xd->block[16].dequant, + xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs+16, xd);// + + } + else +#endif DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) (xd->qcoeff+16*16, xd->block[16].dequant, xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); + } @@ -438,6 +537,9 @@ decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd) vp8_build_uvmvs(xd, pc->full_pixel); +#ifdef DEC_DEBUG + dec_debug = (pc->current_video_frame==5 && mb_row==2 && mb_col==3); +#endif /* if(pc->current_video_frame==0 &&mb_col==1 && mb_row==0) pbi->debugoutput =1; @@ -448,7 +550,6 @@ decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd) /* check if the boolean decoder has suffered an error */ xd->corrupted |= vp8dx_bool_error(xd->current_bc); - recon_yoffset += 16; recon_uvoffset += 8; @@ -632,8 +733,15 @@ static void init_frame(VP8D_COMP *pbi) vp8_default_coef_probs(pc); vp8_kf_default_bmode_probs(pc->kf_bmode_prob); - /* reset the segment feature data to 0 with delta coding (Default state). */ - vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); + // Reset the segment feature data to the default stats: + // Features disabled, 0, with delta coding (Default state). +#if CONFIG_SEGFEATURES + vpx_memset(xd->segment_feature_mask, 0, + sizeof(xd->segment_feature_mask)); +#endif + vpx_memset(xd->segment_feature_data, 0, + sizeof(xd->segment_feature_data)); + xd->mb_segement_abs_delta = SEGMENT_DELTADATA; /* reset the mode ref deltasa for loop filter */ @@ -832,7 +940,6 @@ int vp8_decode_frame(VP8D_COMP *pbi) /* Is segmentation enabled */ xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc); - if (xd->segmentation_enabled) { /* Signal whether or not the segmentation map is being explicitly updated this frame. */ @@ -843,23 +950,47 @@ int vp8_decode_frame(VP8D_COMP *pbi) { xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc); - vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); + // Clear down feature data structure + vpx_memset(xd->segment_feature_data, 0, + sizeof(xd->segment_feature_data)); + +#if CONFIG_SEGFEATURES + // Clear down feature enabled masks + vpx_memset(xd->segment_feature_mask, 0, + sizeof(xd->segment_feature_mask)); - /* For each segmentation feature (Quant and loop filter level) */ - for (i = 0; i < MB_LVL_MAX; i++) + // For each segmentation... + for (j = 0; j < MAX_MB_SEGMENTS; j++) { + // For each of the segments features... + for (i = 0; i < SEG_LVL_MAX; i++) + { + +#else + // For each segmentation feature... + for (i = 0; i < SEG_LVL_MAX; i++) + { + // For each segmentation... for (j = 0; j < MAX_MB_SEGMENTS; j++) { - /* Frame level data */ +#endif + // Is the feature enabled if (vp8_read_bit(bc)) { - xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]); +#if CONFIG_SEGFEATURES + // Update the feature data and mask + xd->segment_feature_mask[j] |= (1 << i); +#endif + + xd->segment_feature_data[j][i] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]); if (vp8_read_bit(bc)) - xd->segment_feature_data[i][j] = -xd->segment_feature_data[i][j]; + xd->segment_feature_data[j][i] = -xd->segment_feature_data[j][i]; } else - xd->segment_feature_data[i][j] = 0; + { + xd->segment_feature_data[j][i] = 0; + } } } } @@ -868,9 +999,12 @@ int vp8_decode_frame(VP8D_COMP *pbi) { /* Which macro block level features are enabled */ vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); - +#if CONFIG_SEGMENTATION /* Read the probs used to decode the segment id for each macro block. */ + for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++) +#else for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) +#endif { /* If not explicitly set value is defaulted to 255 by memset above */ if (vp8_read_bit(bc)) @@ -1041,10 +1175,36 @@ int vp8_decode_frame(VP8D_COMP *pbi) } } +#if CONFIG_T8X8 + { + // read coef probability tree + + for (i = 0; i < BLOCK_TYPES; i++) + for (j = 0; j < COEF_BANDS; j++) + for (k = 0; k < PREV_COEF_CONTEXTS; k++) + for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) + { + + vp8_prob *const p = pc->fc.coef_probs_8x8 [i][j][k] + l; + + if (vp8_read(bc, vp8_coef_update_probs_8x8 [i][j][k][l])) + { + *p = (vp8_prob)vp8_read_literal(bc, 8); + + } + } + } +#endif vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG)); vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG)); +#if CONFIG_SEGMENTATION + // Create the encoder segmentation map and set all entries to 0 + if (!pbi->segmentation_map) + CHECK_MEM_ERROR(pbi->segmentation_map, vpx_calloc((pc->mb_rows * pc->mb_cols), 1)); +#endif + /* set up frame new frame for intra coded blocks */ #if CONFIG_MULTITHREAD if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level)) @@ -1153,6 +1313,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) fclose(f); } #endif + //printf("Frame %d Done\n", frame_count++); return 0; } diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c index dd0c13b7d..956acba8f 100644 --- a/vp8/decoder/dequantize.c +++ b/vp8/decoder/dequantize.c @@ -13,13 +13,22 @@ #include "dequantize.h" #include "vp8/common/idct.h" #include "vpx_mem/vpx_mem.h" +#include "onyxd_int.h" extern void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) ; extern void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch); +#if CONFIG_T8X8 +extern void vp8_short_idct8x8_c(short *input, short *output, int pitch); +extern void vp8_short_idct8x8_1_c(short *input, short *output, int pitch); +#endif +#ifdef DEC_DEBUG +extern int dec_debug; +#endif void vp8_dequantize_b_c(BLOCKD *d) { + int i; short *DQ = d->dqcoeff; short *Q = d->qcoeff; @@ -111,3 +120,211 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, pred += pitch; } } + +#if CONFIG_T8X8 +void vp8_dequantize_b_8x8_c(BLOCKD *d)//just for 2x2 haar transform +{ + int i; + short *DQ = d->dqcoeff; + short *Q = d->qcoeff; + short *DQC = d->dequant; + + for (i = 0; i < 16; i++) + { + DQ[i] = (short)(Q[i] * DQC[i]); + } +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Dequantize 2x2\n"); + for (j=0;j<16;j++) printf("%d ", Q[j]); printf("\n"); + for (j=0;j<16;j++) printf("%d ", DQ[j]); printf("\n"); + } +#endif +} + +void vp8_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride)//, MACROBLOCKD *xd, short blk_idx +{ + short output[64]; + short *diff_ptr = output; + int r, c, b; + int i; + unsigned char *origdest = dest; + unsigned char *origpred = pred; + +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Input 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", input[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + // recover quantizer for 4 4x4 blocks + for (i = 0; i < 64; i++) + { + input[i]=input[i] * dq[i!=0]; + } +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Input DQ 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", input[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + + // the idct halves ( >> 1) the pitch + vp8_short_idct8x8_c(input, output, 16); +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Output 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", output[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + + vpx_memset(input, 0, 128);// test what should i put here + + for (b = 0; b < 4; b++) + { + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = diff_ptr[c] + pred[c]; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dest[c] = (unsigned char) a; + } + + dest += stride; + diff_ptr += 8; + pred += pitch; + } + diff_ptr = output + (b+1) / 2 * 4 * 8 + (b+1) % 2 * 4; + dest = origdest + (b+1) / 2 * 4 * stride + (b+1) % 2 * 4; + pred = origpred + (b+1) / 2 * 4 * pitch + (b+1) % 2 * 4; + } +#ifdef DEC_DEBUG + if (dec_debug) { + int k,j; + printf("Final 8x8\n"); + for (j=0;j<8;j++) { + for (k=0;k<8;k++) { + printf("%d ", origdest[k]); + } + printf("\n"); + origdest+=stride; + } + } +#endif +} + +void vp8_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride, + int Dc)// Dc for 1st order T in some rear case +{ + short output[64]; + short *diff_ptr = output; + int r, c, b; + int i; + unsigned char *origdest = dest; + unsigned char *origpred = pred; + + input[0] = (short)Dc;//Dc is the reconstructed value, do not need dequantization + //dc value is recovered after dequantization, since dc need not quantization +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Input 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", input[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + for (i = 1; i < 64; i++) + { + input[i] = input[i] * dq[i!=0]; + } + +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Input DQ 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", input[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + + // the idct halves ( >> 1) the pitch + vp8_short_idct8x8_c(input, output,16); +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Output 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", output[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + vpx_memset(input, 0, 128); + + for (b = 0; b < 4; b++) + { + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = diff_ptr[c] + pred[c]; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dest[c] = (unsigned char) a; + } + + dest += stride; + diff_ptr += 8; + pred += pitch; + } + diff_ptr = output + (b+1) / 2 * 4 * 8 + (b+1) % 2 * 4; + dest = origdest + (b+1) / 2 * 4 * stride + (b+1) % 2 * 4; + pred = origpred + (b+1) / 2 * 4 * pitch + (b+1) % 2 * 4; + } +#ifdef DEC_DEBUG + if (dec_debug) { + int k,j; + printf("Final 8x8\n"); + for (j=0;j<8;j++) { + for (k=0;k<8;k++) { + printf("%d ", origdest[k]); + } + printf("\n"); + origdest+=stride; + } + } +#endif +} + +#endif diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h index 2e662a593..5ae6f9619 100644 --- a/vp8/decoder/dequantize.h +++ b/vp8/decoder/dequantize.h @@ -42,6 +42,25 @@ unsigned char *pre, unsigned char *dst_u, \ unsigned char *dst_v, int stride, char *eobs) +#if CONFIG_T8X8 +#define prototype_dequant_dc_idct_add_y_block_8x8(sym) \ + void sym(short *q, short *dq, \ + unsigned char *pre, unsigned char *dst, \ + int stride, char *eobs, short *dc, MACROBLOCKD *xd) + +#define prototype_dequant_idct_add_y_block_8x8(sym) \ + void sym(short *q, short *dq, \ + unsigned char *pre, unsigned char *dst, \ + int stride, char *eobs, MACROBLOCKD *xd) + +#define prototype_dequant_idct_add_uv_block_8x8(sym) \ + void sym(short *q, short *dq, \ + unsigned char *pre, unsigned char *dst_u, \ + unsigned char *dst_v, int stride, char *eobs, \ + MACROBLOCKD *xd) + +#endif + #if ARCH_X86 || ARCH_X86_64 #include "x86/dequantize_x86.h" #endif @@ -80,6 +99,38 @@ extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block); #endif extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block); +#if CONFIG_T8X8 +#ifndef vp8_dequant_block_8x8 +#define vp8_dequant_block_8x8 vp8_dequantize_b_8x8_c +#endif +extern prototype_dequant_block(vp8_dequant_block_8x8); + +#ifndef vp8_dequant_idct_add_8x8 +#define vp8_dequant_idct_add_8x8 vp8_dequant_idct_add_8x8_c +#endif +extern prototype_dequant_idct_add(vp8_dequant_idct_add_8x8); + +#ifndef vp8_dequant_dc_idct_add_8x8 +#define vp8_dequant_dc_idct_add_8x8 vp8_dequant_dc_idct_add_8x8_c +#endif +extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_8x8); + +#ifndef vp8_dequant_dc_idct_add_y_block_8x8 +#define vp8_dequant_dc_idct_add_y_block_8x8 vp8_dequant_dc_idct_add_y_block_8x8_c +#endif +extern prototype_dequant_dc_idct_add_y_block_8x8(vp8_dequant_dc_idct_add_y_block_8x8); + +#ifndef vp8_dequant_idct_add_y_block_8x8 +#define vp8_dequant_idct_add_y_block_8x8 vp8_dequant_idct_add_y_block_8x8_c +#endif +extern prototype_dequant_idct_add_y_block_8x8(vp8_dequant_idct_add_y_block_8x8); + +#ifndef vp8_dequant_idct_add_uv_block_8x8 +#define vp8_dequant_idct_add_uv_block_8x8 vp8_dequant_idct_add_uv_block_8x8_c +#endif +extern prototype_dequant_idct_add_uv_block_8x8(vp8_dequant_idct_add_uv_block_8x8); + +#endif typedef prototype_dequant_block((*vp8_dequant_block_fn_t)); @@ -93,6 +144,13 @@ typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t)) typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t)); +#if CONFIG_T8X8 +typedef prototype_dequant_dc_idct_add_y_block_8x8((*vp8_dequant_dc_idct_add_y_block_fn_t_8x8)); + +typedef prototype_dequant_idct_add_y_block_8x8((*vp8_dequant_idct_add_y_block_fn_t_8x8)); + +typedef prototype_dequant_idct_add_uv_block_8x8((*vp8_dequant_idct_add_uv_block_fn_t_8x8)); +#endif typedef struct { vp8_dequant_block_fn_t block; @@ -101,6 +159,14 @@ typedef struct vp8_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block; vp8_dequant_idct_add_y_block_fn_t idct_add_y_block; vp8_dequant_idct_add_uv_block_fn_t idct_add_uv_block; +#if CONFIG_T8X8 + vp8_dequant_block_fn_t block_8x8; + vp8_dequant_idct_add_fn_t idct_add_8x8; + vp8_dequant_dc_idct_add_fn_t dc_idct_add_8x8; + vp8_dequant_dc_idct_add_y_block_fn_t_8x8 dc_idct_add_y_block_8x8; + vp8_dequant_idct_add_y_block_fn_t_8x8 idct_add_y_block_8x8; + vp8_dequant_idct_add_uv_block_fn_t_8x8 idct_add_uv_block_8x8; +#endif } vp8_dequant_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index 98be68558..a55b884c1 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -26,6 +26,18 @@ DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) = 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X }; +#if CONFIG_T8X8 +DECLARE_ALIGNED(64, static const unsigned char, coef_bands_x_8x8[64]) = { + 0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 4 * OCB_X, 5 * OCB_X, + 5 * OCB_X, 3 * OCB_X, 6 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 6 * OCB_X, 6 * OCB_X, + 6 * OCB_X, 5 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, + 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, + 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, +}; +#endif #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 #define ONE_CONTEXT_NODE 2 @@ -44,7 +56,6 @@ DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) = #define CAT4_MIN_VAL 19 #define CAT5_MIN_VAL 35 #define CAT6_MIN_VAL 67 - #define CAT1_PROB0 159 #define CAT2_PROB0 145 #define CAT2_PROB1 165 @@ -157,7 +168,48 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); range = range - split; \ NORMALIZE \ } - +#if CONFIG_T8X8 +#define DECODE_AND_LOOP_IF_ZERO_8x8_2(probability,branch) \ + { \ + split = 1 + ((( probability*(range-1) ) ) >> 8); \ + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ + FILL \ + if ( value < bigsplit ) \ + { \ + range = split; \ + NORMALIZE \ + Prob = coef_probs; \ + if(c<3) {\ + ++c; \ + Prob += coef_bands_x[c]; \ + goto branch; \ + } goto BLOCK_FINISHED_8x8; /*for malformed input */\ + } \ + value -= bigsplit; \ + range = range - split; \ + NORMALIZE \ + } +#define DECODE_AND_LOOP_IF_ZERO_8X8(probability,branch) \ + { \ + split = 1 + ((( probability*(range-1) ) ) >> 8); \ + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ + FILL \ + if ( value < bigsplit ) \ + { \ + range = split; \ + NORMALIZE \ + Prob = coef_probs; \ + if(c<63) {\ + ++c; \ + Prob += coef_bands_x_8x8[c]; \ + goto branch; \ + } goto BLOCK_FINISHED_8x8; /*for malformed input */\ + } \ + value -= bigsplit; \ + range = range - split; \ + NORMALIZE \ + } +#endif #define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \ DECODE_AND_APPLYSIGN(val) \ Prob = coef_probs + (ENTROPY_NODES*2); \ @@ -168,6 +220,26 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); qcoeff_ptr [ 15 ] = (INT16) v; \ goto BLOCK_FINISHED; +#if CONFIG_T8X8 +#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val) \ + DECODE_AND_APPLYSIGN(val) \ + Prob = coef_probs + (ENTROPY_NODES*2); \ + if(c < 3){\ + qcoeff_ptr [ scan[c] ] = (INT16) v; \ + ++c; \ + goto DO_WHILE_8x8; }\ + qcoeff_ptr [ scan[3] ] = (INT16) v; \ + goto BLOCK_FINISHED_8x8; +#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val) \ + DECODE_AND_APPLYSIGN(val) \ + Prob = coef_probs + (ENTROPY_NODES*2); \ + if(c < 63){\ + qcoeff_ptr [ scan[c] ] = (INT16) v; \ + ++c; \ + goto DO_WHILE_8x8; }\ + qcoeff_ptr [ scan[63] ] = (INT16) v; \ + goto BLOCK_FINISHED_8x8; +#endif #define DECODE_EXTRABIT_AND_ADJUST_VAL(prob, bits_count)\ split = 1 + (((range-1) * prob) >> 8); \ @@ -185,6 +257,354 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); }\ NORMALIZE +#if CONFIG_T8X8 +int vp8_decode_mb_tokens_8x8(VP8D_COMP *dx, MACROBLOCKD *x) +{ + ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context; + const VP8_COMMON *const oc = & dx->common; + + BOOL_DECODER *bc = x->current_bc; + + char *eobs = x->eobs; + + ENTROPY_CONTEXT *a, *a1; + ENTROPY_CONTEXT *l, *l1; + int i; + + int eobtotal = 0; + + register int count; + + const BOOL_DATA *bufptr; + const BOOL_DATA *bufend; + register unsigned int range; + VP8_BD_VALUE value; + const int *scan;// + register unsigned int shift; + UINT32 split; + VP8_BD_VALUE bigsplit; + INT16 *qcoeff_ptr; + + const vp8_prob *coef_probs;// + int type; + int stop; + INT16 val, bits_count; + INT16 c; + INT16 v; + const vp8_prob *Prob;// + + type = 3; + i = 0; + stop = 16; + + scan = vp8_default_zig_zag1d_8x8; + qcoeff_ptr = &x->qcoeff[0]; + + if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) + { + i = 24; + stop = 24; + type = 1; + qcoeff_ptr += 24*16; + eobtotal -= 4; + scan = vp8_default_zig_zag1d; + } + + bufend = bc->user_buffer_end; + bufptr = bc->user_buffer; + value = bc->value; + count = bc->count; + range = bc->range; + + coef_probs = oc->fc.coef_probs_8x8 [type] [ 0 ] [0]; + +BLOCK_LOOP_8x8: + a = A + vp8_block2above[i]; + l = L + vp8_block2left[i]; + + if(i < 16) + { + a1 = A + vp8_block2above[i+1]; + l1 = L + vp8_block2left[i+4]; + } + else if(i<24) + { + a1 = A + vp8_block2above[i+1]; + l1 = L + vp8_block2left[i+2]; + + } + c = (INT16)(!type); + +// Dest = ((A)!=0) + ((B)!=0); + if(i==24) + { + VP8_COMBINEENTROPYCONTEXTS(v, *a, *l); + } + else + { + VP8_COMBINEENTROPYCONTEXTS_8x8(v, *a, *l, *a1, *l1); + } + + Prob = coef_probs; + Prob += v * ENTROPY_NODES; + +DO_WHILE_8x8: + if(i==24) + Prob += coef_bands_x[c]; + else + Prob += coef_bands_x_8x8[c]; + DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED_8x8); + +CHECK_0_8x8_: + if (i==24) + { + DECODE_AND_LOOP_IF_ZERO_8x8_2(Prob[ZERO_CONTEXT_NODE], CHECK_0_8x8_); + } + else + { + DECODE_AND_LOOP_IF_ZERO_8X8(Prob[ZERO_CONTEXT_NODE], CHECK_0_8x8_); + } + DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_8x8_); + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val; + bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length; + + do + { + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count); + bits_count -- ; + } + while (bits_count >= 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +CAT_FIVE_CONTEXT_NODE_0_8x8_: + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +CAT_THREEFOUR_CONTEXT_NODE_0_8x8_: + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_8x8_); + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +CAT_THREE_CONTEXT_NODE_0_8x8_: + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +HIGH_LOW_CONTEXT_NODE_0_8x8_: + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_8x8_); + + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +CAT_ONE_CONTEXT_NODE_0_8x8_: + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +LOW_VAL_CONTEXT_NODE_0_8x8_: + DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_8x8_); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(4); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(4); + } + + +THREE_CONTEXT_NODE_0_8x8_: + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(3); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(3); + } + + +TWO_CONTEXT_NODE_0_8x8_: + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(2); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(2); + } + + +ONE_CONTEXT_NODE_0_8x8_: + DECODE_AND_APPLYSIGN(1); + Prob = coef_probs + ENTROPY_NODES; + + if (i==24) + { + if (c < 3)//15 + { + qcoeff_ptr [ scan[c] ] = (INT16) v; + ++c; + goto DO_WHILE_8x8; + } + } + else + { + if (c < 63) + { + qcoeff_ptr [ scan[c] ] = (INT16) v; + ++c; + goto DO_WHILE_8x8; + } + } + + if(i==24) + qcoeff_ptr [ scan[3] ] = (INT16) v;//15 + else + qcoeff_ptr [ scan[63] ] = (INT16) v; + + +BLOCK_FINISHED_8x8: + *a = *l = ((eobs[i] = c) != !type); // any nonzero data? + /*if (i!=24) { + *(A + vp8_block2above[i+1]) = *(A + vp8_block2above[i+2]) = *(A + vp8_block2above[i+3]) = *a; + *(L + vp8_block2left[i+1]) = *(L + vp8_block2left[i+2]) = *(L + vp8_block2left[i+3]) = *l; + }*/ + + if (i!=24) + { + if(i==0) + { + *(A + vp8_block2above[1]) = *(A + vp8_block2above[4]) = *(A + vp8_block2above[5]) = *a; + *(L + vp8_block2left[1]) = *(L + vp8_block2left[4]) = *(L + vp8_block2left[5]) = *l; + } + else if(i==4) + { + *(A + vp8_block2above[2]) = *(A + vp8_block2above[3]) = *(A + vp8_block2above[6]) = *(A + vp8_block2above[7]) = *a; + *(L + vp8_block2left[2]) = *(L + vp8_block2left[3]) = *(L + vp8_block2left[6]) = *(L + vp8_block2left[7]) = *l; + *(A + vp8_block2above[4]) = *(A + vp8_block2above[1]); + *(L + vp8_block2left[4]) = *(L + vp8_block2left[1]); + } + else if(i==8) + { + *(A + vp8_block2above[9]) = *(A + vp8_block2above[12]) = *(A + vp8_block2above[13]) = *a; + *(L + vp8_block2left[9]) = *(L + vp8_block2left[12]) = *(L + vp8_block2left[13]) = *l; + + } + else if(i==12) + { + *(A + vp8_block2above[10]) = *(A + vp8_block2above[11]) = *(A + vp8_block2above[14]) = *(A + vp8_block2above[15]) = *a; + *(L + vp8_block2left[10]) = *(L + vp8_block2left[11]) = *(L + vp8_block2left[14]) = *(L + vp8_block2left[15]) = *l; + *(A + vp8_block2above[12]) = *(A + vp8_block2above[8]); + *(L + vp8_block2left[12]) = *(L + vp8_block2left[8]); + + } + else + { + *(A + vp8_block2above[i+1]) = *(A + vp8_block2above[i+2]) = *(A + vp8_block2above[i+3]) = *a; + *(L + vp8_block2left[i+1]) = *(L + vp8_block2left[i+2]) = *(L + vp8_block2left[i+3]) = *l; + + } + } + + eobtotal += c; + qcoeff_ptr += (i==24 ? 16 : 64); + + i+=4; + + if (i < stop) + goto BLOCK_LOOP_8x8; + + if (i > 24) + { + type = 0; + i = 0; + stop = 16; + coef_probs = oc->fc.coef_probs_8x8 [type] [ 0 ] [0]; + qcoeff_ptr -= (24*16 + 16); + scan = vp8_default_zig_zag1d_8x8; + goto BLOCK_LOOP_8x8; + } + + if (i == 16) + { + type = 2; + coef_probs = oc->fc.coef_probs_8x8 [type] [ 0 ] [0]; + stop = 24; + goto BLOCK_LOOP_8x8; + } + + FILL + bc->user_buffer = bufptr; + bc->value = value; + bc->count = count; + bc->range = range; + + return eobtotal; + +} +#endif int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) { ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; @@ -382,6 +802,7 @@ BLOCK_FINISHED: bc->value = value; bc->count = count; bc->range = range; + return eobtotal; } diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h index 8640bda4c..c5305bb67 100644 --- a/vp8/decoder/detokenize.h +++ b/vp8/decoder/detokenize.h @@ -16,5 +16,8 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x); int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); +#if CONFIG_T8X8 +int vp8_decode_mb_tokens_8x8(VP8D_COMP *, MACROBLOCKD *); +#endif #endif /* DETOKENIZE_H */ diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c index f76653231..fc5fdb39b 100644 --- a/vp8/decoder/generic/dsystemdependent.c +++ b/vp8/decoder/generic/dsystemdependent.c @@ -21,6 +21,17 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi) /* Pure C: */ #if CONFIG_RUNTIME_CPU_DETECT pbi->mb.rtcd = &pbi->common.rtcd; + +#if CONFIG_T8X8 + + pbi->dequant.block_8x8 = vp8_dequantize_b_8x8_c; + pbi->dequant.idct_add_8x8 = vp8_dequant_idct_add_8x8_c; + pbi->dequant.dc_idct_add_8x8 = vp8_dequant_dc_idct_add_8x8_c; + pbi->dequant.dc_idct_add_y_block_8x8 = vp8_dequant_dc_idct_add_y_block_8x8_c; + pbi->dequant.idct_add_y_block_8x8 = vp8_dequant_idct_add_y_block_8x8_c; + pbi->dequant.idct_add_uv_block_8x8 = vp8_dequant_idct_add_uv_block_8x8_c; + +#endif pbi->dequant.block = vp8_dequantize_b_c; pbi->dequant.idct_add = vp8_dequant_idct_add_c; pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_c; diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c index df0192354..2015d5255 100644 --- a/vp8/decoder/idct_blk.c +++ b/vp8/decoder/idct_blk.c @@ -122,3 +122,45 @@ void vp8_dequant_idct_add_uv_block_c dstv += 4*stride - 8; } } + +#if CONFIG_T8X8 +void vp8_dequant_dc_idct_add_y_block_8x8_c + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs, short *dc, MACROBLOCKD *xd) +{ + + vp8_dequant_dc_idct_add_8x8_c (q, dq, pre, dst, 16, stride, dc[0]); + vp8_dequant_dc_idct_add_8x8_c (&q[64], dq, pre+8, dst+8, 16, stride, dc[1]); + vp8_dequant_dc_idct_add_8x8_c (&q[128], dq, pre+8*16, dst+8*stride, 16, stride, dc[4]); + vp8_dequant_dc_idct_add_8x8_c (&q[192], dq, pre+8*16+8, dst+8*stride+8, 16, stride, dc[8]); + +} + +void vp8_dequant_idct_add_y_block_8x8_c + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs, MACROBLOCKD *xd) +{ + + + unsigned char *origdest = dst; + unsigned char *origpred = pre; + + vp8_dequant_idct_add_8x8_c (q, dq, pre, dst, 16, stride); + vp8_dequant_idct_add_8x8_c (&q[64], dq, origpred+8, origdest+8, 16, stride); + vp8_dequant_idct_add_8x8_c (&q[128], dq, origpred+8*16, origdest+8*stride, 16, stride); + vp8_dequant_idct_add_8x8_c (&q[192], dq, origpred+8*16+8, origdest+8*stride+8, 16, stride); + +} + +void vp8_dequant_idct_add_uv_block_8x8_c + (short *q, short *dq, unsigned char *pre, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs, MACROBLOCKD *xd) +{ + vp8_dequant_idct_add_8x8_c (q, dq, pre, dstu, 8, stride); + + q += 64; + pre += 64; + + vp8_dequant_idct_add_8x8_c (q, dq, pre, dstv, 8, stride); +} +#endif diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index db6528c80..d901faf11 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -43,6 +43,43 @@ extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); static int get_free_fb (VP8_COMMON *cm); static void ref_cnt_fb (int *buf, int *idx, int new_idx); +#if CONFIG_DEBUG +void vp8_recon_write_yuv_frame(char *name, YV12_BUFFER_CONFIG *s) +{ + FILE *yuv_file = fopen((char *)name, "ab"); + unsigned char *src = s->y_buffer; + int h = s->y_height; + + do + { + fwrite(src, s->y_width, 1, yuv_file); + src += s->y_stride; + } + while (--h); + + src = s->u_buffer; + h = s->uv_height; + + do + { + fwrite(src, s->uv_width, 1, yuv_file); + src += s->uv_stride; + } + while (--h); + + src = s->v_buffer; + h = s->uv_height; + + do + { + fwrite(src, s->uv_width, 1, yuv_file); + src += s->uv_stride; + } + while (--h); + + fclose(yuv_file); +} +#endif void vp8dx_initialize() { @@ -56,7 +93,6 @@ void vp8dx_initialize() } } - VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf) { VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP)); @@ -119,13 +155,17 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf) return (VP8D_PTR) pbi; } - void vp8dx_remove_decompressor(VP8D_PTR ptr) { VP8D_COMP *pbi = (VP8D_COMP *) ptr; if (!pbi) return; +#if CONFIG_SEGMENTATION + // Delete sementation map + if (pbi->segmentation_map != 0) + vpx_free(pbi->segmentation_map); +#endif #if CONFIG_MULTITHREAD if (pbi->b_multithreaded_rd) @@ -300,6 +340,22 @@ static int swap_frame_buffers (VP8_COMMON *cm) return err; } +/* +static void vp8_print_yuv_rec_mb(VP8_COMMON *cm, int mb_row, int mb_col) +{ + YV12_BUFFER_CONFIG *s = cm->frame_to_show; + unsigned char *src = s->y_buffer; + int i, j; + + printf("After loop filter\n"); + for (i=0;i<16;i++) { + for (j=0;j<16;j++) + printf("%3d ", src[(mb_row*16+i)*s->y_stride + mb_col*16+j]); + printf("\n"); + } +} +*/ + int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, int64_t time_stamp) { #if HAVE_ARMV7 @@ -475,6 +531,9 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); } +#if CONFIG_DEBUG + vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show); +#endif vp8_clear_system_state(); diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index 4ece4312a..1e4f3977a 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -21,6 +21,8 @@ #include "ec_types.h" #endif +//#define DEC_DEBUG + typedef struct { int ithread; @@ -45,10 +47,16 @@ typedef struct typedef struct { int const *scan; +#if CONFIG_T8X8 + int const *scan_8x8; +#endif UINT8 const *ptr_block2leftabove; vp8_tree_index const *vp8_coef_tree_ptr; unsigned char *norm_ptr; UINT8 *ptr_coef_bands_x; +#if CONFIG_T8X8 + UINT8 *ptr_coef_bands_x_8x8; +#endif ENTROPY_CONTEXT_PLANES *A; ENTROPY_CONTEXT_PLANES *L; @@ -57,6 +65,9 @@ typedef struct BOOL_DECODER *current_bc; vp8_prob const *coef_probs[4]; +#if CONFIG_T8X8 + vp8_prob const *coef_probs_8x8[4]; +#endif UINT8 eob[25]; @@ -78,7 +89,9 @@ typedef struct VP8Decompressor const unsigned char *partitions[MAX_PARTITIONS]; unsigned int partition_sizes[MAX_PARTITIONS]; unsigned int num_partitions; - +#if CONFIG_SEGMENTATION + unsigned char *segmentation_map; +#endif #if CONFIG_MULTITHREAD /* variable for threading */ @@ -87,7 +100,6 @@ typedef struct VP8Decompressor int current_mb_col_main; int decoding_thread_count; int allocated_decoding_thread_count; - int mt_baseline_filter_level[MAX_MB_SEGMENTS]; int sync_range; int *mt_current_mb_col; /* Each row remembers its already decoded column. */ diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index fdde04a88..b60b7e8bf 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -66,7 +66,15 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D vp8_build_block_doffsets(mbd); mbd->segmentation_enabled = xd->segmentation_enabled; mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); + + vpx_memcpy(mbd->segment_feature_data, + xd->segment_feature_data, + sizeof(xd->segment_feature_data)); +#if CONFIG_SEGFEATURES + vpx_memcpy(mbd->segment_feature_mask, + xd->segment_feature_mask, + sizeof(xd->segment_feature_mask)); +#endif /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index cea8e1232..e872a433d 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -24,7 +24,9 @@ #include "bitstream.h" #include "defaultcoefcounts.h" - +#if CONFIG_SEGMENTATION +static int segment_cost = 0; +#endif const int vp8cx_base_skip_false_prob[128] = { 255, 255, 255, 255, 255, 255, 255, 255, @@ -52,11 +54,19 @@ unsigned __int64 Sectionbits[500]; #ifdef ENTROPY_STATS int intra_mode_stats[10][10][10]; static unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; +#if CONFIG_T8X8 +static unsigned int tree_update_hist_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; +#endif + extern unsigned int active_section; #endif #ifdef MODE_STATS int count_mb_seg[4] = { 0, 0, 0, 0 }; +#if CONFIG_SEGMENTATION +int segment_modes_intra[MAX_MB_SEGMENTS] = { 0, 0, 0, 0 }; +int segment_modes_inter[MAX_MB_SEGMENTS] = { 0, 0, 0, 0 }; +#endif #endif @@ -813,24 +823,39 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO case 0: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[1]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_zero(x->mb_segment_tree_probs[0]) + vp8_cost_zero(x->mb_segment_tree_probs[1]); +#endif break; case 1: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 1, x->mb_segment_tree_probs[1]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_zero(x->mb_segment_tree_probs[0]) + vp8_cost_one(x->mb_segment_tree_probs[1]); +#endif break; case 2: vp8_write(w, 1, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[2]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_one(x->mb_segment_tree_probs[0]) + vp8_cost_zero(x->mb_segment_tree_probs[2]); +#endif break; case 3: vp8_write(w, 1, x->mb_segment_tree_probs[0]); vp8_write(w, 1, x->mb_segment_tree_probs[2]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_one(x->mb_segment_tree_probs[0]) + vp8_cost_one(x->mb_segment_tree_probs[2]); +#endif break; // TRAP.. This should not happen default: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[1]); +#if CONFIG_SEGMENTATION + segment_cost += vp8_cost_zero(x->mb_segment_tree_probs[0]) + vp8_cost_zero(x->mb_segment_tree_probs[1]); +#endif break; } } @@ -842,7 +867,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) VP8_COMMON *const pc = & cpi->common; vp8_writer *const w = & cpi->bc; const MV_CONTEXT *mvc = pc->fc.mvc; - + MACROBLOCKD *xd = &cpi->mb.e_mbd; +#if CONFIG_SEGMENTATION + int left_id, above_id; + int i; + int sum; + int index = 0; +#endif const int *const rfct = cpi->count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; @@ -899,7 +930,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) update_mbintra_mode_probs(cpi); vp8_write_mvprobs(cpi); - +#if CONFIG_SEGMENTATION + vp8_write_bit(w, (xd->temporal_update) ? 1:0); +#endif while (++mb_row < pc->mb_rows) { int mb_col = -1; @@ -910,7 +943,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) const MV_REFERENCE_FRAME rf = mi->ref_frame; const MB_PREDICTION_MODE mode = mi->mode; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + //MACROBLOCKD *xd = &cpi->mb.e_mbd; // Distance of Mb to the various image edges. // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units @@ -918,13 +951,53 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; xd->mb_to_top_edge = -((mb_row * 16)) << 3; xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - +#if CONFIG_SEGMENTATION + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col != 0); +#endif #ifdef ENTROPY_STATS active_section = 9; #endif +#ifdef MODE_STATS +#if CONFIG_SEGMENTATION + segment_modes_inter[mi->segment_id]++; +#endif +#endif if (cpi->mb.e_mbd.update_mb_segmentation_map) + { +#if CONFIG_SEGMENTATION + if (xd->temporal_update) + { + sum = 0; + if (mb_col != 0) + sum += (m-1)->mbmi.segment_flag; + if (mb_row != 0) + sum += (m-pc->mb_cols)->mbmi.segment_flag; + + if (m->mbmi.segment_flag == 0) + { + vp8_write(w,0,xd->mb_segment_tree_probs[3+sum]); + segment_cost += vp8_cost_zero(xd->mb_segment_tree_probs[3+sum]); + } + else + { + vp8_write(w,1,xd->mb_segment_tree_probs[3+sum]); + segment_cost += vp8_cost_one(xd->mb_segment_tree_probs[3+sum]); + write_mb_features(w, mi, &cpi->mb.e_mbd); + cpi->segmentation_map[index] = mi->segment_id; + } + } + else + { + write_mb_features(w, mi, &cpi->mb.e_mbd); + cpi->segmentation_map[index] = mi->segment_id; + } + index++; +#else write_mb_features(w, mi, &cpi->mb.e_mbd); +#endif + } if (pc->mb_no_coeff_skip) vp8_encode_bool(w, m->mbmi.mb_skip_coeff, prob_skip_false); @@ -1059,7 +1132,11 @@ static void write_kfmodes(VP8_COMP *cpi) const VP8_COMMON *const c = & cpi->common; /* const */ MODE_INFO *m = c->mi; - +#if CONFIG_SEGMENTATION + int left_id, above_id; + int i; + int index = 0; +#endif int mb_row = -1; int prob_skip_false = 0; @@ -1084,9 +1161,28 @@ static void write_kfmodes(VP8_COMP *cpi) while (++mb_col < c->mb_cols) { const int ym = m->mbmi.mode; +#if CONFIG_SEGMENTATION + MACROBLOCKD *xd = &cpi->mb.e_mbd; + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col != 0); +#endif +#ifdef MODE_STATS +#if CONFIG_SEGMENTATION + segment_modes_intra[m->mbmi.segment_id]++; +#endif +#endif if (cpi->mb.e_mbd.update_mb_segmentation_map) + { +#if CONFIG_SEGMENTATION + write_mb_features(bc, &m->mbmi, &cpi->mb.e_mbd); + cpi->segmentation_map[index] = m->mbmi.segment_id; + index++; +#else + write_mb_features(bc, &m->mbmi, &cpi->mb.e_mbd); +#endif + } if (c->mb_no_coeff_skip) vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false); @@ -1315,6 +1411,7 @@ static int default_coef_context_savings(VP8_COMP *cpi) int vp8_estimate_entropy_savings(VP8_COMP *cpi) { int savings = 0; + int i=0; const int *const rfct = cpi->count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; @@ -1379,6 +1476,65 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) savings += default_coef_context_savings(cpi); +#if CONFIG_T8X8 + i = 0; + do + { + int j = 0; + + do + { + int k = 0; + + do + { + /* at every context */ + + /* calc probs and branch cts for this frame only */ + //vp8_prob new_p [ENTROPY_NODES]; + //unsigned int branch_ct [ENTROPY_NODES] [2]; + + int t = 0; /* token/prob index */ + + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + cpi->frame_coef_probs_8x8 [i][j][k], cpi->frame_branch_ct_8x8 [i][j][k], cpi->coef_counts_8x8 [i][j][k], + 256, 1 + ); + + do + { + const unsigned int *ct = cpi->frame_branch_ct_8x8 [i][j][k][t]; + const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t]; + + const vp8_prob old = cpi->common.fc.coef_probs_8x8 [i][j][k][t]; + const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t]; + + const int old_b = vp8_cost_branch(ct, old); + const int new_b = vp8_cost_branch(ct, newp); + + const int update_b = 8 + + ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8); + + const int s = old_b - new_b - update_b; + + if (s > 0) + savings += s; + + + } + while (++t < MAX_ENTROPY_TOKENS - 1); + + + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++j < COEF_BANDS); + } + while (++i < BLOCK_TYPES); +#endif + + return savings; } @@ -1505,6 +1661,92 @@ static void update_coef_probs(VP8_COMP *cpi) } while (++i < BLOCK_TYPES); +#if CONFIG_T8X8 + i = 0; + do + { + int j = 0; + + do + { + int k = 0; + + do + { + //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here. + /* at every context */ + + /* calc probs and branch cts for this frame only */ + //vp8_prob new_p [ENTROPY_NODES]; + //unsigned int branch_ct [ENTROPY_NODES] [2]; + + int t = 0; /* token/prob index */ + + //vp8_tree_probs_from_distribution( + // MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + // new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k], + // 256, 1 + // ); + + do + { + const unsigned int *ct = cpi->frame_branch_ct_8x8 [i][j][k][t]; + const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t]; + + vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t; + const vp8_prob old = *Pold; + const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t]; + + const int old_b = vp8_cost_branch(ct, old); + const int new_b = vp8_cost_branch(ct, newp); + + const int update_b = 8 + + ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8); + + const int s = old_b - new_b - update_b; + const int u = s > 0 ? 1 : 0; + + vp8_write(w, u, upd); + + +#ifdef ENTROPY_STATS + ++ tree_update_hist_8x8 [i][j][k][t] [u]; +#endif + + if (u) + { + /* send/use new probability */ + + *Pold = newp; + vp8_write_literal(w, newp, 8); + + savings += s; + + } + + } + while (++t < MAX_ENTROPY_TOKENS - 1); + + /* Accum token counts for generation of default statistics */ +#ifdef ENTROPY_STATS + t = 0; + + do + { + context_counters_8x8 [i][j][k][t] += cpi->coef_counts_8x8 [i][j][k][t]; + } + while (++t < MAX_ENTROPY_TOKENS); + +#endif + + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++j < COEF_BANDS); + } + while (++i < BLOCK_TYPES); +#endif + } #ifdef PACKET_TESTING FILE *vpxlogc = 0; @@ -1585,8 +1827,9 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) } else vp8_start_encode(bc, cx_data); - - +#if CONFIG_SEGMENTATION + xd->update_mb_segmentation_map = 1; +#endif // Signal whether or not Segmentation is enabled vp8_write_bit(bc, (xd->segmentation_enabled) ? 1 : 0); @@ -1603,28 +1846,45 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) vp8_write_bit(bc, (xd->mb_segement_abs_delta) ? 1 : 0); - // For each segmentation feature (Quant and loop filter level) - for (i = 0; i < MB_LVL_MAX; i++) +#if CONFIG_SEGFEATURES + // For each segments id... + for (j = 0; j < MAX_MB_SEGMENTS; j++) + { + // For each segmentation codable feature... + for (i = 0; i < SEG_LVL_MAX; i++) +#else + // For each segmentation codable feature... + for (i = 0; i < SEG_LVL_MAX; i++) { - // For each of the segments + // For each of the segments id... for (j = 0; j < MAX_MB_SEGMENTS; j++) +#endif { - Data = xd->segment_feature_data[i][j]; + Data = xd->segment_feature_data[j][i]; - // Frame level data +#if CONFIG_SEGFEATURES + // If the feature is enabled... + if ( xd->segment_feature_mask[j] & (0x01 << i)) +#else + // If the feature is enabled...Indicated by non zero + // value in VP8 if (Data) +#endif { vp8_write_bit(bc, 1); + // Encode the relevant feature data if (Data < 0) { Data = - Data; - vp8_write_literal(bc, Data, mb_feature_data_bits[i]); + vp8_write_literal(bc, Data, + mb_feature_data_bits[i]); vp8_write_bit(bc, 1); } else { - vp8_write_literal(bc, Data, mb_feature_data_bits[i]); + vp8_write_literal(bc, Data, + mb_feature_data_bits[i]); vp8_write_bit(bc, 0); } } @@ -1636,8 +1896,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) if (xd->update_mb_segmentation_map) { + #if CONFIG_SEGMENTATION // Write the probs used to decode the segment id for each macro block. + for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++) +#else for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) +#endif { int Data = xd->mb_segment_tree_probs[i]; @@ -1909,6 +2173,46 @@ void print_tree_update_probs() } fprintf(f, "};\n"); + +#if CONFIG_T8X8 + fprintf(f, "const vp8_prob tree_update_probs_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {\n"); + + for (i = 0; i < BLOCK_TYPES; i++) + { + fprintf(f, " { \n"); + + for (j = 0; j < COEF_BANDS; j++) + { + fprintf(f, " {\n"); + + for (k = 0; k < PREV_COEF_CONTEXTS; k++) + { + fprintf(f, " {"); + + for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) + { + Sum = tree_update_hist_8x8[i][j][k][l][0] + tree_update_hist_8x8[i][j][k][l][1]; + + if (Sum > 0) + { + if (((tree_update_hist_8x8[i][j][k][l][0] * 255) / Sum) > 0) + fprintf(f, "%3ld, ", (tree_update_hist_8x8[i][j][k][l][0] * 255) / Sum); + else + fprintf(f, "%3ld, ", 1); + } + else + fprintf(f, "%3ld, ", 128); + } + + fprintf(f, "},\n"); + } + + fprintf(f, " },\n"); + } + + fprintf(f, " },\n"); + } +#endif fclose(f); } #endif diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index 0d14b545c..8a95db798 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -46,7 +46,7 @@ typedef struct int src; int src_stride; -// MV enc_mv; + // MV enc_mv; int force_empty; } BLOCK; @@ -126,6 +126,12 @@ typedef struct void (*short_walsh4x4)(short *input, short *output, int pitch); void (*quantize_b)(BLOCK *b, BLOCKD *d); void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1); + #if CONFIG_T8X8 + void (*vp8_short_fdct8x8)(short *input, short *output, int pitch); + void (*short_fhaar2x2)(short *input, short *output, int pitch); + void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d); + void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d); +#endif } MACROBLOCK; diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index 69a882c89..fd4c62cd4 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -11,6 +11,122 @@ #include <math.h> #include "vpx_ports/config.h" + + + + + +#if CONFIG_T8X8 +void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch) +{ + int j1, i, j, k; + float b[8]; + float b1[8]; + float d[8][8]; + float f0 = (float) .7071068; + float f1 = (float) .4903926; + float f2 = (float) .4619398; + float f3 = (float) .4157348; + float f4 = (float) .3535534; + float f5 = (float) .2777851; + float f6 = (float) .1913417; + float f7 = (float) .0975452; + pitch = pitch / 2; + for (i = 0, k = 0; i < 8; i++, k += pitch) + { + for (j = 0; j < 8; j++) + { + b[j] = (float)( block[k + j]<<1); + } + /* Horizontal transform */ + for (j = 0; j < 4; j++) + { + j1 = 7 - j; + b1[j] = b[j] + b[j1]; + b1[j1] = b[j] - b[j1]; + } + b[0] = b1[0] + b1[3]; + b[1] = b1[1] + b1[2]; + b[2] = b1[1] - b1[2]; + b[3] = b1[0] - b1[3]; + b[4] = b1[4]; + b[5] = (b1[6] - b1[5]) * f0; + b[6] = (b1[6] + b1[5]) * f0; + b[7] = b1[7]; + d[i][0] = (b[0] + b[1]) * f4; + d[i][4] = (b[0] - b[1]) * f4; + d[i][2] = b[2] * f6 + b[3] * f2; + d[i][6] = b[3] * f6 - b[2] * f2; + b1[4] = b[4] + b[5]; + b1[7] = b[7] + b[6]; + b1[5] = b[4] - b[5]; + b1[6] = b[7] - b[6]; + d[i][1] = b1[4] * f7 + b1[7] * f1; + d[i][5] = b1[5] * f3 + b1[6] * f5; + d[i][7] = b1[7] * f7 - b1[4] * f1; + d[i][3] = b1[6] * f3 - b1[5] * f5; + } + /* Vertical transform */ + for (i = 0; i < 8; i++) + { + for (j = 0; j < 4; j++) + { + j1 = 7 - j; + b1[j] = d[j][i] + d[j1][i]; + b1[j1] = d[j][i] - d[j1][i]; + } + b[0] = b1[0] + b1[3]; + b[1] = b1[1] + b1[2]; + b[2] = b1[1] - b1[2]; + b[3] = b1[0] - b1[3]; + b[4] = b1[4]; + b[5] = (b1[6] - b1[5]) * f0; + b[6] = (b1[6] + b1[5]) * f0; + b[7] = b1[7]; + d[0][i] = (b[0] + b[1]) * f4; + d[4][i] = (b[0] - b[1]) * f4; + d[2][i] = b[2] * f6 + b[3] * f2; + d[6][i] = b[3] * f6 - b[2] * f2; + b1[4] = b[4] + b[5]; + b1[7] = b[7] + b[6]; + b1[5] = b[4] - b[5]; + b1[6] = b[7] - b[6]; + d[1][i] = b1[4] * f7 + b1[7] * f1; + d[5][i] = b1[5] * f3 + b1[6] * f5; + d[7][i] = b1[7] * f7 - b1[4] * f1; + d[3][i] = b1[6] * f3 - b1[5] * f5; + } + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + *(coefs + j + i * 8) = (short) floor(d[i][j] +0.5); + } + } + return; +} + + + +void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) //pitch = 8 +{ + /* [1 1 ; 1 -1] orthogonal transform */ + /* use position: 0,1, 4, 8 */ + int i; + short *ip1 = input; + short *op1 = output; + for (i = 0; i < 16; i++) + { + op1[i] = 0; + } + + op1[0]=ip1[0] + ip1[1] + ip1[4] + ip1[8]; + op1[1]=ip1[0] - ip1[1] + ip1[4] - ip1[8]; + op1[4]=ip1[0] + ip1[1] - ip1[4] - ip1[8]; + op1[8]=ip1[0] - ip1[1] - ip1[4] + ip1[8]; + +} +#endif void vp8_short_fdct4x4_c(short *input, short *output, int pitch) { int i; diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h index fec3b4c37..c37d47aca 100644 --- a/vp8/encoder/dct.h +++ b/vp8/encoder/dct.h @@ -22,6 +22,20 @@ #include "arm/dct_arm.h" #endif +#if CONFIG_T8X8 + +#ifndef vp8_fdct_short8x8 +#define vp8_fdct_short8x8 vp8_short_fdct8x8_c +#endif +extern prototype_fdct(vp8_fdct_short8x8); + +#ifndef vp8_fhaar_short2x2 +#define vp8_fhaar_short2x2 vp8_short_fhaar2x2_c +#endif +extern prototype_fdct(vp8_fhaar_short2x2); + +#endif + #ifndef vp8_fdct_short4x4 #define vp8_fdct_short4x4 vp8_short_fdct4x4_c #endif @@ -49,6 +63,10 @@ extern prototype_fdct(vp8_fdct_walsh_short4x4); typedef prototype_fdct(*vp8_fdct_fn_t); typedef struct { +#if CONFIG_T8X8 + vp8_fdct_fn_t short8x8; + vp8_fdct_fn_t haar_short2x2; +#endif vp8_fdct_fn_t short4x4; vp8_fdct_fn_t short8x4; vp8_fdct_fn_t fast4x4; diff --git a/vp8/encoder/defaultcoefcounts.h b/vp8/encoder/defaultcoefcounts.h index 2c0f3ddf3..3b54c823c 100644 --- a/vp8/encoder/defaultcoefcounts.h +++ b/vp8/encoder/defaultcoefcounts.h @@ -221,3 +221,182 @@ static const unsigned int default_coef_counts[BLOCK_TYPES] }, }, }; + + +#if CONFIG_T8X8 +const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS] = +{ + + { /* block Type 0 */ + { /* Coeff Band 0 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 1 */ + { 21041, 13314, 3420, 592, 117, 0, 0, 0, 0, 0, 0, 11783}, + { 48236, 6918, 586, 153, 0, 0, 0, 0, 0, 0, 0, 23137}, + { 676112, 106685, 24701, 6003, 1426, 429, 165, 0, 0, 0, 0, 28910} + }, + { /* Coeff Band 2 */ + { 660107, 75227, 8451, 1345, 259, 0, 0, 0, 0, 0, 0, 0}, + { 79164, 36835, 6865, 1185, 246, 47, 0, 0, 0, 0, 0, 2575}, + { 19469, 14330, 3070, 579, 94, 6, 0, 0, 0, 0, 0, 44} + }, + { /* Coeff Band 3 */ + { 1978004, 235343, 28485, 3242, 271, 0, 0, 0, 0, 0, 0, 0}, + { 228684, 106736, 21431, 2842, 272, 46, 0, 0, 0, 0, 0, 9266}, + { 32470, 27496, 6852, 1386, 45, 93, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 4 */ + { 1911212, 224613, 49653, 13748, 2541, 568, 48, 0, 0, 0, 0, 0}, + { 196670, 103472, 44473, 11490, 2432, 977, 72, 0, 0, 0, 0, 9447}, + { 37876, 40417, 19142, 6069, 1799, 727, 51, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 3813399, 437714, 64387, 11312, 695, 219, 0, 0, 0, 0, 0, 0}, + { 438288, 215917, 61905, 10194, 674, 107, 0, 0, 0, 0, 0, 17808}, + { 99139, 93643, 30054, 5758, 802, 171, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 6 */ + { 12259383, 1625505, 234927, 46306, 8417, 1456, 151, 0, 0, 0, 0, 0}, + { 1518161, 734287, 204240, 44228, 9462, 2240, 65, 0, 0, 0, 0, 107630}, + { 292470, 258894, 94925, 25864, 6662, 2055, 170, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 7 */ + { 9791308, 2118949, 169439, 16735, 1122, 0, 0, 0, 0, 0, 0, 0}, + { 1500281, 752410, 123259, 13065, 1168, 47, 0, 0, 0, 0, 0, 707182}, + { 193067, 142638, 31018, 4719, 516, 138, 0, 0, 0, 0, 0, 12439} + } + }, + { /* block Type 1 */ + { /* Coeff Band 0 */ + { 16925, 10553, 852, 16, 63, 87, 47, 0, 0, 0, 0, 31232}, + { 39777, 26839, 6822, 1908, 678, 456, 227, 168, 35, 0, 0, 46825}, + { 17300, 16666, 4168, 1209, 492, 154, 118, 207, 0, 0, 0, 19608} + }, + { /* Coeff Band 1 */ + { 35882, 31722, 4625, 1270, 266, 237, 0, 0, 0, 0, 0, 0}, + { 15426, 13894, 4482, 1305, 281, 43, 0, 0, 0, 0, 0, 18627}, + { 3900, 6552, 3472, 1723, 746, 366, 115, 35, 0, 0, 0, 798} + }, + { /* Coeff Band 2 */ + { 21998, 29132, 3353, 679, 46, 0, 0, 0, 0, 0, 0, 0}, + { 9098, 15767, 3794, 792, 268, 47, 0, 0, 0, 0, 0, 22402}, + { 4007, 8472, 2844, 687, 217, 0, 0, 0, 0, 0, 0, 2739} + }, + { /* Coeff Band 3 */ + { 0, 31414, 2911, 682, 96, 0, 0, 0, 0, 0, 0, 0}, + { 0, 16515, 4425, 938, 124, 0, 0, 0, 0, 0, 0, 31369}, + { 0, 4833, 2787, 1213, 150, 0, 0, 0, 0, 0, 0, 3744} + }, + { /* Coeff Band 4 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 6 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52762}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13326} + }, + { /* Coeff Band 7 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + } + }, + { /* block Type 2 */ + { /* Coeff Band 0 */ + { 4444, 1614, 120, 48, 0, 48, 0, 0, 0, 0, 0, 278}, + { 192436, 103730, 24494, 9845, 4122, 1193, 102, 0, 0, 0, 0, 2577}, + { 3473446, 2308716, 815510, 370374, 167797, 92152, 12073, 86, 0, 0, 0, 6801} + }, + { /* Coeff Band 1 */ + { 2150616, 1136388, 250011, 86888, 31434, 13746, 1243, 0, 0, 0, 0, 0}, + { 1179945, 799802, 266012, 106787, 40809, 16486, 1546, 0, 0, 0, 0, 2673}, + { 465128, 504130, 286989, 146259, 62380, 30192, 2866, 20, 0, 0, 0, 0} + }, + { /* Coeff Band 2 */ + { 2157762, 1177519, 282665, 108499, 43389, 23224, 2597, 34, 0, 0, 0, 0}, + { 1135685, 813705, 278079, 123255, 53935, 29492, 3152, 39, 0, 0, 0, 2978}, + { 391894, 428037, 264216, 144306, 69326, 40281, 5541, 29, 0, 0, 0, 38} + }, + { /* Coeff Band 3 */ + { 6669109, 3468471, 782161, 288484, 115500, 51083, 4943, 41, 0, 0, 0, 0}, + { 3454493, 2361636, 809524, 337663, 141343, 65036, 6361, 0, 0, 0, 0, 8730}, + { 1231825, 1359522, 824686, 420784, 185517, 98731, 10973, 72, 0, 0, 0, 20} + }, + { /* Coeff Band 4 */ + { 7606203, 3452846, 659856, 191703, 49335, 14336, 450, 0, 0, 0, 0, 0}, + { 3806506, 2379332, 691697, 224938, 61966, 18324, 766, 0, 0, 0, 0, 8193}, + { 1270110, 1283728, 628775, 243378, 72617, 24897, 1087, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 15314169, 7436809, 1579928, 515790, 167453, 58305, 3502, 19, 0, 0, 0, 0}, + { 7021286, 4667922, 1545706, 574463, 191793, 68748, 4048, 1, 0, 0, 0, 17222}, + { 2011989, 2145878, 1185336, 534879, 195719, 79103, 5343, 4, 0, 0, 0, 37} + }, + { /* Coeff Band 6 */ + { 63458382, 25384462, 4208045, 1091050, 299011, 95242, 5238, 33, 0, 0, 0, 0}, + { 25638401, 14694085, 3945978, 1195420, 344813, 117355, 6703, 0, 0, 0, 0, 216811}, + { 5988177, 5824044, 2754413, 1077350, 370739, 139710, 9693, 38, 0, 0, 0, 1835} + }, + { /* Coeff Band 7 */ + { 74998348, 29342158, 2955001, 452912, 69631, 9516, 37, 0, 0, 0, 0, 0}, + { 24762356, 13281085, 2409883, 436787, 68948, 10658, 36, 0, 0, 0, 0, 6614989}, + { 3882867, 3224489, 1052289, 252890, 46967, 8548, 154, 0, 0, 0, 0, 194354} + } + }, + { /* block Type 3 */ + { /* Coeff Band 0 */ + { 10583, 12059, 3155, 1041, 248, 175, 24, 2, 0, 0, 0, 5717}, + { 42461, 41782, 13553, 4966, 1352, 855, 89, 0, 0, 0, 0, 15000}, + { 4691125, 5045589, 2673566, 1089317, 378161, 160268, 18252, 813, 69, 13, 0, 49} + }, + { /* Coeff Band 1 */ + { 1535203, 1685686, 924565, 390329, 141709, 60523, 5983, 171, 0, 0, 0, 0}, + { 1594021, 1793276, 1016078, 441332, 164159, 70843, 8098, 311, 0, 0, 0, 11312}, + { 1225223, 1430184, 888492, 460713, 203286, 115149, 22061, 804, 7, 0, 0, 0} + }, + { /* Coeff Band 2 */ + { 1522386, 1590366, 799910, 303691, 96625, 37608, 3637, 180, 33, 11, 0, 0}, + { 1682184, 1793869, 913649, 353520, 113674, 46309, 4736, 221, 18, 3, 0, 963}, + { 1574580, 1740474, 954392, 417994, 151400, 67091, 8000, 536, 73, 10, 0, 63} + }, + { /* Coeff Band 3 */ + { 4963672, 5197790, 2585383, 982161, 313333, 118498, 16014, 536, 62, 0, 0, 0}, + { 5223913, 5569803, 2845858, 1107384, 364949, 147841, 18296, 658, 11, 11, 0, 1866}, + { 4042207, 4548894, 2608767, 1154993, 446290, 221295, 41054, 2438, 124, 20, 0, 0} + }, + { /* Coeff Band 4 */ + { 3857216, 4431325, 2670447, 1330169, 553301, 286825, 46763, 1917, 0, 0, 0, 0}, + { 4226215, 4963701, 3046198, 1523923, 644670, 355519, 58792, 2525, 0, 0, 0, 1298}, + { 3831873, 4580350, 3018580, 1660048, 797298, 502983, 123906, 7172, 16, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 8524543, 9285149, 4979435, 2039330, 683458, 266032, 22628, 270, 0, 0, 0, 0}, + { 9432163, 10428088, 5715661, 2385738, 838389, 326264, 29981, 361, 0, 0, 0, 884}, + { 9039066, 10368964, 6136765, 2862030, 1098269, 511668, 63105, 945, 14, 0, 0, 0} + }, + { /* Coeff Band 6 */ + { 33222872, 34748297, 17701695, 7214933, 2602336, 1191859, 187873, 12667, 390, 3, 0, 0}, + { 34765051, 37140719, 19525578, 8268934, 3085012, 1473864, 246743, 15258, 736, 3, 0, 8403}, + { 28591289, 32252393, 19037068, 9213729, 4020653, 2372354, 586420, 67428, 3920, 92, 7, 3} + }, + { /* Coeff Band 7 */ + { 68604786, 60777665, 19712887, 5656955, 1520443, 507166, 51829, 2466, 10, 0, 0, 0}, + { 55447403, 51682540, 19008774, 5928582, 1706884, 595531, 65998, 3661, 101, 0, 0, 8468343}, + { 28321970, 29149398, 13565882, 5258675, 1868588, 898041, 192023, 21497, 672, 17, 0, 1884921} + } + } + }; +#endif diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 82c88c440..a37727510 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -26,10 +26,12 @@ #include "vp8/common/findnearmv.h" #include "vp8/common/reconintra.h" #include <stdio.h> +#include <math.h> #include <limits.h> #include "vp8/common/subpixel.h" #include "vpx_ports/vpx_timer.h" + #if CONFIG_RUNTIME_CPU_DETECT #define RTCD(x) &cpi->common.rtcd.x #define IF_RTCD(x) (x) @@ -37,6 +39,18 @@ #define RTCD(x) NULL #define IF_RTCD(x) NULL #endif + +#if CONFIG_SEGMENTATION +#define SEEK_SEGID 12 +#define SEEK_SAMEID 4 +#define SEEK_DIFFID 7 +#endif + +#ifdef ENC_DEBUG +int enc_debug=0; +int mb_row_debug, mb_col_debug; +#endif + extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex); @@ -52,6 +66,8 @@ int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t); static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ); + + #ifdef MODE_STATS unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; unsigned int inter_uv_modes[4] = {0, 0, 0, 0}; @@ -80,6 +96,186 @@ static const unsigned char VP8_VAR_OFFS[16]= }; + +#if CONFIG_T8X8 + +//INTRA mode transform size +//When all three criteria are off the default is 4x4 +//#define INTRA_VARIANCE_ENTROPY_CRITERIA +#define INTRA_WTD_SSE_ENTROPY_CRITERIA +//#define INTRA_TEST_8X8_ONLY +// +//INTER mode transform size +//When all three criteria are off the default is 4x4 +//#define INTER_VARIANCE_ENTROPY_CRITERIA +#define INTER_WTD_SSE_ENTROPY_CRITERIA +//#define INTER_TEST_8X8_ONLY + +double variance_Block(short *b1, int pitch, int dimension) +{ + short ip[8][8]={{0}}; + short *b = b1; + int i, j = 0; + double mean = 0.0, variance = 0.0; + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + ip[i][j] = b[j]; + mean += ip[i][j]; + } + b += pitch; + } + mean /= (dimension*dimension); + + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + variance += (ip[i][j]-mean)*(ip[i][j]-mean); + } + } + variance /= (dimension*dimension); + return variance; +} + +double mean_Block(short *b, int pitch, int dimension) +{ + short ip[8][8]={{0}}; + int i, j = 0; + double mean = 0; + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + ip[i][j] = b[j]; + mean += ip[i][j]; + } + b += pitch; + } + mean /= (dimension*dimension); + + return mean; +} + +int SSE_Block(short *b, int pitch, int dimension) +{ + int i, j, sse_block = 0; + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + sse_block += b[j]*b[j]; + } + b += pitch; + } + return sse_block; +} + +double Compute_Variance_Entropy(MACROBLOCK *x) +{ + double variance_8[4] = {0.0, 0.0, 0.0, 0.0}, sum_var = 0.0, all_entropy = 0.0; + variance_8[0] = variance_Block(x->block[0].src_diff, 16, 8); + variance_8[1] = variance_Block(x->block[2].src_diff, 16, 8); + variance_8[2] = variance_Block(x->block[8].src_diff, 16, 8); + variance_8[3] = variance_Block(x->block[10].src_diff, 16, 8); + sum_var = variance_8[0] + variance_8[1] + variance_8[2] + variance_8[3]; + if(sum_var) + { + int i; + for(i = 0; i <4; i++) + { + if(variance_8[i]) + { + variance_8[i] /= sum_var; + all_entropy -= variance_8[i]*log(variance_8[i]); + } + } + } + return (all_entropy /log(2)); +} + +double Compute_Wtd_SSE_SubEntropy(MACROBLOCK *x) +{ + double variance_8[4] = {0.0, 0.0, 0.0, 0.0}; + double entropy_8[4] = {0.0, 0.0, 0.0, 0.0}; + double sse_1, sse_2, sse_3, sse_4, sse_0; + int i; + for (i=0;i<3;i+=2) + { + sse_0 = SSE_Block(x->block[i].src_diff, 16, 8); + if(sse_0) + { + sse_1 = SSE_Block(x->block[i].src_diff, 16, 4)/sse_0; + sse_2 = SSE_Block(x->block[i+1].src_diff, 16, 4)/sse_0; + sse_3 = SSE_Block(x->block[i+4].src_diff, 16, 4)/sse_0; + sse_4 = SSE_Block(x->block[i+5].src_diff, 16, 4)/sse_0; + variance_8[i]= variance_Block(x->block[i].src_diff, 16, 8); + if(sse_1 && sse_2 && sse_3 && sse_4) + entropy_8[i]= (-sse_1*log(sse_1) + -sse_2*log(sse_2) + -sse_3*log(sse_3) + -sse_4*log(sse_4))/log(2); + } + } + for (i=8;i<11;i+=2) + { + if(sse_0) + { + sse_0 = SSE_Block(x->block[i].src_diff, 16, 8); + sse_1 = SSE_Block(x->block[i].src_diff, 16, 4)/sse_0; + sse_2 = SSE_Block(x->block[i+1].src_diff, 16, 4)/sse_0; + sse_3 = SSE_Block(x->block[i+4].src_diff, 16, 4)/sse_0; + sse_4 = SSE_Block(x->block[i+5].src_diff, 16, 4)/sse_0; + variance_8[i-7]= variance_Block(x->block[i].src_diff, 16, 8); + if(sse_1 && sse_2 && sse_3 && sse_4) + entropy_8[i-7]= (-sse_1*log(sse_1) + -sse_2*log(sse_2) + -sse_3*log(sse_3) + -sse_4*log(sse_4))/log(2); + } + } + if(variance_8[0]+variance_8[1]+variance_8[2]+variance_8[3]) + return (entropy_8[0]*variance_8[0]+ + entropy_8[1]*variance_8[1]+ + entropy_8[2]*variance_8[2]+ + entropy_8[3]*variance_8[3])/ + (variance_8[0]+ + variance_8[1]+ + variance_8[2]+ + variance_8[3]); + else + return 0; +} + +int vp8_8x8_selection_intra(MACROBLOCK *x) +{ +#ifdef INTRA_VARIANCE_ENTROPY_CRITERIA + return (Compute_Variance_Entropy(x) > 1.2); +#elif defined(INTRA_WTD_SSE_ENTROPY_CRITERIA) + return (Compute_Wtd_SSE_SubEntropy(x) > 1.2); +#elif defined(INTRA_TEST_8X8_ONLY) + return 1; +#else + return 0; //when all criteria are off use the default 4x4 only +#endif +} + +int vp8_8x8_selection_inter(MACROBLOCK *x) +{ +#ifdef INTER_VARIANCE_ENTROPY_CRITERIA + return (Compute_Variance_Entropy(x) > 1.5); +#elif defined(INTER_WTD_SSE_ENTROPY_CRITERIA) + return (Compute_Wtd_SSE_SubEntropy(x) > 1.5); +#elif defined(INTER_TEST_8X8_ONLY) + return 1; +#else + return 0; //when all criteria are off use the default 4x4 only +#endif +} + +#endif + // Original activity measure from Tim T's code. static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) { @@ -376,7 +572,10 @@ void encode_mb_row(VP8_COMP *cpi, int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cpi->common.mb_cols); - +#if CONFIG_SEGMENTATION + int left_id, above_id; + int sum; +#endif #if CONFIG_MULTITHREAD const int nsync = cpi->mt_sync_range; const int rightmost_col = cm->mb_cols - 1; @@ -415,6 +614,12 @@ void encode_mb_row(VP8_COMP *cpi, // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { +#ifdef ENC_DEBUG + //enc_debug = (cpi->count==29 && mb_row==5 && mb_col==0); + enc_debug = (cpi->count==4 && mb_row==17 && mb_col==13); + mb_col_debug=mb_col; + mb_row_debug=mb_row; +#endif // Distance of Mb to the left & right edges, specified in // 1/8th pel units as they are always compared to values // that are in 1/8th pel units @@ -461,8 +666,14 @@ void encode_mb_row(VP8_COMP *cpi, if (xd->segmentation_enabled) { // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) +#if CONFIG_T8X8 + // Reset segment_id to 0 or 1 so that the default transform mode is 4x4 + if (cpi->segmentation_map[map_index+mb_col] <= 3) + xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col]&1; +#else if (cpi->segmentation_map[map_index+mb_col] <= 3) xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col]; +#endif else xd->mode_info_context->mbmi.segment_id = 0; @@ -476,24 +687,27 @@ void encode_mb_row(VP8_COMP *cpi, if (cm->frame_type == KEY_FRAME) { *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp); + //Note the encoder may have changed the segment_id + #ifdef MODE_STATS - y_modes[xd->mbmi.mode] ++; + y_modes[xd->mode_info_context->mbmi.mode] ++; #endif } else { *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset); + //Note the encoder may have changed the segment_id #ifdef MODE_STATS - inter_y_modes[xd->mbmi.mode] ++; + inter_y_modes[xd->mode_info_context->mbmi.mode] ++; - if (xd->mbmi.mode == SPLITMV) + if (xd->mode_info_context->mbmi.mode == SPLITMV) { int b; - for (b = 0; b < xd->mbmi.partition_count; b++) + for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition->bmi[b].mode] ++; + inter_b_modes[x->partition_info->bmi[b].mode] ++; } } @@ -534,6 +748,12 @@ void encode_mb_row(VP8_COMP *cpi, // Increment the activity mask pointers. x->mb_activity_ptr++; +#if CONFIG_SEGMENTATION + if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) + xd->mode_info_context->mbmi.segment_id = 0; + else + xd->mode_info_context->mbmi.segment_id = 1; +#endif /* save the block info */ for (i = 0; i < 16; i++) xd->mode_info_context->bmi[i] = xd->block[i].bmi; @@ -546,9 +766,42 @@ void encode_mb_row(VP8_COMP *cpi, recon_yoffset += 16; recon_uvoffset += 8; - // Keep track of segment useage - segment_counts[xd->mode_info_context->mbmi.segment_id] ++; +#if CONFIG_SEGMENTATION + //cpi->segmentation_map[mb_row * cm->mb_cols + mb_col] = xd->mbmi.segment_id; + if (cm->frame_type == KEY_FRAME) + { + segment_counts[xd->mode_info_context->mbmi.segment_id]++; + } + else + { + sum = 0; + if (mb_col != 0) + sum += (xd->mode_info_context-1)->mbmi.segment_flag; + if (mb_row != 0) + sum += (xd->mode_info_context-cm->mb_cols)->mbmi.segment_flag; + + if (xd->mode_info_context->mbmi.segment_id == cpi->segmentation_map[(mb_row*cm->mb_cols) + mb_col]) + xd->mode_info_context->mbmi.segment_flag = 0; + else + xd->mode_info_context->mbmi.segment_flag = 1; + if (xd->mode_info_context->mbmi.segment_flag == 0) + { + segment_counts[SEEK_SAMEID + sum]++; + segment_counts[10]++; + } + else + { + segment_counts[SEEK_DIFFID + sum]++; + segment_counts[11]++; + //calculate individual segment ids + segment_counts[xd->mode_info_context->mbmi.segment_id] ++; + } + } + segment_counts[SEEK_SEGID + xd->mode_info_context->mbmi.segment_id] ++; +#else + segment_counts[xd->mode_info_context->mbmi.segment_id] ++; +#endif // skip to next mb xd->mode_info_context++; x->partition_info++; @@ -675,7 +928,13 @@ void vp8_encode_frame(VP8_COMP *cpi) MACROBLOCKD *const xd = & x->e_mbd; TOKENEXTRA *tp = cpi->tok; +#if CONFIG_SEGMENTATION + int segment_counts[MAX_MB_SEGMENTS + SEEK_SEGID]; + int prob[3]; + int new_cost, original_cost; +#else int segment_counts[MAX_MB_SEGMENTS]; +#endif int totalrate; vpx_memset(segment_counts, 0, sizeof(segment_counts)); @@ -845,41 +1104,126 @@ void vp8_encode_frame(VP8_COMP *cpi) } - // Work out the segment probabilites if segmentation is enabled if (xd->segmentation_enabled) { int tot_count; int i; + int count1,count2,count3,count4; // Set to defaults vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs)); +#if CONFIG_SEGMENTATION + + tot_count = segment_counts[12] + segment_counts[13] + segment_counts[14] + segment_counts[15]; + count1 = segment_counts[12] + segment_counts[13]; + count2 = segment_counts[14] + segment_counts[15]; + + if (tot_count) + prob[0] = (count1 * 255) / tot_count; + + if (count1 > 0) + prob[1] = (segment_counts[12] * 255) /count1; + + if (count2 > 0) + prob[2] = (segment_counts[14] * 255) /count2; + + if (cm->frame_type != KEY_FRAME) + { + tot_count = segment_counts[4] + segment_counts[7]; + if (tot_count) + xd->mb_segment_tree_probs[3] = (segment_counts[4] * 255)/tot_count; + + tot_count = segment_counts[5] + segment_counts[8]; + if (tot_count) + xd->mb_segment_tree_probs[4] = (segment_counts[5] * 255)/tot_count; + + tot_count = segment_counts[6] + segment_counts[9]; + if (tot_count) + xd->mb_segment_tree_probs[5] = (segment_counts[6] * 255)/tot_count; + } tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3]; + count3 = segment_counts[0] + segment_counts[1]; + count4 = segment_counts[2] + segment_counts[3]; if (tot_count) + xd->mb_segment_tree_probs[0] = (count3 * 255) / tot_count; + + if (count3 > 0) + xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) /count3; + + if (count4 > 0) + xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) /count4; + + for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++) { - xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count; + if (xd->mb_segment_tree_probs[i] == 0) + xd->mb_segment_tree_probs[i] = 1; + } - tot_count = segment_counts[0] + segment_counts[1]; + original_cost = count1 * vp8_cost_zero(prob[0]) + count2 * vp8_cost_one(prob[0]); - if (tot_count > 0) - { - xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count; - } + if (count1 > 0) + original_cost += segment_counts[12] * vp8_cost_zero(prob[1]) + segment_counts[13] * vp8_cost_one(prob[1]); + + if (count2 > 0) + original_cost += segment_counts[14] * vp8_cost_zero(prob[2]) + segment_counts[15] * vp8_cost_one(prob[2]) ; + + new_cost = 0; + + if (cm->frame_type != KEY_FRAME) + { + new_cost = segment_counts[4] * vp8_cost_zero(xd->mb_segment_tree_probs[3]) + segment_counts[7] * vp8_cost_one(xd->mb_segment_tree_probs[3]); - tot_count = segment_counts[2] + segment_counts[3]; + new_cost += segment_counts[5] * vp8_cost_zero(xd->mb_segment_tree_probs[4]) + segment_counts[8] * vp8_cost_one(xd->mb_segment_tree_probs[4]); - if (tot_count > 0) - xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count; + new_cost += segment_counts[6] * vp8_cost_zero(xd->mb_segment_tree_probs[5]) + segment_counts[9] * vp8_cost_one (xd->mb_segment_tree_probs[5]); + } + + if (tot_count > 0) + new_cost += count3 * vp8_cost_zero(xd->mb_segment_tree_probs[0]) + count4 * vp8_cost_one(xd->mb_segment_tree_probs[0]); + + if (count3 > 0) + new_cost += segment_counts[0] * vp8_cost_zero(xd->mb_segment_tree_probs[1]) + segment_counts[1] * vp8_cost_one(xd->mb_segment_tree_probs[1]); - // Zero probabilities not allowed - for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++) + if (count4 > 0) + new_cost += segment_counts[2] * vp8_cost_zero(xd->mb_segment_tree_probs[2]) + segment_counts[3] * vp8_cost_one(xd->mb_segment_tree_probs[2]) ; + + if (new_cost < original_cost) + xd->temporal_update = 1; + else + { + xd->temporal_update = 0; + xd->mb_segment_tree_probs[0] = prob[0]; + xd->mb_segment_tree_probs[1] = prob[1]; + xd->mb_segment_tree_probs[2] = prob[2]; + } +#else + tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3]; + count1 = segment_counts[0] + segment_counts[1]; + count2 = segment_counts[2] + segment_counts[3]; + + if (tot_count) + xd->mb_segment_tree_probs[0] = (count1 * 255) / tot_count; + + if (count1 > 0) + xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) /count1; + + if (count2 > 0) + xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) /count2; + +#endif + // Zero probabilities not allowed +#if CONFIG_SEGMENTATION + for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++) +#else + for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) +#endif { if (xd->mb_segment_tree_probs[i] == 0) xd->mb_segment_tree_probs[i] = 1; } - } } // 256 rate units to the bit @@ -1081,7 +1425,7 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) do { - ++ bct[xd->block[b].bmi.mode]; + ++ bct[xd->block[b].bmi.as_mode]; } while (++b < 16); } @@ -1119,6 +1463,10 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { int rate; +#if CONFIG_T8X8 + if (x->e_mbd.segmentation_enabled) + x->e_mbd.update_mb_segmentation_map = 1; +#endif if (cpi->sf.RD && cpi->compressor_speed != 2) vp8_rd_pick_intra_mode(cpi, x, &rate); else @@ -1133,12 +1481,22 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED) vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); else + { +#if CONFIG_T8X8 + if (x->e_mbd.segmentation_enabled) + x->e_mbd.mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_intra(x) << 1); +#endif vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - + } vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); sum_intra_stats(cpi, x); vp8_tokenize_mb(cpi, &x->e_mbd, t); - +#if CONFIG_T8X8 + if( x->e_mbd.mode_info_context->mbmi.segment_id >=2) + cpi->t8x8_count++; + else + cpi->t4x4_count++; +#endif return rate; } #ifdef SPEEDSTATS @@ -1264,16 +1622,25 @@ int vp8cx_encode_inter_macroblock cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; +#if CONFIG_T8X8 + if (xd->segmentation_enabled) + x->e_mbd.update_mb_segmentation_map = 1; +#endif + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); - if (xd->mode_info_context->mbmi.mode == B_PRED) { + vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); } else { +#if CONFIG_T8X8 + if (xd->segmentation_enabled) + xd->mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_intra(x) << 1); +#endif + vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); } @@ -1282,6 +1649,10 @@ int vp8cx_encode_inter_macroblock else { int ref_fb_idx; +#if CONFIG_T8X8 + if (xd->segmentation_enabled) + xd->mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_inter(x) << 1); +#endif vp8_build_uvmvs(xd, cpi->common.full_pixel); @@ -1311,9 +1682,40 @@ int vp8cx_encode_inter_macroblock xd->dst.y_stride, xd->dst.uv_stride); } +#if CONFIG_T8X8 + if (x->e_mbd.mode_info_context->mbmi.segment_id >=2) + cpi->t8x8_count++; + else + cpi->t4x4_count++; +#endif if (!x->skip) + { +#ifdef ENC_DEBUG + if (enc_debug) + { + int i; + printf("Segment=%d [%d, %d]: %d %d:\n", x->e_mbd.mode_info_context->mbmi.segment_id, mb_col_debug, mb_row_debug, xd->mb_to_left_edge, xd->mb_to_top_edge); + for (i =0; i<400; i++) { + printf("%3d ", xd->qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("\n"); + printf("eobs = "); + for (i=0;i<25;i++) + printf("%d:%d ", i, xd->block[i].eob); + printf("\n"); + fflush(stdout); + } +#endif vp8_tokenize_mb(cpi, xd, t); +#ifdef ENC_DEBUG + if (enc_debug) { + printf("Tokenized\n"); + fflush(stdout); + } +#endif + } else { if (cpi->common.mb_no_coeff_skip) diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 59db0253b..10afed3ec 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -22,6 +22,10 @@ #include "encodeintra.h" +#ifdef ENC_DEBUG +extern int enc_debug; +#endif + #if CONFIG_RUNTIME_CPU_DETECT #define IF_RTCD(x) (x) #else @@ -96,15 +100,67 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride); - +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_intra_mby_8x8(x); + else +#endif vp8_transform_intra_mby(x); - vp8_quantize_mby(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_quantize_mby_8x8(x); + else +#endif + vp8_quantize_mby(x); if (x->optimize) + { +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_optimize_mby_8x8(x, rtcd); + else +#endif vp8_optimize_mby(x, rtcd); + } - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif + vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + +#ifdef ENC_DEBUG + if (enc_debug) { + int i; + printf("Intra qcoeff:\n"); + printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("Intra dqcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.dqcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("Intra diff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.diff[i]); + if (i%16 == 15) printf("\n"); + } + printf("Intra predictor:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.predictor[i]); + if (i%16 == 15) printf("\n"); + } + printf("eobs:\n"); + for (i=0;i<25;i++) + printf("%d ", x->e_mbd.block[i].eob); + printf("\n"); + } +#endif RECON_INVOKE(&rtcd->common->recon, recon_mby) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); @@ -116,14 +172,66 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mbuv_8x8(x); + else +#endif + vp8_transform_mbuv(x); - vp8_transform_mbuv(x); - - vp8_quantize_mbuv(x); - +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_quantize_mbuv_8x8(x); + else +#endif + vp8_quantize_mbuv(x); + +#ifdef ENC_DEBUG + if (enc_debug) { + int i; + printf("vp8_encode_intra16x16mbuv\n"); + printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge); + printf("qcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("dqcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.dqcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("diff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.diff[i]); + if (i%16 == 15) printf("\n"); + } + printf("predictor:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.predictor[i]); + if (i%16 == 15) printf("\n"); + } + printf("eobs:\n"); + for (i=0;i<25;i++) + printf("%d ", x->e_mbd.block[i].eob); + printf("\n"); + } +#endif if (x->optimize) + { +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_optimize_mbuv_8x8(x, rtcd); + else +#endif vp8_optimize_mbuv(x, rtcd); + } +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd); vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd); diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index eb89bba0a..408a5956e 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -26,6 +26,11 @@ #else #define IF_RTCD(x) NULL #endif + +#ifdef ENC_DEBUG +extern int enc_debug; +#endif + void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *src_ptr = (*(be->base_src) + be->src); @@ -117,7 +122,21 @@ static void build_dcblock(MACROBLOCK *x) src_diff_ptr[i] = x->coeff[i * 16]; } } - +#if CONFIG_T8X8 +void vp8_build_dcblock_8x8(MACROBLOCK *x) +{ + short *src_diff_ptr = &x->src_diff[384]; + int i; + for (i = 0; i < 16; i++) + { + src_diff_ptr[i] = 0; + } + src_diff_ptr[0] = x->coeff[0 * 16]; + src_diff_ptr[1] = x->coeff[4 * 16]; + src_diff_ptr[4] = x->coeff[8 * 16]; + src_diff_ptr[8] = x->coeff[12 * 16]; +} +#endif void vp8_transform_mbuv(MACROBLOCK *x) { int i; @@ -197,10 +216,104 @@ static void transform_mby(MACROBLOCK *x) } } +#if CONFIG_T8X8 +void vp8_transform_mbuv_8x8(MACROBLOCK *x) +{ + int i; -#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) + for (i = 16; i < 24; i += 4) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); + } +} + + +void vp8_transform_intra_mby_8x8(MACROBLOCK *x)//changed +{ + int i; + + for (i = 0; i < 9; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i+2].coeff[0], 32); + } + // build dc block from 16 y dc values + vp8_build_dcblock_8x8(x); + //vp8_build_dcblock(x); + + // do 2nd order transform on the dc block + x->short_fhaar2x2(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); + +} + + +void vp8_transform_mb_8x8(MACROBLOCK *x) +{ + int i; + + for (i = 0; i < 9; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i+2].coeff[0], 32); + } + // build dc block from 16 y dc values + if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + vp8_build_dcblock_8x8(x); + //vp8_build_dcblock(x); + + for (i = 16; i < 24; i += 4) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); + } + + // do 2nd order transform on the dc block + if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + x->short_fhaar2x2(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); +} + +void vp8_transform_mby_8x8(MACROBLOCK *x) +{ + int i; + for (i = 0; i < 9; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i+2].coeff[0], 32); + } + // build dc block from 16 y dc values + if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + { + //vp8_build_dcblock(x); + vp8_build_dcblock_8x8(x); + x->short_fhaar2x2(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); + } +} + +#endif + +#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) +#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) typedef struct vp8_token_state vp8_token_state; struct vp8_token_state{ @@ -581,27 +694,554 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) } } +#if CONFIG_T8X8 +void optimize_b_8x8(MACROBLOCK *mb, int i, int type, + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, ENTROPY_CONTEXT *l1, + const VP8_ENCODER_RTCD *rtcd) +{ + BLOCK *b; + BLOCKD *d; + vp8_token_state tokens[65][2]; + unsigned best_mask[2]; + const short *dequant_ptr; + const short *coeff_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + int eob; + int i0; + int rc; + int x; + int sz = 0; + int next; + int rdmult; + int rddiv; + int final_eob; + int rd_cost0; + int rd_cost1; + int rate0; + int rate1; + int error0; + int error1; + int t0; + int t1; + int best; + int band; + int pt; + + b = &mb->block[i]; + d = &mb->e_mbd.block[i]; + + /* Enable this to test the effect of RDO as a replacement for the dynamic + * zero bin instead of an augmentation of it. + */ +#if 0 + vp8_strict_quantize_b(b, d); +#endif + + dequant_ptr = d->dequant; + coeff_ptr = b->coeff; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + i0 = !type; + eob = d->eob; + + /* Now set up a Viterbi trellis to evaluate alternative roundings. */ + /* TODO: These should vary with the block type, since the quantizer does. */ + rdmult = mb->rdmult << 2; + rddiv = mb->rddiv; + best_mask[0] = best_mask[1] = 0; + /* Initialize the sentinel node of the trellis. */ + tokens[eob][0].rate = 0; + tokens[eob][0].error = 0; + tokens[eob][0].next = 64; + tokens[eob][0].token = DCT_EOB_TOKEN; + tokens[eob][0].qc = 0; + *(tokens[eob] + 1) = *(tokens[eob] + 0); + next = eob; + for (i = eob; i-- > i0;) + { + int base_bits; + int d2; + int dx; + + rc = vp8_default_zig_zag1d_8x8[i]; + x = qcoeff_ptr[rc]; + /* Only add a trellis state for non-zero coefficients. */ + if (x) + { + int shortcut=0; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + /* Evaluate the first possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + t0 = (vp8_dct_value_tokens_ptr + x)->Token; + /* Consider both possible successor states. */ + if (next < 64) + { + band = vp8_coef_bands_8x8[i + 1]; + pt = vp8_prev_token_class[t0]; + rate0 += + mb->token_costs[type][band][pt][tokens[next][0].token]; + rate1 += + mb->token_costs[type][band][pt][tokens[next][1].token]; + } + rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1); + } + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; + d2 = dx*dx; + tokens[i][0].rate = base_bits + (best ? rate1 : rate0); + tokens[i][0].error = d2 + (best ? error1 : error0); + tokens[i][0].next = next; + tokens[i][0].token = t0; + tokens[i][0].qc = x; + best_mask[0] |= best << i; + /* Evaluate the second possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + + if((abs(x)*dequant_ptr[rc!=0]>abs(coeff_ptr[rc])) && + (abs(x)*dequant_ptr[rc!=0]<abs(coeff_ptr[rc])+dequant_ptr[rc!=0])) + shortcut = 1; + else + shortcut = 0; + + if(shortcut) + { + sz = -(x < 0); + x -= 2*sz + 1; + } + + /* Consider both possible successor states. */ + if (!x) + { + /* If we reduced this coefficient to zero, check to see if + * we need to move the EOB back here. + */ + t0 = tokens[next][0].token == DCT_EOB_TOKEN ? + DCT_EOB_TOKEN : ZERO_TOKEN; + t1 = tokens[next][1].token == DCT_EOB_TOKEN ? + DCT_EOB_TOKEN : ZERO_TOKEN; + } + else + { + t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token; + } + if (next < 64) + { + band = vp8_coef_bands_8x8[i + 1]; + if(t0!=DCT_EOB_TOKEN) + { + pt = vp8_prev_token_class[t0]; + rate0 += mb->token_costs[type][band][pt][ + tokens[next][0].token]; + } + if(t1!=DCT_EOB_TOKEN) + { + pt = vp8_prev_token_class[t1]; + rate1 += mb->token_costs[type][band][pt][ + tokens[next][1].token]; + } + } + + rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1); + } + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + + if(shortcut) + { + dx -= (dequant_ptr[rc!=0] + sz) ^ sz; + d2 = dx*dx; + } + tokens[i][1].rate = base_bits + (best ? rate1 : rate0); + tokens[i][1].error = d2 + (best ? error1 : error0); + tokens[i][1].next = next; + tokens[i][1].token =best?t1:t0; + tokens[i][1].qc = x; + best_mask[1] |= best << i; + /* Finally, make this the new head of the trellis. */ + next = i; + } + /* There's no choice to make for a zero coefficient, so we don't + * add a new trellis node, but we do need to update the costs. + */ + else + { + band = vp8_coef_bands_8x8[i + 1]; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + /* Update the cost of each path if we're past the EOB token. */ + if (t0 != DCT_EOB_TOKEN) + { + tokens[next][0].rate += mb->token_costs[type][band][0][t0]; + tokens[next][0].token = ZERO_TOKEN; + } + if (t1 != DCT_EOB_TOKEN) + { + tokens[next][1].rate += mb->token_costs[type][band][0][t1]; + tokens[next][1].token = ZERO_TOKEN; + } + /* Don't update next, because we didn't add a new node. */ + } + } + + /* Now pick the best path through the whole trellis. */ + band = vp8_coef_bands_8x8[i + 1]; + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + rate0 += mb->token_costs[type][band][pt][t0]; + rate1 += mb->token_costs[type][band][pt][t1]; + rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1); + } + best = rd_cost1 < rd_cost0; + final_eob = i0 - 1; + for (i = next; i < eob; i = next) + { + x = tokens[i][best].qc; + if (x) + final_eob = i; + rc = vp8_default_zig_zag1d_8x8[i]; + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; + next = tokens[i][best].next; + best = (best_mask[best] >> i) & 1; + } + final_eob++; + + d->eob = final_eob; + *a = *l = (d->eob != !type); + +} + +void optimize_mb_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) +{ + int b; + int type; + int has_2nd_order; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + type = has_2nd_order ? 0 : 3; + + for (b = 0; b < 16; b+=4) + { + optimize_b_8x8(x, b, type, + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+4], + rtcd); + + if(b==0) + { + *(ta + vp8_block2above[1]) = *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[5]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[1]) = *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[5]) = *(tl + vp8_block2left[b]); + } + else if(b==4) + { + *(ta + vp8_block2above[2]) = *(ta + vp8_block2above[3]) = *(ta + vp8_block2above[6]) = *(ta + vp8_block2above[7]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[2]) = *(tl + vp8_block2left[3]) = *(tl + vp8_block2left[6]) = *(tl + vp8_block2left[7]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[1]); + *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[1]); + } + else if(b==8) + { + *(ta + vp8_block2above[9]) = *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[13]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[9]) = *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[13]) = *(tl + vp8_block2left[b]); + + } + else if(b==12) + { + *(ta + vp8_block2above[10]) = *(ta + vp8_block2above[11]) = *(ta + vp8_block2above[14]) = *(ta + vp8_block2above[15]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[10]) = *(tl + vp8_block2left[11]) = *(tl + vp8_block2left[14]) = *(tl + vp8_block2left[15]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[8]); + *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[8]); + + } + + + + } + + for (b = 16; b < 20; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + + for (b = 20; b < 24; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + + + /* + if (has_2nd_order) + { + vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT], + x->e_mbd.left_context[Y2CONTEXT], 1); + optimize_b(x, 24, 1, t.a, t.l, rtcd); + } + */ +} + +void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) +{ + int b; + int type; + int has_2nd_order; + + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + if (!x->e_mbd.above_context) + return; + + if (!x->e_mbd.left_context) + return; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + type = has_2nd_order ? 0 : 3; + + for (b = 0; b < 16; b+=4) + { + optimize_b_8x8(x, b, type, + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+4], + rtcd); + if(b==0) + { + *(ta + vp8_block2above[1]) = *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[5]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[1]) = *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[5]) = *(tl + vp8_block2left[b]); + } + else if(b==4) + { + *(ta + vp8_block2above[2]) = *(ta + vp8_block2above[3]) = *(ta + vp8_block2above[6]) = *(ta + vp8_block2above[7]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[2]) = *(tl + vp8_block2left[3]) = *(tl + vp8_block2left[6]) = *(tl + vp8_block2left[7]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[1]); + *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[1]); + } + else if(b==8) + { + *(ta + vp8_block2above[9]) = *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[13]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[9]) = *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[13]) = *(tl + vp8_block2left[b]); + + } + else if(b==12) + { + *(ta + vp8_block2above[10]) = *(ta + vp8_block2above[11]) = *(ta + vp8_block2above[14]) = *(ta + vp8_block2above[15]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[10]) = *(tl + vp8_block2left[11]) = *(tl + vp8_block2left[14]) = *(tl + vp8_block2left[15]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[8]); + *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[8]); + + } + + + } + + /* + if (has_2nd_order) + { + vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT], + x->e_mbd.left_context[Y2CONTEXT], 1); + optimize_b(x, 24, 1, t.a, t.l, rtcd); + } + */ +} + +void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) +{ + int b; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + if (!x->e_mbd.above_context) + return; + + if (!x->e_mbd.left_context) + return; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + for (b = 16; b < 20; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + + for (b = 20; b < 24; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + +} +#endif + void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mb(&x->e_mbd); vp8_subtract_mb(rtcd, x); - transform_mb(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mb_8x8(x); + else +#endif + transform_mb(x); - vp8_quantize_mb(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_quantize_mb_8x8(x); + else +#endif + vp8_quantize_mb(x); if (x->optimize) + { +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + optimize_mb_8x8(x, rtcd); + else +#endif optimize_mb(x, rtcd); + } - vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif + vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) { +#ifdef ENC_DEBUG + if (enc_debug) + { + int i; + printf("qcoeff:\n"); + printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("dqcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.dqcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("diff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.diff[i]); + if (i%16 == 15) printf("\n"); + } + printf("predictor:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.predictor[i]); + if (i%16 == 15) printf("\n"); + } + printf("\n"); + } +#endif + } RECON_INVOKE(&rtcd->common->recon, recon_mb) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); +#ifdef ENC_DEBUG + if (enc_debug) { + int i, j, k; + printf("Final Reconstruction\n"); + for (i =0; i<16; i+=4) { + BLOCKD *b = &x->e_mbd.block[i]; + unsigned char *d = *(b->base_dst) + b->dst; + for (k=0; k<4; k++) { + for (j=0; j<16; j++) + printf("%3d ", d[j]); + printf("\n"); + d+=b->dst_stride; + } + } + } +#endif } -/* this funciton is used by first pass only */ +/* this function is used by first pass only */ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { BLOCK *b = &x->block[0]; @@ -610,22 +1250,34 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride); - transform_mby(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mby_8x8(x); + else +#endif + transform_mby(x); vp8_quantize_mby(x); - - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif + vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); RECON_INVOKE(&rtcd->common->recon, recon_mby) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); } - void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mbuv(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); - +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mbuv_8x8(x); + else +#endif vp8_transform_mbuv(x); vp8_quantize_mbuv(x); diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h index 47fc72dad..73f1ad223 100644 --- a/vp8/encoder/encodemb.h +++ b/vp8/encoder/encodemb.h @@ -103,4 +103,16 @@ void vp8_encode_inter16x16uvrd(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK * void vp8_optimize_mby(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); void vp8_optimize_mbuv(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); void vp8_encode_inter16x16y(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x); + +#if CONFIG_T8X8 +void vp8_transform_mb_8x8(MACROBLOCK *mb); +void vp8_transform_mbuv_8x8(MACROBLOCK *x); +void vp8_transform_intra_mby_8x8(MACROBLOCK *x); +void vp8_build_dcblock_8x8(MACROBLOCK *b); +void vp8_optimize_mby_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); +void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); +#endif + + + #endif diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 1a37f03b9..4820729ea 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -24,6 +24,14 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); extern void vp8_build_block_offsets(MACROBLOCK *x); extern void vp8_setup_block_ptrs(MACROBLOCK *x); +#ifdef MODE_STATS +extern unsigned int inter_y_modes[10]; +extern unsigned int inter_uv_modes[4]; +extern unsigned int inter_b_modes[15]; +extern unsigned int y_modes[5]; +extern unsigned int uv_modes[4]; +extern unsigned int b_modes[14]; +#endif extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); static THREAD_FUNCTION loopfilter_thread(void *p_data) @@ -175,7 +183,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) { *totalrate += vp8cx_encode_intra_macro_block(cpi, x, &tp); #ifdef MODE_STATS - y_modes[xd->mbmi.mode] ++; + y_modes[xd->mode_info_context->mbmi.mode] ++; #endif } else @@ -183,15 +191,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) *totalrate += vp8cx_encode_inter_macroblock(cpi, x, &tp, recon_yoffset, recon_uvoffset); #ifdef MODE_STATS - inter_y_modes[xd->mbmi.mode] ++; + inter_y_modes[xd->mode_info_context->mbmi.mode] ++; - if (xd->mbmi.mode == SPLITMV) + if (xd->mode_info_context->mbmi.mode == SPLITMV) { int b; - for (b = 0; b < xd->mbmi.partition_count; b++) + for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition->bmi[b].mode] ++; + inter_b_modes[x->partition_info->bmi[b].mode] ++; } } @@ -387,7 +395,16 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) zd->subpixel_predict16x16 = xd->subpixel_predict16x16; zd->segmentation_enabled = xd->segmentation_enabled; zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); + + vpx_memcpy(zd->segment_feature_data, + xd->segment_feature_data, + sizeof(xd->segment_feature_data)); + +#if CONFIG_SEGFEATURES + vpx_memcpy(zd->segment_feature_mask, + xd->segment_feature_mask, + sizeof(xd->segment_feature_mask)); +#endif for (i = 0; i < 25; i++) { diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 1ca0f962f..fb22be05b 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -71,6 +71,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; +#if CONFIG_T8X8 + cpi->rtcd.fdct.short8x8 = vp8_short_fdct8x8_c; + cpi->rtcd.fdct.haar_short2x2 = vp8_short_fhaar2x2_c; +#endif cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c; @@ -88,6 +92,12 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.quantize.quantb_pair = vp8_regular_quantize_b_pair; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; cpi->rtcd.quantize.fastquantb_pair = vp8_fast_quantize_b_pair_c; +#if CONFIG_T8X8 + cpi->rtcd.quantize.quantb_8x8 = vp8_regular_quantize_b_8x8; + cpi->rtcd.quantize.fastquantb_8x8 = vp8_fast_quantize_b_8x8_c; + cpi->rtcd.quantize.quantb_2x2 = vp8_regular_quantize_b_2x2; + cpi->rtcd.quantize.fastquantb_2x2 = vp8_fast_quantize_b_2x2_c; +#endif cpi->rtcd.search.full_search = vp8_full_search_sad; cpi->rtcd.search.refining_search = vp8_refining_search_sad; cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index faa36d66a..c97e10825 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -105,6 +105,9 @@ extern double vp8_calc_ssimg #ifdef OUTPUT_YUV_SRC FILE *yuv_file; #endif +#ifdef OUTPUT_YUV_REC +FILE *yuv_rec_file; +#endif #if 0 FILE *framepsnr; @@ -130,15 +133,21 @@ unsigned int tot_ef = 0; unsigned int cnt_ef = 0; #endif +#if defined(SECTIONBITS_OUTPUT) +extern unsigned __int64 Sectionbits[500]; +#endif #ifdef MODE_STATS extern unsigned __int64 Sectionbits[50]; extern int y_modes[5] ; extern int uv_modes[4] ; extern int b_modes[10] ; - extern int inter_y_modes[10] ; extern int inter_uv_modes[4] ; extern unsigned int inter_b_modes[15]; +#if CONFIG_SEGMENTATION +extern int segment_modes_intra[MAX_MB_SEGMENTS]; +extern int segment_modes_inter[MAX_MB_SEGMENTS]; +#endif #endif extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch); @@ -308,19 +317,29 @@ extern FILE *vpxlogc; static void setup_features(VP8_COMP *cpi) { + MACROBLOCKD *xd = &cpi->mb.e_mbd; + // Set up default state for MB feature flags - cpi->mb.e_mbd.segmentation_enabled = 0; - cpi->mb.e_mbd.update_mb_segmentation_map = 0; - cpi->mb.e_mbd.update_mb_segmentation_data = 0; - vpx_memset(cpi->mb.e_mbd.mb_segment_tree_probs, 255, sizeof(cpi->mb.e_mbd.mb_segment_tree_probs)); - vpx_memset(cpi->mb.e_mbd.segment_feature_data, 0, sizeof(cpi->mb.e_mbd.segment_feature_data)); +#if CONFIG_SEGMENTATION + xd->segmentation_enabled = 1; +#else + xd->segmentation_enabled = 0; +#endif + xd->update_mb_segmentation_map = 0; + xd->update_mb_segmentation_data = 0; + vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); + vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); - cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0; - cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; - vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); - vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); - vpx_memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); - vpx_memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); +#if CONFIG_SEGFEATURES + vpx_memset(xd->segment_feature_mask, 0, sizeof(xd->segment_feature_mask)); +#endif + + xd->mode_ref_lf_delta_enabled = 0; + xd->mode_ref_lf_delta_update = 0; + vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); + vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); + vpx_memset(xd->last_ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); + vpx_memset(xd->last_mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); set_default_lf_deltas(cpi); @@ -408,7 +427,6 @@ static void set_segmentation_map(VP8_PTR ptr, unsigned char *segmentation_map) // Copy in the new segmentation map vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols)); - // Signal that the map should be updated. cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; @@ -416,8 +434,8 @@ static void set_segmentation_map(VP8_PTR ptr, unsigned char *segmentation_map) // The values given for each segment can be either deltas (from the default value chosen for the frame) or absolute values. // -// Valid range for abs values is (0-127 for MB_LVL_ALT_Q) , (0-63 for SEGMENT_ALT_LF) -// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q) , (+/-63 for SEGMENT_ALT_LF) +// Valid range for abs values is (0-127 for SEG_LVL_ALT_Q) , (0-63 for SEGMENT_ALT_LF) +// Valid range for delta values are (+/-127 for SEG_LVL_ALT_Q) , (+/-63 for SEGMENT_ALT_LF) // // abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use the absolute values given). // @@ -427,19 +445,24 @@ static void set_segment_data(VP8_PTR ptr, signed char *feature_data, unsigned ch VP8_COMP *cpi = (VP8_COMP *)(ptr); cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta; - vpx_memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data)); + vpx_memcpy(cpi->segment_feature_data, feature_data, + sizeof(cpi->segment_feature_data)); + +#if CONFIG_SEGFEATURES + // TBD ?? Set the feature mask + // vpx_memset(xd->segment_feature_mask, 0, sizeof(xd->segment_feature_mask)); +#endif + } static void segmentation_test_function(VP8_PTR ptr) { VP8_COMP *cpi = (VP8_COMP *)(ptr); - unsigned char *seg_map; - signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; - + signed char feature_data[SEG_LVL_MAX][MAX_MB_SEGMENTS]; + CHECK_MEM_ERROR(seg_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); // Create a temporary map for segmentation data. - CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); // MB loop to set local segmentation map /*for ( i = 0; i < cpi->common.mb_rows; i++ ) @@ -468,15 +491,15 @@ static void segmentation_test_function(VP8_PTR ptr) enable_segmentation(ptr); // Set up the quant segment data - feature_data[MB_LVL_ALT_Q][0] = 0; - feature_data[MB_LVL_ALT_Q][1] = 4; - feature_data[MB_LVL_ALT_Q][2] = 0; - feature_data[MB_LVL_ALT_Q][3] = 0; + feature_data[SEG_LVL_ALT_Q][0] = 0; + feature_data[SEG_LVL_ALT_Q][1] = 4; + feature_data[SEG_LVL_ALT_Q][2] = 0; + feature_data[SEG_LVL_ALT_Q][3] = 0; // Set up the loop segment data - feature_data[MB_LVL_ALT_LF][0] = 0; - feature_data[MB_LVL_ALT_LF][1] = 0; - feature_data[MB_LVL_ALT_LF][2] = 0; - feature_data[MB_LVL_ALT_LF][3] = 0; + feature_data[SEG_LVL_ALT_LF][0] = 0; + feature_data[SEG_LVL_ALT_LF][1] = 0; + feature_data[SEG_LVL_ALT_LF][2] = 0; + feature_data[SEG_LVL_ALT_LF][3] = 0; // Initialise the feature data structure // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 @@ -493,13 +516,13 @@ static void segmentation_test_function(VP8_PTR ptr) static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) { unsigned char *seg_map; - signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; + signed char feature_data[SEG_LVL_MAX][MAX_MB_SEGMENTS]; int i; int block_count = cpi->cyclic_refresh_mode_max_mbs_perframe; int mbs_in_frame = cpi->common.mb_rows * cpi->common.mb_cols; // Create a temporary map for segmentation data. - CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); + CHECK_MEM_ERROR(seg_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); cpi->cyclic_refresh_q = Q; @@ -560,16 +583,16 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) enable_segmentation((VP8_PTR)cpi); // Set up the quant segment data - feature_data[MB_LVL_ALT_Q][0] = 0; - feature_data[MB_LVL_ALT_Q][1] = (cpi->cyclic_refresh_q - Q); - feature_data[MB_LVL_ALT_Q][2] = 0; - feature_data[MB_LVL_ALT_Q][3] = 0; + feature_data[SEG_LVL_ALT_Q][0] = 0; + feature_data[SEG_LVL_ALT_Q][1] = (cpi->cyclic_refresh_q - Q); + feature_data[SEG_LVL_ALT_Q][2] = 0; + feature_data[SEG_LVL_ALT_Q][3] = 0; // Set up the loop segment data - feature_data[MB_LVL_ALT_LF][0] = 0; - feature_data[MB_LVL_ALT_LF][1] = lf_adjustment; - feature_data[MB_LVL_ALT_LF][2] = 0; - feature_data[MB_LVL_ALT_LF][3] = 0; + feature_data[SEG_LVL_ALT_LF][0] = 0; + feature_data[SEG_LVL_ALT_LF][1] = lf_adjustment; + feature_data[SEG_LVL_ALT_LF][2] = 0; + feature_data[SEG_LVL_ALT_LF][3] = 0; // Initialise the feature data structure // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 @@ -1238,16 +1261,25 @@ void vp8_set_speed_features(VP8_COMP *cpi) if (cpi->sf.improved_dct) { +#if CONFIG_T8X8 + cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8); +#endif cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4); } else { +#if CONFIG_T8X8 + cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8); +#endif cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4); } cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4); +#if CONFIG_T8X8 + cpi->mb.short_fhaar2x2 = FDCT_INVOKE(&cpi->rtcd.fdct, haar_short2x2); +#endif if (cpi->sf.improved_quant) { @@ -1255,6 +1287,10 @@ void vp8_set_speed_features(VP8_COMP *cpi) quantb); cpi->mb.quantize_b_pair = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb_pair); +#if CONFIG_T8X8 + cpi->mb.quantize_b_8x8 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb_8x8); + cpi->mb.quantize_b_2x2 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb_2x2); +#endif } else { @@ -1262,6 +1298,10 @@ void vp8_set_speed_features(VP8_COMP *cpi) fastquantb); cpi->mb.quantize_b_pair = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb_pair); +#if CONFIG_T8X8 + cpi->mb.quantize_b_8x8 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb_8x8); + cpi->mb.quantize_b_2x2 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb_2x2); +#endif } if (cpi->sf.improved_quant != last_improved_quant) vp8cx_init_quantizer(cpi); @@ -1912,7 +1952,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); // Create the encoder segmentation map and set all entries to 0 - CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); + CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols)); cpi->active_map_enabled = 0; @@ -1948,13 +1988,12 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->cyclic_refresh_q = 32; if (cpi->cyclic_refresh_mode_enabled) - { CHECK_MEM_ERROR(cpi->cyclic_refresh_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); - } else cpi->cyclic_refresh_map = (signed char *) NULL; // Test function for segmentation + //segmentation_test_function((VP8_PTR) cpi); #ifdef ENTROPY_STATS @@ -2045,6 +2084,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) #ifdef OUTPUT_YUV_SRC yuv_file = fopen("bd.yuv", "ab"); #endif +#ifdef OUTPUT_YUV_REC + yuv_rec_file = fopen("rec.yuv", "wb"); +#endif #if 0 framepsnr = fopen("framepsnr.stt", "a"); @@ -2263,8 +2305,8 @@ void vp8_remove_compressor(VP8_PTR *ptr) #ifdef MODE_STATS { extern int count_mb_seg[4]; - FILE *f = fopen("modes.stt", "a"); - double dr = (double)cpi->oxcf.frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ; + FILE *f = fopen("modes.stt", "w"); + double dr = (double)cpi->oxcf.frame_rate * (double)cpi->bytes * (double)8 / (double)cpi->count / (double)1000 ; fprintf(f, "intra_mode in Intra Frames:\n"); fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]); fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]); @@ -2278,6 +2320,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) fprintf(f, "\n"); } +#if CONFIG_SEGMENTATION + fprintf(f, "Segments:%8d, %8d, %8d, %8d\n", segment_modes_intra[0], segment_modes_intra[1], segment_modes_intra[2], segment_modes_intra[3]); +#endif fprintf(f, "Modes in Inter Frames:\n"); fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d\n", @@ -2297,8 +2342,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) fprintf(f, "P:%8d, %8d, %8d, %8d\n", count_mb_seg[0], count_mb_seg[1], count_mb_seg[2], count_mb_seg[3]); fprintf(f, "PB:%8d, %8d, %8d, %8d\n", inter_b_modes[LEFT4X4], inter_b_modes[ABOVE4X4], inter_b_modes[ZERO4X4], inter_b_modes[NEW4X4]); - - +#if CONFIG_SEGMENTATION + fprintf(f, "Segments:%8d, %8d, %8d, %8d\n", segment_modes_inter[0], segment_modes_inter[1], segment_modes_inter[2], segment_modes_inter[3]); +#endif fclose(f); } #endif @@ -2386,6 +2432,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) #ifdef OUTPUT_YUV_SRC fclose(yuv_file); #endif +#ifdef OUTPUT_YUV_REC + fclose(yuv_rec_file); +#endif #if 0 @@ -2596,10 +2645,9 @@ int vp8_update_entropy(VP8_PTR comp, int update) } -#if OUTPUT_YUV_SRC -void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) +#ifdef OUTPUT_YUV_SRC +void vp8_write_yuv_frame(YV12_BUFFER_CONFIG *s) { - FILE *yuv_file = fopen(name, "ab"); unsigned char *src = s->y_buffer; int h = s->y_height; @@ -2629,8 +2677,42 @@ void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) src += s->uv_stride; } while (--h); +} +#endif - fclose(yuv_file); +#ifdef OUTPUT_YUV_REC +void vp8_write_yuv_rec_frame(VP8_COMMON *cm) +{ + YV12_BUFFER_CONFIG *s = cm->frame_to_show; + unsigned char *src = s->y_buffer; + int h = cm->Height; + + do + { + fwrite(src, s->y_width, 1, yuv_rec_file); + src += s->y_stride; + } + while (--h); + + src = s->u_buffer; + h = (cm->Height+1)/2; + + do + { + fwrite(src, s->uv_width, 1, yuv_rec_file); + src += s->uv_stride; + } + while (--h); + + src = s->v_buffer; + h = (cm->Height+1)/2; + + do + { + fwrite(src, s->uv_width, 1, yuv_rec_file); + src += s->uv_stride; + } + while (--h); } #endif @@ -3311,6 +3393,10 @@ static void encode_frame_to_data_rate // Test code for segmentation of gf/arf (0,0) //segmentation_test_function((VP8_PTR) cpi); +#if CONFIG_SEGMENTATION + cpi->mb.e_mbd.segmentation_enabled = 1; + cpi->mb.e_mbd.update_mb_segmentation_map = 1; +#endif if (cpi->compressor_speed == 2) { @@ -4536,14 +4622,8 @@ static void encode_frame_to_data_rate fclose(recon_file); } #endif -#if 0 - // DEBUG - if(cm->current_video_frame>173 && cm->current_video_frame<178) - { - char filename[512]; - sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame); - vp8_write_yuv_frame(filename, cm->frame_to_show); - } +#ifdef OUTPUT_YUV_REC + vp8_write_yuv_rec_frame(cm); #endif } @@ -4898,7 +4978,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon } else #endif - encode_frame_to_data_rate(cpi, size, dest, frame_flags); + encode_frame_to_data_rate(cpi, size, dest, frame_flags); if (cpi->compressor_speed == 2) { @@ -5135,7 +5215,7 @@ int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflag int vp8_set_roimap(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]) { VP8_COMP *cpi = (VP8_COMP *) comp; - signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; + signed char feature_data[SEG_LVL_MAX][MAX_MB_SEGMENTS]; if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols) return -1; @@ -5153,16 +5233,16 @@ int vp8_set_roimap(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned enable_segmentation((VP8_PTR)cpi); // Set up the quant segment data - feature_data[MB_LVL_ALT_Q][0] = delta_q[0]; - feature_data[MB_LVL_ALT_Q][1] = delta_q[1]; - feature_data[MB_LVL_ALT_Q][2] = delta_q[2]; - feature_data[MB_LVL_ALT_Q][3] = delta_q[3]; + feature_data[SEG_LVL_ALT_Q][0] = delta_q[0]; + feature_data[SEG_LVL_ALT_Q][1] = delta_q[1]; + feature_data[SEG_LVL_ALT_Q][2] = delta_q[2]; + feature_data[SEG_LVL_ALT_Q][3] = delta_q[3]; // Set up the loop segment data s - feature_data[MB_LVL_ALT_LF][0] = delta_lf[0]; - feature_data[MB_LVL_ALT_LF][1] = delta_lf[1]; - feature_data[MB_LVL_ALT_LF][2] = delta_lf[2]; - feature_data[MB_LVL_ALT_LF][3] = delta_lf[3]; + feature_data[SEG_LVL_ALT_LF][0] = delta_lf[0]; + feature_data[SEG_LVL_ALT_LF][1] = delta_lf[1]; + feature_data[SEG_LVL_ALT_LF][2] = delta_lf[2]; + feature_data[SEG_LVL_ALT_LF][3] = delta_lf[3]; cpi->segment_encode_breakout[0] = threshold[0]; cpi->segment_encode_breakout[1] = threshold[1]; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index aead2fbb7..00e7788f9 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -193,7 +193,11 @@ typedef struct typedef struct { MACROBLOCK mb; +#if CONFIG_SEGMENTATION + int segment_counts[MAX_MB_SEGMENTS + 8]; +#else int segment_counts[MAX_MB_SEGMENTS]; +#endif int totalrate; } MB_ROW_COMP; @@ -403,6 +407,11 @@ typedef struct VP8_COMP //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#if CONFIG_T8X8 + unsigned int coef_counts_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ + vp8_prob frame_coef_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + unsigned int frame_branch_ct_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#endif int gfu_boost; int kf_boost; @@ -461,10 +470,21 @@ typedef struct VP8_COMP int gf_update_recommended; int skip_true_count; int skip_false_count; +#if CONFIG_T8X8 + int t4x4_count; + int t8x8_count; +#endif unsigned char *segmentation_map; - signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment data (can be deltas or absolute values) - int segment_encode_breakout[MAX_MB_SEGMENTS]; // segment threashold for encode breakout + + // Segment data (can be deltas or absolute values) + signed char segment_feature_data[MAX_MB_SEGMENTS][SEG_LVL_MAX]; +#if CONFIG_SEGFEATURES +unsigned int segment_feature_mask[MAX_MB_SEGMENTS]; +#endif + + // segment threashold for encode breakout + int segment_encode_breakout[MAX_MB_SEGMENTS]; unsigned char *active_map; unsigned int active_map_enabled; diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c index beefe8d8e..c6c5ec056 100644 --- a/vp8/encoder/picklpf.c +++ b/vp8/encoder/picklpf.c @@ -257,12 +257,19 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val) { MACROBLOCKD *mbd = &cpi->mb.e_mbd; - (void) filt_val; + int i; - mbd->segment_feature_data[MB_LVL_ALT_LF][0] = cpi->segment_feature_data[MB_LVL_ALT_LF][0]; - mbd->segment_feature_data[MB_LVL_ALT_LF][1] = cpi->segment_feature_data[MB_LVL_ALT_LF][1]; - mbd->segment_feature_data[MB_LVL_ALT_LF][2] = cpi->segment_feature_data[MB_LVL_ALT_LF][2]; - mbd->segment_feature_data[MB_LVL_ALT_LF][3] = cpi->segment_feature_data[MB_LVL_ALT_LF][3]; + for ( i = 0; i < MAX_MB_SEGMENTS; i++ ) + { + mbd->segment_feature_data[i][SEG_LVL_ALT_LF] = + cpi->segment_feature_data[i][SEG_LVL_ALT_LF]; + +#if CONFIG_SEGFEATURES + mbd->segment_feature_mask[i] &= ~(1 << SEG_LVL_ALT_LF); + mbd->segment_feature_mask[i] |= + cpi->segment_feature_mask[i] & (1 << SEG_LVL_ALT_LF); +#endif + } } void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 503d24123..156989fc3 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -16,6 +16,10 @@ #include "quantize.h" #include "vp8/common/quant_common.h" +#ifdef ENC_DEBUG +extern int enc_debug; +#endif + #define EXACT_QUANT #ifdef EXACT_FASTQUANT @@ -77,7 +81,11 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = d->dequant; +#if CONFIG_T8X8 + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); +#endif eob = -1; for (i = 0; i < 16; i++) { @@ -267,7 +275,8 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) d->eob = eob + 1; } -#endif +#endif //EXACT_QUANT + void vp8_quantize_mby_c(MACROBLOCK *x) { @@ -301,6 +310,592 @@ void vp8_quantize_mbuv_c(MACROBLOCK *x) x->quantize_b(&x->block[i], &x->e_mbd.block[i]); } +#if CONFIG_T8X8 + +#ifdef EXACT_FASTQUANT +void vp8_fast_quantize_b_2x2_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q2nd = 4; + + + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc]/q2nd ; + zbin = zbin_ptr[rc] ; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += (round_ptr[rc]/q2nd); + x += (round_ptr[rc]); + //y = ((int)((int)(x * quant_ptr[rc] * q2nd) >> 16) + x) + // >> quant_shift_ptr[rc]; // quantize (x) + y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; +} + +void vp8_fast_quantize_b_8x8_c(BLOCK *b, BLOCKD *d)// only ac and dc difference, no difference among ac +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q1st = 2; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + + for (i = 0; i < 64; i++) + { + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc!=0]/q1st ; + zbin = zbin_ptr[rc!=0] ; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += round_ptr[rc]/q1st; + //y = ((int)(((int)((x * quant_ptr[rc!=0] * q1st)) >> 16) + x)) + // >> quant_shift_ptr[rc!=0]; // quantize (x) + x += round_ptr[rc]; + y = ((int)(((int)((x * quant_ptr[rc!=0])) >> 16) + x)) + >> quant_shift_ptr[rc!=0]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; +} + +#else + +void vp8_fast_quantize_b_2x2_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q2nd = 4; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc]/q2nd; + zbin = zbin_ptr[rc]; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = ((int)((x + round_ptr[rc]/q2nd) * quant_ptr[rc] * q2nd)) >> 16; // quantize (x) + y = ((int)((x + round_ptr[rc]) * quant_ptr[rc])) >> 16; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc] / q2nd; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; + //if (d->eob > 4) printf("Flag Fast 2 (%d)\n", d->eob); +} + +void vp8_fast_quantize_b_8x8_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q1st = 2; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + + for (i = 0; i < 64; i++) + { + + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc!=0]/q1st ; + zbin = zbin_ptr[rc!=0] ; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = ((int)((x + round_ptr[rc!=0] / q1st) * quant_ptr[rc!=0] * q1st)) >> 16; + y = ((int)((x + round_ptr[rc!=0]) * quant_ptr[rc!=0])) >> 16; + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; +} + +#endif //EXACT_FASTQUANT + +#ifdef EXACT_QUANT +void vp8_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + unsigned char *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q2nd = 4; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + + //zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value)/q2nd; + zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value); + + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += (round_ptr[rc]/q2nd); + x += (round_ptr[rc]); + y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc]/q2nd; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + + d->eob = eob + 1; +} + +void vp8_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + unsigned char *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q1st = 2; + + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + + for (i = 0; i < 64; i++) + { + + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + + //zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value)/q1st; + zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value); + + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += (round_ptr[rc!=0]/q1st); + //y = ((int)(((int)(x * quant_ptr[rc!=0] * q1st) >> 16) + x)) + // >> quant_shift_ptr[rc!=0]; // quantize (x) + x += (round_ptr[rc!=0]); + y = ((int)(((int)(x * quant_ptr[rc!=0]) >> 16) + x)) + >> quant_shift_ptr[rc!=0]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + + d->eob = eob + 1; +} + +void vp8_strict_quantize_b_2x2(BLOCK *b, BLOCKD *d) +{ + int i; + int rc; + int eob; + int x; + int y; + int z; + int sz; + short *coeff_ptr; + short *quant_ptr; + unsigned char *quant_shift_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + short *dequant_ptr; + //double q2nd = 4; + coeff_ptr = b->coeff; + quant_ptr = b->quant; + quant_shift_ptr = b->quant_shift; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + dequant_ptr = d->dequant; + eob = - 1; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + for (i = 0; i < 4; i++) + { + int dq; + int round; + + /*TODO: These arrays should be stored in zig-zag order.*/ + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //z = z * q2nd; + //dq = dequant_ptr[rc]/q2nd; + dq = dequant_ptr[rc]; + round = dq >> 1; + /* Sign of z. */ + sz = -(z < 0); + x = (z + sz) ^ sz; + x += round; + if (x >= dq) + { + /* Quantize x */ + y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc]; + /* Put the sign back. */ + x = (y + sz) ^ sz; + /* Save * the * coefficient and its dequantized value. */ + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dq; + /* Remember the last non-zero coefficient. */ + if (y) + eob = i; + } + } + + d->eob = eob + 1; +} + +void vp8_strict_quantize_b_8x8(BLOCK *b, BLOCKD *d) +{ + int i; + int rc; + int eob; + int x; + int y; + int z; + int sz; + short *coeff_ptr; + short *quant_ptr; + unsigned char *quant_shift_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + short *dequant_ptr; + //double q1st = 2; + printf("call strict quantizer\n"); + coeff_ptr = b->coeff; + quant_ptr = b->quant; + quant_shift_ptr = b->quant_shift; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + dequant_ptr = d->dequant; + eob = - 1; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + for (i = 0; i < 64; i++) + { + int dq; + int round; + + /*TODO: These arrays should be stored in zig-zag order.*/ + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //z = z * q1st; + //dq = dequant_ptr[rc!=0]/q1st; + dq = dequant_ptr[rc!=0]; + round = dq >> 1; + /* Sign of z. */ + sz = -(z < 0); + x = (z + sz) ^ sz; + x += round; + if (x >= dq) + { + /* Quantize x. */ + y = ((int)(((int)((x * quant_ptr[rc!=0])) >> 16) + x)) >> quant_shift_ptr[rc!=0]; + /* Put the sign back. */ + x = (y + sz) ^ sz; + /* Save the coefficient and its dequantized value. * */ + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dq; + /* Remember the last non-zero coefficient. */ + if (y) + eob = i; + } + } + d->eob = eob + 1; +} + +#else + +void vp8_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q2nd = 4; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value)/q2nd; + zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value); + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = (((x + round_ptr[rc]/q2nd) * quant_ptr[rc]*q2nd)) >> 16; // quantize (x) + y = (((x + round_ptr[rc]) * quant_ptr[rc])) >> 16; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc]/q2nd; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + + d->eob = eob + 1; +} + +void vp8_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q1st = 2; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + for (i = 0; i < 64; i++) + { + + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value)/q1st; + zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value); + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = ((x + round_ptr[rc!=0]/q1st) * quant_ptr[rc!=0] * q1st) >> 16; + y = ((x + round_ptr[rc!=0]) * quant_ptr[rc!=0]) >> 16; + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]/q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + d->eob = eob + 1; +} + +#endif //EXACT_QUANT + +void vp8_quantize_mby_8x8(MACROBLOCK *x) +{ + int i; + int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + for(i = 0; i < 16; i ++) + { + x->e_mbd.block[i].eob = 0; + } + x->e_mbd.block[24].eob = 0; + for (i = 0; i < 16; i+=4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); + + if (has_2nd_order) + x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]); + +} + +void vp8_quantize_mb_8x8(MACROBLOCK *x) +{ + int i; + int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + for(i = 0; i < 25; i ++) + { + x->e_mbd.block[i].eob = 0; + } + for (i = 0; i < 24; i+=4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); + + if (has_2nd_order) + x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]); +} + +void vp8_quantize_mbuv_8x8(MACROBLOCK *x) +{ + int i; + + for(i = 16; i < 24; i ++) + { + x->e_mbd.block[i].eob = 0; + } + for (i = 16; i < 24; i+=4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); +} + +#endif //CONFIG_T8X8 + /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of * these two C functions if corresponding optimized routine is not available. * NEON optimized version implements currently the fast quantization for pair @@ -572,19 +1167,28 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) int QIndex; MACROBLOCKD *xd = &x->e_mbd; int zbin_extra; + int segment_id = xd->mode_info_context->mbmi.segment_id; - // Select the baseline MB Q index. - if (xd->segmentation_enabled) + // Select the baseline MB Q index allowing for any segment level change. +#if CONFIG_SEGFEATURES + if ( xd->segmentation_enabled && + ( xd->segment_feature_mask[segment_id] & (0x01 << SEG_LVL_ALT_Q) ) ) +#else + if ( xd->segmentation_enabled ) +#endif { // Abs Value if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) + QIndex = xd->segment_feature_data[segment_id][SEG_LVL_ALT_Q]; - QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; // Delta Value else { - QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range + QIndex = cpi->common.base_qindex + + xd->segment_feature_data[segment_id][SEG_LVL_ALT_Q]; + + // Clamp to valid range + QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; } } else @@ -699,6 +1303,7 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q) MACROBLOCKD *mbd = &cpi->mb.e_mbd; int update = 0; int new_delta_q; + int i; cm->base_qindex = Q; /* if any of the delta_q values are changing update flag has to be set */ @@ -720,11 +1325,18 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q) cm->y2dc_delta_q = new_delta_q; - // Set Segment specific quatizers - mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0]; - mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1]; - mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2]; - mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3]; + // Set Segment specific quatizers if enabled + for ( i = 0; i < MAX_MB_SEGMENTS; i++ ) + { + mbd->segment_feature_data[i][SEG_LVL_ALT_Q] = + cpi->segment_feature_data[i][SEG_LVL_ALT_Q]; + +#if CONFIG_SEGFEATURES + mbd->segment_feature_mask[i] &= ~(1 << SEG_LVL_ALT_Q); + mbd->segment_feature_mask[i] |= + cpi->segment_feature_mask[i] & (1 << SEG_LVL_ALT_Q); +#endif + } /* quantizer has to be reinitialized for any delta_q changes */ if(update) diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h index f1f0156d8..1a2bad667 100644 --- a/vp8/encoder/quantize.h +++ b/vp8/encoder/quantize.h @@ -45,6 +45,27 @@ extern prototype_quantize_block_pair(vp8_quantize_quantb_pair); #define vp8_quantize_fastquantb vp8_fast_quantize_b_c #endif extern prototype_quantize_block(vp8_quantize_fastquantb); +#if CONFIG_T8X8 +#ifndef vp8_quantize_quantb_8x8 +#define vp8_quantize_quantb_8x8 vp8_regular_quantize_b_8x8 +#endif +extern prototype_quantize_block(vp8_quantize_quantb_8x8); + +#ifndef vp8_quantize_fastquantb_8x8 +#define vp8_quantize_fastquantb_8x8 vp8_fast_quantize_b_8x8_c +#endif +extern prototype_quantize_block(vp8_quantize_fastquantb_8x8); + +#ifndef vp8_quantize_quantb_2x2 +#define vp8_quantize_quantb_2x2 vp8_regular_quantize_b_2x2 +#endif +extern prototype_quantize_block(vp8_quantize_quantb_2x2); + +#ifndef vp8_quantize_fastquantb_2x2 +#define vp8_quantize_fastquantb_2x2 vp8_fast_quantize_b_2x2_c +#endif +extern prototype_quantize_block(vp8_quantize_fastquantb_2x2); +#endif #ifndef vp8_quantize_fastquantb_pair #define vp8_quantize_fastquantb_pair vp8_fast_quantize_b_pair_c @@ -56,6 +77,12 @@ typedef struct prototype_quantize_block(*quantb); prototype_quantize_block_pair(*quantb_pair); prototype_quantize_block(*fastquantb); +#if CONFIG_T8X8 + prototype_quantize_block(*quantb_8x8); + prototype_quantize_block(*fastquantb_8x8); + prototype_quantize_block(*quantb_2x2); + prototype_quantize_block(*fastquantb_2x2); +#endif prototype_quantize_block_pair(*fastquantb_pair); } vp8_quantize_rtcd_vtable_t; @@ -81,7 +108,10 @@ extern prototype_quantize_mb(vp8_quantize_mby); #endif extern void vp8_strict_quantize_b(BLOCK *b,BLOCKD *d); - +#if CONFIG_T8X8 +extern void vp8_strict_quantize_b_8x8(BLOCK *b,BLOCKD *d); +extern void vp8_strict_quantize_b_2x2(BLOCK *b,BLOCKD *d); +#endif struct VP8_COMP; extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q); extern void vp8cx_frame_init_quantizer(struct VP8_COMP *cpi); diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index f1a3fb380..952977094 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -197,7 +197,6 @@ static int rdmult_lut[QINDEX_RANGE]= 61347,64827,69312,73947,78732,83667,89787,97200, }; #endif - /* values are now correlated to quantizer */ static int sad_per_bit16lut[QINDEX_RANGE] = { @@ -252,8 +251,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) { int q; int i; - int *thresh; - int threshmult; vp8_clear_system_state(); //__asm emms; @@ -268,7 +265,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) if (cpi->zbin_over_quant > 0) { double oq_factor; - double modq; // Experimental code using the same basic equation as used for Q above // The units of cpi->zbin_over_quant are 1/128 of Q bin size @@ -1055,7 +1051,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels } - static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0}; diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index 95134cb81..ea04cbf25 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -13,6 +13,7 @@ #define __INC_RDOPT_H #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) +#define RDCOST_8x8(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 15e7336b1..e1e124844 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -22,18 +22,27 @@ #ifdef ENTROPY_STATS _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_T8X8 +_int64 context_counters_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif #endif void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; +#if CONFIG_T8X8 +void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; +#endif void vp8_fix_contexts(MACROBLOCKD *x); static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE*2]; const TOKENVALUE *vp8_dct_value_tokens_ptr; static int dct_value_cost[DCT_MAX_VALUE*2]; const int *vp8_dct_value_cost_ptr; -#if 0 -int skip_true_count = 0; -int skip_false_count = 0; + +#ifdef ENC_DEBUG +extern int mb_row_debug; +extern int mb_col_debug; +extern int enc_debug; #endif + static void fill_value_tokens() { @@ -93,6 +102,69 @@ static void fill_value_tokens() vp8_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } +#if CONFIG_T8X8 +static void tokenize2nd_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + int c = 0; /* start at DC */ + const int eob = b->eob; /* one beyond last nonzero coeff */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + int x; + const short *qcoeff_ptr = b->qcoeff; + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + + assert(eob<=4); + + do + { + const int band = vp8_coef_bands[c]; + + if (c < eob) + { + int rc = vp8_default_zig_zag1d[c]; + const int v = qcoeff_ptr[rc]; + + assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); + + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + x = vp8_dct_value_tokens_ptr[v].Token; + } + else + x = DCT_EOB_TOKEN; + + t->Token = x; + //printf("Token : %d\n", x); + t->context_tree = cpi->common.fc.coef_probs_8x8 [type] [band] [pt]; + + t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); + +#ifdef ENC_DEBUG + if (t->skip_eob_node && vp8_coef_encodings[x].Len==1) + printf("Trouble 2 x=%d Len=%d skip=%d eob=%d c=%d band=%d type=%d: [%d %d %d]\n", + x, vp8_coef_encodings[x].Len, t->skip_eob_node, eob, c, band, type, + cpi->count, mb_row_debug, mb_col_debug); +#endif + + ++cpi->coef_counts_8x8 [type] [band] [pt] [x]; + } + while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 4); + + *tp = t; + pt = (c != !type); /* 0 <-> all coeff data is zero */ + *a = *l = pt; + +} +#endif + static void tokenize2nd_order_b ( MACROBLOCKD *x, @@ -153,6 +225,66 @@ static void tokenize2nd_order_b *a = *l = pt; } +#if CONFIG_T8X8 +static void tokenize1st_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, + ENTROPY_CONTEXT *l1, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + int c = type ? 0 : 1; /* start at DC unless type 0 */ + const int eob = b->eob; /* one beyond last nonzero coeff */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + int x; + const short *qcoeff_ptr = b->qcoeff; + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + + do + { + const int band = vp8_coef_bands_8x8[c]; + + x = DCT_EOB_TOKEN; + + if (c < eob) + { + int rc = vp8_default_zig_zag1d_8x8[c]; + const int v = qcoeff_ptr[rc]; + + assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); + + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + x = vp8_dct_value_tokens_ptr[v].Token; + } + + t->Token = x; + t->context_tree = cpi->common.fc.coef_probs_8x8 [type] [band] [pt]; + + t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); + +#ifdef ENC_DEBUG + if (t->skip_eob_node && vp8_coef_encodings[x].Len==1) + printf("Trouble 1 x=%d Len=%d skip=%d eob=%d c=%d band=%d type=%d: [%d %d %d]\n", x, vp8_coef_encodings[x].Len, t->skip_eob_node, eob, c, band, type, cpi->count, mb_row_debug, mb_col_debug); +#endif + + ++cpi->coef_counts_8x8 [type] [band] [pt] [x]; + } + while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 64); + + *tp = t; + pt = (c != !type); /* 0 <-> all coeff data is zero */ + *a = *l = pt; +} + +#endif + static void tokenize1st_order_b ( @@ -293,22 +425,59 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) return skip; } +#if CONFIG_T8X8 +static int mb_is_skippable_8x8(MACROBLOCKD *x) +{ + int has_y2_block; + int skip = 1; + int i = 0; + + has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED + && x->mode_info_context->mbmi.mode != SPLITMV); + if (has_y2_block) + { + for (i = 0; i < 16; i+=4) + skip &= (x->block[i].eob < 2); + } + + for (; i < 24 + has_y2_block; i+=4) + skip &= (!x->block[i].eob); + + return skip; +} +#endif void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { int plane_type; int has_y2_block; + int b; has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV); - x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block); + x->mode_info_context->mbmi.mb_skip_coeff = +#if CONFIG_T8X8 + (x->mode_info_context->mbmi.segment_id >= 2 ? + mb_is_skippable_8x8(x) : + mb_is_skippable(x, has_y2_block)); +#else + mb_is_skippable(x, has_y2_block); +#endif + if (x->mode_info_context->mbmi.mb_skip_coeff) { cpi->skip_true_count++; if (!cpi->common.mb_no_coeff_skip) - vp8_stuff_mb(cpi, x, t) ; + { +#if CONFIG_T8X8 + if (x->mode_info_context->mbmi.segment_id >= 2) + vp8_stuff_mb_8x8(cpi, x, t) ; + else +#endif + vp8_stuff_mb(cpi, x, t) ; + } else { vp8_fix_contexts(x); @@ -322,13 +491,82 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) plane_type = 3; if(has_y2_block) { - tokenize2nd_order_b(x, t, cpi); - plane_type = 0; +#if CONFIG_T8X8 + if (x->mode_info_context->mbmi.segment_id >= 2) + { + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + tokenize2nd_order_b_8x8(x->block + 24, t, 1, x->frame_type, + A + vp8_block2above[24], L + vp8_block2left[24], cpi); + } + else +#endif + tokenize2nd_order_b(x, t, cpi); + + plane_type = 0; } +#if CONFIG_T8X8 + if (x->mode_info_context->mbmi.segment_id >= 2) + { + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + for (b = 0; b < 16; b+=4) + { + tokenize1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+4], + cpi); + /* *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]);*/ + // build coeff context for 8x8 transform + if(b==0) + { + *(A + vp8_block2above[1]) = *(A + vp8_block2above[4]) = *(A + vp8_block2above[5]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[1]) = *(L + vp8_block2left[4]) = *(L + vp8_block2left[5]) = *(L + vp8_block2left[b]); + } + else if(b==4) + { + *(A + vp8_block2above[2]) = *(A + vp8_block2above[3]) = *(A + vp8_block2above[6]) = *(A + vp8_block2above[7]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[2]) = *(L + vp8_block2left[3]) = *(L + vp8_block2left[6]) = *(L + vp8_block2left[7]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[4]) = *(A + vp8_block2above[1]); + *(L + vp8_block2left[4]) = *(L + vp8_block2left[1]); + } + else if(b==8) + { + *(A + vp8_block2above[9]) = *(A + vp8_block2above[12]) = *(A + vp8_block2above[13]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[9]) = *(L + vp8_block2left[12]) = *(L + vp8_block2left[13]) = *(L + vp8_block2left[b]); + } + else if(b==12) + { + *(A + vp8_block2above[10]) = *(A + vp8_block2above[11]) = *(A + vp8_block2above[14]) = *(A + vp8_block2above[15]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[10]) = *(L + vp8_block2left[11]) = *(L + vp8_block2left[14]) = *(L + vp8_block2left[15]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[12]) = *(A + vp8_block2above[8]); + *(L + vp8_block2left[12]) = *(L + vp8_block2left[8]); + } - tokenize1st_order_b(x, t, plane_type, cpi); + } + for (b = 16; b < 24; b+=4) { + tokenize1st_order_b_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+2], + cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + } + else +#endif + tokenize1st_order_b(x, t, plane_type, cpi); } @@ -337,6 +575,9 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) void init_context_counters(void) { vpx_memset(context_counters, 0, sizeof(context_counters)); +#if CONFIG_T8X8 + vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8)); +#endif } void print_context_counters() @@ -381,6 +622,55 @@ void print_context_counters() const _int64 x = context_counters [type] [band] [pt] [t]; const int y = (int) x; + assert(x == (INT64) y); /* no overflow handling yet */ + fprintf(f, "%s %d", Comma(t), y); + + } + while (++t < MAX_ENTROPY_TOKENS); + + fprintf(f, "}"); + } + while (++pt < PREV_COEF_CONTEXTS); + + fprintf(f, "\n }"); + + } + while (++band < COEF_BANDS); + + fprintf(f, "\n }"); + } + while (++type < BLOCK_TYPES); + +#if CONFIG_T8X8 + fprintf(f, "int Contexts_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];\n\n"); + + fprintf(f, "const int default_contexts_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); + + type = 0; + + do + { + fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); + + band = 0; + + do + { + fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); + + pt = 0; + + do + { + fprintf(f, "%s\n {", Comma(pt)); + + t = 0; + + do + { + const _int64 x = context_counters [type] [band] [pt] [t]; + const int y = (int) x; + assert(x == (_int64) y); /* no overflow handling yet */ fprintf(f, "%s %d", Comma(t), y); @@ -399,6 +689,7 @@ void print_context_counters() fprintf(f, "\n }"); } while (++type < BLOCK_TYPES); +#endif fprintf(f, "\n};\n"); fclose(f); @@ -411,6 +702,188 @@ void vp8_tokenize_initialize() fill_value_tokens(); } +#if CONFIG_T8X8 +static __inline void stuff2nd_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + (void) frametype; + (void) type; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_8x8 [1] [0] [pt]; + //t->section = 11; + t->skip_eob_node = 0; + ++cpi->coef_counts_8x8 [1] [0] [pt] [DCT_EOB_TOKEN]; + ++t; + + *tp = t; + pt = 0; + *a = *l = pt; + +} + +static __inline void stuff1st_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, + ENTROPY_CONTEXT *l1, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + (void) frametype; + (void) type; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_8x8 [0] [1] [pt]; + //t->section = 8; + t->skip_eob_node = 0; + ++cpi->coef_counts_8x8 [0] [1] [pt] [DCT_EOB_TOKEN]; + ++t; + *tp = t; + pt = 0; /* 0 <-> all coeff data is zero */ + *a = *l = pt; + + +} + +static __inline +void stuff1st_order_buv_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, + ENTROPY_CONTEXT *l1, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + (void) frametype; + (void) type; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_8x8 [2] [0] [pt]; + //t->section = 13; + t->skip_eob_node = 0; + ++cpi->coef_counts_8x8[2] [0] [pt] [DCT_EOB_TOKEN]; + ++t; + *tp = t; + pt = 0; /* 0 <-> all coeff data is zero */ + *a = *l = pt; + +} + +void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) +{ + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + int plane_type; + int b; + + stuff2nd_order_b_8x8(x->block + 24, t, 1, x->frame_type, + A + vp8_block2above[24], L + vp8_block2left[24], cpi); + plane_type = 0; + + for (b = 0; b < 16; b+=4) { + stuff1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+4], + cpi); + // build coeff context for 8x8 transform + if(b==0) + { + *(A + vp8_block2above[1]) = *(A + vp8_block2above[4]) = *(A + vp8_block2above[5]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[1]) = *(L + vp8_block2left[4]) = *(L + vp8_block2left[5]) = *(L + vp8_block2left[b]); + } + else if(b==4) + { + *(A + vp8_block2above[2]) = *(A + vp8_block2above[3]) = *(A + vp8_block2above[6]) = *(A + vp8_block2above[7]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[2]) = *(L + vp8_block2left[3]) = *(L + vp8_block2left[6]) = *(L + vp8_block2left[7]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[4]) = *(A + vp8_block2above[1]); + *(L + vp8_block2left[4]) = *(L + vp8_block2left[1]); + } + else if(b==8) + { + *(A + vp8_block2above[9]) = *(A + vp8_block2above[12]) = *(A + vp8_block2above[13]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[9]) = *(L + vp8_block2left[12]) = *(L + vp8_block2left[13]) = *(L + vp8_block2left[b]); + + } + else if(b==12) + { + *(A + vp8_block2above[10]) = *(A + vp8_block2above[11]) = *(A + vp8_block2above[14]) = *(A + vp8_block2above[15]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[10]) = *(L + vp8_block2left[11]) = *(L + vp8_block2left[14]) = *(L + vp8_block2left[15]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[12]) = *(A + vp8_block2above[8]); + *(L + vp8_block2left[12]) = *(L + vp8_block2left[8]); + + } + + } + /* + for (b = 0; b < 16; b+=4) { + stuff1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + */ + + for (b = 16; b < 24; b+=4) { + stuff1st_order_buv_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+2], + cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + /* + for (b = 16; b < 24; b+=4) { + stuff1st_order_buv_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + */ +} +#endif static __inline void stuff2nd_order_b ( diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h index 04a8879cf..cd122f19c 100644 --- a/vp8/encoder/tokenize.h +++ b/vp8/encoder/tokenize.h @@ -38,8 +38,10 @@ void init_context_counters(); void print_context_counters(); extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_T8X8 +extern _int64 context_counters_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif #endif - extern const int *vp8_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to * improve cache locality, since it's needed for costing when the rest of the diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 9ec24d566..b6e5a41c2 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -100,6 +100,11 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm endif # common (c) +ifeq ($(CONFIG_CSM),yes) +VP8_COMMON_SRCS-yes += common/maskingmv.c +VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/mask_sse3.asm +endif + VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/bilinearfilter_arm.c VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/bilinearfilter_arm.h @@ -32,6 +32,7 @@ #include <fcntl.h> #include <unistd.h> #endif +#include "vpx_config.h" #include "vpx_version.h" #include "vpx/vp8cx.h" #include "vpx_ports/mem_ops.h" @@ -76,6 +77,9 @@ static const struct codec_item unsigned int fourcc; } codecs[] = { +#if CONFIG_EXPERIMENTAL && CONFIG_VP8_ENCODER + {"vp8x", &vpx_codec_vp8x_cx_algo, 0x78385056}, +#endif #if CONFIG_VP8_ENCODER {"vp8", &vpx_codec_vp8_cx_algo, 0x30385056}, #endif @@ -1692,7 +1696,11 @@ int main(int argc, const char **argv_) /* Handle codec specific options */ #if CONFIG_VP8_ENCODER - if (codec->iface == &vpx_codec_vp8_cx_algo) + if (codec->iface == &vpx_codec_vp8_cx_algo +#if CONFIG_EXPERIMENTAL + || codec->iface == &vpx_codec_vp8x_cx_algo +#endif + ) { ctrl_args = vp8_args; ctrl_args_map = vp8_arg_ctrl_map; |