diff options
Diffstat (limited to 'vp9')
29 files changed, 2005 insertions, 759 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index aebf4a1ae..426699e31 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -83,7 +83,9 @@ typedef enum { D27_PRED, /* Directional 22 deg prediction [anti-clockwise from 0 deg hor] */ D63_PRED, /* Directional 67 deg prediction [anti-clockwise from 0 deg hor] */ TM_PRED, /* Truemotion prediction */ +#if !CONFIG_SB8X8 I8X8_PRED, /* 8x8 based prediction, each 8x8 has its own mode */ +#endif I4X4_PRED, /* 4x4 based prediction, each 4x4 has its own mode */ NEARESTMV, NEARMV, @@ -126,7 +128,9 @@ typedef enum { #define VP9_YMODES (I4X4_PRED + 1) #define VP9_UV_MODES (TM_PRED + 1) +#if !CONFIG_SB8X8 #define VP9_I8X8_MODES (TM_PRED + 1) +#endif #define VP9_I32X32_MODES (TM_PRED + 1) #define VP9_MVREFS (1 + SPLITMV - NEARESTMV) @@ -169,6 +173,7 @@ typedef enum { #define VP9_NKF_BINTRAMODES (VP9_BINTRAMODES) /* 10 */ #endif +#if !CONFIG_SB8X8 typedef enum { PARTITIONING_16X8 = 0, PARTITIONING_8X16, @@ -176,6 +181,7 @@ typedef enum { PARTITIONING_4X4, NB_PARTITIONINGS, } SPLITMV_PARTITIONING_TYPE; +#endif /* For keyframes, intra block modes are predicted by the (already decoded) modes for the Y blocks to the left and above us; for interframes, there @@ -271,7 +277,9 @@ typedef struct { int mb_mode_context[MAX_REF_FRAMES]; +#if !CONFIG_SB8X8 SPLITMV_PARTITIONING_TYPE partitioning; +#endif unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ unsigned char need_to_clamp_mvs; unsigned char need_to_clamp_secondmv; @@ -293,7 +301,7 @@ typedef struct { typedef struct { MB_MODE_INFO mbmi; - union b_mode_info bmi[16]; + union b_mode_info bmi[16 >> (CONFIG_SB8X8 * 2)]; } MODE_INFO; struct scale_factors { @@ -368,7 +376,7 @@ typedef struct macroblockd { PARTITION_CONTEXT *above_seg_context; PARTITION_CONTEXT *left_seg_context; - /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */ + /* 0 (disable) 1 (enable) segmentation */ unsigned char segmentation_enabled; /* 0 (do not update) 1 (update) the macroblock segmentation map. */ @@ -433,8 +441,11 @@ typedef struct macroblockd { int corrupted; - int sb_index; - int mb_index; // Index of the MB in the SB (0..3) + int sb_index; // index of 32x32 block inside the 64x64 block + int mb_index; // index of 16x16 block inside the 32x32 block +#if CONFIG_SB8X8 + int b_index; // index of 8x8 block inside the 16x16 block +#endif int q_index; } MACROBLOCKD; @@ -442,10 +453,10 @@ typedef struct macroblockd { static INLINE void update_partition_context(MACROBLOCKD *xd, BLOCK_SIZE_TYPE sb_type, BLOCK_SIZE_TYPE sb_size) { - int bsl = mi_width_log2(sb_size) - CONFIG_SB8X8, bs = 1 << bsl; - int bwl = mi_width_log2(sb_type) - CONFIG_SB8X8; - int bhl = mi_height_log2(sb_type) - CONFIG_SB8X8; - int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - CONFIG_SB8X8 - bsl; + int bsl = mi_width_log2(sb_size), bs = 1 << bsl; + int bwl = mi_width_log2(sb_type); + int bhl = mi_height_log2(sb_type); + int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl; int i; // skip macroblock partition if (bsl == 0) @@ -481,9 +492,9 @@ static INLINE void update_partition_context(MACROBLOCKD *xd, static INLINE int partition_plane_context(MACROBLOCKD *xd, BLOCK_SIZE_TYPE sb_type) { - int bsl = mi_width_log2(sb_type) - CONFIG_SB8X8, bs = 1 << bsl; + int bsl = mi_width_log2(sb_type), bs = 1 << bsl; int above = 0, left = 0, i; - int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl - CONFIG_SB8X8; + int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl; assert(mi_width_log2(sb_type) == mi_height_log2(sb_type)); assert(bsl >= 0); @@ -581,6 +592,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) { xd->mode_info_context->bmi[ib].as_mode.context : #endif xd->mode_info_context->bmi[ib].as_mode.first); +#if !CONFIG_SB8X8 } else if (xd->mode_info_context->mbmi.mode == I8X8_PRED && xd->q_index < ACTIVE_HT) { const int ic = (ib & 10); @@ -615,7 +627,8 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) { // Use 2D DCT tx_type = DCT_DCT; #endif - } else if (xd->mode_info_context->mbmi.mode < I8X8_PRED && +#endif // !CONFIG_SB8X8 + } else if (xd->mode_info_context->mbmi.mode <= TM_PRED && xd->q_index < ACTIVE_HT) { #if USE_ADST_FOR_I16X16_4X4 #if USE_ADST_PERIPHERY_ONLY @@ -659,14 +672,17 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) { #endif if (ib >= (1 << (wb + hb))) // no chroma adst return tx_type; +#if !CONFIG_SB8X8 if (xd->mode_info_context->mbmi.mode == I8X8_PRED && xd->q_index < ACTIVE_HT8) { // TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged // or the relationship otherwise modified to address this type conversion. tx_type = txfm_map(pred_mode_conv( (MB_PREDICTION_MODE)xd->mode_info_context->bmi[ib].as_mode.first)); - } else if (xd->mode_info_context->mbmi.mode < I8X8_PRED && - xd->q_index < ACTIVE_HT8) { + } else +#endif // CONFIG_SB8X8 + if (xd->mode_info_context->mbmi.mode <= TM_PRED && + xd->q_index < ACTIVE_HT8) { #if USE_ADST_FOR_I16X16_8X8 #if USE_ADST_PERIPHERY_ONLY const int hmax = 1 << wb; @@ -707,7 +723,7 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) { #endif if (ib >= (1 << (wb + hb))) return tx_type; - if (xd->mode_info_context->mbmi.mode < I8X8_PRED && + if (xd->mode_info_context->mbmi.mode <= TM_PRED && xd->q_index < ACTIVE_HT16) { tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); #if USE_ADST_PERIPHERY_ONLY @@ -738,7 +754,9 @@ void vp9_setup_block_dptrs(MACROBLOCKD *xd); static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) { MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; const TX_SIZE size = mbmi->txfm_size; +#if !CONFIG_SB8X8 const MB_PREDICTION_MODE mode = mbmi->mode; +#endif // !CONFIG_SB8X8 switch (mbmi->sb_type) { case BLOCK_SIZE_SB64X64: @@ -750,6 +768,17 @@ static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) { return TX_16X16; else return size; +#if CONFIG_SB8X8 + case BLOCK_SIZE_SB32X16: + case BLOCK_SIZE_SB16X32: + case BLOCK_SIZE_MB16X16: + if (size == TX_16X16) + return TX_8X8; + else + return size; + default: + return TX_4X4; +#else // CONFIG_SB8X8 default: if (size == TX_16X16) return TX_8X8; @@ -757,6 +786,7 @@ static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) { return TX_4X4; else return size; +#endif // CONFIG_SB8X8 } return size; @@ -812,7 +842,10 @@ typedef void (*foreach_transformed_block_visitor)(int plane, int block, void *arg); static INLINE void foreach_transformed_block_in_plane( const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane, - int is_split, foreach_transformed_block_visitor visit, void *arg) { +#if !CONFIG_SB8X8 + int is_split, +#endif // !CONFIG_SB8X8 + foreach_transformed_block_visitor visit, void *arg) { const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") @@ -830,7 +863,10 @@ static INLINE void foreach_transformed_block_in_plane( // than the size of the subsampled data, or forced externally by the mb mode. const int ss_max = MAX(xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y); - const int ss_txfrm_size = txfrm_size_b > ss_block_size || is_split + const int ss_txfrm_size = txfrm_size_b > ss_block_size +#if !CONFIG_SB8X8 + || is_split +#endif // !CONFIG_SB8X8 ? txfrm_size_b - ss_max * 2 : txfrm_size_b; const int step = 1 << ss_txfrm_size; @@ -847,17 +883,24 @@ static INLINE void foreach_transformed_block_in_plane( static INLINE void foreach_transformed_block( const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, foreach_transformed_block_visitor visit, void *arg) { +#if !CONFIG_SB8X8 const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; const int is_split = xd->mode_info_context->mbmi.txfm_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV); +#endif // !CONFIG_SB8X8 int plane; for (plane = 0; plane < MAX_MB_PLANE; plane++) { +#if !CONFIG_SB8X8 const int is_split_chroma = is_split && xd->plane[plane].plane_type == PLANE_TYPE_UV; +#endif // !CONFIG_SB8X8 - foreach_transformed_block_in_plane(xd, bsize, plane, is_split_chroma, + foreach_transformed_block_in_plane(xd, bsize, plane, +#if !CONFIG_SB8X8 + is_split_chroma, +#endif // !CONFIG_SB8X8 visit, arg); } } @@ -865,14 +908,19 @@ static INLINE void foreach_transformed_block( static INLINE void foreach_transformed_block_uv( const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, foreach_transformed_block_visitor visit, void *arg) { +#if !CONFIG_SB8X8 const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; const int is_split = xd->mode_info_context->mbmi.txfm_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV); +#endif // !CONFIG_SB8X8 int plane; for (plane = 1; plane < MAX_MB_PLANE; plane++) { - foreach_transformed_block_in_plane(xd, bsize, plane, is_split, + foreach_transformed_block_in_plane(xd, bsize, plane, +#if !CONFIG_SB8X8 + is_split, +#endif // !CONFIG_SB8X8 visit, arg); } } @@ -900,11 +948,16 @@ static INLINE void foreach_predicted_block_in_plane( int pred_w, pred_h; if (mode == SPLITMV) { +#if CONFIG_SB8X8 + pred_w = 0; + pred_h = 0; +#else // 4x4 or 8x8 const int is_4x4 = (xd->mode_info_context->mbmi.partitioning == PARTITIONING_4X4); pred_w = is_4x4 ? 0 : 1 >> xd->plane[plane].subsampling_x; pred_h = is_4x4 ? 0 : 1 >> xd->plane[plane].subsampling_y; +#endif } else { pred_w = bw; pred_h = bh; @@ -961,6 +1014,74 @@ static uint8_t* raster_block_offset_uint8(MACROBLOCKD *xd, return base + raster_block_offset(xd, bsize, plane, block, stride); } +static int txfrm_block_to_raster_block(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize, + int plane, int block, + int ss_txfrm_size) { + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int txwl = ss_txfrm_size / 2; + const int tx_cols_lg2 = bwl - txwl; + const int tx_cols = 1 << tx_cols_lg2; + const int raster_mb = block >> ss_txfrm_size; + const int x = (raster_mb & (tx_cols - 1)) << (txwl); + const int y = raster_mb >> tx_cols_lg2 << (txwl); + return x + (y << bwl); +} + +static void txfrm_block_to_raster_xy(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize, + int plane, int block, + int ss_txfrm_size, + int *x, int *y) { + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int txwl = ss_txfrm_size / 2; + const int tx_cols_lg2 = bwl - txwl; + const int tx_cols = 1 << tx_cols_lg2; + const int raster_mb = block >> ss_txfrm_size; + *x = (raster_mb & (tx_cols - 1)) << (txwl); + *y = raster_mb >> tx_cols_lg2 << (txwl); +} + +static TX_SIZE tx_size_for_plane(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + int plane) { + // TODO(jkoleszar): This duplicates a ton of code, but we're going to be + // moving this to a per-plane lookup shortly, and this will go away then. + if (!plane) { + return xd->mode_info_context->mbmi.txfm_size; + } else { + const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); +#if !CONFIG_SB8X8 + const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; + const int is_split = + xd->mode_info_context->mbmi.txfm_size == TX_8X8 && + (mode == I8X8_PRED || mode == SPLITMV); +#endif + + // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") + // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 + const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; + const int block_size_b = bw + bh; + const int txfrm_size_b = tx_size * 2; + + // subsampled size of the block + const int ss_sum = xd->plane[plane].subsampling_x + + xd->plane[plane].subsampling_y; + const int ss_block_size = block_size_b - ss_sum; + + // size of the transform to use. scale the transform down if it's larger + // than the size of the subsampled data, or forced externally by the mb mode + const int ss_max = MAX(xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y); + const int ss_txfrm_size = txfrm_size_b > ss_block_size +#if !CONFIG_SB8X8 + || is_split +#endif // !CONFIG_SB8X8 + ? txfrm_size_b - ss_max * 2 + : txfrm_size_b; + return (TX_SIZE)(ss_txfrm_size / 2); + } +} + #if CONFIG_CODE_ZEROGROUP static int get_zpc_used(TX_SIZE tx_size) { return (tx_size >= TX_16X16); diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 8d5577f24..ed5441cc1 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -16,6 +16,17 @@ #include "vpx_mem/vpx_mem.h" static const unsigned int kf_y_mode_cts[8][VP9_YMODES] = { +#if CONFIG_SB8X8 + /* DC V H D45 135 117 153 D27 D63 TM i4X4 */ + {12, 6, 5, 5, 5, 5, 5, 5, 5, 2, 200}, + {25, 13, 13, 7, 7, 7, 7, 7, 7, 6, 160}, + {31, 17, 18, 8, 8, 8, 8, 8, 8, 9, 139}, + {40, 22, 23, 8, 8, 8, 8, 8, 8, 12, 116}, + {53, 26, 28, 8, 8, 8, 8, 8, 8, 13, 94}, + {68, 33, 35, 8, 8, 8, 8, 8, 8, 17, 68}, + {78, 38, 38, 8, 8, 8, 8, 8, 8, 19, 52}, + {89, 42, 42, 8, 8, 8, 8, 8, 8, 21, 34}, +#else /* DC V H D45 135 117 153 D27 D63 TM i8x8 i4X4 */ {12, 6, 5, 5, 5, 5, 5, 5, 5, 2, 22, 200}, {25, 13, 13, 7, 7, 7, 7, 7, 7, 6, 27, 160}, @@ -25,11 +36,17 @@ static const unsigned int kf_y_mode_cts[8][VP9_YMODES] = { {68, 33, 35, 8, 8, 8, 8, 8, 8, 17, 20, 68}, {78, 38, 38, 8, 8, 8, 8, 8, 8, 19, 16, 52}, {89, 42, 42, 8, 8, 8, 8, 8, 8, 21, 12, 34}, +#endif }; static const unsigned int y_mode_cts [VP9_YMODES] = { +#if CONFIG_SB8X8 + /* DC V H D45 135 117 153 D27 D63 TM i4X4 */ + 98, 19, 15, 14, 14, 14, 14, 12, 12, 13, 70 +#else /* DC V H D45 135 117 153 D27 D63 TM i8x8 i4X4 */ 98, 19, 15, 14, 14, 14, 14, 12, 12, 13, 16, 70 +#endif }; static const unsigned int uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = { @@ -44,14 +61,18 @@ static const unsigned int uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = { { 150, 15, 10, 10, 10, 10, 10, 75, 10, 6}, /* D27 */ { 150, 15, 10, 10, 10, 10, 10, 10, 75, 6}, /* D63 */ { 160, 30, 30, 10, 10, 10, 10, 10, 10, 16}, /* TM */ +#if !CONFIG_SB8X8 { 132, 46, 40, 10, 10, 10, 10, 10, 10, 18}, /* i8x8 - never used */ +#endif { 150, 35, 41, 10, 10, 10, 10, 10, 10, 10}, /* i4X4 */ }; +#if !CONFIG_SB8X8 static const unsigned int i8x8_mode_cts [VP9_I8X8_MODES] = { /* DC V H D45 135 117 153 D27 D63 TM */ 73, 49, 61, 30, 30, 30, 30, 30, 30, 13 }; +#endif static const unsigned int kf_uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = { // DC V H D45 135 117 153 D27 D63 TM @@ -65,7 +86,9 @@ static const unsigned int kf_uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = { { 102, 33, 20, 20, 20, 20, 20, 64, 20, 14}, /* D27 */ { 102, 33, 20, 20, 20, 20, 20, 20, 64, 14}, /* D63 */ { 132, 36, 30, 20, 20, 20, 20, 20, 20, 18}, /* TM */ +#if !CONFIG_SB8X8 { 122, 41, 35, 20, 20, 20, 20, 20, 20, 18}, /* i8x8 - never used */ +#endif { 122, 41, 35, 20, 20, 20, 20, 20, 20, 18}, /* I4X4 */ }; @@ -123,6 +146,7 @@ const vp9_prob vp9_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP9_SUBMVREFS - 1] = { { 208, 1, 1 } }; +#if !CONFIG_SB8X8 vp9_mbsplit vp9_mbsplits [VP9_NUMMBSPLITS] = { { 0, 0, 0, 0, @@ -150,9 +174,17 @@ vp9_mbsplit vp9_mbsplits [VP9_NUMMBSPLITS] = { const int vp9_mbsplit_count [VP9_NUMMBSPLITS] = { 2, 2, 4, 16}; const vp9_prob vp9_mbsplit_probs [VP9_NUMMBSPLITS - 1] = { 110, 111, 150}; +#endif const vp9_prob vp9_partition_probs[NUM_PARTITION_CONTEXTS] [PARTITION_TYPES - 1] = { +#if CONFIG_SB8X8 + // FIXME(jingning,rbultje) put real probabilities here + {202, 162, 107}, + {16, 2, 169}, + {3, 246, 19}, + {104, 90, 134}, +#endif {202, 162, 107}, {16, 2, 169}, {3, 246, 19}, @@ -228,8 +260,12 @@ const vp9_tree_index vp9_ymode_tree[VP9_YMODES * 2 - 2] = { -D27_PRED, -D63_PRED, 16, 18, -V_PRED, -H_PRED, +#if CONFIG_SB8X8 + -TM_PRED, -I4X4_PRED +#else -TM_PRED, 20, -I4X4_PRED, -I8X8_PRED +#endif }; const vp9_tree_index vp9_kf_ymode_tree[VP9_YMODES * 2 - 2] = { @@ -242,10 +278,15 @@ const vp9_tree_index vp9_kf_ymode_tree[VP9_YMODES * 2 - 2] = { -D27_PRED, -D63_PRED, 16, 18, -V_PRED, -H_PRED, +#if CONFIG_SB8X8 + -TM_PRED, -I4X4_PRED +#else -TM_PRED, 20, -I4X4_PRED, -I8X8_PRED +#endif }; +#if !CONFIG_SB8X8 const vp9_tree_index vp9_i8x8_mode_tree[VP9_I8X8_MODES * 2 - 2] = { 2, 14, -DC_PRED, 4, @@ -257,6 +298,7 @@ const vp9_tree_index vp9_i8x8_mode_tree[VP9_I8X8_MODES * 2 - 2] = { -V_PRED, 16, -H_PRED, -TM_PRED }; +#endif const vp9_tree_index vp9_uv_mode_tree[VP9_UV_MODES * 2 - 2] = { 2, 14, @@ -270,11 +312,13 @@ const vp9_tree_index vp9_uv_mode_tree[VP9_UV_MODES * 2 - 2] = { -H_PRED, -TM_PRED }; +#if !CONFIG_SB8X8 const vp9_tree_index vp9_mbsplit_tree[6] = { -PARTITIONING_4X4, 2, -PARTITIONING_8X8, 4, -PARTITIONING_16X8, -PARTITIONING_8X16, }; +#endif const vp9_tree_index vp9_mv_ref_tree[8] = { -ZEROMV, 2, @@ -308,8 +352,10 @@ struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES]; struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES]; struct vp9_token vp9_kf_ymode_encodings[VP9_YMODES]; struct vp9_token vp9_uv_mode_encodings[VP9_UV_MODES]; +#if !CONFIG_SB8X8 struct vp9_token vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; struct vp9_token vp9_mbsplit_encodings[VP9_NUMMBSPLITS]; +#endif struct vp9_token vp9_mv_ref_encoding_array[VP9_MVREFS]; struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_MVREFS]; @@ -340,12 +386,16 @@ void vp9_init_mbmode_probs(VP9_COMMON *x) { bct, uv_mode_cts[i], 0); } +#if !CONFIG_SB8X8 vp9_tree_probs_from_distribution(vp9_i8x8_mode_tree, x->fc.i8x8_mode_prob, bct, i8x8_mode_cts, 0); +#endif vpx_memcpy(x->fc.sub_mv_ref_prob, vp9_sub_mv_ref_prob2, sizeof(vp9_sub_mv_ref_prob2)); +#if !CONFIG_SB8X8 vpx_memcpy(x->fc.mbsplit_prob, vp9_mbsplit_probs, sizeof(vp9_mbsplit_probs)); +#endif vpx_memcpy(x->fc.switchable_interp_prob, vp9_switchable_interp_prob, sizeof(vp9_switchable_interp_prob)); @@ -449,8 +499,10 @@ void vp9_entropy_mode_init() { vp9_tokens_from_tree(vp9_sb_ymode_encodings, vp9_sb_ymode_tree); vp9_tokens_from_tree(vp9_sb_kf_ymode_encodings, vp9_sb_kf_ymode_tree); vp9_tokens_from_tree(vp9_uv_mode_encodings, vp9_uv_mode_tree); +#if !CONFIG_SB8X8 vp9_tokens_from_tree(vp9_i8x8_mode_encodings, vp9_i8x8_mode_tree); vp9_tokens_from_tree(vp9_mbsplit_encodings, vp9_mbsplit_tree); +#endif vp9_tokens_from_tree(vp9_switchable_interp_encodings, vp9_switchable_interp_tree); vp9_tokens_from_tree(vp9_partition_encodings, vp9_partition_tree); @@ -629,9 +681,11 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { update_mode_probs(VP9_NKF_BINTRAMODES, vp9_bmode_tree, fc->bmode_counts, fc->pre_bmode_prob, fc->bmode_prob, 0); +#if !CONFIG_SB8X8 update_mode_probs(VP9_I8X8_MODES, vp9_i8x8_mode_tree, fc->i8x8_mode_counts, fc->pre_i8x8_mode_prob, fc->i8x8_mode_prob, 0); +#endif for (i = 0; i < SUBMVREF_COUNT; ++i) update_mode_probs(VP9_SUBMVREFS, @@ -639,9 +693,11 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { fc->pre_sub_mv_ref_prob[i], fc->sub_mv_ref_prob[i], LEFT4X4); +#if !CONFIG_SB8X8 update_mode_probs(VP9_NUMMBSPLITS, vp9_mbsplit_tree, fc->mbsplit_counts, fc->pre_mbsplit_prob, fc->mbsplit_prob, 0); +#endif #if CONFIG_COMP_INTERINTRA_PRED if (cm->use_interintra) { int factor, interintra_prob, count; diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index 665569578..24f988f25 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -15,7 +15,9 @@ #include "vp9/common/vp9_treecoder.h" #define SUBMVREF_COUNT 5 +#if !CONFIG_SB8X8 #define VP9_NUMMBSPLITS 4 +#endif #if CONFIG_COMP_INTERINTRA_PRED #define VP9_DEF_INTERINTRA_PROB 248 @@ -24,6 +26,7 @@ #define SEPARATE_INTERINTRA_UV 0 #endif +#if !CONFIG_SB8X8 typedef const int vp9_mbsplit[16]; extern vp9_mbsplit vp9_mbsplits[VP9_NUMMBSPLITS]; @@ -31,6 +34,7 @@ extern vp9_mbsplit vp9_mbsplits[VP9_NUMMBSPLITS]; extern const int vp9_mbsplit_count[VP9_NUMMBSPLITS]; /* # of subsets */ extern const vp9_prob vp9_mbsplit_probs[VP9_NUMMBSPLITS - 1]; +#endif extern int vp9_mv_cont(const int_mv *l, const int_mv *a); @@ -48,8 +52,10 @@ extern const vp9_tree_index vp9_kf_ymode_tree[]; extern const vp9_tree_index vp9_uv_mode_tree[]; #define vp9_sb_ymode_tree vp9_uv_mode_tree #define vp9_sb_kf_ymode_tree vp9_uv_mode_tree +#if !CONFIG_SB8X8 extern const vp9_tree_index vp9_i8x8_mode_tree[]; extern const vp9_tree_index vp9_mbsplit_tree[]; +#endif extern const vp9_tree_index vp9_mv_ref_tree[]; extern const vp9_tree_index vp9_sb_mv_ref_tree[]; extern const vp9_tree_index vp9_sub_mv_ref_tree[]; @@ -60,9 +66,11 @@ extern struct vp9_token vp9_ymode_encodings[VP9_YMODES]; extern struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES]; extern struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES]; extern struct vp9_token vp9_kf_ymode_encodings[VP9_YMODES]; -extern struct vp9_token vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; extern struct vp9_token vp9_uv_mode_encodings[VP9_UV_MODES]; +#if !CONFIG_SB8X8 +extern struct vp9_token vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; extern struct vp9_token vp9_mbsplit_encodings[VP9_NUMMBSPLITS]; +#endif /* Inter mode values do not start at zero */ diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h index b72b41e95..3f00ba496 100644 --- a/vp9/common/vp9_enums.h +++ b/vp9/common/vp9_enums.h @@ -47,6 +47,6 @@ typedef enum PARTITION_TYPE { } PARTITION_TYPE; #define PARTITION_PLOFFSET 4 // number of probability models per block size -#define NUM_PARTITION_CONTEXTS (2 * PARTITION_PLOFFSET) +#define NUM_PARTITION_CONTEXTS ((2 + CONFIG_SB8X8) * PARTITION_PLOFFSET) #endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h index 085454512..df1ab73e8 100644 --- a/vp9/common/vp9_findnearmv.h +++ b/vp9/common/vp9_findnearmv.h @@ -74,11 +74,13 @@ vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc, vp9_prob p[VP9_MVREFS - 1], const int context); +#if !CONFIG_SB8X8 extern const uint8_t vp9_mbsplit_offset[4][16]; +#endif static int left_block_mv(const MACROBLOCKD *xd, const MODE_INFO *cur_mb, int b) { - if (!(b & 3)) { + if (!(b & (3 >> CONFIG_SB8X8))) { if (!xd->left_available) return 0; @@ -88,7 +90,7 @@ static int left_block_mv(const MACROBLOCKD *xd, if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.mv[0].as_int; - b += 4; + b += 4 >> CONFIG_SB8X8; } return (cur_mb->bmi + b - 1)->as_mv[0].as_int; @@ -96,7 +98,7 @@ static int left_block_mv(const MACROBLOCKD *xd, static int left_block_second_mv(const MACROBLOCKD *xd, const MODE_INFO *cur_mb, int b) { - if (!(b & 3)) { + if (!(b & (3 >> CONFIG_SB8X8))) { if (!xd->left_available) return 0; @@ -106,7 +108,7 @@ static int left_block_second_mv(const MACROBLOCKD *xd, if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.second_ref_frame > 0 ? cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int; - b += 4; + b += 4 >> CONFIG_SB8X8; } return cur_mb->mbmi.second_ref_frame > 0 ? @@ -115,72 +117,85 @@ static int left_block_second_mv(const MACROBLOCKD *xd, } static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) { - if (!(b >> 2)) { + if (!(b >> (2 >> CONFIG_SB8X8))) { /* On top edge, get from MB above us */ cur_mb -= mi_stride; if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.mv[0].as_int; - b += 16; + b += 16 >> (2 * CONFIG_SB8X8); } - return (cur_mb->bmi + b - 4)->as_mv[0].as_int; + return (cur_mb->bmi + b - (4 >> CONFIG_SB8X8))->as_mv[0].as_int; } static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) { - if (!(b >> 2)) { + if (!(b >> (2 >> CONFIG_SB8X8))) { /* On top edge, get from MB above us */ cur_mb -= mi_stride; if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.second_ref_frame > 0 ? cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int; - b += 16; + b += 16 >> (2 * CONFIG_SB8X8); } return cur_mb->mbmi.second_ref_frame > 0 ? - (cur_mb->bmi + b - 4)->as_mv[1].as_int : - (cur_mb->bmi + b - 4)->as_mv[0].as_int; + (cur_mb->bmi + b - (4 >> CONFIG_SB8X8))->as_mv[1].as_int : + (cur_mb->bmi + b - (4 >> CONFIG_SB8X8))->as_mv[0].as_int; } static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { - if (!(b & 3)) { +#if CONFIG_SB8X8 + // FIXME(rbultje, jingning): temporary hack because jenkins doesn't + // understand this condition. This will go away soon. + if (b == 0 || b == 2) { +#else + if (!(b & (3 >> CONFIG_SB8X8))) { +#endif /* On L edge, get from MB to left of us */ --cur_mb; - if (cur_mb->mbmi.mode < I8X8_PRED) { + if (cur_mb->mbmi.mode <= TM_PRED) { return pred_mode_conv(cur_mb->mbmi.mode); +#if !CONFIG_SB8X8 } else if (cur_mb->mbmi.mode == I8X8_PRED) { return pred_mode_conv( (MB_PREDICTION_MODE)(cur_mb->bmi + 3 + b)->as_mode.first); +#endif // !CONFIG_SB8X8 } else if (cur_mb->mbmi.mode == I4X4_PRED) { - return ((cur_mb->bmi + 3 + b)->as_mode.first); + return ((cur_mb->bmi + (3 >> CONFIG_SB8X8) + b)->as_mode.first); } else { return B_DC_PRED; } } +#if CONFIG_SB8X8 + assert(b == 1 || b == 3); +#endif return (cur_mb->bmi + b - 1)->as_mode.first; } static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi_stride) { - if (!(b >> 2)) { + if (!(b >> (2 >> CONFIG_SB8X8))) { /* On top edge, get from MB above us */ cur_mb -= mi_stride; - if (cur_mb->mbmi.mode < I8X8_PRED) { + if (cur_mb->mbmi.mode <= TM_PRED) { return pred_mode_conv(cur_mb->mbmi.mode); +#if !CONFIG_SB8X8 } else if (cur_mb->mbmi.mode == I8X8_PRED) { return pred_mode_conv( (MB_PREDICTION_MODE)(cur_mb->bmi + 12 + b)->as_mode.first); +#endif } else if (cur_mb->mbmi.mode == I4X4_PRED) { - return ((cur_mb->bmi + 12 + b)->as_mode.first); + return ((cur_mb->bmi + (CONFIG_SB8X8 ? 2 : 12) + b)->as_mode.first); } else { return B_DC_PRED; } } - return (cur_mb->bmi + b - 4)->as_mode.first; + return (cur_mb->bmi + b - (4 >> CONFIG_SB8X8))->as_mode.first; } #endif // VP9_COMMON_VP9_FINDNEARMV_H_ diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index ed0c35463..edb0c540b 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -27,7 +27,9 @@ static void lf_init_lut(loop_filter_info_n *lfi) { lfi->mode_lf_lut[H_PRED] = 1; lfi->mode_lf_lut[TM_PRED] = 1; lfi->mode_lf_lut[I4X4_PRED] = 0; +#if !CONFIG_SB8X8 lfi->mode_lf_lut[I8X8_PRED] = 0; +#endif lfi->mode_lf_lut[ZEROMV] = 1; lfi->mode_lf_lut[NEARESTMV] = 2; lfi->mode_lf_lut[NEARMV] = 2; @@ -165,10 +167,14 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, // the MB uses a prediction size of 16x16 and either 16x16 transform // is used or there is no residue at all. static int mb_lf_skip(const MB_MODE_INFO *const mbmi) { - const MB_PREDICTION_MODE mode = mbmi->mode; const int skip_coef = mbmi->mb_skip_coeff; const int tx_size = mbmi->txfm_size; +#if CONFIG_SB8X8 + return mbmi->sb_type >= BLOCK_SIZE_MB16X16 && +#else + const MB_PREDICTION_MODE mode = mbmi->mode; return mode != I4X4_PRED && mode != I8X8_PRED && mode != SPLITMV && +#endif (tx_size >= TX_16X16 || skip_coef); } @@ -220,7 +226,13 @@ static void lpf_mb(VP9_COMMON *cm, const MODE_INFO *mi, if (!skip_lf) { if (tx_size >= TX_8X8) { - if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) + if (tx_size == TX_8X8 && +#if CONFIG_SB8X8 + (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16) +#else + (mode == I8X8_PRED || mode == SPLITMV) +#endif + ) vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi); else @@ -244,7 +256,13 @@ static void lpf_mb(VP9_COMMON *cm, const MODE_INFO *mi, if (!skip_lf) { if (tx_size >= TX_8X8) { - if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) + if (tx_size == TX_8X8 && +#if CONFIG_SB8X8 + (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16) +#else + (mode == I8X8_PRED || mode == SPLITMV) +#endif + ) vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, y_stride, uv_stride, &lfi); else diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index b6ccb8bd9..7a7ebe64f 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -13,6 +13,11 @@ #define MVREF_NEIGHBOURS 8 #if CONFIG_SB8X8 +static int b_mv_ref_search[MVREF_NEIGHBOURS][2] = { + {0, -1}, {-1, 0}, {-1, -1}, {0, -2}, + {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2} +}; + static int mb_mv_ref_search[MVREF_NEIGHBOURS][2] = { {0, -1}, {-1, 0}, {-1, -1}, {0, -3}, {-3, 0}, {-1, -3}, {-3, -1}, {-3, -3} @@ -185,8 +190,15 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, mv_ref_search = sb64_mv_ref_search; } else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32) { mv_ref_search = sb_mv_ref_search; +#if CONFIG_SB8X8 + } else if (mbmi->sb_type >= BLOCK_SIZE_MB16X16) { + mv_ref_search = mb_mv_ref_search; + } else { + mv_ref_search = b_mv_ref_search; +#else } else { mv_ref_search = mb_mv_ref_search; +#endif } // We first scan for candidate vectors that match the current reference frame diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 211783e51..bb873c185 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -59,9 +59,13 @@ typedef struct frame_contexts { vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; +#if !CONFIG_SB8X8 vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; +#endif vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; +#if !CONFIG_SB8X8 vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; +#endif vp9_prob partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1]; vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; @@ -81,17 +85,25 @@ typedef struct frame_contexts { vp9_prob pre_ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ vp9_prob pre_sb_ymode_prob[VP9_I32X32_MODES - 1]; vp9_prob pre_uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; +#if !CONFIG_SB8X8 vp9_prob pre_i8x8_mode_prob[VP9_I8X8_MODES - 1]; +#endif vp9_prob pre_sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; +#if !CONFIG_SB8X8 vp9_prob pre_mbsplit_prob[VP9_NUMMBSPLITS - 1]; +#endif vp9_prob pre_partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1]; unsigned int bmode_counts[VP9_NKF_BINTRAMODES]; unsigned int ymode_counts[VP9_YMODES]; /* interframe intra mode probs */ unsigned int sb_ymode_counts[VP9_I32X32_MODES]; unsigned int uv_mode_counts[VP9_YMODES][VP9_UV_MODES]; +#if !CONFIG_SB8X8 unsigned int i8x8_mode_counts[VP9_I8X8_MODES]; /* interframe intra probs */ +#endif unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS]; +#if !CONFIG_SB8X8 unsigned int mbsplit_counts[VP9_NUMMBSPLITS]; +#endif unsigned int partition_counts[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES]; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 6efe2465e..042006354 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -265,19 +265,27 @@ static INLINE int round_mv_comp_q4(int value) { return (value < 0 ? value - 2 : value + 2) / 4; } +#if CONFIG_SB8X8 +#define IDX1 2 +#define IDX2 3 +#else +#define IDX1 4 +#define IDX2 5 +#endif + static int mi_mv_pred_row_q4(MACROBLOCKD *mb, int off, int idx) { const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.row + mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.row + - mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.row + - mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.row; + mb->mode_info_context->bmi[off + IDX1].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[off + IDX2].as_mv[idx].as_mv.row; return round_mv_comp_q4(temp); } static int mi_mv_pred_col_q4(MACROBLOCKD *mb, int off, int idx) { const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.col + mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.col + - mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.col + - mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.col; + mb->mode_info_context->bmi[off + IDX1].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[off + IDX2].as_mv[idx].as_mv.col; return round_mv_comp_q4(temp); } diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 4b62c1cce..a0155d9a9 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -15,25 +15,6 @@ #include "vp9/common/vp9_reconintra.h" #include "vpx_mem/vpx_mem.h" -// Using multiplication and shifting instead of division in diagonal prediction. -// iscale table is calculated from ((1 << 16) + (i + 2) / 2) / (i+2) and used as -// ((A + B) * iscale[i] + (1 << 15)) >> 16; -// where A and B are weighted pixel values. -static const unsigned int iscale[64] = { - 32768, 21845, 16384, 13107, 10923, 9362, 8192, 7282, - 6554, 5958, 5461, 5041, 4681, 4369, 4096, 3855, - 3641, 3449, 3277, 3121, 2979, 2849, 2731, 2621, - 2521, 2427, 2341, 2260, 2185, 2114, 2048, 1986, - 1928, 1872, 1820, 1771, 1725, 1680, 1638, 1598, - 1560, 1524, 1489, 1456, 1425, 1394, 1365, 1337, - 1311, 1285, 1260, 1237, 1214, 1192, 1170, 1150, - 1130, 1111, 1092, 1074, 1057, 1040, 1024, 1008, -}; - -static INLINE int iscale_round(int value, int i) { - return ROUND_POWER_OF_TWO(value * iscale[i], 16); -} - static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int bw, int bh, uint8_t *yabove_row, uint8_t *yleft_col) { diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index c51d0b243..474250cf7 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -65,9 +65,11 @@ static MB_PREDICTION_MODE read_kf_mb_ymode(vp9_reader *r, const vp9_prob *p) { return (MB_PREDICTION_MODE)treed_read(r, vp9_kf_ymode_tree, p); } +#if !CONFIG_SB8X8 static int read_i8x8_mode(vp9_reader *r, const vp9_prob *p) { return treed_read(r, vp9_i8x8_mode_tree, p); } +#endif static MB_PREDICTION_MODE read_uv_mode(vp9_reader *r, const vp9_prob *p) { return (MB_PREDICTION_MODE)treed_read(r, vp9_uv_mode_tree, p); @@ -161,6 +163,7 @@ static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m, } } +#if !CONFIG_SB8X8 if (m->mbmi.mode == I8X8_PRED) { int i; for (i = 0; i < 4; ++i) { @@ -175,14 +178,25 @@ static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m, } // chroma mode - if (m->mbmi.mode != I8X8_PRED) { + if (m->mbmi.mode != I8X8_PRED) +#endif + { m->mbmi.uv_mode = read_uv_mode(r, cm->kf_uv_mode_prob[m->mbmi.mode]); } if (cm->txfm_mode == TX_MODE_SELECT && !m->mbmi.mb_skip_coeff && - m->mbmi.mode <= I8X8_PRED) { +#if CONFIG_SB8X8 + m->mbmi.mode != I4X4_PRED +#else + m->mbmi.mode <= I8X8_PRED +#endif + ) { +#if CONFIG_SB8X8 + const int allow_16x16 = m->mbmi.sb_type >= BLOCK_SIZE_MB16X16; +#else const int allow_16x16 = m->mbmi.mode != I8X8_PRED; +#endif const int allow_32x32 = m->mbmi.sb_type >= BLOCK_SIZE_SB32X32; m->mbmi.txfm_size = select_txfm_size(cm, r, allow_16x16, allow_32x32); } else if (cm->txfm_mode >= ALLOW_32X32 && @@ -767,19 +781,29 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->uv_mode = DC_PRED; switch (mbmi->mode) { case SPLITMV: { +#if CONFIG_SB8X8 + const int num_p = 4; +#else const int s = treed_read(r, vp9_mbsplit_tree, cm->fc.mbsplit_prob); const int num_p = vp9_mbsplit_count[s]; +#endif int j = 0; +#if !CONFIG_SB8X8 cm->fc.mbsplit_counts[s]++; - mbmi->need_to_clamp_mvs = 0; mbmi->partitioning = s; +#endif + mbmi->need_to_clamp_mvs = 0; do { // for each subset j int_mv leftmv, abovemv, second_leftmv, second_abovemv; int_mv blockmv, secondmv; int mv_contz; int blockmode; +#if CONFIG_SB8X8 + int k = j; +#else int k = vp9_mbsplit_offset[s][j]; // first block in subset j +#endif leftmv.as_int = left_block_mv(xd, mi, k); abovemv.as_int = above_block_mv(mi, k, mis); @@ -851,6 +875,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, } */ +#if !CONFIG_SB8X8 { /* Fill (uniform) modes, mvs of jth subset. Must do it here because ensuing subsets can @@ -866,12 +891,12 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, fill_offset++; } while (--fill_count); } - +#endif } while (++j < num_p); } - mv0->as_int = mi->bmi[15].as_mv[0].as_int; - mv1->as_int = mi->bmi[15].as_mv[1].as_int; + mv0->as_int = mi->bmi[15 >> (2 * CONFIG_SB8X8)].as_mv[0].as_int; + mv1->as_int = mi->bmi[15 >> (2 * CONFIG_SB8X8)].as_mv[1].as_int; break; /* done with SPLITMV */ @@ -957,6 +982,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, } while (++j < 16); } +#if !CONFIG_SB8X8 if (mbmi->mode == I8X8_PRED) { int i; for (i = 0; i < 4; i++) { @@ -969,7 +995,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mi->bmi[ib + 5].as_mode.first = mode8x8; cm->fc.i8x8_mode_counts[mode8x8]++; } - } else { + } else +#endif + { mbmi->uv_mode = read_uv_mode(r, cm->fc.uv_mode_prob[mbmi->mode]); cm->fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++; } @@ -980,23 +1008,44 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, */ if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 && - ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= I8X8_PRED) || - (mbmi->ref_frame != INTRA_FRAME && !(mbmi->mode == SPLITMV && - mbmi->partitioning == PARTITIONING_4X4)))) { + ((mbmi->ref_frame == INTRA_FRAME && +#if CONFIG_SB8X8 + mbmi->mode != I4X4_PRED +#else + mbmi->mode <= I8X8_PRED +#endif + ) || + (mbmi->ref_frame != INTRA_FRAME && +#if CONFIG_SB8X8 + mbmi->mode != SPLITMV +#else + !(mbmi->mode == SPLITMV && mbmi->partitioning == PARTITIONING_4X4) +#endif + ))) { +#if CONFIG_SB8X8 + const int allow_16x16 = mbmi->sb_type >= BLOCK_SIZE_MB16X16; +#else const int allow_16x16 = mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV; +#endif const int allow_32x32 = mbmi->sb_type >= BLOCK_SIZE_SB32X32; mbmi->txfm_size = select_txfm_size(cm, r, allow_16x16, allow_32x32); } else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32 && cm->txfm_mode >= ALLOW_32X32) { mbmi->txfm_size = TX_32X32; } else if (cm->txfm_mode >= ALLOW_16X16 && +#if CONFIG_SB8X8 + mbmi->sb_type >= BLOCK_SIZE_MB16X16 && +#endif ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) || (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) { mbmi->txfm_size = TX_16X16; } else if (cm->txfm_mode >= ALLOW_8X8 && (!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == I4X4_PRED) && - !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV && - mbmi->partitioning == PARTITIONING_4X4))) { + !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV +#if !CONFIG_SB8X8 + && mbmi->partitioning == PARTITIONING_4X4 +#endif + ))) { mbmi->txfm_size = TX_8X8; } else { mbmi->txfm_size = TX_4X4; diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 01e9a2b89..5ef48d9e7 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -191,86 +191,57 @@ static void mb_init_dequantizer(VP9_COMMON *pc, MACROBLOCKD *xd) { xd->plane[i].dequant = pc->uv_dequant[xd->q_index]; } -static void decode_16x16(MACROBLOCKD *xd) { - const TX_TYPE tx_type = get_tx_type_16x16(xd, 0); - - vp9_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff, xd->plane[0].dst.buf, - xd->plane[0].dst.stride, xd->plane[0].eobs[0]); - - vp9_idct_add_8x8(xd->plane[1].qcoeff, xd->plane[1].dst.buf, - xd->plane[1].dst.stride, xd->plane[1].eobs[0]); - - vp9_idct_add_8x8(xd->plane[2].qcoeff, xd->plane[2].dst.buf, - xd->plane[1].dst.stride, xd->plane[2].eobs[0]); -} - +#if !CONFIG_SB8X8 static void decode_8x8(MACROBLOCKD *xd) { const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; // luma // if the first one is DCT_DCT assume all the rest are as well TX_TYPE tx_type = get_tx_type_8x8(xd, 0); - if (tx_type != DCT_DCT || mode == I8X8_PRED) { - int i; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - int idx = (ib & 0x02) ? (ib + 2) : ib; - int16_t *q = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16); - uint8_t* const dst = + int i; + assert(mode == I8X8_PRED); + for (i = 0; i < 4; i++) { + int ib = vp9_i8x8_block[i]; + int idx = (ib & 0x02) ? (ib + 2) : ib; + int16_t *q = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16); + uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, xd->plane[0].dst.buf, xd->plane[0].dst.stride); - int stride = xd->plane[0].dst.stride; - if (mode == I8X8_PRED) { - int i8x8mode = xd->mode_info_context->bmi[ib].as_mode.first; - vp9_intra8x8_predict(xd, ib, i8x8mode, dst, stride); - } - tx_type = get_tx_type_8x8(xd, ib); - vp9_iht_add_8x8_c(tx_type, q, dst, stride, xd->plane[0].eobs[idx]); + int stride = xd->plane[0].dst.stride; + if (mode == I8X8_PRED) { + int i8x8mode = xd->mode_info_context->bmi[ib].as_mode.first; + vp9_intra8x8_predict(xd, ib, i8x8mode, dst, stride); } - } else { - vp9_idct_add_y_block_8x8(xd->plane[0].qcoeff, xd->plane[0].dst.buf, - xd->plane[0].dst.stride, xd); + tx_type = get_tx_type_8x8(xd, ib); + vp9_iht_add_8x8_c(tx_type, q, dst, stride, xd->plane[0].eobs[idx]); } // chroma - if (mode == I8X8_PRED) { - int i; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - int i8x8mode = xd->mode_info_context->bmi[ib].as_mode.first; - uint8_t* dst; - - dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 1, i, - xd->plane[1].dst.buf, - xd->plane[1].dst.stride); - vp9_intra_uv4x4_predict(xd, 16 + i, i8x8mode, - dst, xd->plane[1].dst.stride); - xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), - dst, xd->plane[1].dst.stride, - xd->plane[1].eobs[i]); - - dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 2, i, - xd->plane[2].dst.buf, - xd->plane[1].dst.stride); - vp9_intra_uv4x4_predict(xd, 20 + i, i8x8mode, - dst, xd->plane[1].dst.stride); - xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), - dst, xd->plane[1].dst.stride, - xd->plane[2].eobs[i]); - } - } else if (mode == SPLITMV) { - xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->plane[1].dst.buf, - xd->plane[1].dst.stride, xd->plane[1].eobs); - xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->plane[2].dst.buf, - xd->plane[1].dst.stride, xd->plane[2].eobs); - } else { - vp9_idct_add_8x8(xd->plane[1].qcoeff, xd->plane[1].dst.buf, - xd->plane[1].dst.stride, xd->plane[1].eobs[0]); + for (i = 0; i < 4; i++) { + int ib = vp9_i8x8_block[i]; + int i8x8mode = xd->mode_info_context->bmi[ib].as_mode.first; + uint8_t* dst; - vp9_idct_add_8x8(xd->plane[2].qcoeff, xd->plane[2].dst.buf, - xd->plane[1].dst.stride, xd->plane[2].eobs[0]); + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 1, i, + xd->plane[1].dst.buf, + xd->plane[1].dst.stride); + vp9_intra_uv4x4_predict(xd, 16 + i, i8x8mode, + dst, xd->plane[1].dst.stride); + xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), + dst, xd->plane[1].dst.stride, + xd->plane[1].eobs[i]); + + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 2, i, + xd->plane[2].dst.buf, + xd->plane[1].dst.stride); + vp9_intra_uv4x4_predict(xd, 20 + i, i8x8mode, + dst, xd->plane[1].dst.stride); + xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), + dst, xd->plane[1].dst.stride, + xd->plane[2].eobs[i]); } } +#endif static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx) { struct macroblockd_plane *const y = &xd->plane[0]; @@ -286,76 +257,46 @@ static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx) { } } +#if !CONFIG_SB8X8 static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_reader *r) { TX_TYPE tx_type; int i = 0; const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; - if (mode == I8X8_PRED) { - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - const int iblock[4] = {0, 1, 4, 5}; - int j; - uint8_t* dst; - int i8x8mode = xd->mode_info_context->bmi[ib].as_mode.first; + assert(mode == I8X8_PRED); + for (i = 0; i < 4; i++) { + int ib = vp9_i8x8_block[i]; + const int iblock[4] = {0, 1, 4, 5}; + int j; + uint8_t* dst; + int i8x8mode = xd->mode_info_context->bmi[ib].as_mode.first; - dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); - vp9_intra8x8_predict(xd, ib, i8x8mode, dst, xd->plane[0].dst.stride); - for (j = 0; j < 4; j++) { - tx_type = get_tx_type_4x4(xd, ib + iblock[j]); - dequant_add_y(xd, tx_type, ib + iblock[j]); - } - dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 1, i, - xd->plane[1].dst.buf, - xd->plane[1].dst.stride); - vp9_intra_uv4x4_predict(xd, 16 + i, i8x8mode, - dst, xd->plane[1].dst.stride); - xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), - dst, xd->plane[1].dst.stride, - xd->plane[1].eobs[i]); - dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 2, i, - xd->plane[2].dst.buf, - xd->plane[2].dst.stride); - vp9_intra_uv4x4_predict(xd, 20 + i, i8x8mode, - dst, xd->plane[1].dst.stride); - xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), - dst, xd->plane[1].dst.stride, - xd->plane[2].eobs[i]); - } - } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) { - xd->itxm_add_y_block(xd->plane[0].qcoeff, xd->plane[0].dst.buf, - xd->plane[0].dst.stride, xd); - xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->plane[1].dst.buf, - xd->plane[1].dst.stride, xd->plane[1].eobs); - xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->plane[2].dst.buf, - xd->plane[1].dst.stride, xd->plane[2].eobs); - } else { - for (i = 0; i < 16; i++) { - tx_type = get_tx_type_4x4(xd, i); - dequant_add_y(xd, tx_type, i); + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); + vp9_intra8x8_predict(xd, ib, i8x8mode, dst, xd->plane[0].dst.stride); + for (j = 0; j < 4; j++) { + tx_type = get_tx_type_4x4(xd, ib + iblock[j]); + dequant_add_y(xd, tx_type, ib + iblock[j]); } - xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->plane[1].dst.buf, - xd->plane[1].dst.stride, xd->plane[1].eobs); - xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->plane[2].dst.buf, - xd->plane[1].dst.stride, xd->plane[2].eobs); + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 1, i, + xd->plane[1].dst.buf, + xd->plane[1].dst.stride); + vp9_intra_uv4x4_predict(xd, 16 + i, i8x8mode, + dst, xd->plane[1].dst.stride); + xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), + dst, xd->plane[1].dst.stride, + xd->plane[1].eobs[i]); + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 2, i, + xd->plane[2].dst.buf, + xd->plane[2].dst.stride); + vp9_intra_uv4x4_predict(xd, 20 + i, i8x8mode, + dst, xd->plane[1].dst.stride); + xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), + dst, xd->plane[1].dst.stride, + xd->plane[2].eobs[i]); } } - -static int txfrm_block_to_raster_block(MACROBLOCKD *xd, - BLOCK_SIZE_TYPE bsize, - int plane, int block, - int ss_txfrm_size) { - const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; - const int txwl = ss_txfrm_size / 2; - const int tx_cols_lg2 = bwl - txwl; - const int tx_cols = 1 << tx_cols_lg2; - const int raster_mb = block >> ss_txfrm_size; - const int x = (raster_mb & (tx_cols - 1)) << (txwl); - const int y = raster_mb >> tx_cols_lg2 << (txwl); - return x + (y << bwl); -} - +#endif static void decode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, int ss_txfrm_size, void *arg) { @@ -428,6 +369,41 @@ static void decode_atom_intra(VP9D_COMP *pbi, MACROBLOCKD *xd, foreach_transformed_block_uv(xd, bsize, decode_block, xd); } +static void decode_atom(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mi_row, int mi_col, + vp9_reader *r, BLOCK_SIZE_TYPE bsize) { + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + + if (pbi->common.frame_type != KEY_FRAME) + vp9_setup_interp_filters(xd, mbmi->interp_filter, &pbi->common); + + // prediction + if (mbmi->ref_frame == INTRA_FRAME) + vp9_build_intra_predictors_sbuv_s(xd, bsize); + else + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + + if (mbmi->mb_skip_coeff) { + vp9_reset_sb_tokens_context(xd, bsize); + } else { + // re-initialize macroblock dequantizer before detokenization + if (xd->segmentation_enabled) + mb_init_dequantizer(&pbi->common, xd); + + if (!vp9_reader_has_error(r)) { +#if CONFIG_NEWBINTRAMODES + if (mbmi->mode != I4X4_PRED) +#endif + vp9_decode_tokens(pbi, xd, r, bsize); + } + } + + if (mbmi->ref_frame == INTRA_FRAME) + decode_atom_intra(pbi, xd, r, bsize); + else + foreach_transformed_block(xd, bsize, decode_block, xd); +} + static void decode_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE_TYPE bsize) { const int bwl = mi_width_log2(bsize), bhl = mi_height_log2(bsize); @@ -473,39 +449,17 @@ static void decode_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, int mi_row, int mi_col, } } -// TODO(jingning): Need to merge SB and MB decoding. The MB decoding currently -// couples special handles on I8x8, B_PRED, and splitmv modes. +#if !CONFIG_SB8X8 +// TODO(jingning): This only performs I8X8_PRED decoding process, which will be +// automatically covered by decode_sb, when SB8X8 is on. static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, int mi_row, int mi_col, vp9_reader *r) { - int eobtotal = 0; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - const MB_PREDICTION_MODE mode = mbmi->mode; const int tx_size = mbmi->txfm_size; assert(mbmi->sb_type == BLOCK_SIZE_MB16X16); - //mode = xd->mode_info_context->mbmi.mode; - if (pbi->common.frame_type != KEY_FRAME) - vp9_setup_interp_filters(xd, mbmi->interp_filter, &pbi->common); - - // do prediction - if (mbmi->ref_frame == INTRA_FRAME) { - if (mode != I8X8_PRED) { - vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16); - if (mode != I4X4_PRED) - vp9_build_intra_predictors_sby_s(xd, BLOCK_SIZE_MB16X16); - } - } else { -#if 0 // def DEC_DEBUG - if (dec_debug) - printf("Decoding mb: %d %d interp %d\n", - xd->mode_info_context->mbmi.mode, tx_size, - xd->mode_info_context->mbmi.interp_filter); -#endif - vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16); - } - if (mbmi->mb_skip_coeff) { vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16); } else { @@ -513,73 +467,16 @@ static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, if (xd->segmentation_enabled) mb_init_dequantizer(&pbi->common, xd); - if (!vp9_reader_has_error(r)) { -#if CONFIG_NEWBINTRAMODES - if (mode != I4X4_PRED) -#endif - eobtotal = vp9_decode_tokens(pbi, xd, r, BLOCK_SIZE_MB16X16); - } - } - - if (eobtotal == 0 && - mode != I4X4_PRED && mode != I8X8_PRED && mode != SPLITMV && - !vp9_reader_has_error(r)) { - mbmi->mb_skip_coeff = 1; - } else { -#if 0 // def DEC_DEBUG - if (dec_debug) - printf("Decoding mb: %d %d\n", xd->mode_info_context->mbmi.mode, tx_size); -#endif - - if (tx_size == TX_16X16) { - decode_16x16(xd); - } else if (tx_size == TX_8X8) { - decode_8x8(xd); - } else { - if (mbmi->mode == I4X4_PRED) - // TODO(jingning): we need to move this to decode_atom later and - // deprecate decode_mb, when SB8X8 is on. - decode_atom_intra(pbi, xd, r, BLOCK_SIZE_MB16X16); - else - decode_4x4(pbi, xd, r); - } + if (!vp9_reader_has_error(r)) + vp9_decode_tokens(pbi, xd, r, BLOCK_SIZE_MB16X16); } -#ifdef DEC_DEBUG - if (dec_debug) { - int i, j; - printf("\n"); - printf("predictor y\n"); - for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j++) - printf("%3d ", xd->predictor[i * 16 + j]); - printf("\n"); - } - printf("\n"); - printf("final y\n"); - for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j++) - printf("%3d ", xd->plane[0].dst.buf[i * xd->plane[0].dst.stride + j]); - printf("\n"); - } - printf("\n"); - printf("final u\n"); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) - printf("%3d ", xd->plane[1].dst.buf[i * xd->plane[1].dst.stride + j]); - printf("\n"); - } - printf("\n"); - printf("final v\n"); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) - printf("%3d ", xd->plane[2].dst.buf[i * xd->plane[1].dst.stride + j]); - printf("\n"); - } - fflush(stdout); - } -#endif + if (tx_size == TX_8X8) + decode_8x8(xd); + else + decode_4x4(pbi, xd, r); } +#endif static int get_delta_q(vp9_reader *r, int *dq) { const int old_value = *dq; @@ -666,12 +563,31 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col, vp9_decode_mb_mode_mv(pbi, xd, mi_row, mi_col, r); set_refs(pbi, mi_row, mi_col); +#if CONFIG_SB8X8 + if (bsize >= BLOCK_SIZE_SB8X8) + decode_sb(pbi, xd, mi_row, mi_col, r, bsize); + else + decode_atom(pbi, xd, mi_row, mi_col, r, BLOCK_SIZE_SB8X8); +#else // TODO(jingning): merge decode_sb_ and decode_mb_ if (bsize > BLOCK_SIZE_MB16X16) { decode_sb(pbi, xd, mi_row, mi_col, r, bsize); } else { - decode_mb(pbi, xd, mi_row, mi_col, r); + // TODO(jingning): In transition of separating functionalities of decode_mb + // into decode_sb and decode_atom. Will remove decode_mb and clean this up + // when SB8X8 is on. + if (xd->mode_info_context->mbmi.mode == I4X4_PRED || + (xd->mode_info_context->mbmi.mode == SPLITMV && + xd->mode_info_context->mbmi.partitioning == PARTITIONING_4X4)) + decode_atom(pbi, xd, mi_row, mi_col, r, bsize); + else if (xd->mode_info_context->mbmi.mode != I8X8_PRED) + decode_sb(pbi, xd, mi_row, mi_col, r, bsize); + else + // TODO(jingning): decode_mb still carries deocding process of I8X8_PRED. + // This will be covered by decode_sb when SB8X8 is on. + decode_mb(pbi, xd, mi_row, mi_col, r); } +#endif xd->corrupted |= vp9_reader_has_error(r); } @@ -1090,9 +1006,13 @@ static void update_frame_context(FRAME_CONTEXT *fc) { vp9_copy(fc->pre_sb_ymode_prob, fc->sb_ymode_prob); vp9_copy(fc->pre_uv_mode_prob, fc->uv_mode_prob); vp9_copy(fc->pre_bmode_prob, fc->bmode_prob); +#if !CONFIG_SB8X8 vp9_copy(fc->pre_i8x8_mode_prob, fc->i8x8_mode_prob); +#endif vp9_copy(fc->pre_sub_mv_ref_prob, fc->sub_mv_ref_prob); +#if !CONFIG_SB8X8 vp9_copy(fc->pre_mbsplit_prob, fc->mbsplit_prob); +#endif vp9_copy(fc->pre_partition_prob, fc->partition_prob); fc->pre_nmvc = fc->nmvc; @@ -1105,9 +1025,13 @@ static void update_frame_context(FRAME_CONTEXT *fc) { vp9_zero(fc->sb_ymode_counts); vp9_zero(fc->uv_mode_counts); vp9_zero(fc->bmode_counts); +#if !CONFIG_SB8X8 vp9_zero(fc->i8x8_mode_counts); +#endif vp9_zero(fc->sub_mv_ref_counts); +#if !CONFIG_SB8X8 vp9_zero(fc->mbsplit_counts); +#endif vp9_zero(fc->NMVcount); vp9_zero(fc->mv_ref_ct); vp9_zero(fc->partition_counts); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 3ab67cd8c..3c0bab2ce 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -281,9 +281,11 @@ static void sb_kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_uv_mode_tree, p, vp9_sb_kf_ymode_encodings + m); } +#if !CONFIG_SB8X8 static void write_i8x8_mode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_i8x8_mode_tree, p, vp9_i8x8_mode_encodings + m); } +#endif static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m); @@ -302,9 +304,11 @@ static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m); } +#if !CONFIG_SB8X8 static void write_split(vp9_writer *bc, int x, const vp9_prob *p) { write_token(bc, vp9_mbsplit_tree, p, vp9_mbsplit_encodings + x); } +#endif static int prob_update_savings(const unsigned int *ct, const vp9_prob oldp, const vp9_prob newp, @@ -728,8 +732,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, do { write_bmode(bc, m->bmi[j].as_mode.first, pc->fc.bmode_prob); - } while (++j < 16); + } while (++j < (16 >> (CONFIG_SB8X8 * 2))); } +#if !CONFIG_SB8X8 if (mode == I8X8_PRED) { write_i8x8_mode(bc, m->bmi[0].as_mode.first, pc->fc.i8x8_mode_prob); @@ -739,7 +744,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, pc->fc.i8x8_mode_prob); write_i8x8_mode(bc, m->bmi[10].as_mode.first, pc->fc.i8x8_mode_prob); - } else { + } else +#endif + { write_uv_mode(bc, mi->uv_mode, pc->fc.uv_mode_prob[mode]); } @@ -824,19 +831,26 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, ++count_mb_seg[mi->partitioning]; #endif +#if !CONFIG_SB8X8 write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob); cpi->mbsplit_count[mi->partitioning]++; +#endif do { B_PREDICTION_MODE blockmode; int_mv blockmv; +#if !CONFIG_SB8X8 const int *const L = vp9_mbsplits[mi->partitioning]; +#endif int k = -1; /* first block in subset j */ int mv_contz; int_mv leftmv, abovemv; blockmode = cpi->mb.partition_info->bmi[j].mode; blockmv = cpi->mb.partition_info->bmi[j].mv; +#if CONFIG_SB8X8 + k = j; +#else #if CONFIG_DEBUG while (j != L[++k]) if (k >= 16) @@ -844,6 +858,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, #else while (j != L[++k]); #endif +#endif leftmv.as_int = left_block_mv(xd, m, k); abovemv.as_int = above_block_mv(m, k, mis); mv_contz = vp9_mv_cont(&leftmv, &abovemv); @@ -875,6 +890,22 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, } } +#if CONFIG_SB8X8 + if (((rf == INTRA_FRAME && mode != I4X4_PRED) || + (rf != INTRA_FRAME && mode != SPLITMV)) && + pc->txfm_mode == TX_MODE_SELECT && + !(skip_coeff || vp9_segfeature_active(xd, segment_id, + SEG_LVL_SKIP))) { + TX_SIZE sz = mi->txfm_size; + // FIXME(rbultje) code ternary symbol once all experiments are merged + vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); + if (mi->sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) { + vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); + if (mi->sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8) + vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); + } + } +#else if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || (rf != INTRA_FRAME && !(mode == SPLITMV && mi->partitioning == PARTITIONING_4X4))) && @@ -890,6 +921,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); } } +#endif } static void write_mb_modes_kf(const VP9_COMP *cpi, @@ -930,8 +962,9 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, #endif write_kf_bmode(bc, bm, c->kf_bmode_prob[a][l]); - } while (++i < 16); + } while (++i < (16 >> (CONFIG_SB8X8 * 2))); } +#if !CONFIG_SB8X8 if (ym == I8X8_PRED) { write_i8x8_mode(bc, m->bmi[0].as_mode.first, c->fc.i8x8_mode_prob); // printf(" mode: %d\n", m->bmi[0].as_mode.first); fflush(stdout); @@ -942,8 +975,22 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, write_i8x8_mode(bc, m->bmi[10].as_mode.first, c->fc.i8x8_mode_prob); // printf(" mode: %d\n", m->bmi[10].as_mode.first); fflush(stdout); } else +#endif write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); +#if CONFIG_SB8X8 + if (ym != I4X4_PRED && c->txfm_mode == TX_MODE_SELECT && + !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { + TX_SIZE sz = m->mbmi.txfm_size; + // FIXME(rbultje) code ternary symbol once all experiments are merged + vp9_write(bc, sz != TX_4X4, c->prob_tx[0]); + if (m->mbmi.sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) { + vp9_write(bc, sz != TX_8X8, c->prob_tx[1]); + if (m->mbmi.sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8) + vp9_write(bc, sz != TX_16X16, c->prob_tx[2]); + } + } +#else if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { TX_SIZE sz = m->mbmi.txfm_size; @@ -955,6 +1002,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, vp9_write(bc, sz != TX_16X16, c->prob_tx[2]); } } +#endif } @@ -1719,16 +1767,91 @@ static void segment_reference_frames(VP9_COMP *cpi) { } } -void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, - unsigned long *size) { +static void encode_segmentation(VP9_COMP *cpi, vp9_writer *w) { int i, j; + VP9_COMMON *const pc = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + + vp9_write_bit(w, xd->segmentation_enabled); + if (!xd->segmentation_enabled) + return; + + // Segmentation map + vp9_write_bit(w, xd->update_mb_segmentation_map); +#if CONFIG_IMPLICIT_SEGMENTATION + vp9_write_bit(w, xd->allow_implicit_segment_update); +#endif + if (xd->update_mb_segmentation_map) { + // Select the coding strategy (temporal or spatial) + vp9_choose_segmap_coding_method(cpi); + // Write out probabilities used to decode unpredicted macro-block segments + for (i = 0; i < MB_SEG_TREE_PROBS; i++) { + const int prob = xd->mb_segment_tree_probs[i]; + if (prob != MAX_PROB) { + vp9_write_bit(w, 1); + vp9_write_prob(w, prob); + } else { + vp9_write_bit(w, 0); + } + } + + // Write out the chosen coding method. + vp9_write_bit(w, pc->temporal_update); + if (pc->temporal_update) { + for (i = 0; i < PREDICTION_PROBS; i++) { + const int prob = pc->segment_pred_probs[i]; + if (prob != MAX_PROB) { + vp9_write_bit(w, 1); + vp9_write_prob(w, prob); + } else { + vp9_write_bit(w, 0); + } + } + } + } + + // Segmentation data + vp9_write_bit(w, xd->update_mb_segmentation_data); + // segment_reference_frames(cpi); + if (xd->update_mb_segmentation_data) { + vp9_write_bit(w, xd->mb_segment_abs_delta); + + for (i = 0; i < MAX_MB_SEGMENTS; i++) { + for (j = 0; j < SEG_LVL_MAX; j++) { + const int data = vp9_get_segdata(xd, i, j); + const int data_max = vp9_seg_feature_data_max(j); + + if (vp9_segfeature_active(xd, i, j)) { + vp9_write_bit(w, 1); + + if (vp9_is_segfeature_signed(j)) { + if (data < 0) { + vp9_encode_unsigned_max(w, -data, data_max); + vp9_write_bit(w, 1); + } else { + vp9_encode_unsigned_max(w, data, data_max); + vp9_write_bit(w, 0); + } + } else { + vp9_encode_unsigned_max(w, data, data_max); + } + } else { + vp9_write_bit(w, 0); + } + } + } + } +} + +void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) { + int i; VP9_HEADER oh; VP9_COMMON *const pc = &cpi->common; vp9_writer header_bc, residual_bc; MACROBLOCKD *const xd = &cpi->mb.e_mbd; int extra_bytes_packed = 0; - unsigned char *cx_data = dest; + uint8_t *cx_data = dest; oh.show_frame = (int) pc->show_frame; oh.type = (int)pc->frame_type; @@ -1960,87 +2083,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, active_section = 7; #endif - // Signal whether or not Segmentation is enabled - vp9_write_bit(&header_bc, (xd->segmentation_enabled) ? 1 : 0); - - // Indicate which features are enabled - if (xd->segmentation_enabled) { - // Indicate whether or not the segmentation map is being updated. - vp9_write_bit(&header_bc, (xd->update_mb_segmentation_map) ? 1 : 0); -#if CONFIG_IMPLICIT_SEGMENTATION - vp9_write_bit(&header_bc, (xd->allow_implicit_segment_update) ? 1 : 0); -#endif - - // If it is, then indicate the method that will be used. - if (xd->update_mb_segmentation_map) { - // Select the coding strategy (temporal or spatial) - vp9_choose_segmap_coding_method(cpi); - // Send the tree probabilities used to decode unpredicted - // macro-block segments - for (i = 0; i < MB_SEG_TREE_PROBS; i++) { - const int prob = xd->mb_segment_tree_probs[i]; - if (prob != 255) { - vp9_write_bit(&header_bc, 1); - vp9_write_prob(&header_bc, prob); - } else { - vp9_write_bit(&header_bc, 0); - } - } - - // Write out the chosen coding method. - vp9_write_bit(&header_bc, (pc->temporal_update) ? 1 : 0); - if (pc->temporal_update) { - for (i = 0; i < PREDICTION_PROBS; i++) { - const int prob = pc->segment_pred_probs[i]; - if (prob != 255) { - vp9_write_bit(&header_bc, 1); - vp9_write_prob(&header_bc, prob); - } else { - vp9_write_bit(&header_bc, 0); - } - } - } - } - - vp9_write_bit(&header_bc, (xd->update_mb_segmentation_data) ? 1 : 0); - - // segment_reference_frames(cpi); - - if (xd->update_mb_segmentation_data) { - vp9_write_bit(&header_bc, (xd->mb_segment_abs_delta) ? 1 : 0); - - // For each segments id... - for (i = 0; i < MAX_MB_SEGMENTS; i++) { - // For each segmentation codable feature... - for (j = 0; j < SEG_LVL_MAX; j++) { - const int8_t data = vp9_get_segdata(xd, i, j); - const int data_max = vp9_seg_feature_data_max(j); - - // If the feature is enabled... - if (vp9_segfeature_active(xd, i, j)) { - vp9_write_bit(&header_bc, 1); - - // Is the segment data signed.. - if (vp9_is_segfeature_signed(j)) { - // Encode the relevant feature data - if (data < 0) { - vp9_encode_unsigned_max(&header_bc, -data, data_max); - vp9_write_bit(&header_bc, 1); - } else { - vp9_encode_unsigned_max(&header_bc, data, data_max); - vp9_write_bit(&header_bc, 0); - } - } else { - // Unsigned data element so no sign bit needed - vp9_encode_unsigned_max(&header_bc, data, data_max); - } - } else { - vp9_write_bit(&header_bc, 0); - } - } - } - } - } + encode_segmentation(cpi, &header_bc); // Encode the common prediction model status flag probability updates for // the reference frame @@ -2153,15 +2196,19 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob); vp9_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob); vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob); +#if !CONFIG_SB8X8 vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob); vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob); +#endif vp9_copy(cpi->common.fc.pre_partition_prob, cpi->common.fc.partition_prob); cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc; #if CONFIG_COMP_INTERINTRA_PRED cpi->common.fc.pre_interintra_prob = cpi->common.fc.interintra_prob; #endif vp9_zero(cpi->sub_mv_ref_count); +#if !CONFIG_SB8X8 vp9_zero(cpi->mbsplit_count); +#endif vp9_zero(cpi->common.fc.mv_ref_ct); update_coef_probs(cpi, &header_bc); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 2c06457e7..40ad680b0 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -117,7 +117,9 @@ struct macroblock { int mbmode_cost[2][MB_MODE_COUNT]; int intra_uv_mode_cost[2][MB_MODE_COUNT]; int bmode_costs[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES]; +#if !CONFIG_SB8X8 int i8x8_mode_costs[MB_MODE_COUNT]; +#endif int inter_bmode_costs[B_MODE_COUNT]; int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; @@ -141,6 +143,11 @@ struct macroblock { // Structure to hold context for each of the 4 MBs within a SB: // when encoded as 4 independent MBs: +#if CONFIG_SB8X8 + PICK_MODE_CONTEXT sb8_context[4][4][4]; + PICK_MODE_CONTEXT sb8x16_context[4][4][2]; + PICK_MODE_CONTEXT sb16x8_context[4][4][2]; +#endif PICK_MODE_CONTEXT mb_context[4][4]; PICK_MODE_CONTEXT sb32x16_context[4][2]; PICK_MODE_CONTEXT sb16x32_context[4][2]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index d40c604a4..95bba21a9 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -47,8 +47,10 @@ int enc_debug = 0; void vp9_select_interp_filter_type(VP9_COMP *cpi); +#if !CONFIG_SB8X8 static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col); +#endif static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, @@ -380,6 +382,8 @@ static void update_state(VP9_COMP *cpi, } } if (bsize < BLOCK_SIZE_SB32X32) { + if (bsize < BLOCK_SIZE_MB16X16) + ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8]; ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; } @@ -387,8 +391,10 @@ static void update_state(VP9_COMP *cpi, vpx_memcpy(x->partition_info, &ctx->partition_info, sizeof(PARTITION_INFO)); - mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int; - mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; + mbmi->mv[0].as_int = + x->partition_info->bmi[15 >> (CONFIG_SB8X8 * 2)].mv.as_int; + mbmi->mv[1].as_int = + x->partition_info->bmi[15 >> (CONFIG_SB8X8 * 2)].second_mv.as_int; #if CONFIG_SB8X8 vpx_memcpy(x->partition_info + mis, &ctx->partition_info, sizeof(PARTITION_INFO)); @@ -453,7 +459,9 @@ static void update_state(VP9_COMP *cpi, THR_D27_PRED /*D27_PRED*/, THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, +#if !CONFIG_SB8X8 THR_I8X8_PRED /*I8X8_PRED*/, +#endif THR_B_PRED /*I4X4_PRED*/, }; cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++; @@ -631,13 +639,11 @@ static void set_offsets(VP9_COMP *cpi, /* segment ID */ if (xd->segmentation_enabled) { - if (xd->update_mb_segmentation_map) { - mbmi->segment_id = find_seg_id(cpi->segmentation_map, bsize, - mi_row, cm->mi_rows, mi_col, cm->mi_cols); - } else { - mbmi->segment_id = find_seg_id(cm->last_frame_seg_map, bsize, - mi_row, cm->mi_rows, mi_col, cm->mi_cols); - } + uint8_t *map = xd->update_mb_segmentation_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = find_seg_id(map, bsize, mi_row, + cm->mi_rows, mi_col, cm->mi_cols); + assert(mbmi->segment_id <= (MAX_MB_SEGMENTS-1)); vp9_mb_init_quantizer(cpi, x); @@ -667,6 +673,7 @@ static void set_offsets(VP9_COMP *cpi, } } +#if !CONFIG_SB8X8 static int pick_mb_mode(VP9_COMP *cpi, int mi_row, int mi_col, @@ -707,6 +714,7 @@ static int pick_mb_mode(VP9_COMP *cpi, return splitmodes_used; } +#endif static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, TOKENEXTRA **tp, int *totalrate, int *totaldist, @@ -790,11 +798,15 @@ static void set_block_index(MACROBLOCKD *xd, int idx, BLOCK_SIZE_TYPE bsize) { if (bsize >= BLOCK_SIZE_SB32X32) { xd->sb_index = idx; - } else { #if CONFIG_SB8X8 - assert(bsize >= BLOCK_SIZE_MB16X16); -#endif + } else if (bsize >= BLOCK_SIZE_MB16X16) { xd->mb_index = idx; + } else { + xd->b_index = idx; +#else + } else { + xd->mb_index = idx; +#endif } } @@ -817,6 +829,14 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, return &x->sb16x32_context[xd->sb_index][xd->mb_index]; case BLOCK_SIZE_MB16X16: return &x->mb_context[xd->sb_index][xd->mb_index]; +#if CONFIG_SB8X8 + case BLOCK_SIZE_SB16X8: + return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_SIZE_SB8X16: + return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_SIZE_SB8X8: + return &x->sb8_context[xd->sb_index][xd->mb_index][xd->b_index]; +#endif default: assert(0); return NULL; @@ -837,12 +857,15 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, set_block_index(xd, sub_index, bsize); set_offsets(cpi, mi_row, mi_col, bsize); update_state(cpi, get_block_context(x, bsize), bsize, output_enabled); +#if !CONFIG_SB8X8 if (bsize == BLOCK_SIZE_MB16X16) { if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); encode_macroblock(cpi, tp, output_enabled, mi_row, mi_col); - } else { + } else +#endif + { encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); } @@ -857,22 +880,38 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE_TYPE level, - BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4]) { + BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4] +#if CONFIG_SB8X8 + , BLOCK_SIZE_TYPE c3[4][4] +#endif + ) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = mi_width_log2(level), bs = 1 << (bsl - 1); const int bwl = mi_width_log2(c1), bhl = mi_height_log2(c1); - int pl; + int UNINITIALIZED_IS_SAFE(pl); if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - set_partition_seg_context(cpi, mi_row, mi_col); - pl = partition_plane_context(xd, level); +#if CONFIG_SB8X8 + if (level > BLOCK_SIZE_SB8X8) { +#endif + set_partition_seg_context(cpi, mi_row, mi_col); + pl = partition_plane_context(xd, level); +#if CONFIG_SB8X8 + } +#endif if (bsl == bwl && bsl == bhl) { - if (output_enabled && level > BLOCK_SIZE_MB16X16) + if (output_enabled && +#if CONFIG_SB8X8 + level > BLOCK_SIZE_SB8X8 +#else + level > BLOCK_SIZE_MB16X16 +#endif + ) cpi->partition_count[pl][PARTITION_NONE]++; encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1); } else if (bsl == bhl && bsl > bwl) { @@ -892,9 +931,17 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, assert(bwl < bsl && bhl < bsl); if (level == BLOCK_SIZE_SB64X64) { subsize = BLOCK_SIZE_SB32X32; +#if CONFIG_SB8X8 + } else if (level == BLOCK_SIZE_SB32X32) { + subsize = BLOCK_SIZE_MB16X16; + } else { + assert(level == BLOCK_SIZE_MB16X16); + subsize = BLOCK_SIZE_SB8X8; +#else } else { assert(level == BLOCK_SIZE_SB32X32); subsize = BLOCK_SIZE_MB16X16; +#endif } if (output_enabled) @@ -906,12 +953,22 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, set_block_index(xd, i, subsize); encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs, output_enabled, subsize, - subsize == BLOCK_SIZE_MB16X16 ? c1 : c2[i], c2); +#if CONFIG_SB8X8 + c2 ? c2[i] : c1, c3 ? c3[i] : NULL, NULL); +#else + c2 ? c2[i] : c1, NULL); +#endif } } +#if CONFIG_SB8X8 + if (level > BLOCK_SIZE_SB8X8 && + (level == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) +#else if (level > BLOCK_SIZE_MB16X16 && - (level == BLOCK_SIZE_SB32X32 || bsl == bwl || bsl == bhl)) { + (level == BLOCK_SIZE_SB32X32 || bsl == bwl || bsl == bhl)) +#endif + { set_partition_seg_context(cpi, mi_row, mi_col); update_partition_context(xd, c1, level); } @@ -934,7 +991,11 @@ static void encode_sb_row(VP9_COMP *cpi, for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; mi_col += (4 << CONFIG_SB8X8)) { int i, p; +#if CONFIG_SB8X8 + BLOCK_SIZE_TYPE mb_partitioning[4][4]; +#endif BLOCK_SIZE_TYPE sb_partitioning[4]; + BLOCK_SIZE_TYPE sb64_partitioning = BLOCK_SIZE_SB32X32; int sb64_rate = 0, sb64_dist = 0; int sb64_skip = 0; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; @@ -951,6 +1012,9 @@ static void encode_sb_row(VP9_COMP *cpi, memcpy(&seg_a, cm->above_seg_context + (mi_col >> CONFIG_SB8X8), sizeof(seg_a)); memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l)); + + // FIXME(rbultje): this function should probably be rewritten to be + // recursive at some point in the future. for (i = 0; i < 4; i++) { const int x_idx = (i & 1) << (1 + CONFIG_SB8X8); const int y_idx = (i & 2) << CONFIG_SB8X8; @@ -985,6 +1049,10 @@ static void encode_sb_row(VP9_COMP *cpi, const int x_idx_m = x_idx + ((j & 1) << CONFIG_SB8X8); const int y_idx_m = y_idx + ((j >> 1) << CONFIG_SB8X8); int r, d; +#if CONFIG_SB8X8 + int r2, d2, mb16_rate = 0, mb16_dist = 0, k; + ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE]; +#endif if (mi_row + y_idx_m >= cm->mi_rows || mi_col + x_idx_m >= cm->mi_cols) { @@ -995,18 +1063,175 @@ static void encode_sb_row(VP9_COMP *cpi, // Index of the MB in the SB 0..3 xd->mb_index = j; +#if CONFIG_SB8X8 + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(l3 + 4 * p, + cm->left_context[p] + + (y_idx_m * 4 >> (CONFIG_SB8X8 + + xd->plane[p].subsampling_y)), + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); + vpx_memcpy(a3 + 4 * p, + cm->above_context[p] + + ((mi_col + x_idx_m) * 4 >> (CONFIG_SB8X8 + + xd->plane[p].subsampling_x)), + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); + } + + mb_partitioning[i][j] = BLOCK_SIZE_SB8X8; + for (k = 0; k < 4; k++) { + xd->b_index = k; + + // try 8x8 coding + pick_sb_modes(cpi, mi_row + y_idx_m + (k & 1), + mi_col + x_idx_m + (k >> 1), + tp, &r, &d, BLOCK_SIZE_SB8X8, + &x->sb8_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + mb16_rate += r; + mb16_dist += d; + update_state(cpi, &x->sb8_context[xd->sb_index][xd->mb_index] + [xd->b_index], + BLOCK_SIZE_SB8X8, 0); + encode_superblock(cpi, tp, + 0, mi_row + y_idx_m, mi_col + x_idx_m, + BLOCK_SIZE_SB8X8); + } + set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); + pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); + mb16_rate += x->partition_cost[pl][PARTITION_SPLIT]; + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(cm->left_context[p] + + (y_idx_m * 4 >> (CONFIG_SB8X8 + + xd->plane[p].subsampling_y)), + l3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); + vpx_memcpy(cm->above_context[p] + + ((mi_col + x_idx_m) * 4 >> (CONFIG_SB8X8 + + xd->plane[p].subsampling_x)), + a3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); + } + + // try 8x16 coding + r2 = 0; + d2 = 0; + xd->b_index = 0; + pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m, + tp, &r, &d, BLOCK_SIZE_SB8X16, + &x->sb8x16_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + r2 += r; + d2 += d; + update_state(cpi, &x->sb8x16_context[xd->sb_index][xd->mb_index] + [xd->b_index], + BLOCK_SIZE_SB8X16, 0); + encode_superblock(cpi, tp, + 0, mi_row + y_idx_m, mi_col + x_idx_m, + BLOCK_SIZE_SB8X16); + xd->b_index = 1; + pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m + 1, + tp, &r, &d, BLOCK_SIZE_SB8X16, + &x->sb8x16_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + r2 += r; + d2 += d; + set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); + pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); + r2 += x->partition_cost[pl][PARTITION_VERT]; + if (RDCOST(x->rdmult, x->rddiv, r2, d2) < + RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) { + mb16_rate = r; + mb16_dist = d; + mb_partitioning[i][j] = BLOCK_SIZE_SB8X16; + } + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(cm->left_context[p] + + (y_idx_m * 4 >> (CONFIG_SB8X8 + + xd->plane[p].subsampling_y)), + l3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); + vpx_memcpy(cm->above_context[p] + + ((mi_col + x_idx_m) * 4 >> (CONFIG_SB8X8 + + xd->plane[p].subsampling_x)), + a3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); + } + + // try 16x8 coding + r2 = 0; + d2 = 0; + xd->b_index = 0; + pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m, + tp, &r, &d, BLOCK_SIZE_SB16X8, + &x->sb16x8_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + r2 += r; + d2 += d; + update_state(cpi, &x->sb16x8_context[xd->sb_index][xd->mb_index] + [xd->b_index], + BLOCK_SIZE_SB16X8, 0); + encode_superblock(cpi, tp, + 0, mi_row + y_idx_m, mi_col + x_idx_m, + BLOCK_SIZE_SB16X8); + xd->b_index = 1; + pick_sb_modes(cpi, mi_row + y_idx_m + 1, mi_col + x_idx_m, + tp, &r, &d, BLOCK_SIZE_SB16X8, + &x->sb16x8_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + r2 += r; + d2 += d; + set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); + pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); + r2 += x->partition_cost[pl][PARTITION_HORZ]; + if (RDCOST(x->rdmult, x->rddiv, r2, d2) < + RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) { + mb16_rate = r; + mb16_dist = d; + mb_partitioning[i][j] = BLOCK_SIZE_SB16X8; + } + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(cm->left_context[p] + + (y_idx_m * 4 >> (CONFIG_SB8X8 + + xd->plane[p].subsampling_y)), + l3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); + vpx_memcpy(cm->above_context[p] + + ((mi_col + x_idx_m) * 4 >> (CONFIG_SB8X8 + + xd->plane[p].subsampling_x)), + a3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); + } + + // try as 16x16 + pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m, + tp, &r, &d, BLOCK_SIZE_MB16X16, + &x->mb_context[xd->sb_index][xd->mb_index]); + set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); + pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); + r += x->partition_cost[pl][PARTITION_NONE]; + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) { + mb16_rate = r; + mb16_dist = d; + mb_partitioning[i][j] = BLOCK_SIZE_MB16X16; + } + sb32_rate += mb16_rate; + sb32_dist += mb16_dist; +#else splitmodes_used += pick_mb_mode(cpi, mi_row + y_idx_m, mi_col + x_idx_m, tp, &r, &d); sb32_rate += r; sb32_dist += d; +#endif // Dummy encode, do not do the tokenization #if CONFIG_SB8X8 - update_state(cpi, &x->mb_context[xd->sb_index][xd->mb_index], - BLOCK_SIZE_MB16X16, 0); -#endif + encode_sb(cpi, tp, mi_row + y_idx, mi_col + x_idx, 0, + BLOCK_SIZE_MB16X16, mb_partitioning[i][j], NULL, NULL); +#else encode_macroblock(cpi, tp, 0, mi_row + y_idx_m, mi_col + x_idx_m); +#endif } /* Restore L & A coding context to those in place on entry */ @@ -1170,7 +1395,12 @@ static void encode_sb_row(VP9_COMP *cpi, // instead of small->big) means we can use as threshold for small, which // may enable breakouts if RD is not good enough (i.e. faster) encode_sb(cpi, tp, mi_row + y_idx, mi_col + x_idx, 0, - BLOCK_SIZE_SB32X32, sb_partitioning[i], sb_partitioning); +#if CONFIG_SB8X8 + BLOCK_SIZE_SB32X32, sb_partitioning[i], mb_partitioning[i], + NULL); +#else + BLOCK_SIZE_SB32X32, sb_partitioning[i], NULL); +#endif } for (p = 0; p < MAX_MB_PLANE; p++) { @@ -1221,7 +1451,7 @@ static void encode_sb_row(VP9_COMP *cpi, RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { sb64_rate = r; sb64_dist = d; - sb_partitioning[0] = BLOCK_SIZE_SB64X32; + sb64_partitioning = BLOCK_SIZE_SB64X32; } for (p = 0; p < MAX_MB_PLANE; p++) { @@ -1266,7 +1496,7 @@ static void encode_sb_row(VP9_COMP *cpi, RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { sb64_rate = r; sb64_dist = d; - sb_partitioning[0] = BLOCK_SIZE_SB32X64; + sb64_partitioning = BLOCK_SIZE_SB32X64; } for (p = 0; p < MAX_MB_PLANE; p++) { @@ -1295,13 +1525,17 @@ static void encode_sb_row(VP9_COMP *cpi, RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { sb64_rate = r; sb64_dist = d; - sb_partitioning[0] = BLOCK_SIZE_SB64X64; + sb64_partitioning = BLOCK_SIZE_SB64X64; } } assert(tp_orig == *tp); - encode_sb(cpi, tp, mi_row, mi_col, 1, - BLOCK_SIZE_SB64X64, sb_partitioning[0], sb_partitioning); + encode_sb(cpi, tp, mi_row, mi_col, 1, BLOCK_SIZE_SB64X64, +#if CONFIG_SB8X8 + sb64_partitioning, sb_partitioning, mb_partitioning); +#else + sb64_partitioning, sb_partitioning); +#endif assert(tp_orig < *tp); } } @@ -1346,10 +1580,14 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_zero(cpi->count_mb_ref_frame_usage) vp9_zero(cpi->bmode_count) vp9_zero(cpi->ymode_count) +#if !CONFIG_SB8X8 vp9_zero(cpi->i8x8_mode_count) +#endif vp9_zero(cpi->y_uv_mode_count) vp9_zero(cpi->sub_mv_ref_count) +#if !CONFIG_SB8X8 vp9_zero(cpi->mbsplit_count) +#endif vp9_zero(cpi->common.fc.mv_ref_ct) vp9_zero(cpi->sb_ymode_count) vp9_zero(cpi->partition_count); @@ -1616,9 +1854,17 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi, assert(bwl < bsl && bhl < bsl); if (bsize == BLOCK_SIZE_SB64X64) { subsize = BLOCK_SIZE_SB32X32; +#if CONFIG_SB8X8 + } else if (bsize == BLOCK_SIZE_SB32X32) { + subsize = BLOCK_SIZE_MB16X16; + } else { + assert(bsize == BLOCK_SIZE_MB16X16); + subsize = BLOCK_SIZE_SB8X8; +#else } else { assert(bsize == BLOCK_SIZE_SB32X32); subsize = BLOCK_SIZE_MB16X16; +#endif } for (n = 0; n < 4; n++) { @@ -1823,9 +2069,10 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { do { ++ bct[xd->block[b].bmi.as_mode.first]; - } while (++b < 16); + } while (++b < (16 >> (CONFIG_SB8X8 * 2))); } +#if !CONFIG_SB8X8 if (m == I8X8_PRED) { i8x8_modes[xd->block[0].bmi.as_mode.first]++; i8x8_modes[xd->block[2].bmi.as_mode.first]++; @@ -1833,20 +2080,25 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { i8x8_modes[xd->block[10].bmi.as_mode.first]++; } #endif +#endif if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_MB16X16) { ++cpi->sb_ymode_count[m]; } else { ++cpi->ymode_count[m]; } +#if !CONFIG_SB8X8 if (m != I8X8_PRED) +#endif ++cpi->y_uv_mode_count[m][uvm]; +#if !CONFIG_SB8X8 else { cpi->i8x8_mode_count[xd->mode_info_context->bmi[0].as_mode.first]++; cpi->i8x8_mode_count[xd->mode_info_context->bmi[2].as_mode.first]++; cpi->i8x8_mode_count[xd->mode_info_context->bmi[8].as_mode.first]++; cpi->i8x8_mode_count[xd->mode_info_context->bmi[10].as_mode.first]++; } +#endif if (m == I4X4_PRED) { int b = 0; do { @@ -1855,7 +2107,7 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS; #endif ++cpi->bmode_count[m]; - } while (++b < 16); + } while (++b < (16 >> (CONFIG_SB8X8 * 2))); } } @@ -1880,6 +2132,7 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { #endif } +#if !CONFIG_SB8X8 static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col) { @@ -2103,7 +2356,6 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, #if CONFIG_SB8X8 int y, x; #endif - if (mbmi->mode != I4X4_PRED && mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) { mbmi->txfm_size = TX_16X16; @@ -2128,6 +2380,7 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } } } +#endif static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, @@ -2178,6 +2431,24 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_update_zbin_extra(cpi, x); } +#if CONFIG_SB8X8 + if (xd->mode_info_context->mbmi.mode == I4X4_PRED) { + assert(bsize == BLOCK_SIZE_SB8X8 && + xd->mode_info_context->mbmi.txfm_size == TX_4X4); + + vp9_encode_intra4x4mby(x, bsize); + vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize); + vp9_subtract_sbuv(x, bsize); + vp9_transform_sbuv_4x4(x, bsize); + vp9_quantize_sbuv_4x4(x, bsize); + vp9_optimize_sbuv(cm, x, bsize); + vp9_inverse_transform_sbuv_4x4(xd, bsize); + vp9_recon_sbuv(xd, bsize); + + if (output_enabled) + sum_intra_stats(cpi, x); + } else +#endif if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize); vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize); @@ -2213,6 +2484,12 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); } +#if CONFIG_SB8X8 + if (xd->mode_info_context->mbmi.mode == I4X4_PRED) { + assert(bsize == BLOCK_SIZE_SB8X8); + vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize); + } else +#endif if (!x->skip) { vp9_subtract_sb(x, bsize); @@ -2228,11 +2505,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sbuv_16x16(x, bsize); } if (x->optimize) { - vp9_optimize_sby_32x32(cm, x, bsize); + vp9_optimize_sby(cm, x, bsize); if (bsize == BLOCK_SIZE_SB64X64) - vp9_optimize_sbuv_32x32(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); else - vp9_optimize_sbuv_16x16(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); } vp9_inverse_transform_sby_32x32(xd, bsize); if (bsize == BLOCK_SIZE_SB64X64) @@ -2251,11 +2528,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sbuv_8x8(x, bsize); } if (x->optimize) { - vp9_optimize_sby_16x16(cm, x, bsize); + vp9_optimize_sby(cm, x, bsize); if (bsize >= BLOCK_SIZE_SB32X32) - vp9_optimize_sbuv_16x16(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); else - vp9_optimize_sbuv_8x8(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); } vp9_inverse_transform_sby_16x16(xd, bsize); if (bsize >= BLOCK_SIZE_SB32X32) @@ -2265,15 +2542,23 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, break; case TX_8X8: vp9_transform_sby_8x8(x, bsize); - vp9_transform_sbuv_8x8(x, bsize); vp9_quantize_sby_8x8(x, bsize); - vp9_quantize_sbuv_8x8(x, bsize); - if (x->optimize) { - vp9_optimize_sby_8x8(cm, x, bsize); - vp9_optimize_sbuv_8x8(cm, x, bsize); - } + if (x->optimize) + vp9_optimize_sby(cm, x, bsize); vp9_inverse_transform_sby_8x8(xd, bsize); - vp9_inverse_transform_sbuv_8x8(xd, bsize); + if (bsize >= BLOCK_SIZE_MB16X16) { + vp9_transform_sbuv_8x8(x, bsize); + vp9_quantize_sbuv_8x8(x, bsize); + if (x->optimize) + vp9_optimize_sbuv(cm, x, bsize); + vp9_inverse_transform_sbuv_8x8(xd, bsize); + } else { + vp9_transform_sbuv_4x4(x, bsize); + vp9_quantize_sbuv_4x4(x, bsize); + if (x->optimize) + vp9_optimize_sbuv(cm, x, bsize); + vp9_inverse_transform_sbuv_4x4(xd, bsize); + } break; case TX_4X4: vp9_transform_sby_4x4(x, bsize); @@ -2281,8 +2566,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sby_4x4(x, bsize); vp9_quantize_sbuv_4x4(x, bsize); if (x->optimize) { - vp9_optimize_sby_4x4(cm, x, bsize); - vp9_optimize_sbuv_4x4(cm, x, bsize); + vp9_optimize_sby(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); } vp9_inverse_transform_sby_4x4(xd, bsize); vp9_inverse_transform_sbuv_4x4(xd, bsize); @@ -2316,8 +2601,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { if (bsize >= BLOCK_SIZE_SB32X32) { cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; - } else { + } else if (bsize >= BLOCK_SIZE_MB16X16) { cpi->txfm_count_16x16p[mi->mbmi.txfm_size]++; + } else { + cpi->txfm_count_8x8p[mi->mbmi.txfm_size]++; } } else { int x, y; @@ -2325,6 +2612,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32) sz = TX_16X16; + if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16) + sz = TX_8X8; for (y = 0; y < bh; y++) { for (x = 0; x < bw; x++) { diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index f6ddca8f4..c5f29fe7e 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -22,12 +22,15 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; (void) cpi; +#if !CONFIG_SB8X8 if (use_16x16_pred) { +#endif mbmi->mode = DC_PRED; mbmi->uv_mode = DC_PRED; mbmi->ref_frame = INTRA_FRAME; vp9_encode_intra16x16mby(&cpi->common, x); +#if !CONFIG_SB8X8 } else { int i; @@ -36,6 +39,7 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { encode_intra4x4block(x, i, BLOCK_SIZE_MB16X16); } } +#endif return vp9_get_mb_ss(x->plane[0].src_diff); } @@ -58,7 +62,7 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib, xd->plane[0].diff); int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16); - assert(ib < 16); + assert(ib < (16 >> (2 * CONFIG_SB8X8))); #if CONFIG_NEWBINTRAMODES xd->mode_info_context->bmi[ib].as_mode.context = @@ -68,22 +72,22 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib, vp9_intra4x4_predict(&x->e_mbd, ib, xd->mode_info_context->bmi[ib].as_mode.first, dst, xd->plane[0].dst.stride); - vp9_subtract_block(4, 4, src_diff, 16, + vp9_subtract_block(4, 4, src_diff, 16 >> CONFIG_SB8X8, src, x->plane[0].src.stride, dst, xd->plane[0].dst.stride); tx_type = get_tx_type_4x4(&x->e_mbd, ib); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(src_diff, coeff, 16, tx_type); + vp9_short_fht4x4(src_diff, coeff, 16 >> CONFIG_SB8X8, tx_type); x->quantize_b_4x4(x, ib, tx_type, 16); vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), - diff, 16, tx_type); + diff, 16 >> CONFIG_SB8X8, tx_type); } else { - x->fwd_txm4x4(src_diff, coeff, 32); + x->fwd_txm4x4(src_diff, coeff, 32 >> CONFIG_SB8X8); x->quantize_b_4x4(x, ib, tx_type, 16); vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[0].eobs[ib], BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), - diff, 32); + diff, 32 >> CONFIG_SB8X8); } vp9_recon_b(dst, diff, dst, xd->plane[0].dst.stride); @@ -110,21 +114,21 @@ void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_transform_sby_16x16(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sby_16x16(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16); break; case TX_8X8: vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sby_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16); break; default: vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sby_4x4(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); break; } @@ -144,14 +148,14 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); break; default: // 16x16 or 8x8 vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); break; } @@ -159,6 +163,7 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_recon_sbuv(xd, BLOCK_SIZE_MB16X16); } +#if !CONFIG_SB8X8 void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { MACROBLOCKD *xd = &x->e_mbd; uint8_t* const src = @@ -304,3 +309,4 @@ void vp9_encode_intra8x8mbuv(MACROBLOCK *x) { encode_intra_uv4x4(x, i + 20, mode); // v } } +#endif diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h index 7ec2f11d4..a4f4c184b 100644 --- a/vp9/encoder/vp9_encodeintra.h +++ b/vp9/encoder/vp9_encodeintra.h @@ -17,8 +17,10 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred); void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bs); +#if !CONFIG_SB8X8 void vp9_encode_intra8x8mby(MACROBLOCK *x); void vp9_encode_intra8x8mbuv(MACROBLOCK *x); void vp9_encode_intra8x8(MACROBLOCK *x, int ib); +#endif #endif // VP9_ENCODER_VP9_ENCODEINTRA_H_ diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 0cb1ae958..15fd4f1b6 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -249,57 +249,53 @@ static int trellis_get_coeff_context(const int *scan, return pt; } -static void optimize_b(VP9_COMMON *const cm, - MACROBLOCK *mb, int ib, PLANE_TYPE type, - const int16_t *dequant_ptr, +static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, + int plane, int block, BLOCK_SIZE_TYPE bsize, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int tx_size, int y_blocks) { + TX_SIZE tx_size) { const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME; MACROBLOCKD *const xd = &mb->e_mbd; vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib); - const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, - pb_idx.block, 16); + const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, + block, 16); int16_t *qcoeff_ptr; int16_t *dqcoeff_ptr; - int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block], final_eob, sz = 0; + int eob = xd->plane[plane].eobs[block], final_eob, sz = 0; const int i0 = 0; int rc, x, next, i; int64_t rdmult, rddiv, rd_cost0, rd_cost1; int rate0, rate1, error0, error1, t0, t1; int best, band, pt; + PLANE_TYPE type = xd->plane[plane].plane_type; int err_mult = plane_rd_mult[type]; int default_eob, pad; int const *scan, *nb; const int mul = 1 + (tx_size == TX_32X32); uint8_t token_cache[1024]; + const int ib = txfrm_block_to_raster_block(xd, bsize, plane, + block, 2 * tx_size); + const int16_t *dequant_ptr = xd->plane[plane].dequant; - assert((!type && !pb_idx.plane) || (type && pb_idx.plane)); - dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16); - qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16); + assert((!type && !plane) || (type && plane)); + dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16); + qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); switch (tx_size) { default: case TX_4X4: { - const TX_TYPE tx_type = get_tx_type_4x4(xd, ib); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT; default_eob = 16; scan = get_scan_4x4(tx_type); break; } case TX_8X8: { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 1 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = get_tx_type_8x8(xd, y + (x >> 1)); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT; scan = get_scan_8x8(tx_type); default_eob = 64; break; } case TX_16X16: { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 2 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = get_tx_type_16x16(xd, y + (x >> 2)); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT; scan = get_scan_16x16(tx_type); default_eob = 256; break; @@ -480,203 +476,84 @@ static void optimize_b(VP9_COMMON *const cm, } final_eob++; - xd->plane[pb_idx.plane].eobs[pb_idx.block] = final_eob; + xd->plane[plane].eobs[block] = final_eob; *a = *l = (final_eob > 0); } -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT *a = xd->plane[0].above_context; - ENTROPY_CONTEXT *l = xd->plane[0].left_context; - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - ENTROPY_CONTEXT ta[2], tl[2]; - int n; - - for (n = 0; n < bw; n++, a += 8) - ta[n] = (a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7]) != 0; - for (n = 0; n < bh; n++, l += 8) - tl[n] = (l[0] + l[1] + l[2] + l[3] + l[4] + l[5] + l[6] + l[7]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh); - } -} - -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT *a = xd->plane[0].above_context; - ENTROPY_CONTEXT *l = xd->plane[0].left_context; - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - ENTROPY_CONTEXT ta[4], tl[4]; - int n; - - for (n = 0; n < bw; n++, a += 4) - ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0; - for (n = 0; n < bh; n++, l += 4) - tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh); - } -} - -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT *a = xd->plane[0].above_context; - ENTROPY_CONTEXT *l = xd->plane[0].left_context; - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - ENTROPY_CONTEXT ta[8], tl[8]; - int n; - - for (n = 0; n < bw; n++, a += 2) - ta[n] = (a[0] + a[1]) != 0; - for (n = 0; n < bh; n++, l += 2) - tl[n] = (l[0] + l[1]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh); - } -} - -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - int bwl = b_width_log2(bsize), bw = 1 << bwl; - int bh = 1 << b_height_log2(bsize); - ENTROPY_CONTEXT ta[16], tl[16]; - int n; - - vpx_memcpy(ta, xd->plane[0].above_context, sizeof(ENTROPY_CONTEXT) * bw); - vpx_memcpy(tl, xd->plane[0].left_context, sizeof(ENTROPY_CONTEXT) * bh); +struct optimize_ctx { + ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; + ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; +}; - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; +struct optimize_block_args { + VP9_COMMON *cm; + MACROBLOCK *x; + struct optimize_ctx *ctx; +}; - optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4, bh * bw); - } -} +static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + const struct optimize_block_args* const args = arg; + MACROBLOCKD* const xd = &args->x->e_mbd; + int x, y; -void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - int b; + // find current entropy context + txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y); - assert(bsize == BLOCK_SIZE_SB64X64); - for (b = 256; b < 384; b += 64) { - const int plane = 1 + (b >= 320); - ENTROPY_CONTEXT *a = xd->plane[plane].above_context; - ENTROPY_CONTEXT *l = xd->plane[plane].left_context; - ENTROPY_CONTEXT a_ec, l_ec; - - a_ec = (a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3] + l[4] + l[5] + l[6] + l[7]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.plane[plane].dequant, - &a_ec, &l_ec, TX_32X32, 256); - } + optimize_b(args->cm, args->x, plane, block, bsize, + &args->ctx->ta[plane][x], &args->ctx->tl[plane][y], + ss_txfrm_size / 2); } -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2; - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - int uvoff = 16 << (bwl + bhl); - int plane, n; - - for (plane = 1; plane < MAX_MB_PLANE; plane++) { - ENTROPY_CONTEXT ta[2], *a = xd->plane[plane].above_context; - ENTROPY_CONTEXT tl[2], *l = xd->plane[plane].left_context; - - for (n = 0; n < bw; n++, a += 4) - ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0; - for (n = 0; n < bh; n++, l += 4) - tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV, - x->e_mbd.plane[plane].dequant, - &ta[x_idx], &tl[y_idx], - TX_16X16, bh * bw * 64); +void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + struct optimize_ctx *ctx) { + int p; + + for (p = 0; p < MAX_MB_PLANE; p++) { + const struct macroblockd_plane* const plane = &xd->plane[p]; + const int bwl = b_width_log2(bsize) - plane->subsampling_x; + const int bhl = b_height_log2(bsize) - plane->subsampling_y; + const TX_SIZE tx_size = tx_size_for_plane(xd, bsize, p); + int i, j; + + for (i = 0; i < 1 << bwl; i += 1 << tx_size) { + int c = 0; + ctx->ta[p][i] = 0; + for (j = 0; j < 1 << tx_size && !c; j++) { + c = ctx->ta[p][i] |= plane->above_context[i + j]; + } + } + for (i = 0; i < 1 << bhl; i += 1 << tx_size) { + int c = 0; + ctx->tl[p][i] = 0; + for (j = 0; j < 1 << tx_size && !c; j++) { + c = ctx->tl[p][i] |= plane->left_context[i + j]; + } } - uvoff = (uvoff * 5) >> 2; // switch u -> v } } -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1; - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - int uvoff = 4 << (bwl + bhl); - int plane, n; - - for (plane = 1; plane < MAX_MB_PLANE; plane++) { - ENTROPY_CONTEXT ta[4], *a = xd->plane[plane].above_context; - ENTROPY_CONTEXT tl[4], *l = xd->plane[plane].left_context; - - for (n = 0; n < bw; n++, a += 2) - ta[n] = (a[0] + a[1]) != 0; - for (n = 0; n < bh; n++, l += 2) - tl[n] = (l[0] + l[1]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV, - x->e_mbd.plane[plane].dequant, - &ta[x_idx], &tl[y_idx], - TX_8X8, bh * bw * 16); - } - uvoff = (uvoff * 5) >> 2; // switch u -> v - } +void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + struct optimize_ctx ctx; + struct optimize_block_args arg = {cm, x, &ctx}; + vp9_optimize_init(&x->e_mbd, bsize, &ctx); + foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, +#if !CONFIG_SB8X8 + 0, +#endif + optimize_block, &arg); } -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - int uvoff = 1 << (bwl + bhl); - int plane, n; - - for (plane = 1; plane < MAX_MB_PLANE; plane++) { - ENTROPY_CONTEXT ta[8], tl[8]; - - vpx_memcpy(ta, xd->plane[plane].above_context, - sizeof(ENTROPY_CONTEXT) * bw); - vpx_memcpy(tl, xd->plane[plane].left_context, - sizeof(ENTROPY_CONTEXT) * bh); - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV, - x->e_mbd.plane[plane].dequant, - &ta[x_idx], &tl[y_idx], - TX_4X4, bh * bw * 4); - } - uvoff = (uvoff * 5) >> 2; // switch u -> v - } +void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + struct optimize_ctx ctx; + struct optimize_block_args arg = {cm, x, &ctx}; + vp9_optimize_init(&x->e_mbd, bsize, &ctx); + foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg); } +#if !CONFIG_SB8X8 void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *const xd = &x->e_mbd; const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; @@ -687,8 +564,8 @@ void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) { - vp9_optimize_sby_16x16(cm, x, BLOCK_SIZE_MB16X16); - vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); } vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); @@ -696,20 +573,20 @@ void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sby_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16); if (xd->mode_info_context->mbmi.mode == SPLITMV) { assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4); vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); } else { vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); } } else { @@ -718,8 +595,8 @@ void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); if (x->optimize) { - vp9_optimize_sby_4x4(cm, x, BLOCK_SIZE_MB16X16); - vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); } vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); @@ -735,6 +612,7 @@ void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, vp9_fidct_mb(cm, x); vp9_recon_sb(xd, BLOCK_SIZE_MB16X16); } +#endif /* this function is used by first pass only */ void vp9_encode_inter16x16y(MACROBLOCK *x, int mi_row, int mi_col) { diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index da134a86b..b1d8771e0 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -24,37 +24,30 @@ typedef struct { struct VP9_ENCODER_RTCD; +#if !CONFIG_SB8X8 void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, int mb_row, int mb_col); +#endif void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); + +#if !CONFIG_SB8X8 void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x); +#endif void vp9_subtract_block(int rows, int cols, int16_t *diff_ptr, int diff_stride, diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index fe5d114ba..af62ec394 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -419,10 +419,10 @@ static void separate_arf_mbs(VP9_COMP *cpi) { cpi->static_mb_pct = 0; cpi->seg0_cnt = ncnt[0]; - vp9_enable_segmentation((VP9_PTR) cpi); + vp9_enable_segmentation((VP9_PTR)cpi); } else { cpi->static_mb_pct = 0; - vp9_disable_segmentation((VP9_PTR) cpi); + vp9_disable_segmentation((VP9_PTR)cpi); } // Free localy allocated storage diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c index 7d9462f94..88cd1f41b 100644 --- a/vp9/encoder/vp9_modecosts.c +++ b/vp9/encoder/vp9_modecosts.c @@ -41,8 +41,10 @@ void vp9_init_mode_costs(VP9_COMP *c) { x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); vp9_cost_tokens(c->mb.intra_uv_mode_cost[0], x->kf_uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); +#if !CONFIG_SB8X8 vp9_cost_tokens(c->mb.i8x8_mode_costs, x->fc.i8x8_mode_prob, vp9_i8x8_mode_tree); +#endif for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i) vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i], diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 0af232eed..ceca60d70 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -280,8 +280,7 @@ static void setup_features(VP9_COMP *cpi) { MACROBLOCKD *xd = &cpi->mb.e_mbd; // Set up default state for MB feature flags - - xd->segmentation_enabled = 0; // Default segmentation disabled + xd->segmentation_enabled = 0; xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; @@ -383,7 +382,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; #if CONFIG_IMPLICIT_SEGMENTATION - xd->allow_implicit_segment_update = 0; + xd->allow_implicit_segment_update = 0; #endif cpi->static_mb_pct = 0; @@ -399,7 +398,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; #if CONFIG_IMPLICIT_SEGMENTATION - xd->allow_implicit_segment_update = 0; + xd->allow_implicit_segment_update = 0; #endif cpi->static_mb_pct = 0; @@ -428,9 +427,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) { xd->mb_segment_abs_delta = SEGMENT_DELTADATA; } - } - // All other frames if segmentation has been enabled - else if (xd->segmentation_enabled) { + } else if (xd->segmentation_enabled) { + // All other frames if segmentation has been enabled + // First normal frame in a valid gf or alt ref group if (cpi->common.frames_since_golden == 0) { // Set up segment features for normal frames in an arf group @@ -454,10 +453,10 @@ static void configure_static_seg_features(VP9_COMP *cpi) { vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME); vp9_enable_segfeature(xd, 1, SEG_LVL_SKIP); } - } - // Disable segmentation and clear down features if alt ref - // is not active for this group - else { + } else { + // Disable segmentation and clear down features if alt ref + // is not active for this group + vp9_disable_segmentation((VP9_PTR)cpi); vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); @@ -467,12 +466,11 @@ static void configure_static_seg_features(VP9_COMP *cpi) { vp9_clearall_segfeatures(xd); } - } + } else if (cpi->is_src_frame_alt_ref) { + // Special case where we are coding over the top of a previous + // alt ref frame. + // Segment coding disabled for compred testing - // Special case where we are coding over the top of a previous - // alt ref frame. - // Segment coding disabled for compred testing - else if (cpi->is_src_frame_alt_ref) { // Enable ref frame features for segment 0 as well vp9_enable_segfeature(xd, 0, SEG_LVL_REF_FRAME); vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME); @@ -490,9 +488,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) { } // Enable data udpate xd->update_mb_segmentation_data = 1; - } - // All other frames. - else { + } else { + // All other frames. + // No updates.. leave things as they are. xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; @@ -628,7 +626,9 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { sf->thresh_mult[THR_D63_PRED ] += speed_multiplier * 1500; sf->thresh_mult[THR_B_PRED ] += speed_multiplier * 2500; +#if !CONFIG_SB8X8 sf->thresh_mult[THR_I8X8_PRED] += speed_multiplier * 2500; +#endif sf->thresh_mult[THR_NEWMV ] += speed_multiplier * 1000; sf->thresh_mult[THR_NEWG ] += speed_multiplier * 1000; @@ -3326,9 +3326,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count); vp9_copy(cpi->common.fc.uv_mode_counts, cpi->y_uv_mode_count); vp9_copy(cpi->common.fc.bmode_counts, cpi->bmode_count); +#if !CONFIG_SB8X8 vp9_copy(cpi->common.fc.i8x8_mode_counts, cpi->i8x8_mode_count); +#endif vp9_copy(cpi->common.fc.sub_mv_ref_counts, cpi->sub_mv_ref_count); +#if !CONFIG_SB8X8 vp9_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count); +#endif vp9_copy(cpi->common.fc.partition_counts, cpi->partition_count); #if CONFIG_COMP_INTERINTRA_PRED vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index aeaf1bda3..541127e51 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -48,9 +48,9 @@ #define KEY_FRAME_CONTEXT 5 #if CONFIG_COMP_INTERINTRA_PRED -#define MAX_MODES 54 +#define MAX_MODES 54 - CONFIG_SB8X8 #else -#define MAX_MODES 42 +#define MAX_MODES 42 - CONFIG_SB8X8 #endif #define MIN_THRESHMULT 32 @@ -72,7 +72,9 @@ typedef struct { // Stats int y_modes[VP9_YMODES]; int uv_modes[VP9_UV_MODES]; +#if !CONFIG_SB8X8 int i8x8_modes[VP9_I8X8_MODES]; +#endif int b_modes[B_MODE_COUNT]; int inter_y_modes[MB_MODE_COUNT]; int inter_uv_modes[VP9_UV_MODES]; @@ -100,9 +102,13 @@ typedef struct { vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1]; +#if !CONFIG_SB8X8 vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; +#endif vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; +#if !CONFIG_SB8X8 vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; +#endif vp9_prob partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1]; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] @@ -207,7 +213,9 @@ typedef enum { THR_SPLITA, THR_B_PRED, +#if !CONFIG_SB8X8 THR_I8X8_PRED, +#endif THR_COMP_ZEROLG, THR_COMP_NEARESTLG, @@ -273,10 +281,17 @@ typedef struct { } SPEED_FEATURES; enum BlockSize { +#if CONFIG_SB8X8 + BLOCK_4X4, + BLOCK_8X8, + BLOCK_8X16, + BLOCK_16X8, +#else BLOCK_16X8 = PARTITIONING_16X8, BLOCK_8X16 = PARTITIONING_8X16, BLOCK_8X8 = PARTITIONING_8X8, BLOCK_4X4 = PARTITIONING_4X4, +#endif BLOCK_16X16, BLOCK_MAX_SEGMENTS, BLOCK_32X32 = BLOCK_MAX_SEGMENTS, @@ -451,9 +466,13 @@ typedef struct VP9_COMP { int sb_ymode_count [VP9_I32X32_MODES]; int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */ int bmode_count[VP9_NKF_BINTRAMODES]; +#if !CONFIG_SB8X8 int i8x8_mode_count[VP9_I8X8_MODES]; +#endif int sub_mv_ref_count[SUBMVREF_COUNT][VP9_SUBMVREFS]; +#if !CONFIG_SB8X8 int mbsplit_count[VP9_NUMMBSPLITS]; +#endif int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES]; unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; #if CONFIG_COMP_INTERINTRA_PRED diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 6c8474c0e..fe8ba4b64 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -223,9 +223,9 @@ void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { } void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2; - const int bhl = b_height_log2(bsize) - 2; - const int uoff = 16 << (bhl + bwl); + const int bwl = b_width_log2(bsize) - 1; + const int bhl = b_height_log2(bsize) - 1; + const int uoff = 4 << (bhl + bwl); int i; for (i = uoff; i < ((uoff * 3) >> 1); i += 4) @@ -233,9 +233,9 @@ void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { } void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2; - const int bhl = b_height_log2(bsize) - 2; - const int uoff = 16 << (bhl + bwl); + const int bwl = b_width_log2(bsize); + const int bhl = b_height_log2(bsize); + const int uoff = 1 << (bhl + bwl); int i; for (i = uoff; i < ((uoff * 3) >> 1); i++) diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 47252253d..42d339dfb 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -138,9 +138,13 @@ void vp9_save_coding_context(VP9_COMP *cpi) { vp9_copy(cc->sb_ymode_prob, cm->fc.sb_ymode_prob); vp9_copy(cc->bmode_prob, cm->fc.bmode_prob); vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob); +#if !CONFIG_SB8X8 vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob); +#endif vp9_copy(cc->sub_mv_ref_prob, cm->fc.sub_mv_ref_prob); +#if !CONFIG_SB8X8 vp9_copy(cc->mbsplit_prob, cm->fc.mbsplit_prob); +#endif vp9_copy(cc->partition_prob, cm->fc.partition_prob); // Stats @@ -198,10 +202,14 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(cm->fc.ymode_prob, cc->ymode_prob); vp9_copy(cm->fc.sb_ymode_prob, cc->sb_ymode_prob); vp9_copy(cm->fc.bmode_prob, cc->bmode_prob); +#if !CONFIG_SB8X8 vp9_copy(cm->fc.i8x8_mode_prob, cc->i8x8_mode_prob); +#endif vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob); vp9_copy(cm->fc.sub_mv_ref_prob, cc->sub_mv_ref_prob); +#if !CONFIG_SB8X8 vp9_copy(cm->fc.mbsplit_prob, cc->mbsplit_prob); +#endif vp9_copy(cm->fc.partition_prob, cc->partition_prob); // Stats diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 0e85a0c71..da78be14a 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -105,7 +105,9 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {SPLITMV, ALTREF_FRAME, NONE}, {I4X4_PRED, INTRA_FRAME, NONE}, +#if !CONFIG_SB8X8 {I8X8_PRED, INTRA_FRAME, NONE}, +#endif /* compound prediction modes */ {ZEROMV, LAST_FRAME, GOLDEN_FRAME}, @@ -563,17 +565,19 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_32X32; - } else if ( cm->txfm_mode == ALLOW_16X16 || - (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) || - (cm->txfm_mode == TX_MODE_SELECT && - rd[TX_16X16][1] < rd[TX_8X8][1] && - rd[TX_16X16][1] < rd[TX_4X4][1])) { + } else if (max_txfm_size >= TX_16X16 && + (cm->txfm_mode == ALLOW_16X16 || + cm->txfm_mode == ALLOW_32X32 || + (cm->txfm_mode == TX_MODE_SELECT && + rd[TX_16X16][1] < rd[TX_8X8][1] && + rd[TX_16X16][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_16X16; } else if (cm->txfm_mode == ALLOW_8X8 || + cm->txfm_mode == ALLOW_16X16 || + cm->txfm_mode == ALLOW_32X32 || (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { mbmi->txfm_size = TX_8X8; } else { - assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT); mbmi->txfm_size = TX_4X4; } @@ -583,13 +587,14 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; - txfm_cache[ALLOW_16X16] = rd[TX_16X16][0]; - txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0]; + txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0]; + txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0]; if (max_txfm_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; - else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) + else if (max_txfm_size >= TX_16X16 && + rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; else txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? @@ -794,12 +799,18 @@ static void super_block_yrd(VP9_COMP *cpi, if (bs >= BLOCK_SIZE_SB32X32) super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], bs); - super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], bs); + if (bs >= BLOCK_SIZE_MB16X16) + super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], + bs); super_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs); super_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, - TX_32X32 - (bs < BLOCK_SIZE_SB32X32)); + TX_32X32 - (bs < BLOCK_SIZE_SB32X32) +#if CONFIG_SB8X8 + - (bs < BLOCK_SIZE_MB16X16) +#endif + ); } static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, @@ -816,17 +827,41 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, VP9_COMMON *const cm = &cpi->common; const int src_stride = x->plane[0].src.stride; uint8_t* const src = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, + raster_block_offset_uint8(xd, +#if CONFIG_SB8X8 + BLOCK_SIZE_SB8X8, +#else + BLOCK_SIZE_MB16X16, +#endif + 0, ib, x->plane[0].src.buf, src_stride); int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib, + raster_block_offset_int16(xd, +#if CONFIG_SB8X8 + BLOCK_SIZE_SB8X8, +#else + BLOCK_SIZE_MB16X16, +#endif + 0, ib, x->plane[0].src_diff); int16_t* const diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib, + raster_block_offset_int16(xd, +#if CONFIG_SB8X8 + BLOCK_SIZE_SB8X8, +#else + BLOCK_SIZE_MB16X16, +#endif + 0, ib, xd->plane[0].diff); int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16); uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, + raster_block_offset_uint8(xd, +#if CONFIG_SB8X8 + BLOCK_SIZE_SB8X8, +#else + BLOCK_SIZE_MB16X16, +#endif + 0, ib, xd->plane[0].dst.buf, xd->plane[0].dst.stride); ENTROPY_CONTEXT ta = *a, tempa = *a; ENTROPY_CONTEXT tl = *l, templ = *l; @@ -839,7 +874,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, * */ DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); - assert(ib < 16); + assert(ib < (16 >> (2 * CONFIG_SB8X8))); #if CONFIG_NEWBINTRAMODES xd->mode_info_context->bmi[ib].as_mode.context = vp9_find_bpred_context(xd, ib, dst, xd->plane[0].dst.stride); @@ -868,17 +903,17 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, #endif vp9_intra4x4_predict(xd, ib, mode, dst, xd->plane[0].dst.stride); - vp9_subtract_block(4, 4, src_diff, 16, + vp9_subtract_block(4, 4, src_diff, 16 >> CONFIG_SB8X8, src, src_stride, dst, xd->plane[0].dst.stride); xd->mode_info_context->bmi[ib].as_mode.first = mode; tx_type = get_tx_type_4x4(xd, ib); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(src_diff, coeff, 16, tx_type); + vp9_short_fht4x4(src_diff, coeff, 16 >> CONFIG_SB8X8, tx_type); x->quantize_b_4x4(x, ib, tx_type, 16); } else { - x->fwd_txm4x4(src_diff, coeff, 32); + x->fwd_txm4x4(src_diff, coeff, 32 >> CONFIG_SB8X8); x->quantize_b_4x4(x, ib, tx_type, 16); } @@ -911,9 +946,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, // inverse transform if (best_tx_type != DCT_DCT) - vp9_short_iht4x4(best_dqcoeff, diff, 16, best_tx_type); + vp9_short_iht4x4(best_dqcoeff, diff, 16 >> CONFIG_SB8X8, best_tx_type); else - xd->inv_txm4x4(best_dqcoeff, diff, 32); + xd->inv_txm4x4(best_dqcoeff, diff, 32 >> CONFIG_SB8X8); vp9_intra4x4_predict(xd, ib, *best_mode, dst, xd->plane[0].dst.stride); @@ -932,7 +967,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; - ENTROPY_CONTEXT t_above[4], t_left[4]; + ENTROPY_CONTEXT t_above[4 >> CONFIG_SB8X8], t_left[4 >> CONFIG_SB8X8]; int *bmode_costs; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); @@ -941,15 +976,21 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, xd->mode_info_context->mbmi.mode = I4X4_PRED; bmode_costs = mb->inter_bmode_costs; - for (i = 0; i < 16; i++) { - const int x_idx = i & 3, y_idx = i >> 2; + for (i = 0; i < (16 >> (2 * CONFIG_SB8X8)); i++) { + const int x_idx = i & (3 >> CONFIG_SB8X8), y_idx = i >> (2 >> CONFIG_SB8X8); MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); #if CONFIG_NEWBINTRAMODES uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i, + raster_block_offset_uint8(xd, +#if CONFIG_SB8X8 + BLOCK_SIZE_SB8X8, +#else + BLOCK_SIZE_MB16X16, +#endif + 0, i, xd->plane[0].dst.buf, xd->plane[0].dst.stride); #endif @@ -1046,6 +1087,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } +#if !CONFIG_SB8X8 static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, B_PREDICTION_MODE *best_mode, int *mode_costs, @@ -1283,6 +1325,7 @@ static int64_t rd_pick_intra8x8mby_modes_and_txsz(VP9_COMP *cpi, MACROBLOCK *x, return tmp_rd; } +#endif // !CONFIG_SB8X8 static int rd_cost_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { @@ -1457,10 +1500,9 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, super_block_uvrd_32x32(cm, x, rate, distortion, skippable, bsize); } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) { super_block_uvrd_16x16(cm, x, rate, distortion, skippable, bsize); - } else if (mbmi->txfm_size >= TX_8X8) { + } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) { super_block_uvrd_8x8(cm, x, rate, distortion, skippable, bsize); } else { - assert(mbmi->txfm_size == TX_4X4); super_block_uvrd_4x4(cm, x, rate, distortion, skippable, bsize); } } @@ -1524,6 +1566,514 @@ void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int; } +#if CONFIG_SB8X8 +static int labels2mode(MACROBLOCK *x, + int const *labelings, int which_label, + B_PREDICTION_MODE this_mode, + int_mv *this_mv, int_mv *this_second_mv, + int_mv seg_mvs[MAX_REF_FRAMES - 1], + int_mv *best_ref_mv, + int_mv *second_best_ref_mv, + int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mic = xd->mode_info_context; + MB_MODE_INFO * mbmi = &mic->mbmi; + const int mis = xd->mode_info_stride; + int i, cost = 0, thismvcost = 0; + + /* We have to be careful retrieving previously-encoded motion vectors. + Ones from this macroblock have to be pulled from the BLOCKD array + as they have not yet made it to the bmi array in our MB_MODE_INFO. */ + for (i = 0; i < 4; ++i) { + const int row = i >> 1, col = i & 1; + B_PREDICTION_MODE m; + + if (labelings[i] != which_label) + continue; + + if (col && labelings[i] == labelings[i - 1]) + m = LEFT4X4; + else if (row && labelings[i] == labelings[i - 2]) + m = ABOVE4X4; + else { + // the only time we should do costing for new motion vector or mode + // is when we are on a new label (jbb May 08, 2007) + switch (m = this_mode) { + case NEW4X4 : + if (mbmi->second_ref_frame > 0) { + this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int; + this_second_mv->as_int = + seg_mvs[mbmi->second_ref_frame - 1].as_int; + } + + thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, + 102, xd->allow_high_precision_mv); + if (mbmi->second_ref_frame > 0) { + thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, + mvjcost, mvcost, 102, + xd->allow_high_precision_mv); + } + break; + case LEFT4X4: + this_mv->as_int = col ? mic->bmi[i - 1].as_mv[0].as_int : + left_block_mv(xd, mic, i); + if (mbmi->second_ref_frame > 0) + this_second_mv->as_int = col ? mic->bmi[i - 1].as_mv[1].as_int : + left_block_second_mv(xd, mic, i); + break; + case ABOVE4X4: + this_mv->as_int = row ? mic->bmi[i - 2].as_mv[0].as_int : + above_block_mv(mic, i, mis); + if (mbmi->second_ref_frame > 0) + this_second_mv->as_int = row ? mic->bmi[i - 2].as_mv[1].as_int : + above_block_second_mv(mic, i, mis); + break; + case ZERO4X4: + this_mv->as_int = 0; + if (mbmi->second_ref_frame > 0) + this_second_mv->as_int = 0; + break; + default: + break; + } + + if (m == ABOVE4X4) { // replace above with left if same + int_mv left_mv, left_second_mv; + + left_second_mv.as_int = 0; + left_mv.as_int = col ? mic->bmi[i - 1].as_mv[0].as_int : + left_block_mv(xd, mic, i); + if (mbmi->second_ref_frame > 0) + left_second_mv.as_int = col ? mic->bmi[i - 1].as_mv[1].as_int : + left_block_second_mv(xd, mic, i); + + if (left_mv.as_int == this_mv->as_int && + (mbmi->second_ref_frame <= 0 || + left_second_mv.as_int == this_second_mv->as_int)) + m = LEFT4X4; + } + +#if CONFIG_NEWBINTRAMODES + cost = x->inter_bmode_costs[m == B_CONTEXT_PRED ? + m - CONTEXT_PRED_REPLACEMENTS : m]; +#else + cost = x->inter_bmode_costs[m]; +#endif + } + + mic->bmi[i].as_mv[0].as_int = this_mv->as_int; + if (mbmi->second_ref_frame > 0) + mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; + + x->partition_info->bmi[i].mode = m; + x->partition_info->bmi[i].mv.as_int = this_mv->as_int; + if (mbmi->second_ref_frame > 0) + x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; + } + + cost += thismvcost; + return cost; +} + +static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, + MACROBLOCK *x, + int const *labels, + int which_label, + int *labelyrate, + int *distortion, + ENTROPY_CONTEXT *ta, + ENTROPY_CONTEXT *tl) { + int i; + MACROBLOCKD *xd = &x->e_mbd; + + *labelyrate = 0; + *distortion = 0; + for (i = 0; i < 4; i++) { + if (labels[i] == which_label) { + const int src_stride = x->plane[0].src.stride; + uint8_t* const src = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src.buf, src_stride); + int16_t* const src_diff = + raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src_diff); + int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); + uint8_t* const pre = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].pre[0].buf, + xd->plane[0].pre[0].stride); + uint8_t* const dst = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); + int thisdistortion; + + vp9_build_inter_predictor(pre, + xd->plane[0].pre[0].stride, + dst, + xd->plane[0].dst.stride, + &xd->mode_info_context->bmi[i].as_mv[0], + &xd->scale_factor[0], + 4, 4, 0 /* no avg */, &xd->subpix); + + // TODO(debargha): Make this work properly with the + // implicit-compoundinter-weight experiment when implicit + // weighting for splitmv modes is turned on. + if (xd->mode_info_context->mbmi.second_ref_frame > 0) { + uint8_t* const second_pre = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].pre[1].buf, + xd->plane[0].pre[1].stride); + vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, + dst, xd->plane[0].dst.stride, + &xd->mode_info_context->bmi[i].as_mv[1], + &xd->scale_factor[1], 4, 4, 1, + &xd->subpix); + } + + vp9_subtract_block(4, 4, src_diff, 8, + src, src_stride, + dst, xd->plane[0].dst.stride); + x->fwd_txm4x4(src_diff, coeff, 16); + x->quantize_b_4x4(x, i, DCT_DCT, 16); + thisdistortion = vp9_block_error(coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, + i, 16), 16); + *distortion += thisdistortion; + *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC, + ta + (i & 1), + tl + (i >> 1), TX_4X4, 16); + } + } + *distortion >>= 2; + return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); +} + +typedef struct { + int_mv *ref_mv, *second_ref_mv; + int_mv mvp; + + int64_t segment_rd; + int r; + int d; + int segment_yrate; + B_PREDICTION_MODE modes[4]; + int_mv mvs[4], second_mvs[4]; + int eobs[4]; + + int mvthresh; + int *mdcounts; +} BEST_SEG_INFO; +#endif // CONFIG_SB8X8 + +static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { + int r = 0; + r |= (mv->as_mv.row >> 3) < x->mv_row_min; + r |= (mv->as_mv.row >> 3) > x->mv_row_max; + r |= (mv->as_mv.col >> 3) < x->mv_col_min; + r |= (mv->as_mv.col >> 3) > x->mv_col_max; + return r; +} + +#if CONFIG_SB8X8 +static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, + BEST_SEG_INFO *bsi, + int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { + int i, j; + static const int labels[4] = { 0, 1, 2, 3 }; + int br = 0, bd = 0; + B_PREDICTION_MODE this_mode; + MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; + const int label_count = 4; + int64_t this_segment_rd = 0, other_segment_rd; + int label_mv_thresh; + int rate = 0; + int sbr = 0, sbd = 0; + int segmentyrate = 0; + int best_eobs[4] = { 0 }; + + vp9_variance_fn_ptr_t *v_fn_ptr; + + ENTROPY_CONTEXT t_above[2], t_left[2]; + ENTROPY_CONTEXT t_above_b[2], t_left_b[2]; + + vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); + + v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4]; + + // 64 makes this threshold really big effectively + // making it so that we very rarely check mvs on + // segments. setting this to 1 would make mv thresh + // roughly equal to what it is for macroblocks + label_mv_thresh = 1 * bsi->mvthresh / label_count; + + // Segmentation method overheads + rate += vp9_cost_mv_ref(cpi, SPLITMV, + mbmi->mb_mode_context[mbmi->ref_frame]); + this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); + br += rate; + other_segment_rd = this_segment_rd; + + for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) { + int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; + int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; + B_PREDICTION_MODE mode_selected = ZERO4X4; + int bestlabelyrate = 0; + + // search for the best motion vector on this segment + for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) { + int64_t this_rd; + int distortion; + int labelyrate; + ENTROPY_CONTEXT t_above_s[2], t_left_s[2]; + + vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); + vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); + + // motion search for newmv (single predictor case only) + if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) { + int sseshift, n; + int step_param = 0; + int further_steps; + int thissme, bestsme = INT_MAX; + const struct buf_2d orig_src = x->plane[0].src; + const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0]; + + /* Is the best so far sufficiently good that we cant justify doing + * and new motion search. */ + if (best_label_rd < label_mv_thresh) + break; + + if (cpi->compressor_speed) { + // use previous block's result as next block's MV predictor. + if (i > 0) { + bsi->mvp.as_int = + x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; + if (i == 2) + bsi->mvp.as_int = + x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; + step_param = 2; + } + } + + further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; + + { + int sadpb = x->sadperbit4; + int_mv mvp_full; + + mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; + mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; + + // find first label + n = i; + + // adjust src pointer for this segment + x->plane[0].src.buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, + x->plane[0].src.buf, + x->plane[0].src.stride); + assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0xf) == 0); + x->e_mbd.plane[0].pre[0].buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, + x->e_mbd.plane[0].pre[0].buf, + x->e_mbd.plane[0].pre[0].stride); + + bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, + sadpb, further_steps, 0, v_fn_ptr, + bsi->ref_mv, &mode_mv[NEW4X4]); + + sseshift = 0; + + // Should we do a full search (best quality only) + if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { + /* Check if mvp_full is within the range. */ + clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, + x->mv_row_min, x->mv_row_max); + + thissme = cpi->full_search_sad(x, &mvp_full, + sadpb, 16, v_fn_ptr, + x->nmvjointcost, x->mvcost, + bsi->ref_mv, + n); + + if (thissme < bestsme) { + bestsme = thissme; + mode_mv[NEW4X4].as_int = + x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int; + } else { + /* The full search result is actually worse so re-instate the + * previous best vector */ + x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int = + mode_mv[NEW4X4].as_int; + } + } + } + + if (bestsme < INT_MAX) { + int distortion; + unsigned int sse; + cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4], + bsi->ref_mv, x->errorperbit, v_fn_ptr, + x->nmvjointcost, x->mvcost, + &distortion, &sse); + + // safe motion search result for use in compound prediction + seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int; + } + + // restore src pointers + x->plane[0].src = orig_src; + x->e_mbd.plane[0].pre[0] = orig_pre; + } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) { + /* NEW4X4 */ + /* motion search not completed? Then skip newmv for this block with + * comppred */ + if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV || + seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) { + continue; + } + } + + rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], + &second_mode_mv[this_mode], seg_mvs[i], + bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, + x->mvcost, cpi); + + // Trap vectors that reach beyond the UMV borders + if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || + ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || + ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || + ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { + continue; + } + if (mbmi->second_ref_frame > 0 && + mv_check_bounds(x, &second_mode_mv[this_mode])) + continue; + + this_rd = encode_inter_mb_segment(&cpi->common, + x, labels, i, &labelyrate, + &distortion, t_above_s, t_left_s); + this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); + rate += labelyrate; + + if (this_rd < best_label_rd) { + sbr = rate; + sbd = distortion; + bestlabelyrate = labelyrate; + mode_selected = this_mode; + best_label_rd = this_rd; + for (j = 0; j < 4; j++) + if (labels[j] == i) + best_eobs[j] = x->e_mbd.plane[0].eobs[j]; + + vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s)); + vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s)); + } + } /*for each 4x4 mode*/ + + vpx_memcpy(t_above, t_above_b, sizeof(t_above)); + vpx_memcpy(t_left, t_left_b, sizeof(t_left)); + + labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], + &second_mode_mv[mode_selected], seg_mvs[i], + bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, + x->mvcost, cpi); + + br += sbr; + bd += sbd; + segmentyrate += bestlabelyrate; + this_segment_rd += best_label_rd; + other_segment_rd += best_other_rd; + } /* for each label */ + + if (this_segment_rd < bsi->segment_rd) { + bsi->r = br; + bsi->d = bd; + bsi->segment_yrate = segmentyrate; + bsi->segment_rd = this_segment_rd; + + // store everything needed to come back to this!! + for (i = 0; i < 4; i++) { + bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; + if (mbmi->second_ref_frame > 0) + bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv; + bsi->modes[i] = x->partition_info->bmi[i].mode; + bsi->eobs[i] = best_eobs[i]; + } + } +} + +static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x, + BEST_SEG_INFO *bsi, + int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { + rd_check_segment_txsize(cpi, x, bsi, seg_mvs); +} + +static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, + int_mv *best_ref_mv, + int_mv *second_best_ref_mv, + int64_t best_rd, + int *mdcounts, + int *returntotrate, + int *returnyrate, + int *returndistortion, + int *skippable, int mvthresh, + int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { + int i; + BEST_SEG_INFO bsi; + MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; + + vpx_memset(&bsi, 0, sizeof(bsi)); + + bsi.segment_rd = best_rd; + bsi.ref_mv = best_ref_mv; + bsi.second_ref_mv = second_best_ref_mv; + bsi.mvp.as_int = best_ref_mv->as_int; + bsi.mvthresh = mvthresh; + bsi.mdcounts = mdcounts; + + for (i = 0; i < 4; i++) + bsi.modes[i] = ZERO4X4; + + rd_check_segment(cpi, x, &bsi, seg_mvs); + + /* set it to the best */ + for (i = 0; i < 4; i++) { + x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int; + if (mbmi->second_ref_frame > 0) + x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int = + bsi.second_mvs[i].as_int; + x->e_mbd.plane[0].eobs[i] = bsi.eobs[i]; + } + + /* save partitions */ + x->partition_info->count = 4; + + for (i = 0; i < x->partition_info->count; i++) { + x->partition_info->bmi[i].mode = bsi.modes[i]; + x->partition_info->bmi[i].mv.as_mv = bsi.mvs[i].as_mv; + if (mbmi->second_ref_frame > 0) + x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[i].as_mv; + } + /* + * used to set mbmi->mv.as_int + */ + x->partition_info->bmi[3].mv.as_int = bsi.mvs[3].as_int; + if (mbmi->second_ref_frame > 0) + x->partition_info->bmi[3].second_mv.as_int = bsi.second_mvs[3].as_int; + + *returntotrate = bsi.r; + *returndistortion = bsi.d; + *returnyrate = bsi.segment_yrate; + *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8); + + return (int)(bsi.segment_rd); +} + +#else // !CONFIG_SB8X8 + static int labels2mode( MACROBLOCK *x, int const *labelings, int which_label, @@ -1887,15 +2437,6 @@ typedef struct { } BEST_SEG_INFO; -static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { - int r = 0; - r |= (mv->as_mv.row >> 3) < x->mv_row_min; - r |= (mv->as_mv.row >> 3) > x->mv_row_max; - r |= (mv->as_mv.col >> 3) < x->mv_col_min; - r |= (mv->as_mv.col >> 3) > x->mv_col_max; - return r; -} - static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, SPLITMV_PARTITIONING_TYPE segmentation, @@ -2428,6 +2969,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, return (int)(bsi.segment_rd); } +#endif // !CONFIG_SB8X8 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, @@ -2474,6 +3016,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, x->mv_best_ref_index[ref_frame] = best_index; } +#if !CONFIG_SB8X8 static void set_i8x8_block_modes(MACROBLOCK *x, int modes[4]) { int i; MACROBLOCKD *xd = &x->e_mbd; @@ -2487,6 +3030,7 @@ static void set_i8x8_block_modes(MACROBLOCK *x, int modes[4]) { // modes[0], modes[1], modes[2], modes[3]); } } +#endif extern void vp9_calc_ref_probs(int *count, vp9_prob *probs); static void estimate_curframe_refprobs(VP9_COMP *cpi, vp9_prob mod_refprobs[3], int pred_ref) { @@ -3193,6 +3737,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, return this_rd; // if 0, this will be re-calculated by caller } +#if !CONFIG_SB8X8 static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int *returnrate, int *returndistortion, @@ -4053,6 +4598,7 @@ end: mbmi->second_ref_frame][0], best_pred_diff, best_txfm_diff); } +#endif // !CONFIG_SB8X8 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int *returndist, @@ -4065,14 +4611,30 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int dist_y = 0, dist_uv; int y_skip = 0, uv_skip; int64_t txfm_cache[NB_TXFM_MODES], err; +#if CONFIG_SB8X8 + MB_PREDICTION_MODE mode; + TX_SIZE txfm_size; + int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y; + int64_t err4x4 = INT64_MAX; +#endif int i; ctx->skip = 0; xd->mode_info_context->mbmi.mode = DC_PRED; err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, bsize, txfm_cache); +#if CONFIG_SB8X8 + mode = xd->mode_info_context->mbmi.mode; + txfm_size = xd->mode_info_context->mbmi.txfm_size; +#endif rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, bsize); +#if CONFIG_SB8X8 + if (bsize == BLOCK_SIZE_SB8X8) + err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y, + &rate4x4_y_tokenonly, + &dist4x4_y, err); +#endif if (y_skip && uv_skip) { *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + @@ -4080,18 +4642,39 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returndist = dist_y + (dist_uv >> 2); memset(ctx->txfm_rd_diff, 0, sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff)); +#if CONFIG_SB8X8 + xd->mode_info_context->mbmi.mode = mode; + xd->mode_info_context->mbmi.txfm_size = txfm_size; + } else if (bsize == BLOCK_SIZE_SB8X8 && err4x4 < err) { + *returnrate = rate4x4_y + rate_uv + + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); + *returndist = dist4x4_y + (dist_uv >> 2); + for (i = 0; i < NB_TXFM_MODES; i++) { + ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]); + } + xd->mode_info_context->mbmi.txfm_size = TX_4X4; +#endif } else { *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); *returndist = dist_y + (dist_uv >> 2); for (i = 0; i < NB_TXFM_MODES; i++) { +#if CONFIG_SB8X8 + ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]); +#else ctx->txfm_rd_diff[i] = err - txfm_cache[i]; +#endif } +#if CONFIG_SB8X8 + xd->mode_info_context->mbmi.txfm_size = txfm_size; + xd->mode_info_context->mbmi.mode = mode; +#endif } vpx_memcpy(&ctx->mic, xd->mode_info_context, sizeof(MODE_INFO)); } +#if !CONFIG_SB8X8 void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int *returndist) { VP9_COMMON *cm = &cpi->common; @@ -4218,6 +4801,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = rate; *returndist = dist; } +#endif int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, @@ -4272,7 +4856,20 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, unsigned int mode_mask = 0; int64_t mode_distortions[MB_MODE_COUNT] = {-1}; int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; + int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, + cpi->common.y_dc_delta_q); +#if CONFIG_SB8X8 + int_mv seg_mvs[4][MAX_REF_FRAMES - 1]; +#endif +#if CONFIG_SB8X8 + for (i = 0; i < 4; i++) { + int j; + + for (j = 0; j < MAX_REF_FRAMES - 1; j++) + seg_mvs[i][j].as_int = INVALID_MV; + } +#endif // Everywhere the flag is set the error is much higher than its neighbors. ctx->frames_with_high_error = 0; ctx->modes_with_high_error = 0; @@ -4400,9 +4997,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // if (!(cpi->ref_frame_flags & flag_list[ref_frame])) // continue; - if (this_mode == I8X8_PRED || + if ( +#if CONFIG_SB8X8 + bsize != BLOCK_SIZE_SB8X8 && + (this_mode == I4X4_PRED || this_mode == SPLITMV) +#else this_mode == I4X4_PRED || - this_mode == SPLITMV) + this_mode == I8X8_PRED || + this_mode == SPLITMV +#endif + ) continue; // if (vp9_mode_order[mode_index].second_ref_frame == INTRA_FRAME) // continue; @@ -4465,6 +5069,27 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } +#if CONFIG_SB8X8 + if (this_mode == I4X4_PRED) { + int rate; + + // Note the rate value returned here includes the cost of coding + // the I4X4_PRED mode : x->mbmode_cost[xd->frame_type][I4X4_PRED]; + assert(bsize == BLOCK_SIZE_SB8X8); + mbmi->txfm_size = TX_4X4; + rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, + &distortion_y, INT64_MAX); + rate2 += rate; + rate2 += intra_cost_penalty; + distortion2 += distortion_y; + + rate2 += rate_uv_intra[TX_4X4]; + rate_uv = rate_uv_intra[TX_4X4]; + distortion2 += dist_uv[TX_4X4]; + distortion_uv = dist_uv[TX_4X4]; + mbmi->uv_mode = mode_uv[TX_4X4]; + } else +#endif if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; vp9_build_intra_predictors_sby_s(xd, bsize); @@ -4483,7 +5108,139 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->uv_mode = mode_uv[uv_tx]; rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv; + if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) + rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; +#if CONFIG_SB8X8 + } else if (this_mode == SPLITMV) { + const int is_comp_pred = mbmi->second_ref_frame > 0; + int rate, distortion; + int64_t this_rd_thresh; + int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; + int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; + int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0; + int switchable_filter_index; + int_mv *second_ref = is_comp_pred ? + &mbmi->ref_mvs[mbmi->second_ref_frame][0] : NULL; + union b_mode_info tmp_best_bmodes[16]; + MB_MODE_INFO tmp_best_mbmode; + PARTITION_INFO tmp_best_partition; + int pred_exists = 0; + int uv_skippable; + + this_rd_thresh = (mbmi->ref_frame == LAST_FRAME) ? + cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA]; + this_rd_thresh = (mbmi->ref_frame == GOLDEN_FRAME) ? + cpi->rd_threshes[THR_NEWG] : this_rd_thresh; + xd->mode_info_context->mbmi.txfm_size = TX_4X4; + + for (switchable_filter_index = 0; + switchable_filter_index < VP9_SWITCHABLE_FILTERS; + ++switchable_filter_index) { + int newbest; + mbmi->interp_filter = + vp9_switchable_interp[switchable_filter_index]; + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + + tmp_rd = rd_pick_best_mbsegmentation(cpi, x, + &mbmi->ref_mvs[mbmi->ref_frame][0], + second_ref, INT64_MAX, mdcounts, + &rate, &rate_y, &distortion, + &skippable, + (int)this_rd_thresh, seg_mvs); + if (cpi->common.mcomp_filter_type == SWITCHABLE) { + int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs + [vp9_get_pred_context(&cpi->common, xd, + PRED_SWITCHABLE_INTERP)] + [vp9_switchable_interp_map[mbmi->interp_filter]]; + tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0); + } + newbest = (tmp_rd < tmp_best_rd); + if (newbest) { + tmp_best_filter = mbmi->interp_filter; + tmp_best_rd = tmp_rd; + } + if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || + (mbmi->interp_filter == cm->mcomp_filter_type && + cm->mcomp_filter_type != SWITCHABLE)) { + tmp_best_rdu = tmp_rd; + tmp_best_rate = rate; + tmp_best_ratey = rate_y; + tmp_best_distortion = distortion; + tmp_best_skippable = skippable; + vpx_memcpy(&tmp_best_mbmode, mbmi, sizeof(MB_MODE_INFO)); + vpx_memcpy(&tmp_best_partition, x->partition_info, + sizeof(PARTITION_INFO)); + for (i = 0; i < 4; i++) { + tmp_best_bmodes[i] = xd->mode_info_context->bmi[i]; + } + pred_exists = 1; + } + } // switchable_filter_index loop + + mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? + tmp_best_filter : cm->mcomp_filter_type); + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + if (!pred_exists) { + // Handles the special case when a filter that is not in the + // switchable list (bilinear, 6-tap) is indicated at the frame level + tmp_rd = rd_pick_best_mbsegmentation(cpi, x, + &mbmi->ref_mvs[mbmi->ref_frame][0], + second_ref, INT64_MAX, mdcounts, + &rate, &rate_y, &distortion, + &skippable, + (int)this_rd_thresh, seg_mvs); + } else { + if (cpi->common.mcomp_filter_type == SWITCHABLE) { + int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs + [vp9_get_pred_context(&cpi->common, xd, + PRED_SWITCHABLE_INTERP)] + [vp9_switchable_interp_map[mbmi->interp_filter]]; + tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); + } + tmp_rd = tmp_best_rdu; + rate = tmp_best_rate; + rate_y = tmp_best_ratey; + distortion = tmp_best_distortion; + skippable = tmp_best_skippable; + vpx_memcpy(mbmi, &tmp_best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(x->partition_info, &tmp_best_partition, + sizeof(PARTITION_INFO)); + for (i = 0; i < 4; i++) { + xd->mode_info_context->bmi[i] = tmp_best_bmodes[i]; + } + } + + rate2 += rate; + distortion2 += distortion; + + if (cpi->common.mcomp_filter_type == SWITCHABLE) + rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs + [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] + [vp9_switchable_interp_map[mbmi->interp_filter]]; + + // If even the 'Y' rd value of split is higher than best so far + // then dont bother looking at UV + vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, + bsize); + vp9_subtract_sbuv(x, bsize); + super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv, + &uv_skippable, bsize); + rate2 += rate_uv; + distortion2 += distortion_uv; + skippable = skippable && uv_skippable; + + if (!mode_excluded) { + if (is_comp_pred) + mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; + else + mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; + } + + compmode_cost = + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred); + mbmi->mode = this_mode; +#endif } else { YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; int fb; @@ -4693,6 +5450,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best txfm size */ + if (bsize < BLOCK_SIZE_SB32X32) { + if (bsize < BLOCK_SIZE_MB16X16) { + if (this_mode == SPLITMV || this_mode == I4X4_PRED) + txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4]; + txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8]; + } + txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16]; + } if (!mode_excluded && this_rd != INT64_MAX) { for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd; @@ -4769,13 +5534,27 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { mbmi->mode = ZEROMV; mbmi->ref_frame = ALTREF_FRAME; - mbmi->second_ref_frame = INTRA_FRAME; + mbmi->second_ref_frame = NONE; mbmi->mv[0].as_int = 0; mbmi->uv_mode = DC_PRED; mbmi->mb_skip_coeff = 1; +#if !CONFIG_SB8X8 mbmi->partitioning = 0; - mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ? - TX_32X32 : cm->txfm_mode; +#endif + if (cm->txfm_mode == TX_MODE_SELECT) { + if (bsize >= BLOCK_SIZE_SB32X32) + mbmi->txfm_size = TX_32X32; +#if CONFIG_SB8X8 + else if (bsize >= BLOCK_SIZE_MB16X16) +#else + else +#endif + mbmi->txfm_size = TX_16X16; +#if CONFIG_SB8X8 + else + mbmi->txfm_size = TX_8X8; +#endif + } vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); @@ -4815,6 +5594,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } +#if !CONFIG_SB8X8 void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int *totalrate, int *totaldist) { @@ -4824,10 +5604,9 @@ void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, int64_t intra_error = 0; unsigned char *segment_id = &mbmi->segment_id; - if (xd->segmentation_enabled) - x->encode_breakout = cpi->segment_encode_breakout[*segment_id]; - else - x->encode_breakout = cpi->oxcf.encode_breakout; + x->encode_breakout = xd->segmentation_enabled ? + cpi->segment_encode_breakout[*segment_id] : + cpi->oxcf.encode_breakout; // if (cpi->sf.RD) // For now this codebase is limited to a single rd encode path @@ -4852,3 +5631,4 @@ void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, *totalrate = rate; *totaldist = distortion; } +#endif diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index eef2a4fe9..6533a82e0 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -19,16 +19,20 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex); void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); +#if !CONFIG_SB8X8 void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, int *r, int *d); +#endif void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *r, int *d, BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx); +#if !CONFIG_SB8X8 void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int *r, int *d); +#endif int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index 7f792ae2b..e04980ce1 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -16,18 +16,15 @@ #include "vp9/common/vp9_tile_common.h" void vp9_enable_segmentation(VP9_PTR ptr) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); + VP9_COMP *cpi = (VP9_COMP *)ptr; - // Set the appropriate feature bit cpi->mb.e_mbd.segmentation_enabled = 1; cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; } void vp9_disable_segmentation(VP9_PTR ptr) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - // Clear the appropriate feature bit + VP9_COMP *cpi = (VP9_COMP *)ptr; cpi->mb.e_mbd.segmentation_enabled = 0; } @@ -199,9 +196,17 @@ static void count_segs_sb(VP9_COMP *cpi, MODE_INFO *mi, assert(bwl < bsl && bhl < bsl); if (bsize == BLOCK_SIZE_SB64X64) { subsize = BLOCK_SIZE_SB32X32; +#if CONFIG_SB8X8 + } else if (bsize == BLOCK_SIZE_SB32X32) { + subsize = BLOCK_SIZE_MB16X16; + } else { + assert(bsize == BLOCK_SIZE_MB16X16); + subsize = BLOCK_SIZE_SB8X8; +#else } else { assert(bsize == BLOCK_SIZE_SB32X32); subsize = BLOCK_SIZE_MB16X16; +#endif } for (n = 0; n < 4; n++) { @@ -238,10 +243,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // Set default state for the segment tree probabilities and the // temporal coding probabilities - vpx_memset(xd->mb_segment_tree_probs, 255, - sizeof(xd->mb_segment_tree_probs)); - vpx_memset(cm->segment_pred_probs, 255, - sizeof(cm->segment_pred_probs)); + vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); + vpx_memset(cm->segment_pred_probs, 255, sizeof(cm->segment_pred_probs)); vpx_memset(no_pred_segcounts, 0, sizeof(no_pred_segcounts)); vpx_memset(t_unpred_seg_counts, 0, sizeof(t_unpred_seg_counts)); @@ -249,7 +252,6 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // First of all generate stats regarding how well the last segment map // predicts this one - for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { vp9_get_tile_col_offsets(cm, tile_col); mi_ptr = cm->mi + cm->cur_tile_mi_col_start; @@ -279,27 +281,24 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // Add in the cost of the signalling for each prediction context for (i = 0; i < PREDICTION_PROBS; i++) { - t_nopred_prob[i] = get_binary_prob(temporal_predictor_count[i][0], - temporal_predictor_count[i][1]); + const int count0 = temporal_predictor_count[i][0]; + const int count1 = temporal_predictor_count[i][1]; + + t_nopred_prob[i] = get_binary_prob(count0, count1); // Add in the predictor signaling cost - t_pred_cost += (temporal_predictor_count[i][0] * - vp9_cost_zero(t_nopred_prob[i])) + - (temporal_predictor_count[i][1] * - vp9_cost_one(t_nopred_prob[i])); + t_pred_cost += count0 * vp9_cost_zero(t_nopred_prob[i]) + + count1 * vp9_cost_one(t_nopred_prob[i]); } } // Now choose which coding method to use. if (t_pred_cost < no_pred_cost) { cm->temporal_update = 1; - vpx_memcpy(xd->mb_segment_tree_probs, - t_pred_tree, sizeof(t_pred_tree)); - vpx_memcpy(&cm->segment_pred_probs, - t_nopred_prob, sizeof(t_nopred_prob)); + vpx_memcpy(xd->mb_segment_tree_probs, t_pred_tree, sizeof(t_pred_tree)); + vpx_memcpy(cm->segment_pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); } else { cm->temporal_update = 0; - vpx_memcpy(xd->mb_segment_tree_probs, - no_pred_tree, sizeof(no_pred_tree)); + vpx_memcpy(xd->mb_segment_tree_probs, no_pred_tree, sizeof(no_pred_tree)); } } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 3c3367071..9756e6e54 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -376,7 +376,11 @@ int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { int result = 1; struct is_skippable_args args = {xd, &result}; - foreach_transformed_block_in_plane(xd, bsize, 0, 0, is_skippable, &args); + foreach_transformed_block_in_plane(xd, bsize, 0, +#if !CONFIG_SB8X8 + 0, +#endif + is_skippable, &args); return result; } |