diff options
Diffstat (limited to 'vp9/decoder')
-rw-r--r-- | vp9/decoder/vp9_dboolhuff.c | 50 | ||||
-rw-r--r-- | vp9/decoder/vp9_dboolhuff.h | 41 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodemv.c | 481 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodframe.c | 871 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodframe.h | 2 | ||||
-rw-r--r-- | vp9/decoder/vp9_dequantize.c | 330 | ||||
-rw-r--r-- | vp9/decoder/vp9_dequantize.h | 95 | ||||
-rw-r--r-- | vp9/decoder/vp9_detokenize.c | 389 | ||||
-rw-r--r-- | vp9/decoder/vp9_idct_blk.c | 226 | ||||
-rw-r--r-- | vp9/decoder/vp9_onyxd.h | 1 | ||||
-rw-r--r-- | vp9/decoder/vp9_onyxd_if.c | 150 | ||||
-rw-r--r-- | vp9/decoder/vp9_onyxd_int.h | 46 | ||||
-rw-r--r-- | vp9/decoder/x86/vp9_dequantize_mmx.asm | 406 | ||||
-rw-r--r-- | vp9/decoder/x86/vp9_idct_blk_mmx.c | 145 | ||||
-rw-r--r-- | vp9/decoder/x86/vp9_x86_dsystemdependent.c | 26 |
15 files changed, 1121 insertions, 2138 deletions
diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c index 5f1ef0408..99c3664b2 100644 --- a/vp9/decoder/vp9_dboolhuff.c +++ b/vp9/decoder/vp9_dboolhuff.c @@ -17,10 +17,10 @@ int vp9_start_decode(BOOL_DECODER *br, const unsigned char *source, unsigned int source_sz) { br->user_buffer_end = source + source_sz; - br->user_buffer = source; - br->value = 0; - br->count = -8; - br->range = 255; + br->user_buffer = source; + br->value = 0; + br->count = -8; + br->range = 255; if (source_sz && !source) return 1; @@ -33,16 +33,27 @@ int vp9_start_decode(BOOL_DECODER *br, void vp9_bool_decoder_fill(BOOL_DECODER *br) { - const unsigned char *bufptr; - const unsigned char *bufend; - VP9_BD_VALUE value; - int count; - bufend = br->user_buffer_end; - bufptr = br->user_buffer; - value = br->value; - count = br->count; + const unsigned char *bufptr = br->user_buffer; + const unsigned char *bufend = br->user_buffer_end; + VP9_BD_VALUE value = br->value; + int count = br->count; + int shift = VP9_BD_VALUE_SIZE - 8 - (count + 8); + int loop_end = 0; + int bits_left = (int)((bufend - bufptr)*CHAR_BIT); + int x = shift + CHAR_BIT - bits_left; + + if (x >= 0) { + count += VP9_LOTS_OF_BITS; + loop_end = x; + } - VP9DX_BOOL_DECODER_FILL(count, value, bufptr, bufend); + if (x < 0 || bits_left) { + while (shift >= loop_end) { + count += CHAR_BIT; + value |= (VP9_BD_VALUE)*bufptr++ << shift; + shift -= CHAR_BIT; + } + } br->user_buffer = bufptr; br->value = value; @@ -52,7 +63,9 @@ void vp9_bool_decoder_fill(BOOL_DECODER *br) { static int get_unsigned_bits(unsigned num_values) { int cat = 0; - if ((num_values--) <= 1) return 0; + if (num_values <= 1) + return 0; + num_values--; while (num_values > 0) { cat++; num_values >>= 1; @@ -61,9 +74,12 @@ static int get_unsigned_bits(unsigned num_values) { } int vp9_inv_recenter_nonneg(int v, int m) { - if (v > (m << 1)) return v; - else if ((v & 1) == 0) return (v >> 1) + m; - else return m - ((v + 1) >> 1); + if (v > (m << 1)) + return v; + else if ((v & 1) == 0) + return (v >> 1) + m; + else + return m - ((v + 1) >> 1); } int vp9_decode_uniform(BOOL_DECODER *br, int n) { diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h index 5afdd67c8..cf31d380a 100644 --- a/vp9/decoder/vp9_dboolhuff.h +++ b/vp9/decoder/vp9_dboolhuff.h @@ -19,11 +19,11 @@ typedef size_t VP9_BD_VALUE; -# define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT) +#define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT) /*This is meant to be a large, positive constant that can still be efficiently loaded as an immediate (on platforms like ARM, for example). Even relatively modest values like 100 would work fine.*/ -# define VP9_LOTS_OF_BITS (0x40000000) +#define VP9_LOTS_OF_BITS (0x40000000) typedef struct { const unsigned char *user_buffer_end; @@ -45,46 +45,13 @@ int vp9_decode_uniform(BOOL_DECODER *br, int n); int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms); int vp9_inv_recenter_nonneg(int v, int m); -/*The refill loop is used in several places, so define it in a macro to make - sure they're all consistent. - An inline function would be cleaner, but has a significant penalty, because - multiple BOOL_DECODER fields must be modified, and the compiler is not smart - enough to eliminate the stores to those fields and the subsequent reloads - from them when inlining the function.*/ -#define VP9DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \ - do \ - { \ - int shift = VP9_BD_VALUE_SIZE - 8 - ((_count) + 8); \ - int loop_end, x; \ - int bits_left = (int)(((_bufend)-(_bufptr))*CHAR_BIT); \ - \ - x = shift + CHAR_BIT - bits_left; \ - loop_end = 0; \ - if(x >= 0) \ - { \ - (_count) += VP9_LOTS_OF_BITS; \ - loop_end = x; \ - if(!bits_left) break; \ - } \ - while(shift >= loop_end) \ - { \ - (_count) += CHAR_BIT; \ - (_value) |= (VP9_BD_VALUE)*(_bufptr)++ << shift; \ - shift -= CHAR_BIT; \ - } \ - } \ - while(0) \ - - static int decode_bool(BOOL_DECODER *br, int probability) { unsigned int bit = 0; VP9_BD_VALUE value; - unsigned int split; VP9_BD_VALUE bigsplit; int count; unsigned int range; - - split = 1 + (((br->range - 1) * probability) >> 8); + unsigned int split = 1 + (((br->range - 1) * probability) >> 8); if (br->count < 0) vp9_bool_decoder_fill(br); @@ -150,6 +117,6 @@ static int bool_error(BOOL_DECODER *br) { return 0; } -extern int vp9_decode_unsigned_max(BOOL_DECODER *br, int max); +int vp9_decode_unsigned_max(BOOL_DECODER *br, int max); #endif // VP9_DECODER_VP9_DBOOLHUFF_H_ diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index c6c3d1576..326c80239 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -12,6 +12,7 @@ #include "vp9/decoder/vp9_treereader.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_reconinter.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/common/vp9_findnearmv.h" #include "vp9/common/vp9_common.h" @@ -28,12 +29,13 @@ #ifdef DEBUG_DEC_MV int dec_mvcount = 0; #endif + // #define DEC_DEBUG #ifdef DEC_DEBUG extern int dec_debug; #endif -static int read_bmode(vp9_reader *bc, const vp9_prob *p) { +static B_PREDICTION_MODE read_bmode(vp9_reader *bc, const vp9_prob *p) { B_PREDICTION_MODE m = treed_read(bc, vp9_bmode_tree, p); #if CONFIG_NEWBINTRAMODES if (m == B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS) @@ -43,53 +45,71 @@ static int read_bmode(vp9_reader *bc, const vp9_prob *p) { return m; } -static int read_kf_bmode(vp9_reader *bc, const vp9_prob *p) { - return treed_read(bc, vp9_kf_bmode_tree, p); +static B_PREDICTION_MODE read_kf_bmode(vp9_reader *bc, const vp9_prob *p) { + return (B_PREDICTION_MODE)treed_read(bc, vp9_kf_bmode_tree, p); } -static int read_ymode(vp9_reader *bc, const vp9_prob *p) { - return treed_read(bc, vp9_ymode_tree, p); +static MB_PREDICTION_MODE read_ymode(vp9_reader *bc, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(bc, vp9_ymode_tree, p); } -static int read_sb_ymode(vp9_reader *bc, const vp9_prob *p) { - return treed_read(bc, vp9_sb_ymode_tree, p); +static MB_PREDICTION_MODE read_sb_ymode(vp9_reader *bc, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(bc, vp9_sb_ymode_tree, p); } -static int read_kf_sb_ymode(vp9_reader *bc, const vp9_prob *p) { - return treed_read(bc, vp9_uv_mode_tree, p); +static MB_PREDICTION_MODE read_kf_sb_ymode(vp9_reader *bc, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p); } -static int read_kf_mb_ymode(vp9_reader *bc, const vp9_prob *p) { - return treed_read(bc, vp9_kf_ymode_tree, p); +static MB_PREDICTION_MODE read_kf_mb_ymode(vp9_reader *bc, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(bc, vp9_kf_ymode_tree, p); } static int read_i8x8_mode(vp9_reader *bc, const vp9_prob *p) { return treed_read(bc, vp9_i8x8_mode_tree, p); } -static int read_uv_mode(vp9_reader *bc, const vp9_prob *p) { - return treed_read(bc, vp9_uv_mode_tree, p); +static MB_PREDICTION_MODE read_uv_mode(vp9_reader *bc, const vp9_prob *p) { + return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p); } // This function reads the current macro block's segnent id from the bitstream // It should only be called if a segment map update is indicated. -static void read_mb_segid(vp9_reader *r, MB_MODE_INFO *mi, - MACROBLOCKD *xd) { +static void read_mb_segid(vp9_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *xd) { /* Is segmentation enabled */ if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { /* If so then read the segment id. */ - if (vp9_read(r, xd->mb_segment_tree_probs[0])) - mi->segment_id = - (unsigned char)(2 + vp9_read(r, xd->mb_segment_tree_probs[2])); - else - mi->segment_id = + mi->segment_id = vp9_read(r, xd->mb_segment_tree_probs[0]) ? + (unsigned char)(2 + vp9_read(r, xd->mb_segment_tree_probs[2])): (unsigned char)(vp9_read(r, xd->mb_segment_tree_probs[1])); } } +// This function reads the current macro block's segnent id from the bitstream +// It should only be called if a segment map update is indicated. +static void read_mb_segid_except(VP9_COMMON *cm, + vp9_reader *r, MB_MODE_INFO *mi, + MACROBLOCKD *xd, int mb_row, int mb_col) { + int pred_seg_id = vp9_get_pred_mb_segid(cm, xd, + mb_row * cm->mb_cols + mb_col); + const vp9_prob *p = xd->mb_segment_tree_probs; + vp9_prob p1 = xd->mb_segment_mispred_tree_probs[pred_seg_id]; + + /* Is segmentation enabled */ + if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { + /* If so then read the segment id. */ + if (vp9_read(r, p1)) { + mi->segment_id = 2 + + (pred_seg_id < 2 ? vp9_read(r, p[2]) : (pred_seg_id == 2)); + } else { + mi->segment_id = + pred_seg_id >= 2 ? vp9_read(r, p[1]) : (pred_seg_id == 0); + } + } +} + #if CONFIG_NEW_MVREF -int vp9_read_mv_ref_id(vp9_reader *r, - vp9_prob * ref_id_probs) { +int vp9_read_mv_ref_id(vp9_reader *r, vp9_prob *ref_id_probs) { int ref_index = 0; if (vp9_read(r, ref_id_probs[0])) { @@ -111,10 +131,13 @@ static void kfread_modes(VP9D_COMP *pbi, int mb_col, BOOL_DECODER* const bc) { VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; const int mis = pbi->common.mode_info_stride; int map_index = mb_row * pbi->common.mb_cols + mb_col; MB_PREDICTION_MODE y_mode; + m->mbmi.ref_frame = INTRA_FRAME; + // Read the Macroblock segmentation map if it is being updated explicitly // this frame (reset to 0 by default). m->mbmi.segment_id = 0; @@ -139,60 +162,56 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.mb_skip_coeff = 0; if (pbi->common.mb_no_coeff_skip && - (!vp9_segfeature_active(&pbi->mb, - m->mbmi.segment_id, SEG_LVL_EOB) || - (vp9_get_segdata(&pbi->mb, - m->mbmi.segment_id, SEG_LVL_EOB) != 0))) { + (!vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id, SEG_LVL_SKIP))) { MACROBLOCKD *const xd = &pbi->mb; m->mbmi.mb_skip_coeff = - vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP)); + vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP)); } else { - if (vp9_segfeature_active(&pbi->mb, - m->mbmi.segment_id, SEG_LVL_EOB) && - (vp9_get_segdata(&pbi->mb, - m->mbmi.segment_id, SEG_LVL_EOB) == 0)) { + if (vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id, SEG_LVL_SKIP)) m->mbmi.mb_skip_coeff = 1; - } else + else m->mbmi.mb_skip_coeff = 0; } - if (m->mbmi.sb_type) { - y_mode = (MB_PREDICTION_MODE) read_kf_sb_ymode(bc, - pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]); - } else { - y_mode = (MB_PREDICTION_MODE) read_kf_mb_ymode(bc, - pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]); - } + + y_mode = m->mbmi.sb_type ? + read_kf_sb_ymode(bc, + pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]): + read_kf_mb_ymode(bc, + pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]); m->mbmi.ref_frame = INTRA_FRAME; if ((m->mbmi.mode = y_mode) == B_PRED) { int i = 0; do { - const B_PREDICTION_MODE A = above_block_mode(m, i, mis); - const B_PREDICTION_MODE L = left_block_mode(m, i); + const B_PREDICTION_MODE a = above_block_mode(m, i, mis); + const B_PREDICTION_MODE l = (xd->left_available || (i & 3)) ? + left_block_mode(m, i) : B_DC_PRED; - m->bmi[i].as_mode.first = - (B_PREDICTION_MODE) read_kf_bmode( - bc, pbi->common.kf_bmode_prob [A] [L]); + m->bmi[i].as_mode.first = read_kf_bmode(bc, + pbi->common.kf_bmode_prob[a][l]); } while (++i < 16); } + if ((m->mbmi.mode = y_mode) == I8X8_PRED) { int i; - int mode8x8; for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob); + const int ib = vp9_i8x8_block[i]; + const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob); + m->bmi[ib + 0].as_mode.first = mode8x8; m->bmi[ib + 1].as_mode.first = mode8x8; m->bmi[ib + 4].as_mode.first = mode8x8; m->bmi[ib + 5].as_mode.first = mode8x8; } - } else - m->mbmi.uv_mode = (MB_PREDICTION_MODE)read_uv_mode(bc, - pbi->common.kf_uv_mode_prob[m->mbmi.mode]); + } else { + m->mbmi.uv_mode = read_uv_mode(bc, + pbi->common.kf_uv_mode_prob[m->mbmi.mode]); + } - if (cm->txfm_mode == TX_MODE_SELECT && m->mbmi.mb_skip_coeff == 0 && + if (cm->txfm_mode == TX_MODE_SELECT && + m->mbmi.mb_skip_coeff == 0 && m->mbmi.mode <= I8X8_PRED) { // FIXME(rbultje) code ternary symbol once all experiments are merged m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]); @@ -215,23 +234,23 @@ static void kfread_modes(VP9D_COMP *pbi, static int read_nmv_component(vp9_reader *r, int rv, const nmv_component *mvcomp) { - int v, s, z, c, o, d; - s = vp9_read(r, mvcomp->sign); - c = treed_read(r, vp9_mv_class_tree, mvcomp->classes); - if (c == MV_CLASS_0) { + int mag, d; + const int sign = vp9_read(r, mvcomp->sign); + const int mv_class = treed_read(r, vp9_mv_class_tree, mvcomp->classes); + + if (mv_class == MV_CLASS_0) { d = treed_read(r, vp9_mv_class0_tree, mvcomp->class0); } else { - int i, b; + int i; + int n = mv_class + CLASS0_BITS - 1; // number of bits + d = 0; - b = c + CLASS0_BITS - 1; /* number of bits */ - for (i = 0; i < b; ++i) - d |= (vp9_read(r, mvcomp->bits[i]) << i); + for (i = 0; i < n; ++i) + d |= vp9_read(r, mvcomp->bits[i]) << i; } - o = d << 3; - z = vp9_get_mv_mag(c, o); - v = (s ? -(z + 8) : (z + 8)); - return v; + mag = vp9_get_mv_mag(mv_class, d << 3); + return sign ? -(mag + 8) : (mag + 8); } static int read_nmv_component_fp(vp9_reader *r, @@ -239,43 +258,34 @@ static int read_nmv_component_fp(vp9_reader *r, int rv, const nmv_component *mvcomp, int usehp) { - int s, z, c, o, d, e, f; - s = v < 0; - z = (s ? -v : v) - 1; /* magnitude - 1 */ - z &= ~7; - - c = vp9_get_mv_class(z, &o); - d = o >> 3; + const int sign = v < 0; + int mag = ((sign ? -v : v) - 1) & ~7; // magnitude - 1 + int offset; + const int mv_class = vp9_get_mv_class(mag, &offset); + const int f = mv_class == MV_CLASS_0 ? + treed_read(r, vp9_mv_fp_tree, mvcomp->class0_fp[offset >> 3]): + treed_read(r, vp9_mv_fp_tree, mvcomp->fp); - if (c == MV_CLASS_0) { - f = treed_read(r, vp9_mv_fp_tree, mvcomp->class0_fp[d]); - } else { - f = treed_read(r, vp9_mv_fp_tree, mvcomp->fp); - } - o += (f << 1); + offset += f << 1; if (usehp) { - if (c == MV_CLASS_0) { - e = vp9_read(r, mvcomp->class0_hp); - } else { - e = vp9_read(r, mvcomp->hp); - } - o += e; + offset += mv_class == MV_CLASS_0 ? + vp9_read(r, mvcomp->class0_hp) : vp9_read(r, mvcomp->hp); } else { - ++o; /* Note if hp is not used, the default value of the hp bit is 1 */ + offset += 1; // If hp is not used, the default value of the hp bit is 1 } - z = vp9_get_mv_mag(c, o); - v = (s ? -(z + 1) : (z + 1)); - return v; + mag = vp9_get_mv_mag(mv_class, offset); + return sign ? -(mag + 1) : (mag + 1); } static void read_nmv(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *mvctx) { - MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, mvctx->joints); + const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, mvctx->joints); mv->row = mv-> col = 0; if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { mv->row = read_nmv_component(r, ref->row, &mvctx->comps[0]); } + if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { mv->col = read_nmv_component(r, ref->col, &mvctx->comps[1]); } @@ -283,7 +293,7 @@ static void read_nmv(vp9_reader *r, MV *mv, const MV *ref, static void read_nmv_fp(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *mvctx, int usehp) { - MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); + const MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); usehp = usehp && vp9_use_nmv_hp(ref); if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { mv->row = read_nmv_component_fp(r, mv->row, ref->row, &mvctx->comps[0], @@ -293,7 +303,10 @@ static void read_nmv_fp(vp9_reader *r, MV *mv, const MV *ref, mv->col = read_nmv_component_fp(r, mv->col, ref->col, &mvctx->comps[1], usehp); } - //printf(" %d: %d %d ref: %d %d\n", usehp, mv->row, mv-> col, ref->row, ref->col); + /* + printf("MV: %d %d REF: %d %d\n", mv->row + ref->row, mv->col + ref->col, + ref->row, ref->col); + */ } static void update_nmv(vp9_reader *bc, vp9_prob *const p, @@ -310,48 +323,40 @@ static void update_nmv(vp9_reader *bc, vp9_prob *const p, static void read_nmvprobs(vp9_reader *bc, nmv_context *mvctx, int usehp) { int i, j, k; + #ifdef MV_GROUP_UPDATE - if (!vp9_read_bit(bc)) return; + if (!vp9_read_bit(bc)) + return; #endif - for (j = 0; j < MV_JOINTS - 1; ++j) { - update_nmv(bc, &mvctx->joints[j], - VP9_NMV_UPDATE_PROB); - } + for (j = 0; j < MV_JOINTS - 1; ++j) + update_nmv(bc, &mvctx->joints[j], VP9_NMV_UPDATE_PROB); + for (i = 0; i < 2; ++i) { - update_nmv(bc, &mvctx->comps[i].sign, - VP9_NMV_UPDATE_PROB); - for (j = 0; j < MV_CLASSES - 1; ++j) { - update_nmv(bc, &mvctx->comps[i].classes[j], - VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < CLASS0_SIZE - 1; ++j) { - update_nmv(bc, &mvctx->comps[i].class0[j], - VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < MV_OFFSET_BITS; ++j) { - update_nmv(bc, &mvctx->comps[i].bits[j], - VP9_NMV_UPDATE_PROB); - } + update_nmv(bc, &mvctx->comps[i].sign, VP9_NMV_UPDATE_PROB); + for (j = 0; j < MV_CLASSES - 1; ++j) + update_nmv(bc, &mvctx->comps[i].classes[j], VP9_NMV_UPDATE_PROB); + + for (j = 0; j < CLASS0_SIZE - 1; ++j) + update_nmv(bc, &mvctx->comps[i].class0[j], VP9_NMV_UPDATE_PROB); + + for (j = 0; j < MV_OFFSET_BITS; ++j) + update_nmv(bc, &mvctx->comps[i].bits[j], VP9_NMV_UPDATE_PROB); } for (i = 0; i < 2; ++i) { for (j = 0; j < CLASS0_SIZE; ++j) { for (k = 0; k < 3; ++k) - update_nmv(bc, &mvctx->comps[i].class0_fp[j][k], - VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < 3; ++j) { - update_nmv(bc, &mvctx->comps[i].fp[j], - VP9_NMV_UPDATE_PROB); + update_nmv(bc, &mvctx->comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB); } + + for (j = 0; j < 3; ++j) + update_nmv(bc, &mvctx->comps[i].fp[j], VP9_NMV_UPDATE_PROB); } if (usehp) { for (i = 0; i < 2; ++i) { - update_nmv(bc, &mvctx->comps[i].class0_hp, - VP9_NMV_UPDATE_PROB); - update_nmv(bc, &mvctx->comps[i].hp, - VP9_NMV_UPDATE_PROB); + update_nmv(bc, &mvctx->comps[i].class0_hp, VP9_NMV_UPDATE_PROB); + update_nmv(bc, &mvctx->comps[i].hp, VP9_NMV_UPDATE_PROB); } } } @@ -361,15 +366,11 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi, vp9_reader *const bc, unsigned char segment_id) { MV_REFERENCE_FRAME ref_frame; - int seg_ref_active; - int seg_ref_count = 0; - VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - seg_ref_active = vp9_segfeature_active(xd, - segment_id, - SEG_LVL_REF_FRAME); + int seg_ref_count = 0; + int seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); // If segment coding enabled does the segment allow for more than one // possible reference frame @@ -492,12 +493,12 @@ unsigned int vp9_mv_cont_count[5][4] = { }; #endif -static const unsigned char mbsplit_fill_count[4] = {8, 8, 4, 1}; +static const unsigned char mbsplit_fill_count[4] = { 8, 8, 4, 1 }; static const unsigned char mbsplit_fill_offset[4][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15}, - { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 }, + { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } }; static void read_switchable_interp_probs(VP9D_COMP* const pbi, @@ -603,7 +604,7 @@ static void read_mb_segment_id(VP9D_COMP *pbi, } // Else .... decode it explicitly else { - read_mb_segid(bc, mbmi, xd); + read_mb_segid_except(cm, bc, mbmi, xd, mb_row, mb_col); } } // Normal unpredicted coding mode @@ -636,8 +637,7 @@ static void read_mb_segment_id(VP9D_COMP *pbi, for (y = 0; y < ymbs; y++) { for (x = 0; x < xmbs; x++) { segment_id = MIN(segment_id, - cm->last_frame_seg_map[index + x + - y * cm->mb_cols]); + cm->last_frame_seg_map[index + x + y * cm->mb_cols]); } } mbmi->segment_id = segment_id; @@ -664,25 +664,28 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int_mv *const mv = &mbmi->mv[0]; int mb_to_left_edge; int mb_to_right_edge; - int mb_to_top_edge; - int mb_to_bottom_edge; const int mb_size = 1 << mi->mbmi.sb_type; - mb_to_top_edge = xd->mb_to_top_edge; - mb_to_bottom_edge = xd->mb_to_bottom_edge; - mb_to_top_edge -= LEFT_TOP_MARGIN; - mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN; + const int use_prev_in_find_mv_refs = cm->Width == cm->last_width && + cm->Height == cm->last_height && + !cm->error_resilient_mode; + + int mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN; + int mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; + mbmi->need_to_clamp_mvs = 0; mbmi->need_to_clamp_secondmv = 0; mbmi->second_ref_frame = NONE; - /* Distance of Mb to the various image edges. - * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units - */ - xd->mb_to_left_edge = - mb_to_left_edge = -((mb_col * 16) << 3); + + // Distance of Mb to the various image edges. + // These specified to 8th pel as they are always compared to MV values + // that are in 1/8th pel units + xd->mb_to_left_edge = mb_to_left_edge + = -((mb_col * 16) << 3); mb_to_left_edge -= LEFT_TOP_MARGIN; - xd->mb_to_right_edge = - mb_to_right_edge = ((pbi->common.mb_cols - mb_size - mb_col) * 16) << 3; + + xd->mb_to_right_edge = mb_to_right_edge + = ((pbi->common.mb_cols - mb_size - mb_col) * 16) << 3; mb_to_right_edge += RIGHT_BOTTOM_MARGIN; // Make sure the MACROBLOCKD mode info pointer is pointed at the @@ -694,75 +697,74 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, read_mb_segment_id(pbi, mb_row, mb_col, bc); if (pbi->common.mb_no_coeff_skip && - (!vp9_segfeature_active(xd, - mbmi->segment_id, SEG_LVL_EOB) || - (vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_EOB) != 0))) { + (!vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP))) { // Read the macroblock coeff skip flag if this feature is in use, // else default to 0 mbmi->mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP)); } else { - if (vp9_segfeature_active(xd, - mbmi->segment_id, SEG_LVL_EOB) && - (vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_EOB) == 0)) { - mbmi->mb_skip_coeff = 1; - } else - mbmi->mb_skip_coeff = 0; + mbmi->mb_skip_coeff = + vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP) ? 1 : 0; } // Read the reference frame - if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE) - && vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE) < NEARESTMV) - mbmi->ref_frame = INTRA_FRAME; - else - mbmi->ref_frame = read_ref_frame(pbi, bc, mbmi->segment_id); + mbmi->ref_frame = read_ref_frame(pbi, bc, mbmi->segment_id); + + /* + if (pbi->common.current_video_frame == 1) + printf("ref frame: %d [%d %d]\n", mbmi->ref_frame, mb_row, mb_col); + */ // If reference frame is an Inter frame if (mbmi->ref_frame) { int_mv nearest, nearby, best_mv; int_mv nearest_second, nearby_second, best_mv_second; - vp9_prob mv_ref_p [VP9_MVREFS - 1]; + vp9_prob mv_ref_p[VP9_MVREFS - 1]; - int recon_y_stride, recon_yoffset; - int recon_uv_stride, recon_uvoffset; MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; + xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; { int ref_fb_idx; + const int use_prev_in_find_best_ref = + xd->scale_factor[0].x_num == xd->scale_factor[0].x_den && + xd->scale_factor[0].y_num == xd->scale_factor[0].y_den && + !cm->error_resilient_mode && + !cm->frame_parallel_decoding_mode; /* Select the appropriate reference frame for this MB */ - if (ref_frame == LAST_FRAME) - ref_fb_idx = cm->lst_fb_idx; - else if (ref_frame == GOLDEN_FRAME) - ref_fb_idx = cm->gld_fb_idx; - else - ref_fb_idx = cm->alt_fb_idx; - - recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride ; - recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); + ref_fb_idx = cm->active_ref_idx[ref_frame - 1]; - xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; - xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; - xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], + mb_row, mb_col, &xd->scale_factor[0], &xd->scale_factor_uv[0]); #ifdef DEC_DEBUG if (dec_debug) printf("%d %d\n", xd->mode_info_context->mbmi.mv[0].as_mv.row, xd->mode_info_context->mbmi.mv[0].as_mv.col); #endif - vp9_find_mv_refs(xd, mi, prev_mi, + // if (cm->current_video_frame == 1 && mb_row == 4 && mb_col == 5) + // printf("Dello\n"); + vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL, ref_frame, mbmi->ref_mvs[ref_frame], cm->ref_frame_sign_bias); vp9_mv_ref_probs(&pbi->common, mv_ref_p, mbmi->mb_mode_context[ref_frame]); + /* + if (pbi->common.current_video_frame == 1) { + int k = mbmi->mb_mode_context[ref_frame]; + printf("vp9_mode_contexts: [%d %d %d %d] %d %d %d %d\n", + mb_row, mb_col, ref_frame, k, + cm->fc.vp9_mode_contexts[k][0], + cm->fc.vp9_mode_contexts[k][1], + cm->fc.vp9_mode_contexts[k][2], + cm->fc.vp9_mode_contexts[k][3]); + } + */ - // Is the segment level mode feature enabled for this segment - if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)) { - mbmi->mode = - vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); + // If the segment level skip mode enabled + if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) { + mbmi->mode = ZEROMV; } else { if (mbmi->sb_type) mbmi->mode = read_sb_mv_ref(bc, mv_ref_p); @@ -775,8 +777,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (mbmi->mode != ZEROMV) { vp9_find_best_ref_mvs(xd, - xd->pre.y_buffer, - recon_y_stride, + use_prev_in_find_best_ref ? + xd->pre.y_buffer : NULL, + xd->pre.y_stride, mbmi->ref_mvs[ref_frame], &nearest, &nearby); @@ -791,8 +794,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, #endif } - if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV) - { + if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV) { if (cm->mcomp_filter_type == SWITCHABLE) { mbmi->interp_filter = vp9_switchable_interp[ treed_read(bc, vp9_switchable_interp_tree, @@ -817,31 +819,31 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->second_ref_frame = 1; if (mbmi->second_ref_frame > 0) { int second_ref_fb_idx; + int use_prev_in_find_best_ref; + + xd->scale_factor[1] = cm->active_ref_scale[mbmi->second_ref_frame - 1]; + use_prev_in_find_best_ref = + xd->scale_factor[1].x_num == xd->scale_factor[1].x_den && + xd->scale_factor[1].y_num == xd->scale_factor[1].y_den && + !cm->error_resilient_mode && + !cm->frame_parallel_decoding_mode; + /* Select the appropriate reference frame for this MB */ - if (mbmi->second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cm->lst_fb_idx; - else if (mbmi->second_ref_frame == - GOLDEN_FRAME) - second_ref_fb_idx = cm->gld_fb_idx; - else - second_ref_fb_idx = cm->alt_fb_idx; + second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1]; - xd->second_pre.y_buffer = - cm->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset; - xd->second_pre.u_buffer = - cm->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset; - xd->second_pre.v_buffer = - cm->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset; + setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx], + mb_row, mb_col, &xd->scale_factor[1], &xd->scale_factor_uv[1]); - vp9_find_mv_refs(xd, mi, prev_mi, + vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL, mbmi->second_ref_frame, mbmi->ref_mvs[mbmi->second_ref_frame], cm->ref_frame_sign_bias); if (mbmi->mode != ZEROMV) { vp9_find_best_ref_mvs(xd, - xd->second_pre.y_buffer, - recon_y_stride, + use_prev_in_find_best_ref ? + xd->second_pre.y_buffer : NULL, + xd->second_pre.y_stride, mbmi->ref_mvs[mbmi->second_ref_frame], &nearest_second, &nearby_second); @@ -861,12 +863,11 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, pbi->common.fc.interintra_counts[ mbmi->second_ref_frame == INTRA_FRAME]++; if (mbmi->second_ref_frame == INTRA_FRAME) { - mbmi->interintra_mode = (MB_PREDICTION_MODE)read_ymode( - bc, pbi->common.fc.ymode_prob); + mbmi->interintra_mode = read_ymode(bc, pbi->common.fc.ymode_prob); pbi->common.fc.ymode_counts[mbmi->interintra_mode]++; #if SEPARATE_INTERINTRA_UV - mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)read_uv_mode( - bc, pbi->common.fc.uv_mode_prob[mbmi->interintra_mode]); + mbmi->interintra_uv_mode = read_uv_mode(bc, + pbi->common.fc.uv_mode_prob[mbmi->interintra_mode]); pbi->common.fc.uv_mode_counts[mbmi->interintra_mode] [mbmi->interintra_uv_mode]++; #else @@ -912,21 +913,19 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, cm->fc.mbsplit_counts[s]++; mbmi->need_to_clamp_mvs = 0; - do { /* for each subset j */ + do { // for each subset j int_mv leftmv, abovemv, second_leftmv, second_abovemv; int_mv blockmv, secondmv; - int k; /* first block in subset j */ int mv_contz; int blockmode; + int k = vp9_mbsplit_offset[s][j]; // first block in subset j - k = vp9_mbsplit_offset[s][j]; - - leftmv.as_int = left_block_mv(mi, k); + leftmv.as_int = left_block_mv(xd, mi, k); abovemv.as_int = above_block_mv(mi, k, mis); second_leftmv.as_int = 0; second_abovemv.as_int = 0; if (mbmi->second_ref_frame > 0) { - second_leftmv.as_int = left_block_second_mv(mi, k); + second_leftmv.as_int = left_block_second_mv(xd, mi, k); second_abovemv.as_int = above_block_second_mv(mi, k, mis); } mv_contz = vp9_mv_cont(&leftmv, &abovemv); @@ -1005,15 +1004,14 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, /* Fill (uniform) modes, mvs of jth subset. Must do it here because ensuing subsets can refer back to us via "left" or "above". */ - const unsigned char *fill_offset; unsigned int fill_count = mbsplit_fill_count[s]; - - fill_offset = &mbsplit_fill_offset[s][(unsigned char)j * mbsplit_fill_count[s]]; + const unsigned char *fill_offset = + &mbsplit_fill_offset[s][j * fill_count]; do { - mi->bmi[ *fill_offset].as_mv.first.as_int = blockmv.as_int; + mi->bmi[*fill_offset].as_mv[0].as_int = blockmv.as_int; if (mbmi->second_ref_frame > 0) - mi->bmi[ *fill_offset].as_mv.second.as_int = secondmv.as_int; + mi->bmi[*fill_offset].as_mv[1].as_int = secondmv.as_int; fill_offset++; } while (--fill_count); } @@ -1021,8 +1019,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, } while (++j < num_p); } - mv->as_int = mi->bmi[15].as_mv.first.as_int; - mbmi->mv[1].as_int = mi->bmi[15].as_mv.second.as_int; + mv->as_int = mi->bmi[15].as_mv[0].as_int; + mbmi->mv[1].as_int = mi->bmi[15].as_mv[1].as_int; break; /* done with SPLITMV */ @@ -1057,7 +1055,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, break; case NEWMV: - read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc); read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc, xd->allow_high_precision_mv); @@ -1086,10 +1083,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, &cm->fc.NMVcount, xd->allow_high_precision_mv); mbmi->mv[1].as_mv.row += best_mv_second.as_mv.row; mbmi->mv[1].as_mv.col += best_mv_second.as_mv.col; - mbmi->need_to_clamp_secondmv |= - check_mv_bounds(&mbmi->mv[1], - mb_to_left_edge, mb_to_right_edge, - mb_to_top_edge, mb_to_bottom_edge); + mbmi->need_to_clamp_secondmv |= check_mv_bounds(&mbmi->mv[1], + mb_to_left_edge, mb_to_right_edge, + mb_to_top_edge, mb_to_bottom_edge); } break; default: @@ -1102,16 +1098,11 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, /* required for left and above block mv */ mbmi->mv[0].as_int = 0; - if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)) { - mbmi->mode = (MB_PREDICTION_MODE) - vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); - } else if (mbmi->sb_type) { - mbmi->mode = (MB_PREDICTION_MODE) - read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob); + if (mbmi->sb_type) { + mbmi->mode = read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob); pbi->common.fc.sb_ymode_counts[mbmi->mode]++; } else { - mbmi->mode = (MB_PREDICTION_MODE) - read_ymode(bc, pbi->common.fc.ymode_prob); + mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob); pbi->common.fc.ymode_counts[mbmi->mode]++; } @@ -1120,8 +1111,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int j = 0; do { int m; - m = mi->bmi[j].as_mode.first = (B_PREDICTION_MODE) - read_bmode(bc, pbi->common.fc.bmode_prob); + m = mi->bmi[j].as_mode.first = read_bmode(bc, + pbi->common.fc.bmode_prob); #if CONFIG_NEWBINTRAMODES if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS; #endif @@ -1131,10 +1122,10 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (mbmi->mode == I8X8_PRED) { int i; - int mode8x8; for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob); + const int ib = vp9_i8x8_block[i]; + const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob); + mi->bmi[ib + 0].as_mode.first = mode8x8; mi->bmi[ib + 1].as_mode.first = mode8x8; mi->bmi[ib + 4].as_mode.first = mode8x8; @@ -1142,11 +1133,14 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, pbi->common.fc.i8x8_mode_counts[mode8x8]++; } } else { - mbmi->uv_mode = (MB_PREDICTION_MODE)read_uv_mode( - bc, pbi->common.fc.uv_mode_prob[mbmi->mode]); + mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob[mbmi->mode]); pbi->common.fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++; } } + /* + if (pbi->common.current_video_frame == 1) + printf("mode: %d skip: %d\n", mbmi->mode, mbmi->mb_skip_coeff); + */ if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 && ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= I8X8_PRED) || @@ -1188,6 +1182,7 @@ void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc) { mb_mode_mv_init(pbi, bc); } + void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, MACROBLOCKD* const xd, int mb_row, @@ -1196,8 +1191,12 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, MODE_INFO *mi = xd->mode_info_context; MODE_INFO *prev_mi = xd->prev_mode_info_context; - if (pbi->common.frame_type == KEY_FRAME) + if (pbi->common.frame_type == KEY_FRAME) { kfread_modes(pbi, mi, mb_row, mb_col, bc); - else + } else { read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc); + set_scale_factors(xd, + mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1, + pbi->common.active_ref_scale); + } } diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index c3b9637a6..86806d2d0 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -13,7 +13,6 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_header.h" #include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_reconintra4x4.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_entropy.h" #include "vp9/decoder/vp9_decodframe.h" @@ -32,7 +31,7 @@ #include "vp9/decoder/vp9_dboolhuff.h" #include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_tile_common.h" #include "vp9_rtcd.h" #include <assert.h> @@ -79,103 +78,82 @@ static vp9_prob read_prob_diff_update(vp9_reader *const bc, int oldp) { void vp9_init_de_quantizer(VP9D_COMP *pbi) { int i; - int Q; + int q; VP9_COMMON *const pc = &pbi->common; - for (Q = 0; Q < QINDEX_RANGE; Q++) { - pc->Y1dequant[Q][0] = (int16_t)vp9_dc_quant(Q, pc->y1dc_delta_q); - pc->Y2dequant[Q][0] = (int16_t)vp9_dc2quant(Q, pc->y2dc_delta_q); - pc->UVdequant[Q][0] = (int16_t)vp9_dc_uv_quant(Q, pc->uvdc_delta_q); + for (q = 0; q < QINDEX_RANGE; q++) { + pc->Y1dequant[q][0] = (int16_t)vp9_dc_quant(q, pc->y1dc_delta_q); + pc->UVdequant[q][0] = (int16_t)vp9_dc_uv_quant(q, pc->uvdc_delta_q); /* all the ac values =; */ for (i = 1; i < 16; i++) { int rc = vp9_default_zig_zag1d_4x4[i]; - pc->Y1dequant[Q][rc] = (int16_t)vp9_ac_yquant(Q); - pc->Y2dequant[Q][rc] = (int16_t)vp9_ac2quant(Q, pc->y2ac_delta_q); - pc->UVdequant[Q][rc] = (int16_t)vp9_ac_uv_quant(Q, pc->uvac_delta_q); + pc->Y1dequant[q][rc] = (int16_t)vp9_ac_yquant(q); + pc->UVdequant[q][rc] = (int16_t)vp9_ac_uv_quant(q, pc->uvac_delta_q); } } } static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { int i; - int QIndex; + int qindex; VP9_COMMON *const pc = &pbi->common; int segment_id = xd->mode_info_context->mbmi.segment_id; // Set the Q baseline allowing for any segment level adjustment if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) { - /* Abs Value */ if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) - QIndex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); - - /* Delta Value */ + /* Abs Value */ + qindex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); else { - QIndex = pc->base_qindex + + /* Delta Value */ + qindex = pc->base_qindex + vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ + /* Clamp to valid range */ + qindex = (qindex >= 0) ? ((qindex <= MAXQ) ? qindex : MAXQ) : 0; } } else - QIndex = pc->base_qindex; - xd->q_index = QIndex; + qindex = pc->base_qindex; + + xd->q_index = qindex; /* Set up the block level dequant pointers */ for (i = 0; i < 16; i++) { - xd->block[i].dequant = pc->Y1dequant[QIndex]; + xd->block[i].dequant = pc->Y1dequant[qindex]; } -#if CONFIG_LOSSLESS - if (!QIndex) { - pbi->mb.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8; - pbi->mb.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8; - pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless; - pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless; - pbi->idct_add = vp9_dequant_idct_add_lossless_c; - pbi->dc_idct_add = vp9_dequant_dc_idct_add_lossless_c; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; - } else { - pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; - pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm; - pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; - pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; - pbi->idct_add = vp9_dequant_idct_add; - pbi->dc_idct_add = vp9_dequant_dc_idct_add; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block; + xd->inv_txm4x4_1 = vp9_short_idct4x4llm_1; + xd->inv_txm4x4 = vp9_short_idct4x4llm; + xd->itxm_add = vp9_dequant_idct_add; + xd->itxm_add_y_block = vp9_dequant_idct_add_y_block; + xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block; + if (xd->lossless) { + assert(qindex == 0); + xd->inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; + xd->inv_txm4x4 = vp9_short_inv_walsh4x4_x8; + xd->itxm_add = vp9_dequant_idct_add_lossless_c; + xd->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; + xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; } -#else - pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; - pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm; - pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; - pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; - pbi->idct_add = vp9_dequant_idct_add; - pbi->dc_idct_add = vp9_dequant_dc_idct_add; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block; -#endif for (i = 16; i < 24; i++) { - xd->block[i].dequant = pc->UVdequant[QIndex]; + xd->block[i].dequant = pc->UVdequant[qindex]; } - - xd->block[24].dequant = pc->Y2dequant[QIndex]; - } /* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it * to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy. */ -static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { +static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mb_row, int mb_col) { + BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + if (sb_type == BLOCK_SIZE_SB64X64) { vp9_build_intra_predictors_sb64uv_s(xd); vp9_build_intra_predictors_sb64y_s(xd); - } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { + } else if (sb_type == BLOCK_SIZE_SB32X32) { vp9_build_intra_predictors_sbuv_s(xd); vp9_build_intra_predictors_sby_s(xd); } else { @@ -183,38 +161,32 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { vp9_build_intra_predictors_mby_s(xd); } } else { - if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + if (sb_type == BLOCK_SIZE_SB64X64) { vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); - } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { + xd->dst.uv_stride, + mb_row, mb_col); + } else if (sb_type == BLOCK_SIZE_SB32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); } else { - vp9_build_1st_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - vp9_build_2nd_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - } + vp9_build_inter16x16_predictors_mb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED - else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { vp9_build_interintra_16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, @@ -231,7 +203,6 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc) { BLOCKD *bd = &xd->block[0]; TX_TYPE tx_type = get_tx_type_16x16(xd, bd); - assert(get_2nd_order_usage(xd) == 0); #ifdef DEC_DEBUG if (dec_debug) { int i; @@ -262,7 +233,7 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_dequant_idct_add_uv_block_8x8( xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs + 16, xd); + xd->dst.uv_stride, xd); } static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, @@ -283,7 +254,6 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, #endif if (tx_type != DCT_DCT || xd->mode_info_context->mbmi.mode == I8X8_PRED) { int i; - assert(get_2nd_order_usage(xd) == 0); for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; int idx = (ib & 0x02) ? (ib + 2) : ib; @@ -295,7 +265,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, BLOCKD *b = &xd->block[ib]; if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { int i8x8mode = b->bmi.as_mode.first; - vp9_intra8x8_predict(b, i8x8mode, b->predictor); + vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor); } tx_type = get_tx_type_8x8(xd, &xd->block[ib]); if (tx_type != DCT_DCT) { @@ -303,38 +273,16 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->eobs[idx]); } else { vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, - 0, xd->eobs[idx]); + xd->eobs[idx]); } } - } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - assert(get_2nd_order_usage(xd) == 0); + } else { vp9_dequant_idct_add_y_block_8x8(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, - xd->eobs, xd); - } else { - BLOCKD *b = &xd->block[24]; - assert(get_2nd_order_usage(xd) == 1); - vp9_dequantize_b_2x2(b); - vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8); - ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - vp9_dequant_dc_idct_add_y_block_8x8(xd->qcoeff, - xd->block[0].dequant, - xd->predictor, - xd->dst.y_buffer, - xd->dst.y_stride, - xd->eobs, - xd->block[24].diff, - xd); + xd); } // Now do UV @@ -345,23 +293,23 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, BLOCKD *b = &xd->block[ib]; int i8x8mode = b->bmi.as_mode.first; b = &xd->block[16 + i]; - vp9_intra_uv4x4_predict(&xd->block[16 + i], i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); + vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); b = &xd->block[20 + i]; - vp9_intra_uv4x4_predict(&xd->block[20 + i], i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); + vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); } } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs + 16); + xd->dst.uv_stride, xd); } else { vp9_dequant_idct_add_uv_block_8x8 (xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs + 16, xd); + xd->dst.uv_stride, xd); } #ifdef DEC_DEBUG if (dec_debug) { @@ -382,7 +330,6 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, int i, eobtotal = 0; MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; if (mode == I8X8_PRED) { - assert(get_2nd_order_usage(xd) == 0); for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; const int iblock[4] = {0, 1, 4, 5}; @@ -391,7 +338,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, BLOCKD *b; b = &xd->block[ib]; i8x8mode = b->bmi.as_mode.first; - vp9_intra8x8_predict(b, i8x8mode, b->predictor); + vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor); for (j = 0; j < 4; j++) { b = &xd->block[ib + iblock[j]]; tx_type = get_tx_type_4x4(xd, b); @@ -399,23 +346,23 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, - b->dst_stride, b->eob); + b->dst_stride, xd->eobs[ib + iblock[j]]); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride, + xd->eobs[ib + iblock[j]]); } } b = &xd->block[16 + i]; - vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); + vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); b = &xd->block[20 + i]; - vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); + vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); } } else if (mode == B_PRED) { - assert(get_2nd_order_usage(xd) == 0); for (i = 0; i < 16; i++) { int b_mode; BLOCKD *b = &xd->block[i]; @@ -427,46 +374,43 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, if (!xd->mode_info_context->mbmi.mb_skip_coeff) eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i); - vp9_intra4x4_predict(b, b_mode, b->predictor); + vp9_intra4x4_predict(xd, b, b_mode, b->predictor); tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride, - b->eob); + xd->eobs[i]); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); } } if (!xd->mode_info_context->mbmi.mb_skip_coeff) { vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc); } - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; vp9_build_intra_predictors_mbuv(xd); - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, - xd->eobs + 16); - } else if (mode == SPLITMV) { - assert(get_2nd_order_usage(xd) == 0); - pbi->idct_add_y_block(xd->qcoeff, + xd); + } else if (mode == SPLITMV || get_tx_type_4x4(xd, &xd->block[0]) == DCT_DCT) { + xd->itxm_add_y_block(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, - xd->eobs); - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd); + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, - xd->eobs + 16); + xd); } else { #ifdef DEC_DEBUG if (dec_debug) { @@ -485,56 +429,26 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, } } #endif - tx_type = get_tx_type_4x4(xd, &xd->block[0]); - if (tx_type != DCT_DCT) { - assert(get_2nd_order_usage(xd) == 0); - for (i = 0; i < 16; i++) { - BLOCKD *b = &xd->block[i]; - tx_type = get_tx_type_4x4(xd, b); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, - b->dst_stride, b->eob); - } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); - } - } - } else { - BLOCKD *b = &xd->block[24]; - assert(get_2nd_order_usage(xd) == 1); - vp9_dequantize_b(b); - if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; + for (i = 0; i < 16; i++) { + BLOCKD *b = &xd->block[i]; + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, + b->dst_stride, xd->eobs[i]); } else { - xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); } - vp9_dequantize_b(b); - pbi->dc_idct_add_y_block(xd->qcoeff, - xd->block[0].dequant, - xd->predictor, - xd->dst.y_buffer, - xd->dst.y_stride, - xd->eobs, - xd->block[24].diff); } - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, - xd->eobs + 16); + xd); } } @@ -548,7 +462,7 @@ static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, tx_type, xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->dst.y_stride, xd->block[0].eob); + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]); } else { vp9_dequant_idct_add_16x16( xd->qcoeff, xd->block[0].dequant, @@ -561,14 +475,13 @@ static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->block[16].dequant, xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); + xd->dst.uv_stride, xd); }; static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc, int n, int maska, int shiftb) { int x_idx = n & maska, y_idx = n >> shiftb; - BLOCKD *b = &xd->block[24]; TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]); if (tx_type != DCT_DCT) { int i; @@ -578,7 +491,6 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, int16_t *q = xd->block[idx].qcoeff; int16_t *dq = xd->block[0].dequant; int stride = xd->dst.y_stride; - BLOCKD *b = &xd->block[ib]; tx_type = get_tx_type_8x8(xd, &xd->block[ib]); if (tx_type != DCT_DCT) { vp9_ht_dequant_idct_add_8x8_c( @@ -587,7 +499,7 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 1) * 8, xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride + x_idx * 16 + (i & 1) * 8, - stride, stride, b->eob); + stride, stride, xd->eobs[idx]); } else { vp9_dequant_idct_add_8x8_c( q, dq, @@ -595,42 +507,26 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 1) * 8, xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride + x_idx * 16 + (i & 1) * 8, - stride, stride, 0, b->eob); + stride, stride, xd->eobs[idx]); } - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); } } else { - vp9_dequantize_b_2x2(b); - vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8); - ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - vp9_dequant_dc_idct_add_y_block_8x8_inplace_c( + vp9_dequant_idct_add_y_block_8x8_inplace_c( xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); + xd->dst.y_stride, xd); } + vp9_dequant_idct_add_uv_block_8x8_inplace_c( + xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd); }; static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc, int n, int maska, int shiftb) { int x_idx = n & maska, y_idx = n >> shiftb; - BLOCKD *b = &xd->block[24]; TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]); if (tx_type != DCT_DCT) { int i; @@ -644,49 +540,34 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 3) * 4, xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + x_idx * 16 + (i & 3) * 4, - xd->dst.y_stride, xd->dst.y_stride, b->eob); + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]); } else { - vp9_dequant_idct_add_c( + xd->itxm_add( b->qcoeff, b->dequant, xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + x_idx * 16 + (i & 3) * 4, xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + x_idx * 16 + (i & 3) * 4, - xd->dst.y_stride, xd->dst.y_stride); + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]); } } } else { - vp9_dequantize_b(b); - if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - } else { - xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - } - vp9_dequant_dc_idct_add_y_block_4x4_inplace_c( + vp9_dequant_idct_add_y_block_4x4_inplace_c( xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); + xd->dst.y_stride, xd); } vp9_dequant_idct_add_uv_block_4x4_inplace_c( xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); + xd->dst.uv_stride, xd); }; static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, BOOL_DECODER* const bc) { - int i, n, eobtotal; + int n, eobtotal; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; VP9_COMMON *const pc = &pbi->common; MODE_INFO *orig_mi = xd->mode_info_context; @@ -720,7 +601,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. */ - skip_recon_mb(pbi, xd); + skip_recon_mb(pbi, xd, mb_row, mb_col); return; } @@ -731,7 +612,8 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, } else { vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } /* dequantization and idct */ @@ -770,7 +652,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.uv_stride * y_idx * 16, xd->dst.v_buffer + x_idx * 16 + xd->dst.uv_stride * y_idx * 16, - xd->dst.uv_stride, xd->eobs + 16); + xd->dst.uv_stride, xd); } } } else { @@ -783,10 +665,6 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context = pc->above_context + mb_col + x_idx; xd->left_context = pc->left_context + y_idx; xd->mode_info_context = orig_mi + x_idx + y_idx * mis; - for (i = 0; i < 25; i++) { - xd->block[i].eob = 0; - xd->eobs[i] = 0; - } eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); if (eobtotal == 0) { // skip loopfilter @@ -812,7 +690,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, BOOL_DECODER* const bc) { - int i, n, eobtotal; + int n, eobtotal; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; VP9_COMMON *const pc = &pbi->common; MODE_INFO *orig_mi = xd->mode_info_context; @@ -842,7 +720,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. */ - skip_recon_mb(pbi, xd); + skip_recon_mb(pbi, xd, mb_row, mb_col); return; } @@ -853,7 +731,8 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, } else { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } /* dequantization and idct */ @@ -876,7 +755,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024, xd->block[16].dequant, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs + 16); + xd->dst.uv_stride, xd); } } else { for (n = 0; n < 4; n++) { @@ -888,10 +767,6 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context = pc->above_context + mb_col + x_idx; xd->left_context = pc->left_context + y_idx + (mb_row & 2); xd->mode_info_context = orig_mi + x_idx + y_idx * mis; - for (i = 0; i < 25; i++) { - xd->block[i].eob = 0; - xd->eobs[i] = 0; - } eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); if (eobtotal == 0) { // skip loopfilter @@ -919,7 +794,6 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc) { int eobtotal = 0; MB_PREDICTION_MODE mode; - int i; int tx_size; assert(!xd->mode_info_context->mbmi.sb_type); @@ -934,10 +808,6 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, if (xd->mode_info_context->mbmi.mb_skip_coeff) { vp9_reset_mb_tokens_context(xd); } else if (!bool_error(bc)) { - for (i = 0; i < 25; i++) { - xd->block[i].eob = 0; - xd->eobs[i] = 0; - } if (mode != B_PRED) { eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); } @@ -948,14 +818,15 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, &pbi->common); - if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV - && mode != I8X8_PRED - && !bool_error(bc)) { + if (eobtotal == 0 && + mode != B_PRED && + mode != SPLITMV && + mode != I8X8_PRED && + !bool_error(bc)) { /* Special case: Force the loopfilter to skip when eobtotal and - * mb_skip_coeff are zero. - * */ + mb_skip_coeff are zero. */ xd->mode_info_context->mbmi.mb_skip_coeff = 1; - skip_recon_mb(pbi, xd); + skip_recon_mb(pbi, xd, mb_row, mb_col); return; } #ifdef DEC_DEBUG @@ -982,7 +853,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->mode_info_context->mbmi.mode, tx_size, xd->mode_info_context->mbmi.interp_filter); #endif - vp9_build_inter_predictors_mb(xd); + vp9_build_inter_predictors_mb(xd, mb_row, mb_col); } if (tx_size == TX_16X16) { @@ -1072,8 +943,9 @@ static void set_offsets(VP9D_COMP *pbi, int block_size, xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3; xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3; - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col > cm->cur_tile_mb_col_start); + xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end); xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; @@ -1088,23 +960,14 @@ static void set_refs(VP9D_COMP *pbi, int block_size, MB_MODE_INFO *const mbmi = &mi->mbmi; if (mbmi->ref_frame > INTRA_FRAME) { - int ref_fb_idx, ref_yoffset, ref_uvoffset, ref_y_stride, ref_uv_stride; + int ref_fb_idx; /* Select the appropriate reference frame for this MB */ - if (mbmi->ref_frame == LAST_FRAME) - ref_fb_idx = cm->lst_fb_idx; - else if (mbmi->ref_frame == GOLDEN_FRAME) - ref_fb_idx = cm->gld_fb_idx; - else - ref_fb_idx = cm->alt_fb_idx; - - ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - ref_yoffset = mb_row * 16 * ref_y_stride + 16 * mb_col; - xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + ref_yoffset; - ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - ref_uvoffset = mb_row * 8 * ref_uv_stride + 8 * mb_col; - xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + ref_uvoffset; - xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + ref_uvoffset; + ref_fb_idx = cm->active_ref_idx[mbmi->ref_frame - 1]; + xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; + xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; + setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], mb_row, mb_col, + &xd->scale_factor[0], &xd->scale_factor_uv[0]); /* propagate errors from reference frames */ xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted; @@ -1113,19 +976,11 @@ static void set_refs(VP9D_COMP *pbi, int block_size, int second_ref_fb_idx; /* Select the appropriate reference frame for this MB */ - if (mbmi->second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cm->lst_fb_idx; - else if (mbmi->second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cm->gld_fb_idx; - else - second_ref_fb_idx = cm->alt_fb_idx; - - xd->second_pre.y_buffer = - cm->yv12_fb[second_ref_fb_idx].y_buffer + ref_yoffset; - xd->second_pre.u_buffer = - cm->yv12_fb[second_ref_fb_idx].u_buffer + ref_uvoffset; - xd->second_pre.v_buffer = - cm->yv12_fb[second_ref_fb_idx].v_buffer + ref_uvoffset; + second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1]; + + setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx], + mb_row, mb_col, + &xd->scale_factor[1], &xd->scale_factor_uv[1]); /* propagate errors from reference frames */ xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted; @@ -1156,7 +1011,8 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, // For a SB there are 2 left contexts, each pertaining to a MB row within vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); - for (mb_col = 0; mb_col < pc->mb_cols; mb_col += 4) { + for (mb_col = pc->cur_tile_mb_col_start; + mb_col < pc->cur_tile_mb_col_end; mb_col += 4) { if (vp9_read(bc, pc->sb64_coded)) { set_offsets(pbi, 64, mb_row, mb_col); vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc); @@ -1204,8 +1060,7 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, vp9_decode_mb_mode_mv(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc); update_blockd_bmi(xd); set_refs(pbi, 16, mb_row + y_idx, mb_col + x_idx); - vp9_intra_prediction_down_copy(xd); - decode_macroblock(pbi, xd, mb_row, mb_col, bc); + decode_macroblock(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc); /* check if the boolean decoder has suffered an error */ xd->corrupted |= bool_error(bc); @@ -1225,7 +1080,7 @@ static unsigned int read_partition_size(const unsigned char *cx_size) { static int read_is_valid(const unsigned char *start, size_t len, const unsigned char *end) { - return (start + len > start && start + len <= end); + return start + len > start && start + len <= end; } @@ -1265,55 +1120,14 @@ static void init_frame(VP9D_COMP *pbi) { MACROBLOCKD *const xd = &pbi->mb; if (pc->frame_type == KEY_FRAME) { - - if (pc->last_frame_seg_map) - vpx_memset(pc->last_frame_seg_map, 0, (pc->mb_rows * pc->mb_cols)); - - vp9_init_mv_probs(pc); - - vp9_init_mbmode_probs(pc); - vp9_default_bmode_probs(pc->fc.bmode_prob); - - vp9_default_coef_probs(pc); - vp9_kf_default_bmode_probs(pc->kf_bmode_prob); - - // Reset the segment feature data to the default stats: - // Features disabled, 0, with delta coding (Default state). - vp9_clearall_segfeatures(xd); - - xd->mb_segment_abs_delta = SEGMENT_DELTADATA; - - /* reset the mode ref deltasa for loop filter */ - vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); - vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); - + vp9_setup_past_independence(pc, xd); /* All buffers are implicitly updated on key frames. */ - pc->refresh_golden_frame = 1; - pc->refresh_alt_ref_frame = 1; - pc->copy_buffer_to_gf = 0; - pc->copy_buffer_to_arf = 0; - - /* Note that Golden and Altref modes cannot be used on a key frame so - * ref_frame_sign_bias[] is undefined and meaningless - */ - pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0; - pc->ref_frame_sign_bias[ALTREF_FRAME] = 0; - - vp9_init_mode_contexts(&pbi->common); - vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); - vpx_memcpy(&pc->lfc_a, &pc->fc, sizeof(pc->fc)); - - vpx_memset(pc->prev_mip, 0, - (pc->mb_cols + 1) * (pc->mb_rows + 1)* sizeof(MODE_INFO)); - vpx_memset(pc->mip, 0, - (pc->mb_cols + 1) * (pc->mb_rows + 1)* sizeof(MODE_INFO)); - - vp9_update_mode_info_border(pc, pc->mip); - vp9_update_mode_info_in_image(pc, pc->mi); - - - } else { + pbi->refresh_frame_flags = (1 << NUM_REF_FRAMES) - 1; + } else if (pc->error_resilient_mode) { + vp9_setup_past_independence(pc, xd); + } + if (pc->frame_type != KEY_FRAME) { if (!pc->use_bilinear_mc_filter) pc->mcomp_filter_type = EIGHTTAP; else @@ -1333,27 +1147,26 @@ static void init_frame(VP9D_COMP *pbi) { xd->fullpixel_mask = 0xffffffff; if (pc->full_pixel) xd->fullpixel_mask = 0xfffffff8; - } static void read_coef_probs_common(BOOL_DECODER* const bc, vp9_coeff_probs *coef_probs, int block_types) { - int i, j, k, l; + int i, j, k, l, m; if (vp9_read_bit(bc)) { for (i = 0; i < block_types; i++) { - for (j = !i; j < COEF_BANDS; j++) { - /* NB: This j loop starts from 1 on block type i == 0 */ - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - if (k >= 3 && ((i == 0 && j == 1) || - (i > 0 && j == 0))) - continue; - for (l = 0; l < ENTROPY_NODES; l++) { - vp9_prob *const p = coef_probs[i][j][k] + l; - - if (vp9_read(bc, COEF_UPDATE_PROB)) { - *p = read_prob_diff_update(bc, *p); + for (j = 0; j < REF_TYPES; j++) { + for (k = 0; k < COEF_BANDS; k++) { + for (l = 0; l < PREV_COEF_CONTEXTS; l++) { + if (l >= 3 && k == 0) + continue; + for (m = 0; m < ENTROPY_NODES; m++) { + vp9_prob *const p = coef_probs[i][j][k][l] + m; + + if (vp9_read(bc, COEF_UPDATE_PROB)) { + *p = read_prob_diff_update(bc, *p); + } } } } @@ -1365,23 +1178,39 @@ static void read_coef_probs_common(BOOL_DECODER* const bc, static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { VP9_COMMON *const pc = &pbi->common; - read_coef_probs_common(bc, pc->fc.coef_probs_4x4, BLOCK_TYPES_4X4); - read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4); + read_coef_probs_common(bc, pc->fc.coef_probs_4x4, BLOCK_TYPES); if (pbi->common.txfm_mode != ONLY_4X4) { - read_coef_probs_common(bc, pc->fc.coef_probs_8x8, BLOCK_TYPES_8X8); - read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8); + read_coef_probs_common(bc, pc->fc.coef_probs_8x8, BLOCK_TYPES); } if (pbi->common.txfm_mode > ALLOW_8X8) { - read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES_16X16); - read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16, - BLOCK_TYPES_16X16); + read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES); } if (pbi->common.txfm_mode > ALLOW_16X16) { read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES_32X32); } } +static void update_frame_size(VP9D_COMP *pbi) { + VP9_COMMON *cm = &pbi->common; + + /* our internal buffers are always multiples of 16 */ + int width = (cm->Width + 15) & ~15; + int height = (cm->Height + 15) & ~15; + + cm->mb_rows = height >> 4; + cm->mb_cols = width >> 4; + cm->MBs = cm->mb_rows * cm->mb_cols; + cm->mode_info_stride = cm->mb_cols + 1; + memset(cm->mip, 0, + (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO)); + vp9_update_mode_info_border(cm, cm->mip); + + cm->mi = cm->mip + cm->mode_info_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; + vp9_update_mode_info_in_image(cm, cm->mi); +} + int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { BOOL_DECODER header_bc, residual_bc; VP9_COMMON *const pc = &pbi->common; @@ -1394,13 +1223,13 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { int i, j; int corrupt_tokens = 0; + // printf("Decoding frame %d\n", pc->current_video_frame); /* start with no corruption of current frame */ xd->corrupted = 0; pc->yv12_fb[pc->new_fb_idx].corrupted = 0; if (data_end - data < 3) { - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet"); + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } else { pc->last_frame_type = pc->frame_type; pc->frame_type = (FRAME_TYPE)(data[0] & 1); @@ -1419,9 +1248,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vp9_setup_version(pc); if (pc->frame_type == KEY_FRAME) { - const int Width = pc->Width; - const int Height = pc->Height; - /* vet via sync code */ /* When error concealment is enabled we should only check the sync * code if we have enough bits available @@ -1431,41 +1257,59 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame sync code"); } + data += 3; + } + { + const int width = pc->Width; + const int height = pc->Height; /* If error concealment is enabled we should only parse the new size * if we have enough data. Otherwise we will end up with the wrong * size. */ - if (data + 6 < data_end) { - pc->Width = (data[3] | (data[4] << 8)) & 0x3fff; - pc->horiz_scale = data[4] >> 6; - pc->Height = (data[5] | (data[6] << 8)) & 0x3fff; - pc->vert_scale = data[6] >> 6; + if (data + 4 < data_end) { + pc->Width = (data[0] | (data[1] << 8)) & 0x3fff; + pc->horiz_scale = data[1] >> 6; + pc->Height = (data[2] | (data[3] << 8)) & 0x3fff; + pc->vert_scale = data[3] >> 6; } - data += 7; + data += 4; - if (Width != pc->Width || Height != pc->Height) { + if (width != pc->Width || height != pc->Height) { if (pc->Width <= 0) { - pc->Width = Width; + pc->Width = width; vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Invalid frame width"); } if (pc->Height <= 0) { - pc->Height = Height; + pc->Height = height; vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Invalid frame height"); } - if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffers"); + if (!pbi->initial_width || !pbi->initial_height) { + if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height)) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffers"); + pbi->initial_width = pc->Width; + pbi->initial_height = pc->Height; + } + + if (pc->Width > pbi->initial_width) { + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Frame width too large"); + } + + if (pc->Height > pbi->initial_height) { + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Frame height too large"); + } + + update_frame_size(pbi); } } } -#ifdef DEC_DEBUG - printf("Decode frame %d\n", pc->current_video_frame); -#endif if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) || pc->Width == 0 || pc->Height == 0) { @@ -1474,15 +1318,19 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { init_frame(pbi); + /* Reset the frame pointers to the current frame size */ + vp8_yv12_realloc_frame_buffer(&pc->yv12_fb[pc->new_fb_idx], + pc->mb_cols * 16, pc->mb_rows * 16, + VP9BORDERINPIXELS); + if (vp9_start_decode(&header_bc, data, (unsigned int)first_partition_length_in_bytes)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder 0"); - if (pc->frame_type == KEY_FRAME) { - pc->clr_type = (YUV_TYPE)vp9_read_bit(&header_bc); - pc->clamp_type = (CLAMP_TYPE)vp9_read_bit(&header_bc); - } + pc->clr_type = (YUV_TYPE)vp9_read_bit(&header_bc); + pc->clamp_type = (CLAMP_TYPE)vp9_read_bit(&header_bc); + pc->error_resilient_mode = vp9_read_bit(&header_bc); /* Is segmentation enabled */ xd->segmentation_enabled = (unsigned char)vp9_read_bit(&header_bc); @@ -1512,6 +1360,22 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pc->segment_pred_probs[i] = 255; } } + + if (pc->temporal_update) { + int count[4]; + const vp9_prob *p = xd->mb_segment_tree_probs; + vp9_prob *p_mod = xd->mb_segment_mispred_tree_probs; + + count[0] = p[0] * p[1]; + count[1] = p[0] * (256 - p[1]); + count[2] = (256 - p[0]) * p[2]; + count[3] = (256 - p[0]) * (256 - p[2]); + + p_mod[0] = get_binary_prob(count[1], count[2] + count[3]); + p_mod[1] = get_binary_prob(count[0], count[2] + count[3]); + p_mod[2] = get_binary_prob(count[0] + count[1], count[3]); + p_mod[3] = get_binary_prob(count[0] + count[1], count[2]); + } } // Is the segment data being updated xd->update_mb_segmentation_data = (unsigned char)vp9_read_bit(&header_bc); @@ -1566,17 +1430,20 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pc->sb64_coded = vp9_read_literal(&header_bc, 8); pc->sb32_coded = vp9_read_literal(&header_bc, 8); - - /* Read the loop filter level and type */ - pc->txfm_mode = vp9_read_literal(&header_bc, 2); - if (pc->txfm_mode == 3) - pc->txfm_mode += vp9_read_bit(&header_bc); - if (pc->txfm_mode == TX_MODE_SELECT) { - pc->prob_tx[0] = vp9_read_literal(&header_bc, 8); - pc->prob_tx[1] = vp9_read_literal(&header_bc, 8); - pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); + xd->lossless = vp9_read_bit(&header_bc); + if (xd->lossless) { + pc->txfm_mode = ONLY_4X4; + } else { + /* Read the loop filter level and type */ + pc->txfm_mode = vp9_read_literal(&header_bc, 2); + if (pc->txfm_mode == 3) + pc->txfm_mode += vp9_read_bit(&header_bc); + if (pc->txfm_mode == TX_MODE_SELECT) { + pc->prob_tx[0] = vp9_read_literal(&header_bc, 8); + pc->prob_tx[1] = vp9_read_literal(&header_bc, 8); + pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); + } } - pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc); pc->filter_level = vp9_read_literal(&header_bc, 6); pc->sharpness_level = vp9_read_literal(&header_bc, 3); @@ -1617,20 +1484,13 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { // Dummy read for now vp9_read_literal(&header_bc, 2); - setup_token_decoder(pbi, data + first_partition_length_in_bytes, - &residual_bc); - /* Read the default quantizers. */ { - int Q, q_update; + int q_update = 0; + pc->base_qindex = vp9_read_literal(&header_bc, QINDEX_BITS); - Q = vp9_read_literal(&header_bc, QINDEX_BITS); - pc->base_qindex = Q; - q_update = 0; /* AC 1st order Q = default */ pc->y1dc_delta_q = get_delta_q(&header_bc, pc->y1dc_delta_q, &q_update); - pc->y2dc_delta_q = get_delta_q(&header_bc, pc->y2dc_delta_q, &q_update); - pc->y2ac_delta_q = get_delta_q(&header_bc, pc->y2ac_delta_q, &q_update); pc->uvdc_delta_q = get_delta_q(&header_bc, pc->uvdc_delta_q, &q_update); pc->uvac_delta_q = get_delta_q(&header_bc, pc->uvac_delta_q, &q_update); @@ -1645,27 +1505,20 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { * For all non key frames the GF and ARF refresh flags and sign bias * flags must be set explicitly. */ - if (pc->frame_type != KEY_FRAME) { + if (pc->frame_type == KEY_FRAME) { + pc->active_ref_idx[0] = pc->new_fb_idx; + pc->active_ref_idx[1] = pc->new_fb_idx; + pc->active_ref_idx[2] = pc->new_fb_idx; + } else { /* Should the GF or ARF be updated from the current frame */ - pc->refresh_golden_frame = vp9_read_bit(&header_bc); - pc->refresh_alt_ref_frame = vp9_read_bit(&header_bc); - - if (pc->refresh_alt_ref_frame) { - vpx_memcpy(&pc->fc, &pc->lfc_a, sizeof(pc->fc)); - } else { - vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); - } - - /* Buffer to buffer copy flags. */ - pc->copy_buffer_to_gf = 0; + pbi->refresh_frame_flags = vp9_read_literal(&header_bc, NUM_REF_FRAMES); - if (!pc->refresh_golden_frame) - pc->copy_buffer_to_gf = vp9_read_literal(&header_bc, 2); + /* Select active reference frames */ + for (i = 0; i < 3; i++) { + int ref_frame_num = vp9_read_literal(&header_bc, NUM_REF_FRAMES_LG2); - pc->copy_buffer_to_arf = 0; - - if (!pc->refresh_alt_ref_frame) - pc->copy_buffer_to_arf = vp9_read_literal(&header_bc, 2); + pc->active_ref_idx[i] = pc->ref_frame_map[ref_frame_num]; + } pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp9_read_bit(&header_bc); pc->ref_frame_sign_bias[ALTREF_FRAME] = vp9_read_bit(&header_bc); @@ -1685,13 +1538,16 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc); } - pc->refresh_entropy_probs = vp9_read_bit(&header_bc); - if (pc->refresh_entropy_probs == 0) { - vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); + if (!pc->error_resilient_mode) { + pc->refresh_entropy_probs = vp9_read_bit(&header_bc); + pc->frame_parallel_decoding_mode = vp9_read_bit(&header_bc); + } else { + pc->refresh_entropy_probs = 0; + pc->frame_parallel_decoding_mode = 1; } - - pc->refresh_last_frame = (pc->frame_type == KEY_FRAME) - || vp9_read_bit(&header_bc); + pc->frame_context_idx = vp9_read_literal(&header_bc, NUM_FRAME_CONTEXTS_LG2); + vpx_memcpy(&pc->fc, &pc->frame_contexts[pc->frame_context_idx], + sizeof(pc->fc)); // Read inter mode probability context updates if (pc->frame_type != KEY_FRAME) { @@ -1708,11 +1564,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { #if CONFIG_NEW_MVREF // If Key frame reset mv ref id probabilities to defaults - if (pc->frame_type == KEY_FRAME) { - // Defaults probabilities for encoding the MV ref id signal - vpx_memset(xd->mb_mv_ref_probs, VP9_DEFAULT_MV_REF_PROB, - sizeof(xd->mb_mv_ref_probs)); - } else { + if (pc->frame_type != KEY_FRAME) { // Read any mv_ref index probability updates int i, j; @@ -1735,28 +1587,20 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { if (0) { FILE *z = fopen("decodestats.stt", "a"); - fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n", + fprintf(z, "%6d F:%d,R:%d,Q:%d\n", pc->current_video_frame, pc->frame_type, - pc->refresh_golden_frame, - pc->refresh_alt_ref_frame, - pc->refresh_last_frame, + pbi->refresh_frame_flags, pc->base_qindex); fclose(z); } vp9_copy(pbi->common.fc.pre_coef_probs_4x4, pbi->common.fc.coef_probs_4x4); - vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_4x4, - pbi->common.fc.hybrid_coef_probs_4x4); vp9_copy(pbi->common.fc.pre_coef_probs_8x8, pbi->common.fc.coef_probs_8x8); - vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_8x8, - pbi->common.fc.hybrid_coef_probs_8x8); vp9_copy(pbi->common.fc.pre_coef_probs_16x16, pbi->common.fc.coef_probs_16x16); - vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_16x16, - pbi->common.fc.hybrid_coef_probs_16x16); vp9_copy(pbi->common.fc.pre_coef_probs_32x32, pbi->common.fc.coef_probs_32x32); vp9_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob); @@ -1771,11 +1615,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { #endif pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc; vp9_zero(pbi->common.fc.coef_counts_4x4); - vp9_zero(pbi->common.fc.hybrid_coef_counts_4x4); vp9_zero(pbi->common.fc.coef_counts_8x8); - vp9_zero(pbi->common.fc.hybrid_coef_counts_8x8); vp9_zero(pbi->common.fc.coef_counts_16x16); - vp9_zero(pbi->common.fc.hybrid_coef_counts_16x16); vp9_zero(pbi->common.fc.coef_counts_32x32); vp9_zero(pbi->common.fc.ymode_counts); vp9_zero(pbi->common.fc.sb_ymode_counts); @@ -1792,8 +1633,11 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { read_coef_probs(pbi, &header_bc); - vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG)); - vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG)); + /* Initialize xd pointers. Any reference should do for xd->pre, so use 0. */ + vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->active_ref_idx[0]], + sizeof(YV12_BUFFER_CONFIG)); + vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], + sizeof(YV12_BUFFER_CONFIG)); // Create the segmentation map structure and set to 0 if (!pc->last_frame_seg_map) @@ -1815,14 +1659,106 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vp9_decode_mode_mvs_init(pbi, &header_bc); - vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); + /* tile info */ + { + const unsigned char *data_ptr = data + first_partition_length_in_bytes; + int tile_row, tile_col, delta_log2_tiles; + + vp9_get_tile_n_bits(pc, &pc->log2_tile_columns, &delta_log2_tiles); + while (delta_log2_tiles--) { + if (vp9_read_bit(&header_bc)) { + pc->log2_tile_columns++; + } else { + break; + } + } + pc->log2_tile_rows = vp9_read_bit(&header_bc); + if (pc->log2_tile_rows) + pc->log2_tile_rows += vp9_read_bit(&header_bc); + pc->tile_columns = 1 << pc->log2_tile_columns; + pc->tile_rows = 1 << pc->log2_tile_rows; + + vpx_memset(pc->above_context, 0, + sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); + + if (pbi->oxcf.inv_tile_order) { + const int n_cols = pc->tile_columns; + const unsigned char *data_ptr2[4][1 << 6]; + BOOL_DECODER UNINITIALIZED_IS_SAFE(bc_bak); + + // pre-initialize the offsets, we're going to read in inverse order + data_ptr2[0][0] = data_ptr; + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + if (tile_row) { + int size = data_ptr2[tile_row - 1][n_cols - 1][0] | + (data_ptr2[tile_row - 1][n_cols - 1][1] << 8) | + (data_ptr2[tile_row - 1][n_cols - 1][2] << 16) | + (data_ptr2[tile_row - 1][n_cols - 1][3] << 24); + data_ptr2[tile_row - 1][n_cols - 1] += 4; + data_ptr2[tile_row][0] = data_ptr2[tile_row - 1][n_cols - 1] + size; + } + + for (tile_col = 1; tile_col < n_cols; tile_col++) { + int size = data_ptr2[tile_row][tile_col - 1][0] | + (data_ptr2[tile_row][tile_col - 1][1] << 8) | + (data_ptr2[tile_row][tile_col - 1][2] << 16) | + (data_ptr2[tile_row][tile_col - 1][3] << 24); + data_ptr2[tile_row][tile_col - 1] += 4; + data_ptr2[tile_row][tile_col] = + data_ptr2[tile_row][tile_col - 1] + size; + } + } + + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + vp9_get_tile_row_offsets(pc, tile_row); + for (tile_col = n_cols - 1; tile_col >= 0; tile_col--) { + vp9_get_tile_col_offsets(pc, tile_col); + setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], &residual_bc); + + /* Decode a row of superblocks */ + for (mb_row = pc->cur_tile_mb_row_start; + mb_row < pc->cur_tile_mb_row_end; mb_row += 4) { + decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); + } + if (tile_row == pc->tile_rows - 1 && tile_col == n_cols - 1) + bc_bak = residual_bc; + } + } + residual_bc = bc_bak; + } else { + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + vp9_get_tile_row_offsets(pc, tile_row); + for (tile_col = 0; tile_col < pc->tile_columns; tile_col++) { + vp9_get_tile_col_offsets(pc, tile_col); + + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) + setup_token_decoder(pbi, data_ptr + 4, &residual_bc); + else + setup_token_decoder(pbi, data_ptr, &residual_bc); + + /* Decode a row of superblocks */ + for (mb_row = pc->cur_tile_mb_row_start; + mb_row < pc->cur_tile_mb_row_end; mb_row += 4) { + decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); + } - /* Decode a row of superblocks */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) { - decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) { + int size = data_ptr[0] | + (data_ptr[1] << 8) | + (data_ptr[2] << 16) | + (data_ptr[3] << 24); + data_ptr += 4 + size; + } + } + } + } } corrupt_tokens |= xd->corrupted; + // keep track of the last coded dimensions + pc->last_width = pc->Width; + pc->last_height = pc->Height; + /* Collect information about decoder corruption. */ /* 1. Check first boolean decoder for errors. */ pc->yv12_fb[pc->new_fb_idx].corrupted = bool_error(&header_bc); @@ -1838,23 +1774,21 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { "A stream must start with a complete key frame"); } - vp9_adapt_coef_probs(pc); + if (!pc->error_resilient_mode && + !pc->frame_parallel_decoding_mode) + vp9_adapt_coef_probs(pc); if (pc->frame_type != KEY_FRAME) { - vp9_adapt_mode_probs(pc); - vp9_adapt_nmv_probs(pc, xd->allow_high_precision_mv); - vp9_update_mode_context(&pbi->common); + if (!pc->error_resilient_mode && + !pc->frame_parallel_decoding_mode) { + vp9_adapt_mode_probs(pc); + vp9_adapt_nmv_probs(pc, xd->allow_high_precision_mv); + vp9_adapt_mode_context(&pbi->common); + } } - /* If this was a kf or Gf note the Q used */ - if ((pc->frame_type == KEY_FRAME) || - pc->refresh_golden_frame || pc->refresh_alt_ref_frame) { - pc->last_kf_gf_q = pc->base_qindex; - } if (pc->refresh_entropy_probs) { - if (pc->refresh_alt_ref_frame) - vpx_memcpy(&pc->lfc_a, &pc->fc, sizeof(pc->fc)); - else - vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); + vpx_memcpy(&pc->frame_contexts[pc->frame_context_idx], &pc->fc, + sizeof(pc->fc)); } #ifdef PACKET_TESTING @@ -1866,7 +1800,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { fclose(f); } #endif - // printf("Frame %d Done\n", frame_count++); /* Find the end of the coded buffer */ while (residual_bc.count > CHAR_BIT diff --git a/vp9/decoder/vp9_decodframe.h b/vp9/decoder/vp9_decodframe.h index ae25428c4..391a26519 100644 --- a/vp9/decoder/vp9_decodframe.h +++ b/vp9/decoder/vp9_decodframe.h @@ -14,6 +14,6 @@ struct VP9Decompressor; -extern void vp9_init_de_quantizer(struct VP9Decompressor *pbi); +void vp9_init_de_quantizer(struct VP9Decompressor *pbi); #endif // VP9_DECODER_VP9_DECODFRAME_H_ diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 354d2bd36..5a98b1150 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -14,14 +14,14 @@ #include "vpx_mem/vpx_mem.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/common/vp9_common.h" + static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride, int width, int height) { int r, c; for (r = 0; r < height; r++) { - for (c = 0; c < width; c++) { + for (c = 0; c < width; c++) dest[c] = clip_pixel(diff[c] + pred[c]); - } dest += stride; diff += width; @@ -35,126 +35,107 @@ static void add_constant_residual(const int16_t diff, const uint8_t *pred, int r, c; for (r = 0; r < height; r++) { - for (c = 0; c < width; c++) { + for (c = 0; c < width; c++) dest[c] = clip_pixel(diff + pred[c]); - } dest += stride; pred += pitch; } } -void vp9_dequantize_b_c(BLOCKD *d) { - - int i; - int16_t *DQ = d->dqcoeff; - const int16_t *Q = d->qcoeff; - const int16_t *DQC = d->dequant; - - for (i = 0; i < 16; i++) { - DQ[i] = Q[i] * DQC[i]; - } -} - - void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, - int pitch, int stride, uint16_t eobs) { - int16_t output[16]; - int16_t *diff_ptr = output; + int pitch, int stride, int eob) { int i; + int16_t output[16]; - for (i = 0; i < 16; i++) { - input[i] = dq[i] * input[i]; - } - - vp9_ihtllm(input, output, 4 << 1, tx_type, 4, eobs); + for (i = 0; i < 16; i++) + input[i] *= dq[i]; + vp9_short_iht4x4(input, output, 4, tx_type); vpx_memset(input, 0, 32); - - add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); + add_residual(output, pred, pitch, dest, stride, 4, 4); } void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, - int pitch, int stride, uint16_t eobs) { + int pitch, int stride, int eob) { int16_t output[64]; - int16_t *diff_ptr = output; - int i; - if (eobs == 0) { - /* All 0 DCT coefficient */ + + if (eob == 0) { + // All 0 DCT coefficients vp9_copy_mem8x8(pred, pitch, dest, stride); - } else if (eobs > 0) { - input[0] = dq[0] * input[0]; - for (i = 1; i < 64; i++) { - input[i] = dq[1] * input[i]; - } + } else if (eob > 0) { + int i; - vp9_ihtllm(input, output, 16, tx_type, 8, eobs); + input[0] *= dq[0]; + for (i = 1; i < 64; i++) + input[i] *= dq[1]; + vp9_short_iht8x8(input, output, 8, tx_type); vpx_memset(input, 0, 128); - - add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); + add_residual(output, pred, pitch, dest, stride, 8, 8); } } void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride) { - int16_t output[16]; - int16_t *diff_ptr = output; + uint8_t *dest, int pitch, int stride, int eob) { int i; + int16_t output[16]; - for (i = 0; i < 16; i++) { - input[i] = dq[i] * input[i]; - } + if (eob > 1) { + for (i = 0; i < 16; i++) + input[i] *= dq[i]; - /* the idct halves ( >> 1) the pitch */ - vp9_short_idct4x4llm_c(input, output, 4 << 1); + // the idct halves ( >> 1) the pitch + vp9_short_idct4x4llm_c(input, output, 4 << 1); - vpx_memset(input, 0, 32); + vpx_memset(input, 0, 32); - add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); + add_residual(output, pred, pitch, dest, stride, 4, 4); + } else { + vp9_dc_only_idct_add(input[0]*dq[0], pred, dest, pitch, stride); + ((int *)input)[0] = 0; + } } void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride, int Dc) { + uint8_t *dest, int pitch, int stride, int dc) { int i; int16_t output[16]; - int16_t *diff_ptr = output; - input[0] = (int16_t)Dc; + input[0] = dc; - for (i = 1; i < 16; i++) { - input[i] = dq[i] * input[i]; - } + for (i = 1; i < 16; i++) + input[i] *= dq[i]; - /* the idct halves ( >> 1) the pitch */ + // the idct halves ( >> 1) the pitch vp9_short_idct4x4llm_c(input, output, 4 << 1); - vpx_memset(input, 0, 32); - - add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); + add_residual(output, pred, pitch, dest, stride, 4, 4); } -#if CONFIG_LOSSLESS void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, - int pitch, int stride) { - int16_t output[16]; - int16_t *diff_ptr = output; + int pitch, int stride, int eob) { int i; + int16_t output[16]; - for (i = 0; i < 16; i++) { - input[i] = dq[i] * input[i]; - } + if (eob > 1) { + for (i = 0; i < 16; i++) + input[i] *= dq[i]; - vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1); + vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1); - vpx_memset(input, 0, 32); + vpx_memset(input, 0, 32); - add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); + add_residual(output, pred, pitch, dest, stride, 4, 4); + } else { + vp9_dc_only_inv_walsh_add(input[0]*dq[0], pred, dest, pitch, stride); + ((int *)input)[0] = 0; + } } void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, @@ -163,76 +144,55 @@ void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, int pitch, int stride, int dc) { int i; int16_t output[16]; - int16_t *diff_ptr = output; - input[0] = (int16_t)dc; + input[0] = dc; - for (i = 1; i < 16; i++) { - input[i] = dq[i] * input[i]; - } + for (i = 1; i < 16; i++) + input[i] *= dq[i]; vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1); vpx_memset(input, 0, 32); - - add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); -} -#endif - -void vp9_dequantize_b_2x2_c(BLOCKD *d) { - int i; - int16_t *DQ = d->dqcoeff; - const int16_t *Q = d->qcoeff; - const int16_t *DQC = d->dequant; - - for (i = 0; i < 16; i++) { - DQ[i] = (int16_t)((Q[i] * DQC[i])); - } + add_residual(output, pred, pitch, dest, stride, 4, 4); } void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, - int stride, int dc, int eob) { + int stride, int eob) { int16_t output[64]; - int16_t *diff_ptr = output; - int i; - /* If dc is 1, then input[0] is the reconstructed value, do not need - * dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. - */ - if (!dc) - input[0] *= dq[0]; - /* The calculation can be simplified if there are not many non-zero dct - * coefficients. Use eobs to decide what to do. - * TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. - * Combine that with code here. - */ + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + input[0] *= dq[0]; + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. + // Combine that with code here. if (eob == 0) { - /* All 0 DCT coefficient */ + // All 0 DCT coefficients vp9_copy_mem8x8(pred, pitch, dest, stride); } else if (eob == 1) { - /* DC only DCT coefficient. */ + // DC only DCT coefficient + int16_t in = input[0]; int16_t out; - /* Note: the idct1 will need to be modified accordingly whenever - * vp9_short_idct8x8_c() is modified. */ - out = (input[0] + 1 + (input[0] < 0)) >> 2; - out = out << 3; - out = (out + 32) >> 7; - + // Note: the idct1 will need to be modified accordingly whenever + // vp9_short_idct8x8_c() is modified. + vp9_short_idct1_8x8_c(&in, &out); input[0] = 0; add_constant_residual(out, pred, pitch, dest, stride, 8, 8); } else if (eob <= 10) { - input[1] = input[1] * dq[1]; - input[2] = input[2] * dq[1]; - input[3] = input[3] * dq[1]; - input[8] = input[8] * dq[1]; - input[9] = input[9] * dq[1]; - input[10] = input[10] * dq[1]; - input[16] = input[16] * dq[1]; - input[17] = input[17] * dq[1]; - input[24] = input[24] * dq[1]; + input[1] *= dq[1]; + input[2] *= dq[1]; + input[3] *= dq[1]; + input[8] *= dq[1]; + input[9] *= dq[1]; + input[10] *= dq[1]; + input[16] *= dq[1]; + input[17] *= dq[1]; + input[24] *= dq[1]; vp9_short_idct10_8x8_c(input, output, 16); @@ -241,48 +201,48 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, input[16] = input[17] = 0; input[24] = 0; - add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); + add_residual(output, pred, pitch, dest, stride, 8, 8); } else { + int i; + // recover quantizer for 4 4x4 blocks - for (i = 1; i < 64; i++) { - input[i] = input[i] * dq[1]; - } - // the idct halves ( >> 1) the pitch - vp9_short_idct8x8_c(input, output, 16); + for (i = 1; i < 64; i++) + input[i] *= dq[1]; + // the idct halves ( >> 1) the pitch + vp9_short_idct8x8_c(input, output, 8 << 1); vpx_memset(input, 0, 128); - - add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); - + add_residual(output, pred, pitch, dest, stride, 8, 8); } } void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, - uint16_t eobs) { + int eob) { int16_t output[256]; - int16_t *diff_ptr = output; - int i; - if (eobs == 0) { - /* All 0 DCT coefficient */ + + if (eob == 0) { + // All 0 DCT coefficients vp9_copy_mem16x16(pred, pitch, dest, stride); - } else if (eobs > 0) { - input[0]= input[0] * dq[0]; + } else if (eob > 0) { + int i; + + input[0] *= dq[0]; // recover quantizer for 4 4x4 blocks for (i = 1; i < 256; i++) - input[i] = input[i] * dq[1]; + input[i] *= dq[1]; // inverse hybrid transform - vp9_ihtllm(input, output, 32, tx_type, 16, eobs); + vp9_short_iht16x16(input, output, 16, tx_type); // the idct halves ( >> 1) the pitch // vp9_short_idct16x16_c(input, output, 32); vpx_memset(input, 0, 512); - add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16); + add_residual(output, pred, pitch, dest, stride, 16, 16); } } @@ -290,8 +250,6 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob) { int16_t output[256]; - int16_t *diff_ptr = output; - int i; /* The calculation can be simplified if there are not many non-zero dct * coefficients. Use eobs to separate different cases. */ @@ -300,28 +258,26 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, vp9_copy_mem16x16(pred, pitch, dest, stride); } else if (eob == 1) { /* DC only DCT coefficient. */ + int16_t in = input[0] * dq[0]; int16_t out; - /* Note: the idct1 will need to be modified accordingly whenever * vp9_short_idct16x16_c() is modified. */ - out = (input[0] * dq[0] + 2) >> 2; - out = (out + 2) >> 2; - out = (out + 4) >> 3; - + vp9_short_idct1_16x16_c(&in, &out); input[0] = 0; add_constant_residual(out, pred, pitch, dest, stride, 16, 16); } else if (eob <= 10) { - input[0]= input[0] * dq[0]; - input[1] = input[1] * dq[1]; - input[2] = input[2] * dq[1]; - input[3] = input[3] * dq[1]; - input[16] = input[16] * dq[1]; - input[17] = input[17] * dq[1]; - input[18] = input[18] * dq[1]; - input[32] = input[32] * dq[1]; - input[33] = input[33] * dq[1]; - input[48] = input[48] * dq[1]; + input[0] *= dq[0]; + + input[1] *= dq[1]; + input[2] *= dq[1]; + input[3] *= dq[1]; + input[16] *= dq[1]; + input[17] *= dq[1]; + input[18] *= dq[1]; + input[32] *= dq[1]; + input[33] *= dq[1]; + input[48] *= dq[1]; // the idct halves ( >> 1) the pitch vp9_short_idct10_16x16_c(input, output, 32); @@ -331,20 +287,22 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, input[32] = input[33] = 0; input[48] = 0; - add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16); + add_residual(output, pred, pitch, dest, stride, 16, 16); } else { - input[0]= input[0] * dq[0]; + int i; + + input[0] *= dq[0]; // recover quantizer for 4 4x4 blocks for (i = 1; i < 256; i++) - input[i] = input[i] * dq[1]; + input[i] *= dq[1]; // the idct halves ( >> 1) the pitch - vp9_short_idct16x16_c(input, output, 32); + vp9_short_idct16x16_c(input, output, 16 << 1); vpx_memset(input, 0, 512); - add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16); + add_residual(output, pred, pitch, dest, stride, 16, 16); } } @@ -352,23 +310,51 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob) { int16_t output[1024]; - int i; - - input[0]= input[0] * dq[0] / 2; - for (i = 1; i < 1024; i++) - input[i] = input[i] * dq[1] / 2; - vp9_short_idct32x32_c(input, output, 64); - vpx_memset(input, 0, 2048); - add_residual(output, pred, pitch, dest, stride, 32, 32); + if (eob) { + input[0] = input[0] * dq[0] / 2; + if (eob == 1) { + vp9_short_idct1_32x32(input, output); + add_constant_residual(output[0], pred, pitch, dest, stride, 32, 32); + input[0] = 0; + } else if (eob <= 10) { + input[1] = input[1] * dq[1] / 2; + input[2] = input[2] * dq[1] / 2; + input[3] = input[3] * dq[1] / 2; + input[32] = input[32] * dq[1] / 2; + input[33] = input[33] * dq[1] / 2; + input[34] = input[34] * dq[1] / 2; + input[64] = input[64] * dq[1] / 2; + input[65] = input[65] * dq[1] / 2; + input[96] = input[96] * dq[1] / 2; + + // the idct halves ( >> 1) the pitch + vp9_short_idct10_32x32(input, output, 64); + + input[0] = input[1] = input[2] = input[3] = 0; + input[32] = input[33] = input[34] = 0; + input[64] = input[65] = 0; + input[96] = 0; + + add_residual(output, pred, pitch, dest, stride, 32, 32); + } else { + int i; + for (i = 1; i < 1024; i++) + input[i] = input[i] * dq[1] / 2; + vp9_short_idct32x32(input, output, 64); + vpx_memset(input, 0, 2048); + add_residual(output, pred, pitch, dest, stride, 32, 32); + } + } } void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, - uint16_t *eobs) { - vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, eobs[0]); - vp9_dequant_idct_add_16x16_c(q + 256, dq, - dstv, dstv, stride, stride, eobs[4]); + MACROBLOCKD *xd) { + vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, + xd->eobs[16]); + vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride, + xd->eobs[20]); } diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 2a0ae80e8..bde27bb7a 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -11,91 +11,86 @@ #ifndef VP9_DECODER_VP9_DEQUANTIZE_H_ #define VP9_DECODER_VP9_DEQUANTIZE_H_ + #include "vp9/common/vp9_blockd.h" -#if CONFIG_LOSSLESS -extern void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, - unsigned char *pred, - unsigned char *output, - int pitch, int stride); -extern void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, - unsigned char *pred, - unsigned char *output, - int pitch, int stride, int dc); -extern void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, - const int16_t *dq, - unsigned char *pre, - unsigned char *dst, - int stride, - uint16_t *eobs, - const int16_t *dc); -extern void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, - unsigned char *pre, - unsigned char *dst, - int stride, - uint16_t *eobs); -extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, - unsigned char *pre, - unsigned char *dst_u, - unsigned char *dst_v, - int stride, - uint16_t *eobs); -#endif - -typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride); -typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride, int dc); - -typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs, - const int16_t *dc); -typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs); -typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride, - uint16_t *eobs); + +void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, + unsigned char *pred, + unsigned char *output, + int pitch, int stride, int eob); + +void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, + unsigned char *pred, + unsigned char *output, + int pitch, int stride, int dc); + +void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, + const int16_t *dq, + unsigned char *pre, + unsigned char *dst, + int stride, + const int16_t *dc); + +void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, + unsigned char *pre, + unsigned char *dst, + int stride, + struct macroblockd *xd); + +void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, + unsigned char *pre, + unsigned char *dst_u, + unsigned char *dst_v, + int stride, + struct macroblockd *xd); void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *dest, - int pitch, int stride, uint16_t eobs); + int pitch, int stride, int eob); void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, - uint16_t eobs); + int eob); void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *dest, - int pitch, int stride, uint16_t eobs); + int pitch, int stride, int eob); void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq, unsigned char *dst, int stride, - uint16_t *eobs, const int16_t *dc, MACROBLOCKD *xd); +void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dst, + int stride, + MACROBLOCKD *xd); + void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, unsigned char *dst, int stride, - uint16_t *eobs, const int16_t *dc, MACROBLOCKD *xd); +void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dst, + int stride, + MACROBLOCKD *xd); + void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, unsigned char *dstu, unsigned char *dstv, int stride, - uint16_t *eobs, MACROBLOCKD *xd); void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, unsigned char *dstu, unsigned char *dstv, int stride, - uint16_t *eobs, MACROBLOCKD *xd); -#endif +#endif // VP9_DECODER_VP9_DEQUANTIZE_H_ diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 335c335ca..d3fb25ace 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -63,24 +63,11 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { return decode_bool(br, 128) ? -value_to_sign : value_to_sign; } -#if CONFIG_NEWCOEFCONTEXT -#define PT pn -#define INCREMENT_COUNT(token) \ - do { \ - coef_counts[type][coef_bands[c]][pn][token]++; \ - pn = pt = vp9_prev_token_class[token]; \ - if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(coef_bands[c + 1])) \ - pn = vp9_get_coef_neighbor_context( \ - qcoeff_ptr, nodc, neighbors, scan[c + 1]); \ - } while (0) -#else -#define PT pt #define INCREMENT_COUNT(token) \ do { \ - coef_counts[type][coef_bands[c]][pt][token]++; \ - pt = vp9_prev_token_class[token]; \ + coef_counts[type][ref][get_coef_band(txfm_size, c)][pt][token]++; \ + pt = vp9_get_coef_context(&recent_energy, token); \ } while (0) -#endif /* CONFIG_NEWCOEFCONTEXT */ #define WRITE_COEF_CONTINUE(val, token) \ { \ @@ -97,77 +84,90 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { } while (0); static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, - BOOL_DECODER* const br, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - PLANE_TYPE type, - TX_TYPE tx_type, + BOOL_DECODER* const br, int block_idx, + PLANE_TYPE type, TX_TYPE tx_type, int seg_eob, int16_t *qcoeff_ptr, - const int *const scan, TX_SIZE txfm_size, - const int *coef_bands) { + const int *const scan, TX_SIZE txfm_size) { + ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context; + ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context; + const int aidx = vp9_block2above[txfm_size][block_idx]; + const int lidx = vp9_block2left[txfm_size][block_idx]; + ENTROPY_CONTEXT above_ec = A0[aidx] != 0, left_ec = L0[lidx] != 0; FRAME_CONTEXT *const fc = &dx->common.fc; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors; - int pn; -#endif - int nodc = (type == PLANE_TYPE_Y_NO_DC); - int pt, c = nodc; + int recent_energy = 0; + int pt, c = 0; vp9_coeff_probs *coef_probs; vp9_prob *prob; vp9_coeff_count *coef_counts; + const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; switch (txfm_size) { default: case TX_4X4: - if (tx_type == DCT_DCT) { - coef_probs = fc->coef_probs_4x4; - coef_counts = fc->coef_counts_4x4; - } else { - coef_probs = fc->hybrid_coef_probs_4x4; - coef_counts = fc->hybrid_coef_counts_4x4; - } + coef_probs = fc->coef_probs_4x4; + coef_counts = fc->coef_counts_4x4; break; case TX_8X8: - if (tx_type == DCT_DCT) { - coef_probs = fc->coef_probs_8x8; - coef_counts = fc->coef_counts_8x8; - } else { - coef_probs = fc->hybrid_coef_probs_8x8; - coef_counts = fc->hybrid_coef_counts_8x8; - } + coef_probs = fc->coef_probs_8x8; + coef_counts = fc->coef_counts_8x8; + above_ec = (A0[aidx] + A0[aidx + 1]) != 0; + left_ec = (L0[lidx] + L0[lidx + 1]) != 0; break; case TX_16X16: - if (tx_type == DCT_DCT) { - coef_probs = fc->coef_probs_16x16; - coef_counts = fc->coef_counts_16x16; + coef_probs = fc->coef_probs_16x16; + coef_counts = fc->coef_counts_16x16; + if (type == PLANE_TYPE_UV) { + ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); + ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); + above_ec = (A0[aidx] + A0[aidx + 1] + A1[aidx] + A1[aidx + 1]) != 0; + left_ec = (L0[lidx] + L0[lidx + 1] + L1[lidx] + L1[lidx + 1]) != 0; } else { - coef_probs = fc->hybrid_coef_probs_16x16; - coef_counts = fc->hybrid_coef_counts_16x16; + above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3]) != 0; + left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3]) != 0; } break; case TX_32X32: coef_probs = fc->coef_probs_32x32; coef_counts = fc->coef_counts_32x32; + if (type == PLANE_TYPE_UV) { + ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); + ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); + ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2); + ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2); + ENTROPY_CONTEXT *A3 = (ENTROPY_CONTEXT *) (xd->above_context + 3); + ENTROPY_CONTEXT *L3 = (ENTROPY_CONTEXT *) (xd->left_context + 3); + above_ec = (A0[aidx] + A0[aidx + 1] + A1[aidx] + A1[aidx + 1] + + A2[aidx] + A2[aidx + 1] + A3[aidx] + A3[aidx + 1]) != 0; + left_ec = (L0[lidx] + L0[lidx + 1] + L1[lidx] + L1[lidx + 1] + + L2[lidx] + L2[lidx + 1] + L3[lidx] + L3[lidx + 1]) != 0; + } else { + ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); + ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); + above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3] + + A1[aidx] + A1[aidx + 1] + A1[aidx + 2] + A1[aidx + 3]) != 0; + left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3] + + L1[lidx] + L1[lidx + 1] + L1[lidx + 2] + L1[lidx + 3]) != 0; + } break; } - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); -#if CONFIG_NEWCOEFCONTEXT - pn = pt; - neighbors = vp9_get_coef_neighbors_handle(scan); -#endif + VP9_COMBINEENTROPYCONTEXTS(pt, above_ec, left_ec); while (1) { int val; const uint8_t *cat6 = cat6_prob; - if (c >= seg_eob) break; - prob = coef_probs[type][coef_bands[c]][PT]; + + if (c >= seg_eob) + break; + prob = coef_probs[type][ref][get_coef_band(txfm_size, c)][pt]; if (!vp9_read(br, prob[EOB_CONTEXT_NODE])) break; SKIP_START: - if (c >= seg_eob) break; + if (c >= seg_eob) + break; if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); ++c; - prob = coef_probs[type][coef_bands[c]][PT]; + prob = coef_probs[type][ref][get_coef_band(txfm_size, c)][pt]; goto SKIP_START; } // ONE_CONTEXT_NODE_0_ @@ -231,193 +231,110 @@ SKIP_START: } if (c < seg_eob) - coef_counts[type][coef_bands[c]][PT][DCT_EOB_TOKEN]++; - - a[0] = l[0] = (c > !type); + coef_counts[type][ref][get_coef_band(txfm_size, c)][pt][DCT_EOB_TOKEN]++; + + A0[aidx] = L0[lidx] = c > 0; + if (txfm_size >= TX_8X8) { + A0[aidx + 1] = L0[lidx + 1] = A0[aidx]; + if (txfm_size >= TX_16X16) { + if (type == PLANE_TYPE_UV) { + ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); + ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); + A1[aidx] = A1[aidx + 1] = L1[aidx] = L1[lidx + 1] = A0[aidx]; + if (txfm_size >= TX_32X32) { + ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2); + ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2); + ENTROPY_CONTEXT *A3 = (ENTROPY_CONTEXT *) (xd->above_context + 3); + ENTROPY_CONTEXT *L3 = (ENTROPY_CONTEXT *) (xd->left_context + 3); + A2[aidx] = A2[aidx + 1] = A3[aidx] = A3[aidx + 1] = A0[aidx]; + L2[lidx] = L2[lidx + 1] = L3[lidx] = L3[lidx + 1] = A0[aidx]; + } + } else { + A0[aidx + 2] = A0[aidx + 3] = L0[lidx + 2] = L0[lidx + 3] = A0[aidx]; + if (txfm_size >= TX_32X32) { + ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); + ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); + A1[aidx] = A1[aidx + 1] = A1[aidx + 2] = A1[aidx + 3] = A0[aidx]; + L1[lidx] = L1[lidx + 1] = L1[lidx + 2] = L1[lidx + 3] = A0[aidx]; + } + } + } + } return c; } static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) { - int active = vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB); - int eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - - if (!active || eob > eob_max) - eob = eob_max; - return eob; + return vp9_get_segdata(xd, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; } int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { - ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context; - ENTROPY_CONTEXT* const A1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]); - ENTROPY_CONTEXT* const L1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]); - uint16_t *const eobs = xd->eobs; const int segment_id = xd->mode_info_context->mbmi.segment_id; - int c, i, eobtotal = 0, seg_eob; + int i, eobtotal = 0, seg_eob; // Luma block -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3] + - A1[0] + A1[1] + A1[2] + A1[3]) != 0; - ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3] + - L1[0] + L1[1] + L1[2] + L1[3]) != 0; -#else - ENTROPY_CONTEXT above_ec = A[0]; - ENTROPY_CONTEXT left_ec = L[0]; -#endif - eobs[0] = c = decode_coefs(pbi, xd, bc, &above_ec, &left_ec, - PLANE_TYPE_Y_WITH_DC, - DCT_DCT, get_eob(xd, segment_id, 1024), - xd->sb_coeff_data.qcoeff, - vp9_default_zig_zag1d_32x32, - TX_32X32, vp9_coef_bands_32x32); - A[1] = A[2] = A[3] = A[0] = above_ec; - L[1] = L[2] = L[3] = L[0] = left_ec; - A1[1] = A1[2] = A1[3] = A1[0] = above_ec; - L1[1] = L1[2] = L1[3] = L1[0] = left_ec; - + int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, get_eob(xd, segment_id, 1024), + xd->sb_coeff_data.qcoeff, + vp9_default_zig_zag1d_32x32, TX_32X32); + xd->eobs[0] = c; eobtotal += c; // 16x16 chroma blocks seg_eob = get_eob(xd, segment_id, 256); - for (i = 16; i < 24; i += 4) { - ENTROPY_CONTEXT* const a = A + vp9_block2above[TX_16X16][i]; - ENTROPY_CONTEXT* const l = L + vp9_block2left[TX_16X16][i]; - ENTROPY_CONTEXT* const a1 = A1 + vp9_block2above[TX_16X16][i]; - ENTROPY_CONTEXT* const l1 = L1 + vp9_block2left[TX_16X16][i]; -#if CONFIG_CNVCONTEXT - above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; -#else - above_ec = a[0]; - left_ec = l[0]; -#endif - - eobs[i] = c = decode_coefs(pbi, xd, bc, - &above_ec, &left_ec, - PLANE_TYPE_UV, - DCT_DCT, seg_eob, - xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, - vp9_default_zig_zag1d_16x16, - TX_16X16, vp9_coef_bands_16x16); - - a1[1] = a1[0] = a[1] = a[0] = above_ec; - l1[1] = l1[0] = l[1] = l[0] = left_ec; + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; eobtotal += c; } - // no Y2 block - A[8] = L[8] = A1[8] = L1[8] = 0; + return eobtotal; } static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { - ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context; - uint16_t *const eobs = xd->eobs; const int segment_id = xd->mode_info_context->mbmi.segment_id; - int c, i, eobtotal = 0, seg_eob; - // Luma block + int i, eobtotal = 0, seg_eob; -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; - ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; -#else - ENTROPY_CONTEXT above_ec = A[0]; - ENTROPY_CONTEXT left_ec = L[0]; -#endif - eobs[0] = c = decode_coefs(pbi, xd, bc, &above_ec, &left_ec, - PLANE_TYPE_Y_WITH_DC, - get_tx_type(xd, &xd->block[0]), - get_eob(xd, segment_id, 256), - xd->qcoeff, vp9_default_zig_zag1d_16x16, - TX_16X16, vp9_coef_bands_16x16); - A[1] = A[2] = A[3] = A[0] = above_ec; - L[1] = L[2] = L[3] = L[0] = left_ec; + // Luma block + int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, + get_tx_type(xd, &xd->block[0]), + get_eob(xd, segment_id, 256), + xd->qcoeff, vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[0] = c; eobtotal += c; // 8x8 chroma blocks seg_eob = get_eob(xd, segment_id, 64); for (i = 16; i < 24; i += 4) { - ENTROPY_CONTEXT* const a = A + vp9_block2above[TX_8X8][i]; - ENTROPY_CONTEXT* const l = L + vp9_block2left[TX_8X8][i]; -#if CONFIG_CNVCONTEXT - above_ec = (a[0] + a[1]) != 0; - left_ec = (l[0] + l[1]) != 0; -#else - above_ec = a[0]; - left_ec = l[0]; -#endif - eobs[i] = c = decode_coefs(pbi, xd, bc, - &above_ec, &left_ec, - PLANE_TYPE_UV, - DCT_DCT, seg_eob, xd->block[i].qcoeff, - vp9_default_zig_zag1d_8x8, - TX_8X8, vp9_coef_bands_8x8); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, + DCT_DCT, seg_eob, xd->block[i].qcoeff, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; eobtotal += c; } - A[8] = 0; - L[8] = 0; return eobtotal; } static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { - ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; - uint16_t *const eobs = xd->eobs; - PLANE_TYPE type; int c, i, eobtotal = 0, seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; - int has_2nd_order = get_2nd_order_usage(xd); - // 2nd order DC block - if (has_2nd_order) { - ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_8X8][24]; - ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_8X8][24]; - - eobs[24] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_Y2, - DCT_DCT, get_eob(xd, segment_id, 4), - xd->block[24].qcoeff, - vp9_default_zig_zag1d_4x4, TX_8X8, - vp9_coef_bands_4x4); - eobtotal += c - 4; - type = PLANE_TYPE_Y_NO_DC; - } else { - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; - eobs[24] = 0; - type = PLANE_TYPE_Y_WITH_DC; - } - // luma blocks seg_eob = get_eob(xd, segment_id, 64); for (i = 0; i < 16; i += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_8X8][i]; - ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_8X8][i]; -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; - ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; -#else - ENTROPY_CONTEXT above_ec = a[0]; - ENTROPY_CONTEXT left_ec = l[0]; -#endif - eobs[i] = c = decode_coefs(pbi, xd, bc, &above_ec, &left_ec, type, - type == PLANE_TYPE_Y_WITH_DC ? - get_tx_type(xd, xd->block + i) : DCT_DCT, - seg_eob, xd->block[i].qcoeff, - vp9_default_zig_zag1d_8x8, - TX_8X8, vp9_coef_bands_8x8); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + get_tx_type(xd, xd->block + i), + seg_eob, xd->block[i].qcoeff, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; eobtotal += c; } @@ -427,34 +344,18 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, // use 4x4 transform for U, V components in I8X8/splitmv prediction mode seg_eob = get_eob(xd, segment_id, 16); for (i = 16; i < 24; i++) { - ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_4X4][i]; - ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_4X4][i]; - - eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV, - DCT_DCT, seg_eob, xd->block[i].qcoeff, - vp9_default_zig_zag1d_4x4, TX_4X4, - vp9_coef_bands_4x4); + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, + DCT_DCT, seg_eob, xd->block[i].qcoeff, + vp9_default_zig_zag1d_4x4, TX_4X4); + xd->eobs[i] = c; eobtotal += c; } } else { for (i = 16; i < 24; i += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_8X8][i]; - ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_8X8][i]; -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; - ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; -#else - ENTROPY_CONTEXT above_ec = a[0]; - ENTROPY_CONTEXT left_ec = l[0]; -#endif - eobs[i] = c = decode_coefs(pbi, xd, bc, - &above_ec, &left_ec, - PLANE_TYPE_UV, - DCT_DCT, seg_eob, xd->block[i].qcoeff, - vp9_default_zig_zag1d_8x8, - TX_8X8, vp9_coef_bands_8x8); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, + DCT_DCT, seg_eob, xd->block[i].qcoeff, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; eobtotal += c; } } @@ -466,17 +367,9 @@ static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, BOOL_DECODER* const bc, PLANE_TYPE type, int i, int seg_eob, TX_TYPE tx_type, const int *scan) { - ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; - ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_4X4][i]; - ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_4X4][i]; - uint16_t *const eobs = xd->eobs; - int c; - - c = decode_coefs(dx, xd, bc, a, l, type, tx_type, seg_eob, - xd->block[i].qcoeff, scan, TX_4X4, vp9_coef_bands_4x4); - eobs[i] = c; - + int c = decode_coefs(dx, xd, bc, i, type, tx_type, seg_eob, + xd->block[i].qcoeff, scan, TX_4X4); + xd->eobs[i] = c; return c; } @@ -539,26 +432,13 @@ static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { int i, eobtotal = 0; - PLANE_TYPE type; const int segment_id = xd->mode_info_context->mbmi.segment_id; const int seg_eob = get_eob(xd, segment_id, 16); - const int has_2nd_order = get_2nd_order_usage(xd); - - // 2nd order DC block - if (has_2nd_order) { - eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y2, 24, seg_eob, - DCT_DCT, vp9_default_zig_zag1d_4x4) - 16; - type = PLANE_TYPE_Y_NO_DC; - } else { - xd->above_context->y2 = 0; - xd->left_context->y2 = 0; - xd->eobs[24] = 0; - type = PLANE_TYPE_Y_WITH_DC; - } // luma blocks for (i = 0; i < 16; ++i) { - eobtotal += decode_coefs_4x4_y(dx, xd, bc, type, i, seg_eob); + eobtotal += decode_coefs_4x4_y(dx, xd, bc, + PLANE_TYPE_Y_WITH_DC, i, seg_eob); } // chroma blocks @@ -571,16 +451,13 @@ int vp9_decode_mb_tokens(VP9D_COMP* const dx, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - int eobtotal; - - if (tx_size == TX_16X16) { - eobtotal = vp9_decode_mb_tokens_16x16(dx, xd, bc); - } else if (tx_size == TX_8X8) { - eobtotal = vp9_decode_mb_tokens_8x8(dx, xd, bc); - } else { - assert(tx_size == TX_4X4); - eobtotal = vp9_decode_mb_tokens_4x4(dx, xd, bc); + switch (tx_size) { + case TX_16X16: + return vp9_decode_mb_tokens_16x16(dx, xd, bc); + case TX_8X8: + return vp9_decode_mb_tokens_8x8(dx, xd, bc); + default: + assert(tx_size == TX_4X4); + return vp9_decode_mb_tokens_4x4(dx, xd, bc); } - - return eobtotal; } diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 152527cff..b17955b1c 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -10,54 +10,20 @@ #include "vp9_rtcd.h" #include "vp9/common/vp9_blockd.h" -#if CONFIG_LOSSLESS #include "vp9/decoder/vp9_dequantize.h" -#endif -void vp9_dequant_dc_idct_add_y_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dst, - int stride, uint16_t *eobs, - const int16_t *dc) { - int i, j; - - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp9_dequant_dc_idct_add_c(q, dq, pre, dst, 16, stride, dc[0]); - else - vp9_dc_only_idct_add_c(dc[0], pre, dst, 16, stride); - - q += 16; - pre += 4; - dst += 4; - dc++; - } - - pre += 64 - 16; - dst += 4 * stride - 16; - } -} - -void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dst, - int stride, - uint16_t *eobs, - const int16_t *dc, - MACROBLOCKD *xd) { +void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, + int stride, + MACROBLOCKD *xd) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp9_dequant_dc_idct_add_c(q, dq, dst, dst, stride, stride, dc[0]); - else - vp9_dc_only_idct_add_c(dc[0], dst, dst, stride, stride); - + xd->itxm_add(q, dq, dst, dst, stride, stride, xd->eobs[i * 4 + j]); q += 16; dst += 4; - dc++; } dst += 4 * stride - 16; @@ -67,18 +33,12 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, - int stride, uint16_t *eobs) { + int stride, MACROBLOCKD *xd) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp9_dequant_idct_add_c(q, dq, pre, dst, 16, stride); - else { - vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dst, 16, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add(q, dq, pre, dst, 16, stride, xd->eobs[i * 4 + j]); q += 16; pre += 4; dst += 4; @@ -92,18 +52,13 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, - uint16_t *eobs) { + MACROBLOCKD *xd) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (*eobs++ > 1) - vp9_dequant_idct_add_c(q, dq, pre, dstu, 8, stride); - else { - vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstu, 8, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add(q, dq, pre, dstu, 8, stride, + xd->eobs[16 + i * 2 + j]); q += 16; pre += 4; dstu += 4; @@ -115,13 +70,8 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (*eobs++ > 1) - vp9_dequant_idct_add_c(q, dq, pre, dstv, 8, stride); - else { - vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstv, 8, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add(q, dq, pre, dstv, 8, stride, + xd->eobs[20 + i * 2 + j]); q += 16; pre += 4; dstv += 4; @@ -136,19 +86,12 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, - uint16_t *eobs, MACROBLOCKD *xd) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (*eobs++ > 1) { - vp9_dequant_idct_add_c(q, dq, dstu, dstu, stride, stride); - } else { - vp9_dc_only_idct_add_c(q[0]*dq[0], dstu, dstu, stride, stride); - ((int *)q)[0] = 0; - } - + xd->itxm_add(q, dq, dstu, dstu, stride, stride, xd->eobs[16 + i * 2 + j]); q += 16; dstu += 4; } @@ -158,13 +101,7 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (*eobs++ > 1) { - vp9_dequant_idct_add_c(q, dq, dstv, dstv, stride, stride); - } else { - vp9_dc_only_idct_add_c(q[0]*dq[0], dstv, dstv, stride, stride); - ((int *)q)[0] = 0; - } - + xd->itxm_add(q, dq, dstv, dstv, stride, stride, xd->eobs[20 + i * 2 + j]); q += 16; dstv += 4; } @@ -173,69 +110,40 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, } } -void vp9_dequant_dc_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dst, - int stride, uint16_t *eobs, - const int16_t *dc, - MACROBLOCKD *xd) { - q[0] = dc[0]; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 1, xd->eobs[0]); - - q[64] = dc[1]; - vp9_dequant_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, 1, - xd->eobs[4]); - - q[128] = dc[4]; - vp9_dequant_idct_add_8x8_c(&q[128], dq, pre + 8 * 16, - dst + 8 * stride, 16, stride, 1, xd->eobs[8]); - - q[192] = dc[8]; - vp9_dequant_idct_add_8x8_c(&q[192], dq, pre + 8 * 16 + 8, - dst + 8 * stride + 8, 16, stride, 1, - xd->eobs[12]); -} +void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, + int stride, + MACROBLOCKD *xd) { + vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, xd->eobs[0]); -void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dst, - int stride, - uint16_t *eobs, - const int16_t *dc, - MACROBLOCKD *xd) { - q[0] = dc[0]; - vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, 1, xd->eobs[0]); - - q[64] = dc[1]; vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8, - dst + 8, stride, stride, 1, xd->eobs[4]); + dst + 8, stride, stride, xd->eobs[4]); - q[128] = dc[4]; vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride, - dst + 8 * stride, stride, stride, 1, - xd->eobs[8]); + dst + 8 * stride, stride, stride, + xd->eobs[8]); - q[192] = dc[8]; vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8, - dst + 8 * stride + 8, stride, stride, 1, - xd->eobs[12]); + dst + 8 * stride + 8, stride, stride, + xd->eobs[12]); } void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, - int stride, uint16_t *eobs, - MACROBLOCKD *xd) { + int stride, MACROBLOCKD *xd) { uint8_t *origdest = dst; uint8_t *origpred = pre; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, xd->eobs[0]); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, xd->eobs[0]); vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8, - origdest + 8, 16, stride, 0, xd->eobs[4]); + origdest + 8, 16, stride, xd->eobs[4]); vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16, - origdest + 8 * stride, 16, stride, 0, xd->eobs[8]); + origdest + 8 * stride, 16, stride, + xd->eobs[8]); vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8, - origdest + 8 * stride + 8, 16, stride, 0, + origdest + 8 * stride + 8, 16, stride, xd->eobs[12]); } @@ -243,72 +151,39 @@ void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, - int stride, uint16_t *eobs, - MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, 0, xd->eobs[16]); + int stride, MACROBLOCKD *xd) { + vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->eobs[16]); q += 64; pre += 64; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, 0, xd->eobs[20]); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->eobs[20]); } void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, - uint16_t *eobs, MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, 0, + vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, xd->eobs[16]); q += 64; - vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, 0, + vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, xd->eobs[20]); } -#if CONFIG_LOSSLESS -void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dst, - int stride, - uint16_t *eobs, - const int16_t *dc) { - int i, j; - - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp9_dequant_dc_idct_add_lossless_c(q, dq, pre, dst, 16, stride, dc[0]); - else - vp9_dc_only_inv_walsh_add_c(dc[0], pre, dst, 16, stride); - - q += 16; - pre += 4; - dst += 4; - dc++; - } - - pre += 64 - 16; - dst += 4 * stride - 16; - } -} void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, - int stride, uint16_t *eobs) { + int stride, MACROBLOCKD *xd) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride); - else { - vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dst, 16, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride, + xd->eobs[i * 4 + j]); q += 16; pre += 4; dst += 4; @@ -324,18 +199,13 @@ void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, - uint16_t *eobs) { + MACROBLOCKD *xd) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (*eobs++ > 1) - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride); - else { - vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstu, 8, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride, + xd->eobs[16 + i * 2 + j]); q += 16; pre += 4; dstu += 4; @@ -347,13 +217,8 @@ void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (*eobs++ > 1) - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride); - else { - vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstv, 8, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride, + xd->eobs[20 + i * 2 + j]); q += 16; pre += 4; dstv += 4; @@ -363,5 +228,4 @@ void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, dstv += 4 * stride - 8; } } -#endif diff --git a/vp9/decoder/vp9_onyxd.h b/vp9/decoder/vp9_onyxd.h index 93321ef34..748fc7ea3 100644 --- a/vp9/decoder/vp9_onyxd.h +++ b/vp9/decoder/vp9_onyxd.h @@ -27,6 +27,7 @@ extern "C" { int Version; int postprocess; int max_threads; + int inv_tile_order; int input_partition; } VP9D_CONFIG; typedef enum { diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index b3b75af70..ce7958c3b 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -30,34 +30,34 @@ #include "vp9/decoder/vp9_detokenize.h" #include "./vpx_scale_rtcd.h" -static int get_free_fb(VP9_COMMON *cm); -static void ref_cnt_fb(int *buf, int *idx, int new_idx); - #define WRITE_RECON_BUFFER 0 #if WRITE_RECON_BUFFER == 1 -static void recon_write_yuv_frame(char *name, YV12_BUFFER_CONFIG *s) { +static void recon_write_yuv_frame(const char *name, + const YV12_BUFFER_CONFIG *s, + int w, int _h) { FILE *yuv_file = fopen((char *)name, "ab"); - uint8_t *src = s->y_buffer; - int h = s->y_height; + const uint8_t *src = s->y_buffer; + int h = _h; do { - fwrite(src, s->y_width, 1, yuv_file); + fwrite(src, w, 1, yuv_file); src += s->y_stride; } while (--h); src = s->u_buffer; - h = s->uv_height; + h = (_h + 1) >> 1; + w = (w + 1) >> 1; do { - fwrite(src, s->uv_width, 1, yuv_file); + fwrite(src, w, 1, yuv_file); src += s->uv_stride; } while (--h); src = s->v_buffer; - h = s->uv_height; + h = (_h + 1) >> 1; do { - fwrite(src, s->uv_width, 1, yuv_file); + fwrite(src, w, 1, yuv_file); src += s->uv_stride; } while (--h); @@ -127,6 +127,7 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_initialize_dec(); vp9_create_common(&pbi->common); + pbi->oxcf = *oxcf; pbi->common.current_video_frame = 0; pbi->ready_for_new_data = 1; @@ -168,12 +169,13 @@ vpx_codec_err_t vp9_get_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag, VP9_COMMON *cm = &pbi->common; int ref_fb_idx; + /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the + * encoder is using the frame buffers for. This is just a stub to keep the + * vpxenc --test-decode functionality working, and will be replaced in a + * later commit that adds VP9-specific controls for this functionality. + */ if (ref_frame_flag == VP9_LAST_FLAG) - ref_fb_idx = cm->lst_fb_idx; - else if (ref_frame_flag == VP9_GOLD_FLAG) - ref_fb_idx = cm->gld_fb_idx; - else if (ref_frame_flag == VP9_ALT_FLAG) - ref_fb_idx = cm->alt_fb_idx; + ref_fb_idx = pbi->common.new_fb_idx; else { vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, "Invalid reference frame"); @@ -200,12 +202,17 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag, int *ref_fb_ptr = NULL; int free_fb; + /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the + * encoder is using the frame buffers for. This is just a stub to keep the + * vpxenc --test-decode functionality working, and will be replaced in a + * later commit that adds VP9-specific controls for this functionality. + */ if (ref_frame_flag == VP9_LAST_FLAG) - ref_fb_ptr = &cm->lst_fb_idx; + ref_fb_ptr = &pbi->common.active_ref_idx[0]; else if (ref_frame_flag == VP9_GOLD_FLAG) - ref_fb_ptr = &cm->gld_fb_idx; + ref_fb_ptr = &pbi->common.active_ref_idx[1]; else if (ref_frame_flag == VP9_ALT_FLAG) - ref_fb_ptr = &cm->alt_fb_idx; + ref_fb_ptr = &pbi->common.active_ref_idx[2]; else { vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, "Invalid reference frame"); @@ -234,77 +241,25 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag, } -static int get_free_fb(VP9_COMMON *cm) { - int i; - for (i = 0; i < NUM_YV12_BUFFERS; i++) - if (cm->fb_idx_ref_cnt[i] == 0) - break; - - assert(i < NUM_YV12_BUFFERS); - cm->fb_idx_ref_cnt[i] = 1; - return i; -} - -static void ref_cnt_fb(int *buf, int *idx, int new_idx) { - if (buf[*idx] > 0) - buf[*idx]--; - - *idx = new_idx; - - buf[new_idx]++; -} - -/* If any buffer copy / swapping is signalled it should be done here. */ -static int swap_frame_buffers(VP9_COMMON *cm) { - int err = 0; - - /* The alternate reference frame or golden frame can be updated - * using the new, last, or golden/alt ref frame. If it - * is updated using the newly decoded frame it is a refresh. - * An update using the last or golden/alt ref frame is a copy. - */ - if (cm->copy_buffer_to_arf) { - int new_fb = 0; +/* If any buffer updating is signalled it should be done here. */ +static void swap_frame_buffers(VP9D_COMP *pbi) { + int ref_index = 0, mask; - if (cm->copy_buffer_to_arf == 1) - new_fb = cm->lst_fb_idx; - else if (cm->copy_buffer_to_arf == 2) - new_fb = cm->gld_fb_idx; - else - err = -1; - - ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb); - } - - if (cm->copy_buffer_to_gf) { - int new_fb = 0; - - if (cm->copy_buffer_to_gf == 1) - new_fb = cm->lst_fb_idx; - else if (cm->copy_buffer_to_gf == 2) - new_fb = cm->alt_fb_idx; - else - err = -1; - - ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb); + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + if (mask & 1) { + ref_cnt_fb(pbi->common.fb_idx_ref_cnt, + &pbi->common.ref_frame_map[ref_index], + pbi->common.new_fb_idx); + } + ++ref_index; } - if (cm->refresh_golden_frame) - ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx); - - if (cm->refresh_alt_ref_frame) - ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx); + pbi->common.frame_to_show = &pbi->common.yv12_fb[pbi->common.new_fb_idx]; + pbi->common.fb_idx_ref_cnt[pbi->common.new_fb_idx]--; - if (cm->refresh_last_frame) { - ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx); - - cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx]; - } else - cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; - - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - - return err; + /* Invalidate these references until the next frame starts. */ + for (ref_index = 0; ref_index < 3; ref_index++) + pbi->common.active_ref_idx[ref_index] = INT_MAX; } int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, @@ -332,8 +287,13 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, * We do not know if the missing frame(s) was supposed to update * any of the reference buffers, but we act conservative and * mark only the last buffer as corrupted. + * + * TODO(jkoleszar): Error concealment is undefined and non-normative + * at this point, but if it becomes so, [0] may not always be the correct + * thing to do here. */ - cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; + if (cm->active_ref_idx[0] != INT_MAX) + cm->yv12_fb[cm->active_ref_idx[0]].corrupted = 1; } cm->new_fb_idx = get_free_fb(cm); @@ -344,8 +304,13 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, /* We do not know if the missing frame(s) was supposed to update * any of the reference buffers, but we act conservative and * mark only the last buffer as corrupted. + * + * TODO(jkoleszar): Error concealment is undefined and non-normative + * at this point, but if it becomes so, [0] may not always be the correct + * thing to do here. */ - cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; + if (cm->active_ref_idx[0] != INT_MAX) + cm->yv12_fb[cm->active_ref_idx[0]].corrupted = 1; if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) cm->fb_idx_ref_cnt[cm->new_fb_idx]--; @@ -365,11 +330,7 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, } { - if (swap_frame_buffers(cm)) { - pbi->common.error.error_code = VPX_CODEC_ERROR; - pbi->common.error.setjmp = 0; - return -1; - } + swap_frame_buffers(pbi); #if WRITE_RECON_BUFFER == 2 if (cm->show_frame) @@ -389,7 +350,8 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, #if WRITE_RECON_BUFFER == 1 if (cm->show_frame) - recon_write_yuv_frame("recon.yuv", cm->frame_to_show); + recon_write_yuv_frame("recon.yuv", cm->frame_to_show, + cm->Width, cm->Height); #endif vp9_clear_system_state(); diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 64975468d..0e6d059af 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -18,41 +18,6 @@ // #define DEC_DEBUG -typedef struct { - int ithread; - void *ptr1; - void *ptr2; -} DECODETHREAD_DATA; - -typedef struct { - MACROBLOCKD mbd; - int mb_row; - int current_mb_col; - short *coef_ptr; -} MB_ROW_DEC; - -typedef struct { - int const *scan; - int const *scan_8x8; - uint8_t const *ptr_block2leftabove; - vp9_tree_index const *vp9_coef_tree_ptr; - unsigned char *norm_ptr; - uint8_t *ptr_coef_bands_x; - uint8_t *ptr_coef_bands_x_8x8; - - ENTROPY_CONTEXT_PLANES *A; - ENTROPY_CONTEXT_PLANES *L; - - int16_t *qcoeff_start_ptr; - - vp9_prob const *coef_probs_4x4[BLOCK_TYPES_4X4]; - vp9_prob const *coef_probs_8x8[BLOCK_TYPES_8X8]; - vp9_prob const *coef_probs_16X16[BLOCK_TYPES_16X16]; - - uint8_t eob[25]; - -} DETOK; - typedef struct VP9Decompressor { DECLARE_ALIGNED(16, MACROBLOCKD, mb); @@ -68,18 +33,13 @@ typedef struct VP9Decompressor { int64_t last_time_stamp; int ready_for_new_data; - DETOK detoken; - - vp9_dequant_idct_add_fn_t idct_add; - vp9_dequant_dc_idct_add_fn_t dc_idct_add; - vp9_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block; - vp9_dequant_idct_add_y_block_fn_t idct_add_y_block; - vp9_dequant_idct_add_uv_block_fn_t idct_add_uv_block; - + int refresh_frame_flags; vp9_prob prob_skip_false; int decoded_key_frame; + int initial_width; + int initial_height; } VP9D_COMP; int vp9_decode_frame(VP9D_COMP *cpi, const unsigned char **p_data_end); diff --git a/vp9/decoder/x86/vp9_dequantize_mmx.asm b/vp9/decoder/x86/vp9_dequantize_mmx.asm deleted file mode 100644 index 23080bfee..000000000 --- a/vp9/decoder/x86/vp9_dequantize_mmx.asm +++ /dev/null @@ -1,406 +0,0 @@ -; -; Copyright (c) 2012 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA -align 16 -x_s1sqr2: times 4 dw 0x8A8C -align 16 -x_c1sqr2less1: times 4 dw 0x4E7B -align 16 -pw_16: times 4 dw 16 - -SECTION .text - -INIT_MMX - - -;void dequantize_b_impl_mmx(short *sq, short *dq, short *q) -cglobal dequantize_b_impl_mmx, 3,3,0,sq,dq,arg3 - mova m1, [sqq] - pmullw m1, [arg3q+0] ; mm4 *= kernel 0 modifiers. - mova [dqq+ 0], m1 - - mova m1, [sqq+8] - pmullw m1, [arg3q+8] ; mm4 *= kernel 0 modifiers. - mova [dqq+ 8], m1 - - mova m1, [sqq+16] - pmullw m1, [arg3q+16] ; mm4 *= kernel 0 modifiers. - mova [dqq+16], m1 - - mova m1, [sqq+24] - pmullw m1, [arg3q+24] ; mm4 *= kernel 0 modifiers. - mova [dqq+24], m1 - RET - - -;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride) -cglobal dequant_idct_add_mmx, 4,6,0,inp,dq,pred,dest,pit,stride - -%if ARCH_X86_64 - movsxd strideq, dword stridem - movsxd pitq, dword pitm -%else - mov strideq, stridem - mov pitq, pitm -%endif - - mova m0, [inpq+ 0] - pmullw m0, [dqq] - - mova m1, [inpq+ 8] - pmullw m1, [dqq+ 8] - - mova m2, [inpq+16] - pmullw m2, [dqq+16] - - mova m3, [inpq+24] - pmullw m3, [dqq+24] - - pxor m7, m7 - mova [inpq], m7 - mova [inpq+8], m7 - mova [inpq+16], m7 - mova [inpq+24], m7 - - - psubw m0, m2 ; b1= 0-2 - paddw m2, m2 ; - - mova m5, m1 - paddw m2, m0 ; a1 =0+2 - - pmulhw m5, [x_s1sqr2]; - paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2) - - mova m7, m3 ; - pmulhw m7, [x_c1sqr2less1]; - - paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2) - psubw m7, m5 ; c1 - - mova m5, m1 - mova m4, m3 - - pmulhw m5, [x_c1sqr2less1] - paddw m5, m1 - - pmulhw m3, [x_s1sqr2] - paddw m3, m4 - - paddw m3, m5 ; d1 - mova m6, m2 ; a1 - - mova m4, m0 ; b1 - paddw m2, m3 ;0 - - paddw m4, m7 ;1 - psubw m0, m7 ;2 - - psubw m6, m3 ;3 - - mova m1, m2 ; 03 02 01 00 - mova m3, m4 ; 23 22 21 20 - - punpcklwd m1, m0 ; 11 01 10 00 - punpckhwd m2, m0 ; 13 03 12 02 - - punpcklwd m3, m6 ; 31 21 30 20 - punpckhwd m4, m6 ; 33 23 32 22 - - mova m0, m1 ; 11 01 10 00 - mova m5, m2 ; 13 03 12 02 - - punpckldq m0, m3 ; 30 20 10 00 - punpckhdq m1, m3 ; 31 21 11 01 - - punpckldq m2, m4 ; 32 22 12 02 - punpckhdq m5, m4 ; 33 23 13 03 - - mova m3, m5 ; 33 23 13 03 - - psubw m0, m2 ; b1= 0-2 - paddw m2, m2 ; - - mova m5, m1 - paddw m2, m0 ; a1 =0+2 - - pmulhw m5, [x_s1sqr2]; - paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2) - - mova m7, m3 ; - pmulhw m7, [x_c1sqr2less1]; - - paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2) - psubw m7, m5 ; c1 - - mova m5, m1 - mova m4, m3 - - pmulhw m5, [x_c1sqr2less1] - paddw m5, m1 - - pmulhw m3, [x_s1sqr2] - paddw m3, m4 - - paddw m3, m5 ; d1 - paddw m0, [pw_16] - - paddw m2, [pw_16] - mova m6, m2 ; a1 - - mova m4, m0 ; b1 - paddw m2, m3 ;0 - - paddw m4, m7 ;1 - psubw m0, m7 ;2 - - psubw m6, m3 ;3 - psraw m2, 5 - - psraw m0, 5 - psraw m4, 5 - - psraw m6, 5 - - mova m1, m2 ; 03 02 01 00 - mova m3, m4 ; 23 22 21 20 - - punpcklwd m1, m0 ; 11 01 10 00 - punpckhwd m2, m0 ; 13 03 12 02 - - punpcklwd m3, m6 ; 31 21 30 20 - punpckhwd m4, m6 ; 33 23 32 22 - - mova m0, m1 ; 11 01 10 00 - mova m5, m2 ; 13 03 12 02 - - punpckldq m0, m3 ; 30 20 10 00 - punpckhdq m1, m3 ; 31 21 11 01 - - punpckldq m2, m4 ; 32 22 12 02 - punpckhdq m5, m4 ; 33 23 13 03 - - pxor m7, m7 - - movh m4, [predq] - punpcklbw m4, m7 - paddsw m0, m4 - packuswb m0, m7 - movh [destq], m0 - - movh m4, [predq+pitq] - punpcklbw m4, m7 - paddsw m1, m4 - packuswb m1, m7 - movh [destq+strideq], m1 - - movh m4, [predq+2*pitq] - punpcklbw m4, m7 - paddsw m2, m4 - packuswb m2, m7 - movh [destq+strideq*2], m2 - - add destq, strideq - add predq, pitq - - movh m4, [predq+2*pitq] - punpcklbw m4, m7 - paddsw m5, m4 - packuswb m5, m7 - movh [destq+strideq*2], m5 - RET - - -;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc) -cglobal dequant_dc_idct_add_mmx, 4,7,0,inp,dq,pred,dest,pit,stride,Dc - -%if ARCH_X86_64 - movsxd strideq, dword stridem - movsxd pitq, dword pitm -%else - mov strideq, stridem - mov pitq, pitm -%endif - - mov Dcq, Dcm - mova m0, [inpq+ 0] - pmullw m0, [dqq+ 0] - - mova m1, [inpq+ 8] - pmullw m1, [dqq+ 8] - - mova m2, [inpq+16] - pmullw m2, [dqq+16] - - mova m3, [inpq+24] - pmullw m3, [dqq+24] - - pxor m7, m7 - mova [inpq+ 0], m7 - mova [inpq+ 8], m7 - mova [inpq+16], m7 - mova [inpq+24], m7 - - ; move lower word of Dc to lower word of m0 - psrlq m0, 16 - psllq m0, 16 - and Dcq, 0xFFFF ; If Dc < 0, we don't want the full dword precision. - movh m7, Dcq - por m0, m7 - psubw m0, m2 ; b1= 0-2 - paddw m2, m2 ; - - mova m5, m1 - paddw m2, m0 ; a1 =0+2 - - pmulhw m5, [x_s1sqr2]; - paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2) - - mova m7, m3 ; - pmulhw m7, [x_c1sqr2less1]; - - paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2) - psubw m7, m5 ; c1 - - mova m5, m1 - mova m4, m3 - - pmulhw m5, [x_c1sqr2less1] - paddw m5, m1 - - pmulhw m3, [x_s1sqr2] - paddw m3, m4 - - paddw m3, m5 ; d1 - mova m6, m2 ; a1 - - mova m4, m0 ; b1 - paddw m2, m3 ;0 - - paddw m4, m7 ;1 - psubw m0, m7 ;2 - - psubw m6, m3 ;3 - - mova m1, m2 ; 03 02 01 00 - mova m3, m4 ; 23 22 21 20 - - punpcklwd m1, m0 ; 11 01 10 00 - punpckhwd m2, m0 ; 13 03 12 02 - - punpcklwd m3, m6 ; 31 21 30 20 - punpckhwd m4, m6 ; 33 23 32 22 - - mova m0, m1 ; 11 01 10 00 - mova m5, m2 ; 13 03 12 02 - - punpckldq m0, m3 ; 30 20 10 00 - punpckhdq m1, m3 ; 31 21 11 01 - - punpckldq m2, m4 ; 32 22 12 02 - punpckhdq m5, m4 ; 33 23 13 03 - - mova m3, m5 ; 33 23 13 03 - - psubw m0, m2 ; b1= 0-2 - paddw m2, m2 ; - - mova m5, m1 - paddw m2, m0 ; a1 =0+2 - - pmulhw m5, [x_s1sqr2]; - paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2) - - mova m7, m3 ; - pmulhw m7, [x_c1sqr2less1]; - - paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2) - psubw m7, m5 ; c1 - - mova m5, m1 - mova m4, m3 - - pmulhw m5, [x_c1sqr2less1] - paddw m5, m1 - - pmulhw m3, [x_s1sqr2] - paddw m3, m4 - - paddw m3, m5 ; d1 - paddw m0, [pw_16] - - paddw m2, [pw_16] - mova m6, m2 ; a1 - - mova m4, m0 ; b1 - paddw m2, m3 ;0 - - paddw m4, m7 ;1 - psubw m0, m7 ;2 - - psubw m6, m3 ;3 - psraw m2, 5 - - psraw m0, 5 - psraw m4, 5 - - psraw m6, 5 - - mova m1, m2 ; 03 02 01 00 - mova m3, m4 ; 23 22 21 20 - - punpcklwd m1, m0 ; 11 01 10 00 - punpckhwd m2, m0 ; 13 03 12 02 - - punpcklwd m3, m6 ; 31 21 30 20 - punpckhwd m4, m6 ; 33 23 32 22 - - mova m0, m1 ; 11 01 10 00 - mova m5, m2 ; 13 03 12 02 - - punpckldq m0, m3 ; 30 20 10 00 - punpckhdq m1, m3 ; 31 21 11 01 - - punpckldq m2, m4 ; 32 22 12 02 - punpckhdq m5, m4 ; 33 23 13 03 - - pxor m7, m7 - - movh m4, [predq] - punpcklbw m4, m7 - paddsw m0, m4 - packuswb m0, m7 - movh [destq], m0 - - movh m4, [predq+pitq] - punpcklbw m4, m7 - paddsw m1, m4 - packuswb m1, m7 - movh [destq+strideq], m1 - - movh m4, [predq+2*pitq] - punpcklbw m4, m7 - paddsw m2, m4 - packuswb m2, m7 - movh [destq+strideq*2], m2 - - add destq, strideq - add predq, pitq - - movh m4, [predq+2*pitq] - punpcklbw m4, m7 - paddsw m5, m4 - packuswb m5, m7 - movh [destq+strideq*2], m5 - RET - diff --git a/vp9/decoder/x86/vp9_idct_blk_mmx.c b/vp9/decoder/x86/vp9_idct_blk_mmx.c deleted file mode 100644 index 8279eaa4a..000000000 --- a/vp9/decoder/x86/vp9_idct_blk_mmx.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/decoder/vp9_dequantize.h" -#include "vp9/decoder/x86/vp9_idct_mmx.h" - -void vp9_dequant_dc_idct_add_y_block_mmx(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, - int stride, unsigned short *eobs, - const short *dc) { - int i; - - for (i = 0; i < 4; i++) { - if (eobs[0] > 1) - vp9_dequant_dc_idct_add_mmx(q, dq, pre, dst, 16, stride, dc[0]); - else - vp9_dc_only_idct_add_mmx(dc[0], pre, dst, 16, stride); - - if (eobs[1] > 1) - vp9_dequant_dc_idct_add_mmx(q + 16, dq, pre + 4, - dst + 4, 16, stride, dc[1]); - else - vp9_dc_only_idct_add_mmx(dc[1], pre + 4, dst + 4, 16, stride); - - if (eobs[2] > 1) - vp9_dequant_dc_idct_add_mmx(q + 32, dq, pre + 8, - dst + 8, 16, stride, dc[2]); - else - vp9_dc_only_idct_add_mmx(dc[2], pre + 8, dst + 8, 16, stride); - - if (eobs[3] > 1) - vp9_dequant_dc_idct_add_mmx(q + 48, dq, pre + 12, - dst + 12, 16, stride, dc[3]); - else - vp9_dc_only_idct_add_mmx(dc[3], pre + 12, dst + 12, 16, stride); - - q += 64; - dc += 4; - pre += 64; - dst += 4 * stride; - eobs += 4; - } -} - -void vp9_dequant_idct_add_y_block_mmx(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, - int stride, unsigned short *eobs) { - int i; - - for (i = 0; i < 4; i++) { - if (eobs[0] > 1) - vp9_dequant_idct_add_mmx(q, dq, pre, dst, 16, stride); - else { - vp9_dc_only_idct_add_mmx(q[0]*dq[0], pre, dst, 16, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp9_dequant_idct_add_mmx(q + 16, dq, pre + 4, dst + 4, 16, stride); - else { - vp9_dc_only_idct_add_mmx(q[16]*dq[0], pre + 4, dst + 4, 16, stride); - ((int *)(q + 16))[0] = 0; - } - - if (eobs[2] > 1) - vp9_dequant_idct_add_mmx(q + 32, dq, pre + 8, dst + 8, 16, stride); - else { - vp9_dc_only_idct_add_mmx(q[32]*dq[0], pre + 8, dst + 8, 16, stride); - ((int *)(q + 32))[0] = 0; - } - - if (eobs[3] > 1) - vp9_dequant_idct_add_mmx(q + 48, dq, pre + 12, dst + 12, 16, stride); - else { - vp9_dc_only_idct_add_mmx(q[48]*dq[0], pre + 12, dst + 12, 16, stride); - ((int *)(q + 48))[0] = 0; - } - - q += 64; - pre += 64; - dst += 4 * stride; - eobs += 4; - } -} - -void vp9_dequant_idct_add_uv_block_mmx(short *q, const short *dq, - unsigned char *pre, - unsigned char *dstu, - unsigned char *dstv, - int stride, unsigned short *eobs) { - int i; - - for (i = 0; i < 2; i++) { - if (eobs[0] > 1) - vp9_dequant_idct_add_mmx(q, dq, pre, dstu, 8, stride); - else { - vp9_dc_only_idct_add_mmx(q[0]*dq[0], pre, dstu, 8, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp9_dequant_idct_add_mmx(q + 16, dq, pre + 4, dstu + 4, 8, stride); - else { - vp9_dc_only_idct_add_mmx(q[16]*dq[0], pre + 4, dstu + 4, 8, stride); - ((int *)(q + 16))[0] = 0; - } - - q += 32; - pre += 32; - dstu += 4 * stride; - eobs += 2; - } - - for (i = 0; i < 2; i++) { - if (eobs[0] > 1) - vp9_dequant_idct_add_mmx(q, dq, pre, dstv, 8, stride); - else { - vp9_dc_only_idct_add_mmx(q[0]*dq[0], pre, dstv, 8, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp9_dequant_idct_add_mmx(q + 16, dq, pre + 4, dstv + 4, 8, stride); - else { - vp9_dc_only_idct_add_mmx(q[16]*dq[0], pre + 4, dstv + 4, 8, stride); - ((int *)(q + 16))[0] = 0; - } - - q += 32; - pre += 32; - dstv += 4 * stride; - eobs += 2; - } -} diff --git a/vp9/decoder/x86/vp9_x86_dsystemdependent.c b/vp9/decoder/x86/vp9_x86_dsystemdependent.c deleted file mode 100644 index 51ee8ec31..000000000 --- a/vp9/decoder/x86/vp9_x86_dsystemdependent.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vpx_ports/x86.h" -#include "vp9/decoder/vp9_onyxd_int.h" - -#if HAVE_MMX -void vp9_dequantize_b_impl_mmx(short *sq, short *dq, short *q); - -void vp9_dequantize_b_mmx(BLOCKD *d) { - short *sq = (short *) d->qcoeff; - short *dq = (short *) d->dqcoeff; - short *q = (short *) d->dequant; - vp9_dequantize_b_impl_mmx(sq, dq, q); -} -#endif - - |