diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_bitstream.c | 154 | ||||
-rw-r--r-- | vp9/encoder/vp9_block.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 169 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeintra.c | 11 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeintra.h | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 118 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemv.c | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_lookahead.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_modecosts.c | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 51 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 17 | ||||
-rw-r--r-- | vp9/encoder/vp9_quantize.c | 41 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 610 | ||||
-rw-r--r-- | vp9/encoder/vp9_sad_c.c | 62 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 13 | ||||
-rw-r--r-- | vp9/encoder/vp9_tokenize.c | 14 | ||||
-rw-r--r-- | vp9/encoder/vp9_tokenize.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.h | 2 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_variance_sse2.c | 212 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_variance_ssse3.c | 9 |
22 files changed, 825 insertions, 705 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index d8839cd14..5916bae2b 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -369,11 +369,6 @@ static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m); } - -static void write_bmode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_bmode_tree, p, vp9_bmode_encodings + m); -} - static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m); } @@ -411,7 +406,6 @@ static int prob_diff_update_savings_search(const unsigned int *ct, return bestsavings; } -#if CONFIG_MODELCOEFPROB static int prob_diff_update_savings_search_model(const unsigned int *ct, const vp9_prob *oldp, vp9_prob *bestp, @@ -420,7 +414,8 @@ static int prob_diff_update_savings_search_model(const unsigned int *ct, int i, old_b, new_b, update_b, savings, bestsavings, step; int newp; vp9_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES]; - vp9_model_to_full_probs(oldp, b, r, oldplist); + vp9_model_to_full_probs(oldp, oldplist); + vpx_memcpy(newplist, oldp, sizeof(vp9_prob) * UNCONSTRAINED_NODES); for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i) old_b += cost_branch256(ct + 2 * i, oldplist[i]); old_b += cost_branch256(ct + 2 * PIVOT_NODE, oldplist[PIVOT_NODE]); @@ -433,7 +428,7 @@ static int prob_diff_update_savings_search_model(const unsigned int *ct, for (; newp != oldp[PIVOT_NODE]; newp += step) { if (newp < 1 || newp > 255) continue; newplist[PIVOT_NODE] = newp; - vp9_get_model_distribution(newp, newplist, b, r); + vp9_model_to_full_probs(newplist, newplist); for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i) new_b += cost_branch256(ct + 2 * i, newplist[i]); new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]); @@ -448,7 +443,6 @@ static int prob_diff_update_savings_search_model(const unsigned int *ct, *bestp = bestnewp; return bestsavings; } -#endif static void vp9_cond_prob_update(vp9_writer *bc, vp9_prob *oldp, vp9_prob upd, unsigned int *ct) { @@ -479,25 +473,18 @@ static void pack_mb_tokens(vp9_writer* const bc, int v = a->value; int n = a->len; int ncount = n; -#if CONFIG_MODELCOEFPROB vp9_prob probs[ENTROPY_NODES]; -#endif if (t == EOSB_TOKEN) { ++p; break; } -#if CONFIG_MODELCOEFPROB if (t >= TWO_TOKEN) { - vp9_model_to_full_probs(p->context_tree, - p->block_type, p->ref_type, probs); + vp9_model_to_full_probs(p->context_tree, probs); pp = probs; } else { pp = p->context_tree; } -#else - pp = p->context_tree; -#endif assert(pp != 0); /* skip one or two nodes */ @@ -729,26 +716,17 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, active_section = 6; #endif -#if CONFIG_AB4X4 if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob); -#else - if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8) - write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob); - else - write_ymode(bc, mode, pc->fc.ymode_prob); -#endif -#if CONFIG_AB4X4 if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) { -#else - if (mode == I4X4_PRED) { -#endif - int j = 0; - do { - write_bmode(bc, m->bmi[j].as_mode.first, - pc->fc.bmode_prob); - } while (++j < 4); + int idx, idy; + int bw = 1 << b_width_log2(mi->sb_type); + int bh = 1 << b_height_log2(mi->sb_type); + for (idy = 0; idy < 2; idy += bh) + for (idx = 0; idx < 2; idx += bw) + write_sb_ymode(bc, m->bmi[idy * 2 + idx].as_mode.first, + pc->fc.sb_ymode_prob); } write_uv_mode(bc, mi->uv_mode, pc->fc.uv_mode_prob[mode]); @@ -763,16 +741,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, // If segment skip is not enabled code the mode. if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { -#if CONFIG_AB4X4 if (mi->sb_type >= BLOCK_SIZE_SB8X8) write_sb_mv_ref(bc, mode, mv_ref_p); -#else - if (mi->sb_type > BLOCK_SIZE_SB8X8) { - write_sb_mv_ref(bc, mode, mv_ref_p); - } else { - write_mv_ref(bc, mode, mv_ref_p); - } -#endif vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); } @@ -819,9 +789,6 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, int bwl = b_width_log2(mi->sb_type), bw = 1 << bwl; int bhl = b_height_log2(mi->sb_type), bh = 1 << bhl; int idx, idy; -#if !CONFIG_AB4X4 - bw = 1, bh = 1; -#endif for (idy = 0; idy < 2; idy += bh) { for (idx = 0; idx < 2; idx += bw) { j = idy * 2 + idx; @@ -861,20 +828,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, } } -#if CONFIG_AB4X4 - if (((rf == INTRA_FRAME && mi->sb_type >= BLOCK_SIZE_SB8X8) || - (rf != INTRA_FRAME && mi->sb_type >= BLOCK_SIZE_SB8X8)) && - pc->txfm_mode == TX_MODE_SELECT && - !(skip_coeff || vp9_segfeature_active(xd, segment_id, - SEG_LVL_SKIP))) -#else - if (((rf == INTRA_FRAME && mode != I4X4_PRED) || - (rf != INTRA_FRAME && mode != SPLITMV)) && - pc->txfm_mode == TX_MODE_SELECT && - !(skip_coeff || vp9_segfeature_active(xd, segment_id, - SEG_LVL_SKIP))) -#endif - { + if (mi->sb_type >= BLOCK_SIZE_SB8X8 && pc->txfm_mode == TX_MODE_SELECT && + !(rf != INTRA_FRAME && + (skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { TX_SIZE sz = mi->txfm_size; // FIXME(rbultje) code ternary symbol once all experiments are merged vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); @@ -891,8 +847,8 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, vp9_writer *bc, int mi_row, int mi_col) { const VP9_COMMON *const c = &cpi->common; const MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const int mis = c->mode_info_stride; const int ym = m->mbmi.mode; + const int mis = c->mode_info_stride; const int segment_id = m->mbmi.segment_id; int skip_coeff; @@ -906,45 +862,32 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, vp9_write(bc, skip_coeff, vp9_get_pred_prob(c, xd, PRED_MBSKIP)); } -#if CONFIG_AB4X4 - if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) - sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); -#else - if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8) - sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); - else - kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]); -#endif + if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { + const B_PREDICTION_MODE A = above_block_mode(m, 0, mis); + const B_PREDICTION_MODE L = xd->left_available ? + left_block_mode(m, 0) : DC_PRED; + write_kf_bmode(bc, ym, c->kf_bmode_prob[A][L]); + } -#if CONFIG_AB4X4 if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) { -#else - if (ym == I4X4_PRED) { -#endif - int i = 0; - do { - const B_PREDICTION_MODE a = above_block_mode(m, i, mis); - const B_PREDICTION_MODE l = (xd->left_available || - (i & 1)) ? - left_block_mode(m, i) : B_DC_PRED; - const int bm = m->bmi[i].as_mode.first; - -/*#ifdef ENTROPY_STATS - ++intra_mode_stats [A] [L] [bm]; -#endif*/ - write_kf_bmode(bc, bm, c->kf_bmode_prob[a][l]); - } while (++i < 4); + int idx, idy; + int bw = 1 << b_width_log2(m->mbmi.sb_type); + int bh = 1 << b_height_log2(m->mbmi.sb_type); + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + int i = idy * 2 + idx; + const B_PREDICTION_MODE A = above_block_mode(m, i, mis); + const B_PREDICTION_MODE L = (xd->left_available || idx) ? + left_block_mode(m, i) : DC_PRED; + write_kf_bmode(bc, m->bmi[i].as_mode.first, + c->kf_bmode_prob[A][L]); + } + } } write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); -#if CONFIG_AB4X4 - if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8 && c->txfm_mode == TX_MODE_SELECT && - !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { -#else - if (ym != I4X4_PRED && c->txfm_mode == TX_MODE_SELECT && - !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { -#endif + if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8 && c->txfm_mode == TX_MODE_SELECT) { TX_SIZE sz = m->mbmi.txfm_size; // FIXME(rbultje) code ternary symbol once all experiments are merged vp9_write(bc, sz != TX_4X4, c->prob_tx[0]); @@ -962,11 +905,9 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; -#if CONFIG_AB4X4 if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) if (xd->ab_index > 0) return; -#endif xd->mode_info_context = m; set_mi_row_col(&cpi->common, xd, mi_row, 1 << mi_height_log2(m->mbmi.sb_type), @@ -1019,17 +960,11 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, else assert(0); -#if CONFIG_AB4X4 if (bsize < BLOCK_SIZE_SB8X8) if (xd->ab_index > 0) return; -#endif -#if CONFIG_AB4X4 if (bsize >= BLOCK_SIZE_SB8X8) { -#else - if (bsize > BLOCK_SIZE_SB8X8) { -#endif int pl; xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); xd->above_seg_context = cm->above_seg_context + mi_col; @@ -1071,13 +1006,8 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, } // update partition context -#if CONFIG_AB4X4 if (bsize >= BLOCK_SIZE_SB8X8 && (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) { -#else - if (bsize > BLOCK_SIZE_SB8X8 && - (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) { -#endif set_partition_seg_context(cm, xd, mi_row, mi_col); update_partition_context(xd, subsize, bsize); } @@ -1101,7 +1031,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc, vpx_memset(c->left_seg_context, 0, sizeof(c->left_seg_context)); for (mi_col = c->cur_tile_mi_col_start; mi_col < c->cur_tile_mi_col_end; - mi_col += 8, m += 8) + mi_col += 64 / MI_SIZE, m += 64 / MI_SIZE) write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col, BLOCK_SIZE_SB64X64); } @@ -1219,22 +1149,14 @@ static void update_coef_probs_common( vp9_coeff_stats *tree_update_hist, #endif vp9_coeff_probs *new_frame_coef_probs, -#if CONFIG_MODELCOEFPROB vp9_coeff_probs_model *old_frame_coef_probs, -#else - vp9_coeff_probs *old_frame_coef_probs, -#endif vp9_coeff_stats *frame_branch_ct, TX_SIZE tx_size) { int i, j, k, l, t; int update[2] = {0, 0}; int savings; -#if CONFIG_MODELCOEFPROB const int entropy_nodes_update = UNCONSTRAINED_NODES; -#else - const int entropy_nodes_update = ENTROPY_NODES; -#endif // vp9_prob bestupd = find_coef_update_prob(cpi); const int tstart = 0; @@ -1254,13 +1176,11 @@ static void update_coef_probs_common( if (l >= 3 && k == 0) continue; -#if CONFIG_MODELCOEFPROB if (t == PIVOT_NODE) s = prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); else -#endif s = prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], oldp, &newp, upd); if (s > 0 && newp != oldp) @@ -1298,13 +1218,11 @@ static void update_coef_probs_common( if (l >= 3 && k == 0) continue; -#if CONFIG_MODELCOEFPROB if (t == PIVOT_NODE) s = prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); else -#endif s = prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index d3851b428..84e1a1fdb 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -140,11 +140,9 @@ struct macroblock { // TODO(jingning): Need to refactor the structure arrays that buffers the // coding mode decisions of each partition type. -#if CONFIG_AB4X4 PICK_MODE_CONTEXT ab4x4_context[4][4][4]; PICK_MODE_CONTEXT sb8x4_context[4][4][4]; PICK_MODE_CONTEXT sb4x8_context[4][4][4]; -#endif PICK_MODE_CONTEXT sb8x8_context[4][4][4]; PICK_MODE_CONTEXT sb8x16_context[4][4][2]; PICK_MODE_CONTEXT sb16x8_context[4][4][2]; @@ -158,9 +156,7 @@ struct macroblock { PICK_MODE_CONTEXT sb64_context; int partition_cost[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; -#if CONFIG_AB4X4 BLOCK_SIZE_TYPE b_partitioning[4][4][4]; -#endif BLOCK_SIZE_TYPE mb_partitioning[4][4]; BLOCK_SIZE_TYPE sb_partitioning[4]; BLOCK_SIZE_TYPE sb64_partitioning; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f3a03f3c8..6c129ebbf 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -503,21 +503,19 @@ static unsigned find_seg_id(uint8_t *buf, BLOCK_SIZE_TYPE bsize, void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, int mb_row, int mb_col) { - setup_pred_plane(&x->plane[0].src, - src->y_buffer, src->y_stride, - mb_row, mb_col, NULL, - x->e_mbd.plane[0].subsampling_x, - x->e_mbd.plane[0].subsampling_y); - setup_pred_plane(&x->plane[1].src, - src->u_buffer, src->uv_stride, - mb_row, mb_col, NULL, - x->e_mbd.plane[1].subsampling_x, - x->e_mbd.plane[1].subsampling_y); - setup_pred_plane(&x->plane[2].src, - src->v_buffer, src->uv_stride, - mb_row, mb_col, NULL, - x->e_mbd.plane[2].subsampling_x, - x->e_mbd.plane[2].subsampling_y); + uint8_t *buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + int i; + + for (i = 0; i < MAX_MB_PLANE; i++) { + setup_pred_plane(&x->plane[i].src, + buffers[i], strides[i], + mb_row, mb_col, NULL, + x->e_mbd.plane[i].subsampling_x, + x->e_mbd.plane[i].subsampling_y); + } } static void set_offsets(VP9_COMP *cpi, @@ -621,11 +619,9 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; -#if CONFIG_AB4X4 if (bsize < BLOCK_SIZE_SB8X8) if (xd->ab_index != 0) return; -#endif set_offsets(cpi, mi_row, mi_col, bsize); xd->mode_info_context->mbmi.sb_type = bsize; @@ -710,14 +706,12 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index]; case BLOCK_SIZE_SB8X8: return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index]; -#if CONFIG_AB4X4 case BLOCK_SIZE_SB8X4: return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index]; case BLOCK_SIZE_SB4X8: return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index]; case BLOCK_SIZE_AB4X4: return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index]; -#endif default: assert(0); return NULL; @@ -734,10 +728,8 @@ static BLOCK_SIZE_TYPE *get_sb_partitioning(MACROBLOCK *x, return &x->sb_partitioning[xd->sb_index]; case BLOCK_SIZE_MB16X16: return &x->mb_partitioning[xd->sb_index][xd->mb_index]; -#if CONFIG_AB4X4 case BLOCK_SIZE_SB8X8: return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index]; -#endif default: assert(0); return NULL; @@ -787,11 +779,9 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, if (sub_index != -1) *(get_sb_index(xd, bsize)) = sub_index; -#if CONFIG_AB4X4 if (bsize < BLOCK_SIZE_SB8X8) if (xd->ab_index > 0) return; -#endif set_offsets(cpi, mi_row, mi_col, bsize); update_state(cpi, get_block_context(x, bsize), bsize, output_enabled); encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); @@ -818,13 +808,8 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; -#if CONFIG_AB4X4 c1 = BLOCK_SIZE_AB4X4; - if (bsize >= BLOCK_SIZE_SB8X8) -#else - if (bsize > BLOCK_SIZE_SB8X8) -#endif - { + if (bsize >= BLOCK_SIZE_SB8X8) { set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); c1 = *(get_sb_partitioning(x, bsize)); @@ -833,13 +818,8 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, bwl = b_width_log2(c1), bhl = b_height_log2(c1); if (bsl == bwl && bsl == bhl) { -#if CONFIG_AB4X4 if (output_enabled && bsize >= BLOCK_SIZE_SB8X8) cpi->partition_count[pl][PARTITION_NONE]++; -#else - if (output_enabled && bsize > BLOCK_SIZE_SB8X8) - cpi->partition_count[pl][PARTITION_NONE]++; -#endif encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1); } else if (bsl == bhl && bsl > bwl) { if (output_enabled) @@ -870,13 +850,8 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, } } -#if CONFIG_AB4X4 if (bsize >= BLOCK_SIZE_SB8X8 && (bsize == BLOCK_SIZE_SB8X8 || bsl == bwl || bsl == bhl)) { -#else - if (bsize > BLOCK_SIZE_SB8X8 && - (bsize == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) { -#endif set_partition_seg_context(cm, xd, mi_row, mi_col); update_partition_context(xd, c1, bsize); } @@ -902,14 +877,12 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, BLOCK_SIZE_TYPE subsize; int srate = INT_MAX, sdist = INT_MAX; -#if CONFIG_AB4X4 if (bsize < BLOCK_SIZE_SB8X8) if (xd->ab_index != 0) { *rate = 0; *dist = 0; return; } -#endif assert(mi_height_log2(bsize) == mi_width_log2(bsize)); // buffer the above/left context information of the block in search. @@ -927,11 +900,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, sizeof(PARTITION_CONTEXT) * ms); // PARTITION_SPLIT -#if CONFIG_AB4X4 if (bsize >= BLOCK_SIZE_SB8X8) { -#else - if (bsize >= BLOCK_SIZE_MB16X16) { -#endif int r4 = 0, d4 = 0; subsize = get_subsize(bsize, PARTITION_SPLIT); *(get_sb_partitioning(x, bsize)) = subsize; @@ -953,12 +922,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, } set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); -#if CONFIG_AB4X4 if (r4 < INT_MAX) r4 += x->partition_cost[pl][PARTITION_SPLIT]; -#else - r4 += x->partition_cost[pl][PARTITION_SPLIT]; -#endif assert(r4 >= 0); assert(d4 >= 0); srate = r4; @@ -968,11 +933,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, // PARTITION_HORZ if ((mi_col + ms <= cm->mi_cols) && (mi_row + (ms >> 1) <= cm->mi_rows) && -#if CONFIG_AB4X4 (bsize >= BLOCK_SIZE_SB8X8)) { -#else - (bsize >= BLOCK_SIZE_MB16X16)) { -#endif int r2, d2; int mb_skip = 0; subsize = get_subsize(bsize, PARTITION_HORZ); @@ -995,12 +956,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, } set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); -#if CONFIG_AB4X4 if (r2 < INT_MAX) r2 += x->partition_cost[pl][PARTITION_HORZ]; -#else - r2 += x->partition_cost[pl][PARTITION_HORZ]; -#endif if ((RDCOST(x->rdmult, x->rddiv, r2, d2) < RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) { srate = r2; @@ -1012,11 +969,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, // PARTITION_VERT if ((mi_row + ms <= cm->mi_rows) && (mi_col + (ms >> 1) <= cm->mi_cols) && -#if CONFIG_AB4X4 (bsize >= BLOCK_SIZE_SB8X8)) { -#else - (bsize >= BLOCK_SIZE_MB16X16)) { -#endif int r2, d2; int mb_skip = 0; subsize = get_subsize(bsize, PARTITION_VERT); @@ -1038,12 +991,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, } set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); -#if CONFIG_AB4X4 if (r2 < INT_MAX) r2 += x->partition_cost[pl][PARTITION_VERT]; -#else - r2 += x->partition_cost[pl][PARTITION_VERT]; -#endif if ((RDCOST(x->rdmult, x->rddiv, r2, d2) < RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) { srate = r2; @@ -1058,11 +1007,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int r, d; pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize, get_block_context(x, bsize)); -#if CONFIG_AB4X4 if (bsize >= BLOCK_SIZE_SB8X8) { -#else - if (bsize >= BLOCK_SIZE_MB16X16) { -#endif set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); r += x->partition_cost[pl][PARTITION_NONE]; @@ -1072,11 +1017,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, RDCOST(x->rdmult, x->rddiv, srate, sdist)) { srate = r; sdist = d; -#if CONFIG_AB4X4 if (bsize >= BLOCK_SIZE_SB8X8) -#else - if (bsize >= BLOCK_SIZE_MB16X16) -#endif *(get_sb_partitioning(x, bsize)) = bsize; } } @@ -1260,6 +1201,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { vpx_memset(cpi->txfm_count_16x16p, 0, sizeof(cpi->txfm_count_16x16p)); vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p)); vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff)); + vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes)); + { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); @@ -1514,7 +1457,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { } else txfm_type = ALLOW_8X8; #else - txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >= + txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] > cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? ALLOW_32X32 : TX_MODE_SELECT; #endif @@ -1603,22 +1546,22 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode; const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode; -#if CONFIG_AB4X4 if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { -#else - if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_SB8X8) { -#endif ++cpi->sb_ymode_count[m]; } else { ++cpi->ymode_count[m]; } ++cpi->y_uv_mode_count[m][uvm]; if (m == I4X4_PRED) { - int b = 0; - do { - int m = xd->mode_info_context->bmi[b].as_mode.first; - ++cpi->bmode_count[m]; - } while (++b < 4); + int idx, idy; + int bw = 1 << b_width_log2(xd->mode_info_context->mbmi.sb_type); + int bh = 1 << b_height_log2(xd->mode_info_context->mbmi.sb_type); + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + int m = xd->mode_info_context->bmi[idy * 2 + idx].as_mode.first; + ++cpi->sb_ymode_count[m]; + } + } } } @@ -1693,22 +1636,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_update_zbin_extra(cpi, x); } -#if CONFIG_AB4X4 - if (mbmi->ref_frame == INTRA_FRAME && - bsize < BLOCK_SIZE_SB8X8) { -#else - if (mbmi->mode == I4X4_PRED) { - assert(bsize == BLOCK_SIZE_SB8X8 && mbmi->txfm_size == TX_4X4); -#endif - vp9_encode_intra4x4mby(x, BLOCK_SIZE_SB8X8); - vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_SB8X8); - vp9_encode_sbuv(cm, x, BLOCK_SIZE_SB8X8); - - if (output_enabled) - sum_intra_stats(cpi, x); - } else if (mbmi->ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sby_s(xd, bsize); - vp9_build_intra_predictors_sbuv_s(xd, bsize); + if (mbmi->ref_frame == INTRA_FRAME) { + vp9_encode_intra_block_y(cm, x, (bsize < BLOCK_SIZE_SB8X8) ? + BLOCK_SIZE_SB8X8 : bsize); + vp9_encode_intra_block_uv(cm, x, (bsize < BLOCK_SIZE_SB8X8) ? + BLOCK_SIZE_SB8X8 : bsize); if (output_enabled) sum_intra_stats(cpi, x); } else { @@ -1730,14 +1662,9 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, : bsize); } -#if CONFIG_AB4X4 - if (mbmi->ref_frame == INTRA_FRAME && - bsize < BLOCK_SIZE_SB8X8) { -#else - if (mbmi->mode == I4X4_PRED) { - assert(bsize == BLOCK_SIZE_SB8X8); -#endif - vp9_tokenize_sb(cpi, xd, t, !output_enabled, BLOCK_SIZE_SB8X8); + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + vp9_tokenize_sb(cpi, xd, t, !output_enabled, + (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize); } else if (!x->skip) { vp9_encode_sb(cm, x, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize); vp9_tokenize_sb(cpi, xd, t, !output_enabled, @@ -1764,8 +1691,9 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && - !(mbmi->mb_skip_coeff || - vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { + mbmi->sb_type >= BLOCK_SIZE_SB8X8 && + !(mbmi->ref_frame != INTRA_FRAME && (mbmi->mb_skip_coeff || + vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { if (bsize >= BLOCK_SIZE_SB32X32) { cpi->txfm_count_32x32p[mbmi->txfm_size]++; } else if (bsize >= BLOCK_SIZE_MB16X16) { @@ -1776,18 +1704,19 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, } else { int x, y; TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode; - - if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32) - sz = TX_16X16; - if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16) - sz = TX_8X8; -#if CONFIG_AB4X4 - if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8) -#else - if (sz == TX_8X8 && (mbmi->mode == SPLITMV || - mbmi->mode == I4X4_PRED)) -#endif + // The new intra coding scheme requires no change of transform size + if (mi->mbmi.ref_frame != INTRA_FRAME) { + if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32) + sz = TX_16X16; + if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16) + sz = TX_8X8; + if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8) + sz = TX_4X4; + } else if (bsize >= BLOCK_SIZE_SB8X8) { + sz = mbmi->txfm_size; + } else { sz = TX_4X4; + } for (y = 0; y < bh; y++) { for (x = 0; x < bw; x++) { diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index f8cf50f84..91866b28f 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -80,15 +80,6 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib, } } -void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bsize) { - int i; - int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - int bc = 1 << (bwl + bhl); - - for (i = 0; i < bc; i++) - encode_intra4x4block(mb, i, bsize); -} - void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; @@ -102,3 +93,5 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16); vp9_encode_sbuv(cm, x, BLOCK_SIZE_MB16X16); } + + diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h index c26200494..7da164c6a 100644 --- a/vp9/encoder/vp9_encodeintra.h +++ b/vp9/encoder/vp9_encodeintra.h @@ -16,5 +16,9 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred); void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bs); +void vp9_encode_intra_block_y(VP9_COMMON *const cm, MACROBLOCK *mb, + BLOCK_SIZE_TYPE bs); +void vp9_encode_intra_block_uv(VP9_COMMON *const cm, MACROBLOCK *mb, + BLOCK_SIZE_TYPE bs); + #endif // VP9_ENCODER_VP9_ENCODEINTRA_H_ diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 84b350792..3f2061c64 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -44,7 +44,6 @@ static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) { const uint8_t *src = x->plane[plane].src.buf; const int src_stride = x->plane[plane].src.stride; - assert(plane < 3); vp9_subtract_block(bh, bw, x->plane[plane].src_diff, bw, src, src_stride, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride); @@ -168,7 +167,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, break; } case TX_32X32: - scan = vp9_default_zig_zag1d_32x32; + scan = vp9_default_scan_32x32; default_eob = 1024; band_translate = vp9_coefband_trans_8x8plus; break; @@ -605,3 +604,118 @@ void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x, foreach_transformed_block(xd, bsize, encode_block, &arg); } + +static void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct encode_b_args* const args = arg; + MACROBLOCK* const x = args->x; + MACROBLOCKD* const xd = &x->e_mbd; + const TX_SIZE tx_size = (TX_SIZE)(ss_txfrm_size / 2); + const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x); + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + uint8_t* const src = + raster_block_offset_uint8(xd, bsize, plane, raster_block, + x->plane[plane].src.buf, + x->plane[plane].src.stride); + uint8_t* const dst = + raster_block_offset_uint8(xd, bsize, plane, raster_block, + xd->plane[plane].dst.buf, + xd->plane[plane].dst.stride); + int16_t* const src_diff = + raster_block_offset_int16(xd, bsize, plane, + raster_block, x->plane[plane].src_diff); + + const int txfm_b_size = 4 << tx_size; + int ib = raster_block; + int tx_ib = ib >> tx_size; + int plane_b_size; + + TX_TYPE tx_type; + int mode, b_mode; + + mode = plane == 0? xd->mode_info_context->mbmi.mode: + xd->mode_info_context->mbmi.uv_mode; + if (bsize <= BLOCK_SIZE_SB8X8 && mode == I4X4_PRED && plane == 0) + b_mode = xd->mode_info_context->bmi[ib].as_mode.first; + else + b_mode = mode; + + assert(b_mode >= B_DC_PRED && b_mode <= B_TM_PRED); + + plane_b_size = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode, + dst, xd->plane[plane].dst.stride); + vp9_subtract_block(txfm_b_size, txfm_b_size, + src_diff, bw, + src, x->plane[plane].src.stride, + dst, xd->plane[plane].dst.stride); + + xform_quant(plane, block, bsize, ss_txfrm_size, arg); + + /* + if (x->optimize) + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx); + */ + switch (ss_txfrm_size / 2) { + case TX_32X32: + vp9_short_idct32x32_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, + block, 16), dst, xd->plane[plane].dst.stride); + break; + case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) { + vp9_short_idct16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, + block, 16), dst, xd->plane[plane].dst.stride); + } else { + vp9_short_iht16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, + block, 16), dst, xd->plane[plane].dst.stride, + tx_type); + } + break; + case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) { + vp9_short_idct8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, + block, 16), dst, xd->plane[plane].dst.stride); + } else { + vp9_short_iht8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, + block, 16), dst, xd->plane[plane].dst.stride, + tx_type); + } + break; + case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) { + // this is like vp9_short_idct4x4 but has a special case around eob<=1 + // which is significant (not just an optimization) for the lossless + // case. + vp9_inverse_transform_b_4x4_add(xd, xd->plane[plane].eobs[block], + BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), dst, + xd->plane[plane].dst.stride); + } else { + vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + dst, xd->plane[plane].dst.stride, tx_type); + } + break; + } +} + +void vp9_encode_intra_block_y(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; + + foreach_transformed_block_in_plane(xd, bsize, 0, + encode_block_intra, &arg); +} +void vp9_encode_intra_block_uv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; + + foreach_transformed_block_uv(xd, bsize, encode_block_intra, &arg); +} + diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index e2cd8838c..1bb7fa88d 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -573,16 +573,9 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x, int bhl = b_height_log2(mbmi->sb_type), bh = 1 << bhl; int idx, idy; -#if CONFIG_AB4X4 if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { -#else - if (mbmi->mode == SPLITMV) { -#endif int i; PARTITION_INFO *pi = x->partition_info; -#if !CONFIG_AB4X4 - bw = 1, bh = 1; -#endif for (idy = 0; idy < 2; idy += bh) { for (idx = 0; idx < 2; idx += bw) { i = idy * 2 + idx; diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index 708fe4549..b07d92a44 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -84,20 +84,27 @@ bail: return NULL; } +#define USE_PARTIAL_COPY 0 int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int64_t ts_start, int64_t ts_end, unsigned int flags, unsigned char *active_map) { struct lookahead_entry *buf; +#if USE_PARTIAL_COPY int row, col, active_end; int mb_rows = (src->y_height + 15) >> 4; int mb_cols = (src->y_width + 15) >> 4; +#endif if (ctx->sz + 1 > ctx->max_sz) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); +#if USE_PARTIAL_COPY + // TODO(jkoleszar): This is disabled for now, as + // vp9_copy_and_extend_frame_with_rect is not subsampling/alpha aware. + // Only do this partial copy if the following conditions are all met: // 1. Lookahead queue has has size of 1. // 2. Active map is provided. @@ -140,6 +147,11 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, } else { vp9_copy_and_extend_frame(src, &buf->img); } +#else + // Partial copy not implemented yet + vp9_copy_and_extend_frame(src, &buf->img); +#endif + buf->ts_start = ts_start; buf->ts_end = ts_end; buf->flags = flags; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 75e6e6757..2e99736ce 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -414,7 +414,6 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, return besterr; } -#if CONFIG_COMP_INTER_JOINT_SEARCH #undef DIST /* returns subpixel variance error function */ #define DIST(r, c) \ @@ -606,7 +605,7 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, return besterr; } -#endif // CONFIG_COMP_INTER_JOINT_SEARCH + #undef MVC #undef PRE @@ -2327,7 +2326,6 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, return INT_MAX; } -#if CONFIG_COMP_INTER_JOINT_SEARCH /* This function is called when we do joint motion search in comp_inter_inter * mode. */ @@ -2429,4 +2427,3 @@ int vp9_refining_search_8p_c(MACROBLOCK *x, return INT_MAX; } } -#endif // CONFIG_COMP_INTER_JOINT_SEARCH diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 33e688b97..28b2efd28 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -75,7 +75,6 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, int *mvjcost, int *mvcost[2], int_mv *center_mv); -#if CONFIG_COMP_INTER_JOINT_SEARCH int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, @@ -91,5 +90,4 @@ int vp9_refining_search_8p_c(MACROBLOCK *x, int *mvjcost, int *mvcost[2], int_mv *center_mv, const uint8_t *second_pred, int w, int h); -#endif // CONFIG_COMP_INTER_JOINT_SEARCH #endif // VP9_ENCODER_VP9_MCOMP_H_ diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c index e26daf0c9..e58ff40d9 100644 --- a/vp9/encoder/vp9_modecosts.c +++ b/vp9/encoder/vp9_modecosts.c @@ -33,10 +33,11 @@ void vp9_init_mode_costs(VP9_COMP *c) { x->fc.sub_mv_ref_prob[0], vp9_sub_mv_ref_tree); // TODO(rbultje) separate tables for superblock costing? - vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp9_ymode_tree); + vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.sb_ymode_prob, + vp9_sb_ymode_tree); vp9_cost_tokens(c->mb.mbmode_cost[0], - x->kf_ymode_prob[c->common.kf_ymode_probs_index], - vp9_kf_ymode_tree); + x->sb_kf_ymode_prob[c->common.kf_ymode_probs_index], + vp9_sb_ymode_tree); vp9_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); vp9_cost_tokens(c->mb.intra_uv_mode_cost[0], diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index e5c4761cc..27eeb5c82 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -775,6 +775,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->optimize_coefficients = !cpi->oxcf.lossless; sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; + sf->comp_inter_joint_serach = 1; #if CONFIG_MULTIPLE_ARF // Switch segmentation off. sf->static_segmentation = 0; @@ -785,7 +786,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->static_segmentation = 0; #endif #endif - sf->splitmode_breakout = 0; sf->mb16_breakout = 0; switch (mode) { @@ -804,13 +804,13 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->static_segmentation = 0; #endif #endif - sf->splitmode_breakout = 1; sf->mb16_breakout = 0; if (speed > 0) { /* Disable coefficient optimization above speed 0 */ sf->optimize_coefficients = 0; sf->no_skip_block4x4_search = 0; + sf->comp_inter_joint_serach = 0; sf->first_step = 1; @@ -1636,12 +1636,12 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { BFP(BLOCK_8X4, vp9_sad8x4, vp9_variance8x4, vp9_sub_pixel_variance8x4, vp9_sub_pixel_avg_variance8x4, NULL, NULL, - NULL, NULL, NULL, + NULL, NULL, vp9_sad8x4x8, vp9_sad8x4x4d) BFP(BLOCK_4X8, vp9_sad4x8, vp9_variance4x8, vp9_sub_pixel_variance4x8, vp9_sub_pixel_avg_variance4x8, NULL, NULL, - NULL, NULL, NULL, + NULL, NULL, vp9_sad4x8x8, vp9_sad4x8x4d) BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4, @@ -2083,6 +2083,18 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { fwrite(src, s->uv_width, 1, yuv_rec_file); src += s->uv_stride; } while (--h); + +#if CONFIG_ALPHA + if (s->alpha_buffer) { + src = s->alpha_buffer; + h = s->alpha_height; + do { + fwrite(src, s->alpha_width, 1, yuv_rec_file); + src += s->alpha_stride; + } while (--h); + } +#endif + fflush(yuv_rec_file); } #endif @@ -2095,11 +2107,15 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, const int out_h = dst_fb->y_crop_height; int x, y, i; - uint8_t *srcs[3] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer}; - int src_strides[3] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride}; + uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer, + src_fb->alpha_buffer}; + int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride, + src_fb->alpha_stride}; - uint8_t *dsts[3] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer}; - int dst_strides[3] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride}; + uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer, + dst_fb->alpha_buffer}; + int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride, + dst_fb->alpha_stride}; for (y = 0; y < out_h; y += 16) { for (x = 0; x < out_w; x += 16) { @@ -2791,6 +2807,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } #endif loop_count = 0; + vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes)); if (cm->frame_type != KEY_FRAME) { /* TODO: Decide this more intelligently */ @@ -2919,11 +2936,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, #endif // transform / motion compensation build reconstruction frame -#if CONFIG_MODELCOEFPROB if (cm->frame_type == KEY_FRAME) { vp9_default_coef_probs(cm); } -#endif vp9_encode_frame(cpi); @@ -3153,6 +3168,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Pick the loop filter level for the frame. loopfilter_frame(cpi, cm); +#if WRITE_RECON_BUFFER + if (cm->show_frame) + write_cx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 2000); + else + write_cx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 3000); +#endif + // build the bitstream cpi->dummy_packing = 0; vp9_pack_bitstream(cpi, dest, size); @@ -3172,7 +3196,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, release_scaled_references(cpi); update_reference_frames(cpi); -#if CONFIG_MODELCOEFPROB vp9_full_to_model_counts(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4); vp9_full_to_model_counts(cpi->common.fc.coef_counts_8x8, @@ -3181,12 +3204,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cpi->coef_counts_16x16); vp9_full_to_model_counts(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32); -#else - vp9_copy(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4); - vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8); - vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); - vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32); -#endif if (!cpi->common.error_resilient_mode && !cpi->common.frame_parallel_decoding_mode) { vp9_adapt_coef_probs(&cpi->common); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 57d19ca63..e3e95eda9 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -76,17 +76,10 @@ typedef struct { // 0 = I4X4_PRED, ZERO_MV, MV, SPLIT signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; -#if CONFIG_MODELCOEFPROB vp9_coeff_probs_model coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs_model coef_probs_8x8[BLOCK_TYPES]; vp9_coeff_probs_model coef_probs_16x16[BLOCK_TYPES]; vp9_coeff_probs_model coef_probs_32x32[BLOCK_TYPES]; -#else - vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES]; -#endif vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ @@ -231,9 +224,9 @@ typedef struct { int optimize_coefficients; int no_skip_block4x4_search; int search_best_filter; - int splitmode_breakout; int mb16_breakout; int static_segmentation; + int comp_inter_joint_serach; } SPEED_FEATURES; enum BlockSize { @@ -265,6 +258,14 @@ typedef struct VP9_COMP { DECLARE_ALIGNED(16, short, uv_zbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, uv_round[QINDEX_RANGE][16]); +#if CONFIG_ALPHA + DECLARE_ALIGNED(16, short, a_quant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, unsigned char, a_quant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, a_zbin[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, a_round[QINDEX_RANGE][16]); + + DECLARE_ALIGNED(16, short, zrun_zbin_boost_a[QINDEX_RANGE][16]); +#endif DECLARE_ALIGNED(16, short, zrun_zbin_boost_y[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]); diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index aea350bc4..53d8be775 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -93,7 +93,7 @@ void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coeffs, scan = get_scan_16x16(tx_type); break; default: - scan = vp9_default_zig_zag1d_32x32; + scan = vp9_default_scan_32x32; break; } @@ -148,6 +148,9 @@ void vp9_init_quantizer(VP9_COMP *cpi) { int i; int quant_val; int quant_uv_val; +#if CONFIG_ALPHA + int quant_alpha_val; +#endif int q; static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12, @@ -168,7 +171,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->common.y_dequant[q][0] = quant_val; cpi->zrun_zbin_boost_y[q][0] = (quant_val * zbin_boost[0]) >> 7; - quant_val = vp9_dc_quant(q, cpi->common.uv_dc_delta_q); invert_quant(cpi->uv_quant[q] + 0, cpi->uv_quant_shift[q] + 0, quant_val); cpi->uv_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); @@ -176,13 +178,26 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->common.uv_dequant[q][0] = quant_val; cpi->zrun_zbin_boost_uv[q][0] = (quant_val * zbin_boost[0]) >> 7; +#if CONFIG_ALPHA + quant_val = vp9_dc_quant(q, cpi->common.a_dc_delta_q); + invert_quant(cpi->a_quant[q] + 0, cpi->a_quant_shift[q] + 0, quant_val); + cpi->a_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); + cpi->a_round[q][0] = (qrounding_factor * quant_val) >> 7; + cpi->common.a_dequant[q][0] = quant_val; + cpi->zrun_zbin_boost_a[q][0] = (quant_val * zbin_boost[0]) >> 7; +#endif + quant_val = vp9_ac_quant(q, 0); cpi->common.y_dequant[q][1] = quant_val; quant_uv_val = vp9_ac_quant(q, cpi->common.uv_ac_delta_q); cpi->common.uv_dequant[q][1] = quant_uv_val; +#if CONFIG_ALPHA + quant_alpha_val = vp9_ac_quant(q, cpi->common.a_ac_delta_q); + cpi->common.a_dequant[q][1] = quant_alpha_val; +#endif // all the 4x4 ac values =; for (i = 1; i < 16; i++) { - int rc = vp9_default_zig_zag1d_4x4[i]; + int rc = vp9_default_scan_4x4[i]; invert_quant(cpi->y_quant[q] + rc, cpi->y_quant_shift[q] + rc, quant_val); cpi->y_zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); @@ -196,6 +211,16 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->uv_round[q][rc] = (qrounding_factor * quant_uv_val) >> 7; cpi->zrun_zbin_boost_uv[q][i] = ROUND_POWER_OF_TWO(quant_uv_val * zbin_boost[i], 7); + +#if CONFIG_ALPHA + invert_quant(cpi->a_quant[q] + rc, cpi->a_quant_shift[q] + rc, + quant_alpha_val); + cpi->a_zbin[q][rc] = + ROUND_POWER_OF_TWO(qzbin_factor * quant_alpha_val, 7); + cpi->a_round[q][rc] = (qrounding_factor * quant_alpha_val) >> 7; + cpi->zrun_zbin_boost_a[q][i] = + ROUND_POWER_OF_TWO(quant_alpha_val * zbin_boost[i], 7); +#endif } } } @@ -233,6 +258,16 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex]; } +#if CONFIG_ALPHA + x->plane[3].quant = cpi->a_quant[qindex]; + x->plane[3].quant_shift = cpi->a_quant_shift[qindex]; + x->plane[3].zbin = cpi->a_zbin[qindex]; + x->plane[3].round = cpi->a_round[qindex]; + x->plane[3].zrun_zbin_boost = cpi->zrun_zbin_boost_a[qindex]; + x->plane[3].zbin_extra = (int16_t)zbin_extra; + x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex]; +#endif + x->skip_block = vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); /* save this macroblock QIndex for vp9_update_zbin_extra() */ diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 8c1ef4915..15ed8318c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -106,11 +106,7 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { }; static void fill_token_costs(vp9_coeff_count *c, -#if CONFIG_MODELCOEFPROB vp9_coeff_probs_model *p, -#else - vp9_coeff_probs *p, -#endif TX_SIZE tx_size) { int i, j, k, l; @@ -118,15 +114,10 @@ static void fill_token_costs(vp9_coeff_count *c, for (j = 0; j < REF_TYPES; j++) for (k = 0; k < COEF_BANDS; k++) for (l = 0; l < PREV_COEF_CONTEXTS; l++) { -#if CONFIG_MODELCOEFPROB vp9_prob probs[ENTROPY_NODES]; - vp9_model_to_full_probs(p[i][j][k][l], i, j, probs); + vp9_model_to_full_probs(p[i][j][k][l], probs); vp9_cost_tokens_skip((int *)c[i][j][k][l], probs, vp9_coef_tree); -#else - vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l], - vp9_coef_tree); -#endif } } @@ -280,11 +271,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, TX_TYPE tx_type = DCT_DCT; const int segment_id = xd->mode_info_context->mbmi.segment_id; -#if CONFIG_MODELCOEFPROB vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; -#else - vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][ENTROPY_NODES]; -#endif int seg_eob, default_eob; uint8_t token_cache[1024]; const uint8_t * band_translate; @@ -304,12 +291,8 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, get_tx_type_4x4(xd, block) : DCT_DCT; above_ec = A[0] != 0; left_ec = L[0] != 0; -#if CONFIG_MODELCOEFPROB vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref], - type, ref, coef_probs); -#else - coef_probs = cm->fc.coef_probs_4x4[type][ref]; -#endif + coef_probs); seg_eob = 16; scan = get_scan_4x4(tx_type); band_translate = vp9_coefband_trans_4x4; @@ -324,12 +307,8 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, above_ec = (A[0] + A[1]) != 0; left_ec = (L[0] + L[1]) != 0; scan = get_scan_8x8(tx_type); -#if CONFIG_MODELCOEFPROB vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref], - type, ref, coef_probs); -#else - coef_probs = cm->fc.coef_probs_8x8[type][ref]; -#endif + coef_probs); seg_eob = 64; band_translate = vp9_coefband_trans_8x8plus; break; @@ -341,12 +320,8 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; scan = get_scan_16x16(tx_type); -#if CONFIG_MODELCOEFPROB vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref], - type, ref, coef_probs); -#else - coef_probs = cm->fc.coef_probs_16x16[type][ref]; -#endif + coef_probs); seg_eob = 256; above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; @@ -354,13 +329,9 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, break; } case TX_32X32: - scan = vp9_default_zig_zag1d_32x32; -#if CONFIG_MODELCOEFPROB + scan = vp9_default_scan_32x32; vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref], - type, ref, coef_probs); -#else - coef_probs = cm->fc.coef_probs_32x32[type][ref]; -#endif + coef_probs); seg_eob = 1024; above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; @@ -609,78 +580,92 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, - int *bestdistortion) { - B_PREDICTION_MODE mode; + int *bestdistortion, + BLOCK_SIZE_TYPE bsize) { + MB_PREDICTION_MODE mode; MACROBLOCKD *xd = &x->e_mbd; int64_t best_rd = INT64_MAX; int rate = 0; int distortion; VP9_COMMON *const cm = &cpi->common; const int src_stride = x->plane[0].src.stride; - uint8_t* const src = - raster_block_offset_uint8(xd, - BLOCK_SIZE_SB8X8, - 0, ib, - x->plane[0].src.buf, src_stride); - int16_t* const src_diff = - raster_block_offset_int16(xd, - BLOCK_SIZE_SB8X8, - 0, ib, - x->plane[0].src_diff); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16); - uint8_t* const dst = - raster_block_offset_uint8(xd, - BLOCK_SIZE_SB8X8, - 0, ib, - xd->plane[0].dst.buf, xd->plane[0].dst.stride); - ENTROPY_CONTEXT ta = *a, tempa = *a; - ENTROPY_CONTEXT tl = *l, templ = *l; + uint8_t *src, *dst; + int16_t *src_diff, *coeff; + + ENTROPY_CONTEXT ta[2], tempa[2]; + ENTROPY_CONTEXT tl[2], templ[2]; TX_TYPE tx_type = DCT_DCT; TX_TYPE best_tx_type = DCT_DCT; - /* - * The predictor buffer is a 2d buffer with a stride of 16. Create - * a temp buffer that meets the stride requirements, but we are only - * interested in the left 4x4 block - * */ - DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); + int bw = 1 << b_width_log2(bsize); + int bh = 1 << b_height_log2(bsize); + int idx, idy, block; + DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]); assert(ib < 4); + vpx_memcpy(ta, a, sizeof(ta)); + vpx_memcpy(tl, l, sizeof(tl)); xd->mode_info_context->mbmi.txfm_size = TX_4X4; - for (mode = B_DC_PRED; mode < LEFT4X4; mode++) { + + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; - int ratey; + int ratey = 0; xd->mode_info_context->bmi[ib].as_mode.first = mode; - rate = bmode_costs[mode]; + if (cm->frame_type == KEY_FRAME) + rate = bmode_costs[mode]; + else + rate = x->mbmode_cost[cm->frame_type][mode]; + distortion = 0; - vp9_intra4x4_predict(xd, ib, - BLOCK_SIZE_SB8X8, - mode, dst, xd->plane[0].dst.stride); - vp9_subtract_block(4, 4, src_diff, 8, - src, src_stride, - dst, xd->plane[0].dst.stride); + vpx_memcpy(tempa, ta, sizeof(ta)); + vpx_memcpy(templ, tl, sizeof(tl)); - xd->mode_info_context->bmi[ib].as_mode.first = mode; - tx_type = get_tx_type_4x4(xd, ib); - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(src_diff, coeff, 8, tx_type); - x->quantize_b_4x4(x, ib, tx_type, 16); - } else { - x->fwd_txm4x4(src_diff, coeff, 16); - x->quantize_b_4x4(x, ib, tx_type, 16); - } + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + block = ib + idy * 2 + idx; + xd->mode_info_context->bmi[block].as_mode.first = mode; + src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, + x->plane[0].src.buf, src_stride); + src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block, + x->plane[0].src_diff); + coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16); + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); + vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode, + dst, xd->plane[0].dst.stride); + vp9_subtract_block(4, 4, src_diff, 8, + src, src_stride, + dst, xd->plane[0].dst.stride); + + tx_type = get_tx_type_4x4(xd, block); + if (tx_type != DCT_DCT) { + vp9_short_fht4x4(src_diff, coeff, 8, tx_type); + x->quantize_b_4x4(x, block, tx_type, 16); + } else { + x->fwd_txm4x4(src_diff, coeff, 16); + x->quantize_b_4x4(x, block, tx_type, 16); + } - tempa = ta; - templ = tl; + ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC, + tempa + idx, templ + idy, TX_4X4, 16); + distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, + block, 16), 16) >> 2; - ratey = cost_coeffs(cm, x, 0, ib, - PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16); - rate += ratey; - distortion = vp9_block_error(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), - 16) >> 2; + vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode, + dst, xd->plane[0].dst.stride); + + if (best_tx_type != DCT_DCT) + vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + dst, xd->plane[0].dst.stride, best_tx_type); + else + xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + dst, xd->plane[0].dst.stride); + } + } + rate += ratey; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { @@ -690,25 +675,37 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, best_rd = this_rd; *best_mode = mode; best_tx_type = tx_type; - *a = tempa; - *l = templ; - vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32); + vpx_memcpy(a, tempa, sizeof(tempa)); + vpx_memcpy(l, templ, sizeof(templ)); + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + block = ib + idy * 2 + idx; + vpx_memcpy(best_dqcoeff[idy * 2 + idx], + BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + sizeof(best_dqcoeff[0])); + } + } } } - xd->mode_info_context->bmi[ib].as_mode.first = - (B_PREDICTION_MODE)(*best_mode); - - vp9_intra4x4_predict(xd, ib, - BLOCK_SIZE_SB8X8, - *best_mode, - dst, xd->plane[0].dst.stride); - - // inverse transform - if (best_tx_type != DCT_DCT) { - vp9_short_iht4x4_add(best_dqcoeff, dst, xd->plane[0].dst.stride, - best_tx_type); - } else { - xd->inv_txm4x4_add(best_dqcoeff, dst, xd->plane[0].dst.stride); + + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + block = ib + idy * 2 + idx; + xd->mode_info_context->bmi[block].as_mode.first = *best_mode; + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); + + vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode, + dst, xd->plane[0].dst.stride); + // inverse transform + if (best_tx_type != DCT_DCT) + vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst, + xd->plane[0].dst.stride, best_tx_type); + else + xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst, + xd->plane[0].dst.stride); + } } return best_rd; @@ -717,17 +714,17 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion, int64_t best_rd) { - int i; + int i, j; MACROBLOCKD *const xd = &mb->e_mbd; -#if CONFIG_AB4X4 + BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; + int bw = 1 << b_width_log2(bsize); + int bh = 1 << b_height_log2(bsize); + int idx, idy; int cost = 0; -#else - int cost = mb->mbmode_cost[xd->frame_type][I4X4_PRED]; -#endif int distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; - ENTROPY_CONTEXT t_above[2], t_left[2]; + ENTROPY_CONTEXT t_above[4], t_left[4]; int *bmode_costs; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); @@ -736,31 +733,39 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, xd->mode_info_context->mbmi.mode = I4X4_PRED; bmode_costs = mb->inter_bmode_costs; - for (i = 0; i < 4; i++) { - const int x_idx = i & 1, y_idx = i >> 1; - MODE_INFO *const mic = xd->mode_info_context; - const int mis = xd->mode_info_stride; - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); - int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + MODE_INFO *const mic = xd->mode_info_context; + const int mis = xd->mode_info_stride; + B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); + int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry); + int UNINITIALIZED_IS_SAFE(d); + i = idy * 2 + idx; - if (xd->frame_type == KEY_FRAME) { - const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); - const B_PREDICTION_MODE L = left_block_mode(mic, i); + if (xd->frame_type == KEY_FRAME) { + const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis); + const MB_PREDICTION_MODE L = (xd->left_available || idx) ? + left_block_mode(mic, i) : DC_PRED; - bmode_costs = mb->bmode_costs[A][L]; - } + bmode_costs = mb->bmode_costs[A][L]; + } - total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, - t_above + x_idx, t_left + y_idx, - &r, &ry, &d); - cost += r; - distortion += d; - tot_rate_y += ry; + total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, + t_above + idx, t_left + idy, + &r, &ry, &d, bsize); + cost += r; + distortion += d; + tot_rate_y += ry; - mic->bmi[i].as_mode.first = best_mode; + mic->bmi[i].as_mode.first = best_mode; + for (j = 1; j < bh; ++j) + mic->bmi[i + j * 2].as_mode.first = best_mode; + for (j = 1; j < bw; ++j) + mic->bmi[i + j].as_mode.first = best_mode; - if (total_rd >= best_rd) - break; + if (total_rd >= best_rd) + break; + } } if (total_rd >= best_rd) @@ -780,18 +785,17 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t txfm_cache[NB_TXFM_MODES]) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); + MACROBLOCKD *xd = &x->e_mbd; int this_rate, this_rate_tokenonly; int this_distortion, s; int64_t best_rd = INT64_MAX, this_rd; TX_SIZE UNINITIALIZED_IS_SAFE(best_tx); int i; -#if CONFIG_AB4X4 if (bsize < BLOCK_SIZE_SB8X8) { x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4; return best_rd; } -#endif for (i = 0; i < NB_TXFM_MODES; i++) txfm_cache[i] = INT64_MAX; @@ -799,13 +803,20 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, /* Y Search for 32x32 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { int64_t local_txfm_cache[NB_TXFM_MODES]; + MODE_INFO *const mic = xd->mode_info_context; + const int mis = xd->mode_info_stride; + const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); + const MB_PREDICTION_MODE L = xd->left_available ? + left_block_mode(mic, 0) : DC_PRED; + + int *bmode_costs = x->bmode_costs[A][L]; x->e_mbd.mode_info_context->mbmi.mode = mode; vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize); super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, bsize, local_txfm_cache); - this_rate = this_rate_tokenonly + x->mbmode_cost[x->e_mbd.frame_type][mode]; + this_rate = this_rate_tokenonly + bmode_costs[mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { @@ -940,11 +951,9 @@ static int labels2mode(MACROBLOCK *x, MB_MODE_INFO * mbmi = &mic->mbmi; const int mis = xd->mode_info_stride; int i, cost = 0, thismvcost = 0; -#if CONFIG_AB4X4 int idx, idy; int bw = 1 << b_width_log2(mbmi->sb_type); int bh = 1 << b_height_log2(mbmi->sb_type); -#endif /* We have to be careful retrieving previously-encoded motion vectors. Ones from this macroblock have to be pulled from the BLOCKD array @@ -1028,7 +1037,6 @@ static int labels2mode(MACROBLOCK *x, x->partition_info->bmi[i].mv.as_int = this_mv->as_int; if (mbmi->second_ref_frame > 0) x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; -#if CONFIG_AB4X4 for (idy = 0; idy < bh; ++idy) { for (idx = 0; idx < bw; ++idx) { vpx_memcpy(&mic->bmi[i + idy * 2 + idx], @@ -1038,7 +1046,6 @@ static int labels2mode(MACROBLOCK *x, sizeof(x->partition_info->bmi[i])); } } -#endif } cost += thismvcost; @@ -1059,9 +1066,6 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, int bwl = b_width_log2(bsize), bw = 1 << bwl; int bhl = b_height_log2(bsize), bh = 1 << bhl; int idx, idy; -#if !CONFIG_AB4X4 - bw = 1, bh = 1; -#endif *labelyrate = 0; *distortion = 0; @@ -1229,18 +1233,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, ENTROPY_CONTEXT t_above[4], t_left[4]; ENTROPY_CONTEXT t_above_b[4], t_left_b[4]; -#if !CONFIG_AB4X4 - bh = 1, bw = 1; -#endif - vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); -#if CONFIG_AB4X4 v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)]; -#else - v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4]; -#endif // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on @@ -1249,19 +1245,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, label_mv_thresh = 1 * bsi->mvthresh / label_count; // Segmentation method overheads -#if !CONFIG_AB4X4 - rate += vp9_cost_mv_ref(cpi, SPLITMV, - mbmi->mb_mode_context[mbmi->ref_frame]); - this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); - br += rate; -#endif other_segment_rd = this_segment_rd; for (idy = 0; idy < 2; idy += bh) { for (idx = 0; idx < 2; idx += bw) { // TODO(jingning,rbultje): rewrite the rate-distortion optimization - // loop for 4x4/4x8/8x4 block coding -#if CONFIG_AB4X4 + // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop +#if CONFIG_AB4X4 || 1 int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; B_PREDICTION_MODE mode_selected = ZERO4X4; @@ -1912,6 +1902,10 @@ static void setup_pred_block(const MACROBLOCKD *xd, dst[1].buf = src->u_buffer; dst[2].buf = src->v_buffer; dst[1].stride = dst[2].stride = src->uv_stride; +#if CONFIG_ALPHA + dst[3].buf = src->alpha_buffer; + dst[3].stride = src->alpha_stride; +#endif // TODO(jkoleszar): Make scale factors per-plane data for (i = 0; i < MAX_MB_PLANE; i++) { @@ -2108,157 +2102,145 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; if (is_comp_pred) { -#if CONFIG_COMP_INTER_JOINT_SEARCH - const int b_sz[BLOCK_SIZE_TYPES][2] = { - {4, 4}, - {8, 8}, - {8, 16}, - {16, 8}, - {16, 16}, - {16, 32}, - {32, 16}, - {32, 32}, - {32, 64}, - {64, 32}, - {64, 64} - }; - - int ite; - // Prediction buffer from second frame. - uint8_t *second_pred = vpx_memalign(16, b_sz[bsize][0] * - b_sz[bsize][1] * sizeof(uint8_t)); - - // Do joint motion search in compound mode to get more accurate mv. - struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; - struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}}; - struct buf_2d scaled_first_yv12; - int last_besterr[2] = {INT_MAX, INT_MAX}; - - if (scaled_ref_frame[0]) { - int i; - - // Swap out the reference frame for a version that's been scaled to - // match the resolution of the current frame, allowing the existing - // motion search code to be used without additional modifications. - for (i = 0; i < MAX_MB_PLANE; i++) - backup_yv12[i] = xd->plane[i].pre[0]; - - setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col, - NULL, NULL); - } - - if (scaled_ref_frame[1]) { - int i; - - for (i = 0; i < MAX_MB_PLANE; i++) - backup_second_yv12[i] = xd->plane[i].pre[1]; - - setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col, - NULL, NULL); - } - xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0], - mi_row, mi_col); - xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1], - mi_row, mi_col); - - scaled_first_yv12 = xd->plane[0].pre[0]; - - // Initialize mv using single prediction mode result. - frame_mv[NEWMV][refs[0]].as_int = single_newmv[refs[0]].as_int; - frame_mv[NEWMV][refs[1]].as_int = single_newmv[refs[1]].as_int; - - // Allow joint search multiple times iteratively for each ref frame, and - // break out the search loop if it couldn't find better mv. - for (ite = 0; ite < 4; ite++) { - struct buf_2d ref_yv12[2] = {xd->plane[0].pre[0], - xd->plane[0].pre[1]}; - int bestsme = INT_MAX; - int sadpb = x->sadperbit16; - int_mv tmp_mv; - int search_range = 3; - - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; - int id = ite % 2; - - // Get pred block from second frame. - vp9_build_inter_predictor(ref_yv12[!id].buf, - ref_yv12[!id].stride, - second_pred, b_sz[bsize][0], - &frame_mv[NEWMV][refs[!id]], - &xd->scale_factor[!id], - b_sz[bsize][0], b_sz[bsize][1], 0, - &xd->subpix); - - // Compound motion search on first ref frame. - if (id) - xd->plane[0].pre[0] = ref_yv12[id]; - vp9_clamp_mv_min_max(x, &ref_mv[id]); - - // Use mv result from single mode as mvp. - tmp_mv.as_int = frame_mv[NEWMV][refs[id]].as_int; - - tmp_mv.as_mv.col >>= 3; - tmp_mv.as_mv.row >>= 3; - - // Small-range full-pixel motion search - bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, - search_range, - &cpi->fn_ptr[block_size], - x->nmvjointcost, x->mvcost, - &ref_mv[id], second_pred, - b_sz[bsize][0], b_sz[bsize][1]); + if (cpi->sf.comp_inter_joint_serach) { + int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize); + int ite; + // Prediction buffer from second frame. + uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); + + // Do joint motion search in compound mode to get more accurate mv. + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d scaled_first_yv12; + int last_besterr[2] = {INT_MAX, INT_MAX}; + + if (scaled_ref_frame[0]) { + int i; + + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[i] = xd->plane[i].pre[0]; + + setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col, + NULL, NULL); + } - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; + if (scaled_ref_frame[1]) { + int i; - if (bestsme < INT_MAX) { - int dis; /* TODO: use dis in distortion calculation later. */ - unsigned int sse; + for (i = 0; i < MAX_MB_PLANE; i++) + backup_second_yv12[i] = xd->plane[i].pre[1]; - bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv, - &ref_mv[id], - x->errorperbit, - &cpi->fn_ptr[block_size], - x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, - b_sz[bsize][0], - b_sz[bsize][1]); + setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col, + NULL, NULL); } + xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0], + mi_row, mi_col); + xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1], + mi_row, mi_col); + + scaled_first_yv12 = xd->plane[0].pre[0]; + + // Initialize mv using single prediction mode result. + frame_mv[NEWMV][refs[0]].as_int = single_newmv[refs[0]].as_int; + frame_mv[NEWMV][refs[1]].as_int = single_newmv[refs[1]].as_int; + + // Allow joint search multiple times iteratively for each ref frame + // and break out the search loop if it couldn't find better mv. + for (ite = 0; ite < 4; ite++) { + struct buf_2d ref_yv12[2]; + int bestsme = INT_MAX; + int sadpb = x->sadperbit16; + int_mv tmp_mv; + int search_range = 3; + + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; + int id = ite % 2; + + // Initialized here because of compiler problem in Visual Studio. + ref_yv12[0] = xd->plane[0].pre[0]; + ref_yv12[1] = xd->plane[0].pre[1]; + + // Get pred block from second frame. + vp9_build_inter_predictor(ref_yv12[!id].buf, + ref_yv12[!id].stride, + second_pred, pw, + &frame_mv[NEWMV][refs[!id]], + &xd->scale_factor[!id], + pw, ph, 0, + &xd->subpix); + + // Compound motion search on first ref frame. + if (id) + xd->plane[0].pre[0] = ref_yv12[id]; + vp9_clamp_mv_min_max(x, &ref_mv[id]); + + // Use mv result from single mode as mvp. + tmp_mv.as_int = frame_mv[NEWMV][refs[id]].as_int; + + tmp_mv.as_mv.col >>= 3; + tmp_mv.as_mv.row >>= 3; + + // Small-range full-pixel motion search + bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, + search_range, + &cpi->fn_ptr[block_size], + x->nmvjointcost, x->mvcost, + &ref_mv[id], second_pred, + pw, ph); + + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; + + if (bestsme < INT_MAX) { + int dis; /* TODO: use dis in distortion calculation later. */ + unsigned int sse; + + bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv, + &ref_mv[id], + x->errorperbit, + &cpi->fn_ptr[block_size], + x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, + pw, ph); + } - if (id) - xd->plane[0].pre[0] = scaled_first_yv12; + if (id) + xd->plane[0].pre[0] = scaled_first_yv12; - if (bestsme < last_besterr[id]) { + if (bestsme < last_besterr[id]) { frame_mv[NEWMV][refs[id]].as_int = - xd->mode_info_context->bmi[0].as_mv[1].as_int = tmp_mv.as_int; - last_besterr[id] = bestsme; - } else { - break; + xd->mode_info_context->bmi[0].as_mv[1].as_int = tmp_mv.as_int; + last_besterr[id] = bestsme; + } else { + break; + } } - } - // restore the predictor - if (scaled_ref_frame[0]) { - int i; + // restore the predictor + if (scaled_ref_frame[0]) { + int i; - for (i = 0; i < MAX_MB_PLANE; i++) - xd->plane[i].pre[0] = backup_yv12[i]; - } + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[0] = backup_yv12[i]; + } - if (scaled_ref_frame[1]) { - int i; + if (scaled_ref_frame[1]) { + int i; - for (i = 0; i < MAX_MB_PLANE; i++) - xd->plane[i].pre[1] = backup_second_yv12[i]; - } + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = backup_second_yv12[i]; + } - vpx_free(second_pred); -#endif // CONFIG_COMP_INTER_JOINT_SEARCH + vpx_free(second_pred); + } if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV || frame_mv[NEWMV][refs[1]].as_int == INVALID_MV) @@ -2577,11 +2559,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &dist_uv, &uv_skip, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize); -#if CONFIG_AB4X4 if (bsize < BLOCK_SIZE_SB8X8) -#else - if (bsize == BLOCK_SIZE_SB8X8) -#endif err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y, &rate4x4_y_tokenonly, &dist4x4_y, err); @@ -2593,11 +2571,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); xd->mode_info_context->mbmi.mode = mode; xd->mode_info_context->mbmi.txfm_size = txfm_size; -#if CONFIG_AB4X4 } else if (bsize < BLOCK_SIZE_SB8X8 && err4x4 < err) { -#else - } else if (bsize == BLOCK_SIZE_SB8X8 && err4x4 < err) { -#endif *returnrate = rate4x4_y + rate_uv + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); *returndist = dist4x4_y + (dist_uv >> 2); @@ -2762,17 +2736,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, txfm_cache[i] = INT64_MAX; // Test best rd so far against threshold for trying this mode. -#if CONFIG_AB4X4 if (bsize >= BLOCK_SIZE_SB8X8 && (best_rd < cpi->rd_threshes[mode_index] || cpi->rd_threshes[mode_index] == INT_MAX)) continue; -#else - if (best_rd <= cpi->rd_threshes[mode_index] || - cpi->rd_threshes[mode_index] == INT_MAX) { - continue; - } -#endif x->skip = 0; this_mode = vp9_mode_order[mode_index].mode; @@ -2783,11 +2750,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; } -#if CONFIG_AB4X4 if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) { -#else - if (cpi->speed > 0) { -#endif if (!(ref_frame_mask & (1 << ref_frame))) { continue; } @@ -2833,18 +2796,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = cm->mcomp_filter_type; vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); -#if CONFIG_AB4X4 if (bsize >= BLOCK_SIZE_SB8X8 && (this_mode == I4X4_PRED || this_mode == SPLITMV)) continue; if (bsize < BLOCK_SIZE_SB8X8 && !(this_mode == I4X4_PRED || this_mode == SPLITMV)) continue; -#else - if (bsize != BLOCK_SIZE_SB8X8 && - (this_mode == I4X4_PRED || this_mode == SPLITMV)) - continue; -#endif if (comp_pred) { if (ref_frame == ALTREF_FRAME) { @@ -2919,11 +2876,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += dist_uv[TX_4X4]; distortion_uv = dist_uv[TX_4X4]; mbmi->uv_mode = mode_uv[TX_4X4]; -#if CONFIG_AB4X4 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); for (i = 0; i < NB_TXFM_MODES; ++i) txfm_cache[i] = txfm_cache[ONLY_4X4]; -#endif } else if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; vp9_build_intra_predictors_sby_s(xd, bsize); @@ -3057,11 +3012,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion_uv; skippable = skippable && uv_skippable; -#if CONFIG_AB4X4 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); for (i = 0; i < NB_TXFM_MODES; ++i) txfm_cache[i] = txfm_cache[ONLY_4X4]; -#endif if (!mode_excluded) { if (is_comp_pred) @@ -3117,11 +3070,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Is Mb level skip allowed (i.e. not coded at segment level). mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); -#if CONFIG_AB4X4 if (skippable && bsize >= BLOCK_SIZE_SB8X8) { -#else - if (skippable) { -#endif // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); // for best_yrd calculation @@ -3302,13 +3251,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } -#if CONFIG_AB4X4 if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) { *returnrate = INT_MAX; *returndistortion = INT_MAX; return best_rd; } -#endif assert((cm->mcomp_filter_type == SWITCHABLE) || (cm->mcomp_filter_type == best_mbmode.interp_filter) || @@ -3341,10 +3288,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0) && (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME) -#if CONFIG_AB4X4 - && bsize >= BLOCK_SIZE_SB8X8 -#endif - ) { + && bsize >= BLOCK_SIZE_SB8X8) { mbmi->mode = ZEROMV; mbmi->ref_frame = ALTREF_FRAME; mbmi->second_ref_frame = NONE; diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c index 994828f20..6b1ba4964 100644 --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad_c.c @@ -593,6 +593,37 @@ void vp9_sad8x4x4d_c(const uint8_t *src_ptr, ref_ptr[3], ref_stride, 0x7fffffff); } +void vp9_sad8x4x8_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + uint32_t *sad_array) { + sad_array[0] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); +} + void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], @@ -608,6 +639,37 @@ void vp9_sad4x8x4d_c(const uint8_t *src_ptr, ref_ptr[3], ref_stride, 0x7fffffff); } +void vp9_sad4x8x8_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + uint32_t *sad_array) { + sad_array[0] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); +} + void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 30143d77d..47792fcc2 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -205,9 +205,11 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor, 16 * 16 + 8 * 8 + 8 * 8); // Save input state - uint8_t *y_buffer = mbd->plane[0].pre[0].buf; - uint8_t *u_buffer = mbd->plane[1].pre[0].buf; - uint8_t *v_buffer = mbd->plane[2].pre[0].buf; + uint8_t* input_buffer[MAX_MB_PLANE]; + int i; + + for (i = 0; i < MAX_MB_PLANE; i++) + input_buffer[i] = mbd->plane[i].pre[0].buf; for (mb_row = 0; mb_row < mb_rows; mb_row++) { #if ALT_REF_MC_ENABLED @@ -352,9 +354,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, } // Restore input state - mbd->plane[0].pre[0].buf = y_buffer; - mbd->plane[1].pre[0].buf = u_buffer; - mbd->plane[2].pre[0].buf = v_buffer; + for (i = 0; i < MAX_MB_PLANE; i++) + mbd->plane[i].pre[0].buf = input_buffer[i]; } void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 9ed16ffc3..08efc84d4 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -119,12 +119,8 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, TOKENEXTRA *t = *tp; /* store tokens starting here */ const int eob = xd->plane[plane].eobs[block]; const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); -#if CONFIG_AB4X4 const BLOCK_SIZE_TYPE sb_type = (mbmi->sb_type < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : mbmi->sb_type; -#else - const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; -#endif const int bwl = b_width_log2(sb_type); const int off = block >> (2 * tx_size); const int mod = bwl - tx_size - xd->plane[plane].subsampling_x; @@ -136,11 +132,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, const int segment_id = mbmi->segment_id; const int *scan, *nb; vp9_coeff_count *counts; -#if CONFIG_MODELCOEFPROB vp9_coeff_probs_model *coef_probs; -#else - vp9_coeff_probs *coef_probs; -#endif const int ref = mbmi->ref_frame != INTRA_FRAME; ENTROPY_CONTEXT above_ec, left_ec; uint8_t token_cache[1024]; @@ -194,7 +186,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; seg_eob = 1024; - scan = vp9_default_zig_zag1d_32x32; + scan = vp9_default_scan_32x32; counts = cpi->coef_counts_32x32; coef_probs = cpi->common.fc.coef_probs_32x32; band_translate = vp9_coefband_trans_8x8plus; @@ -228,10 +220,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, t->token = token; t->context_tree = coef_probs[type][ref][band][pt]; -#if CONFIG_MODELCOEFPROB - t->block_type = type; - t->ref_type = ref; -#endif t->skip_eob_node = (c > 0) && (token_cache[scan[c - 1]] == 0); assert(vp9_coef_encodings[t->token].len - t->skip_eob_node > 0); diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 2a56da8d1..08236c429 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -26,10 +26,6 @@ typedef struct { int16_t extra; uint8_t token; uint8_t skip_eob_node; -#if CONFIG_MODELCOEFPROB - uint8_t block_type; - uint8_t ref_type; -#endif } TOKENEXTRA; typedef int64_t vp9_coeff_accum[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 306476b01..aaa43ef82 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -86,7 +86,6 @@ typedef struct vp9_variance_vtable { vp9_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; -// #if CONFIG_COMP_INTER_JOINT_SEARCH static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int weight, int height, uint8_t *ref, int ref_stride) { int i, j; @@ -102,5 +101,4 @@ static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int weight, ref += ref_stride; } } -// #endif // CONFIG_COMP_INTER_JOINT_SEARCH #endif // VP9_ENCODER_VP9_VARIANCE_H_ diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c index fc363b6b0..67ca9257c 100644 --- a/vp9/encoder/x86/vp9_variance_sse2.c +++ b/vp9/encoder/x86/vp9_variance_sse2.c @@ -139,7 +139,37 @@ void vp9_half_vert_variance16x_h_sse2 DECLARE_ALIGNED(16, extern const short, vp9_bilinear_filters_mmx[16][8]); -unsigned int vp9_variance4x4_wmt( +typedef unsigned int (*get_var_sse2) ( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *SSE, + int *Sum +); + +static void variance_sse2(const unsigned char *src_ptr, int source_stride, + const unsigned char *ref_ptr, int recon_stride, + int w, int h, unsigned int *sse, int *sum, + get_var_sse2 var_fn, int block_size) { + unsigned int sse0; + int sum0; + int i, j; + + *sse = 0; + *sum = 0; + + for (i = 0; i < h; i += block_size) { + for (j = 0; j < w; j += block_size) { + var_fn(src_ptr + source_stride * i + j, source_stride, + ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0); + *sse += sse0; + *sum += sum0; + } + } +} + +unsigned int vp9_variance4x4_sse2( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, @@ -148,13 +178,41 @@ unsigned int vp9_variance4x4_wmt( unsigned int var; int avg; - vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, + &var, &avg, vp9_get4x4var_mmx, 4); *sse = var; return (var - (((unsigned int)avg * avg) >> 4)); +} + +unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, + &var, &avg, vp9_get4x4var_mmx, 4); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); +} + +unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, + &var, &avg, vp9_get4x4var_mmx, 4); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); } -unsigned int vp9_variance8x8_wmt +unsigned int vp9_variance8x8_sse2 ( const unsigned char *src_ptr, int source_stride, @@ -164,83 +222,157 @@ unsigned int vp9_variance8x8_wmt unsigned int var; int avg; - vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, + &var, &avg, vp9_get8x8var_sse2, 8); *sse = var; return (var - (((unsigned int)avg * avg) >> 6)); - } - -unsigned int vp9_variance16x16_wmt +unsigned int vp9_variance16x8_sse2 ( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { - unsigned int sse0; - int sum0; - + unsigned int var; + int avg; - vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - *sse = sse0; - return (sse0 - (((unsigned int)sum0 * sum0) >> 8)); + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, + &var, &avg, vp9_get8x8var_sse2, 8); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 7)); } -unsigned int vp9_mse16x16_wmt( +unsigned int vp9_variance8x16_sse2 +( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { + unsigned int var; + int avg; - unsigned int sse0; - int sum0; - vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - *sse = sse0; - return sse0; - + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, + &var, &avg, vp9_get8x8var_sse2, 8); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 7)); } - -unsigned int vp9_variance16x8_wmt +unsigned int vp9_variance16x16_sse2 ( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); + unsigned int var; + int avg; - var = sse0 + sse1; - avg = sum0 + sum1; + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, + &var, &avg, vp9_get16x16var_sse2, 16); *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); - + return (var - (((unsigned int)avg * avg) >> 8)); } -unsigned int vp9_variance8x16_wmt -( +unsigned int vp9_mse16x16_wmt( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1); + unsigned int sse0; + int sum0; + vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, + &sum0); + *sse = sse0; + return sse0; +} - var = sse0 + sse1; - avg = sum0 + sum1; +unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, + &var, &avg, vp9_get16x16var_sse2, 16); *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); + return (var - (((int64_t)avg * avg) >> 10)); +} + +unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 9)); +} +unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 9)); +} + +unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 12)); +} + +unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 11)); +} + +unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 11)); } unsigned int vp9_sub_pixel_variance4x4_wmt diff --git a/vp9/encoder/x86/vp9_variance_ssse3.c b/vp9/encoder/x86/vp9_variance_ssse3.c index f95a5423c..882acad78 100644 --- a/vp9/encoder/x86/vp9_variance_ssse3.c +++ b/vp9/encoder/x86/vp9_variance_ssse3.c @@ -15,15 +15,6 @@ #define HALFNDX 8 -extern unsigned int vp9_get16x16var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); extern void vp9_half_horiz_vert_variance16x_h_sse2 ( const unsigned char *ref_ptr, |