diff options
-rw-r--r-- | third_party/googletest/README.libvpx (renamed from third_party/googletest/README.webm) | 0 | ||||
-rw-r--r-- | third_party/libwebm/README.libvpx (renamed from third_party/libwebm/README.webm) | 0 | ||||
-rw-r--r-- | third_party/libyuv/README.libvpx (renamed from third_party/libyuv/README.webm) | 0 | ||||
-rw-r--r-- | third_party/nestegg/README.libvpx (renamed from third_party/nestegg/README.webm) | 0 | ||||
-rw-r--r-- | third_party/x86inc/README.libvpx (renamed from third_party/x86inc/README.webm) | 0 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 21 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 83 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 126 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 67 | ||||
-rw-r--r-- | vp9/encoder/vp9_picklpf.c | 19 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 48 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 5 |
13 files changed, 152 insertions, 219 deletions
diff --git a/third_party/googletest/README.webm b/third_party/googletest/README.libvpx index 6fdeb8731..6fdeb8731 100644 --- a/third_party/googletest/README.webm +++ b/third_party/googletest/README.libvpx diff --git a/third_party/libwebm/README.webm b/third_party/libwebm/README.libvpx index 2c7570d6d..2c7570d6d 100644 --- a/third_party/libwebm/README.webm +++ b/third_party/libwebm/README.libvpx diff --git a/third_party/libyuv/README.webm b/third_party/libyuv/README.libvpx index d3495caa1..d3495caa1 100644 --- a/third_party/libyuv/README.webm +++ b/third_party/libyuv/README.libvpx diff --git a/third_party/nestegg/README.webm b/third_party/nestegg/README.libvpx index 8e3760bd7..8e3760bd7 100644 --- a/third_party/nestegg/README.webm +++ b/third_party/nestegg/README.libvpx diff --git a/third_party/x86inc/README.webm b/third_party/x86inc/README.libvpx index 02cd9ab4e..02cd9ab4e 100644 --- a/third_party/x86inc/README.webm +++ b/third_party/x86inc/README.libvpx diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 83c6a33c7..795765d15 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1456,13 +1456,14 @@ static void set_source_var_based_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *x = &cpi->mb; + MACROBLOCK *const x = &cpi->mb; const int mis = cm->mi_stride; - int row8x8_remaining = tile->mi_row_end - mi_row; - int col8x8_remaining = tile->mi_col_end - mi_col; - int r, c; + const int row8x8_remaining = tile->mi_row_end - mi_row; + const int col8x8_remaining = tile->mi_col_end - mi_col; MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; + vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); // In-image SB64 @@ -1541,16 +1542,13 @@ static void set_source_var_based_partition(VP9_COMP *cpi, BLOCK_SIZE bsize = BLOCK_16X16; int bh = num_8x8_blocks_high_lookup[bsize]; int bw = num_8x8_blocks_wide_lookup[bsize]; - + int r, c; for (r = 0; r < MI_BLOCK_SIZE; r += bh) { for (c = 0; c < MI_BLOCK_SIZE; c += bw) { - int index = r * mis + c; - // Find a partition size that fits - bsize = find_partition_size(bsize, - (row8x8_remaining - r), - (col8x8_remaining - c), &bh, &bw); + const int index = r * mis + c; mi_8x8[index] = mi_upper_left + index; - mi_8x8[index]->mbmi.sb_type = bsize; + mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize, + row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); } } } @@ -3193,7 +3191,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, 1, &dummy_rate, &dummy_dist); break; case SOURCE_VAR_BASED_PARTITION: - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col); nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 2bea93f0f..b0c014eef 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -63,24 +63,17 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { } #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF) -typedef struct vp9_token_state vp9_token_state; -struct vp9_token_state { +typedef struct vp9_token_state { int rate; int error; int next; signed char token; short qc; -}; +} vp9_token_state; // TODO(jimbankoski): experiment to find optimal RD numbers. -#define Y1_RD_MULT 4 -#define UV_RD_MULT 2 - -static const int plane_rd_mult[4] = { - Y1_RD_MULT, - UV_RD_MULT, -}; +static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 }; #define UPDATE_RD_COST()\ {\ @@ -108,56 +101,53 @@ static int trellis_get_coeff_context(const int16_t *scan, static int optimize_b(MACROBLOCK *mb, int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx) { MACROBLOCKD *const xd = &mb->e_mbd; - struct macroblock_plane *p = &mb->plane[plane]; - struct macroblockd_plane *pd = &xd->plane[plane]; + struct macroblock_plane *const p = &mb->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; const int ref = is_inter_block(&xd->mi[0]->mbmi); vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; - const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); - int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - int eob = p->eobs[block], final_eob, sz = 0; - const int i0 = 0; - int rc, x, next, i; - int64_t rdmult, rddiv, rd_cost0, rd_cost1; - int rate0, rate1, error0, error1, t0, t1; - int best, band, pt; - PLANE_TYPE type = pd->plane_type; - int err_mult = plane_rd_mult[type]; + uint8_t token_cache[1024]; + const int16_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); + int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + const int eob = p->eobs[block]; + const PLANE_TYPE type = pd->plane_type; const int default_eob = 16 << (tx_size << 1); const int mul = 1 + (tx_size == TX_32X32); - uint8_t token_cache[1024]; const int16_t *dequant_ptr = pd->dequant; const uint8_t *const band_translate = get_band_translate(tx_size); - const scan_order *so = get_scan(xd, tx_size, type, block); - const int16_t *scan = so->scan; - const int16_t *nb = so->neighbors; + const scan_order *const so = get_scan(xd, tx_size, type, block); + const int16_t *const scan = so->scan; + const int16_t *const nb = so->neighbors; + int next = eob, sz = 0; + int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv; + int64_t rd_cost0, rd_cost1; + int rate0, rate1, error0, error1, t0, t1; + int best, band, pt, i, final_eob; assert((!type && !plane) || (type && plane)); assert(eob <= default_eob); /* Now set up a Viterbi trellis to evaluate alternative roundings. */ - rdmult = mb->rdmult * err_mult; - if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi)) + if (!ref) rdmult = (rdmult * 9) >> 4; - rddiv = mb->rddiv; + /* Initialize the sentinel node of the trellis. */ tokens[eob][0].rate = 0; tokens[eob][0].error = 0; tokens[eob][0].next = default_eob; tokens[eob][0].token = EOB_TOKEN; tokens[eob][0].qc = 0; - *(tokens[eob] + 1) = *(tokens[eob] + 0); - next = eob; + tokens[eob][1] = tokens[eob][0]; + for (i = 0; i < eob; i++) - token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ - qcoeff[scan[i]]].token]; + token_cache[scan[i]] = + vp9_pt_energy_class[vp9_dct_value_tokens_ptr[qcoeff[scan[i]]].token]; - for (i = eob; i-- > i0;) { + for (i = eob; i-- > 0;) { int base_bits, d2, dx; - - rc = scan[i]; - x = qcoeff[rc]; + const int rc = scan[i]; + int x = qcoeff[rc]; /* Only add a trellis state for non-zero coefficients. */ if (x) { int shortcut = 0; @@ -179,7 +169,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, UPDATE_RD_COST(); /* And pick the best. */ best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); + base_bits = vp9_dct_value_cost_ptr[x]; dx = mul * (dqcoeff[rc] - coeff[rc]); d2 = dx * dx; tokens[i][0].rate = base_bits + (best ? rate1 : rate0); @@ -193,9 +183,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; - if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) && - (abs(x)*dequant_ptr[rc != 0] < abs(coeff[rc]) * mul + - dequant_ptr[rc != 0])) + if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) && + (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul + + dequant_ptr[rc != 0])) shortcut = 1; else shortcut = 0; @@ -232,7 +222,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, UPDATE_RD_COST(); /* And pick the best. */ best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); + base_bits = vp9_dct_value_cost_ptr[x]; if (shortcut) { dx -= (dequant_ptr[rc != 0] + sz) ^ sz; @@ -281,15 +271,16 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1]; UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; - final_eob = i0 - 1; + final_eob = -1; vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2))); vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2))); for (i = next; i < eob; i = next) { - x = tokens[i][best].qc; + const int x = tokens[i][best].qc; + const int rc = scan[i]; if (x) { final_eob = i; } - rc = scan[i]; + qcoeff[rc] = x; dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index f5511ffa3..8a7901172 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -66,7 +66,7 @@ int vp9_init_search_range(VP9_COMP *cpi, int size) { } static INLINE int mv_cost(const MV *mv, - const int *joint_cost, int *comp_cost[2]) { + const int *joint_cost, int *const comp_cost[2]) { return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] + comp_cost[1][mv->col]; } @@ -90,14 +90,13 @@ static int mv_err_cost(const MV *mv, const MV *ref, return 0; } -static int mvsad_err_cost(const MV *mv, const MV *ref, - const int *mvjsadcost, int *mvsadcost[2], +static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, int error_per_bit) { - if (mvsadcost) { + if (x->nmvsadcost) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; - return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * - error_per_bit, 8); + return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost, + x->nmvsadcost) * error_per_bit, 8); } return 0; } @@ -478,8 +477,7 @@ static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { {\ if (thissad < bestsad) {\ if (use_mvcost) \ - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \ - mvjsadcost, mvsadcost, sad_per_bit);\ + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\ if (thissad < bestsad) {\ bestsad = thissad;\ best_site = i;\ @@ -519,9 +517,6 @@ static int vp9_pattern_search(const MACROBLOCK *x, int k = -1; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_init_s = search_param_to_steps[search_param]; - const int *const mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - // adjust ref_mv to make sure it is within MV range clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); br = ref_mv->row; @@ -530,8 +525,8 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Work out the start point for the search bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride, - 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + 0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv, + sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of @@ -891,10 +886,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, unsigned int thissad; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - int tr, tc; int best_tr = 0; int best_tc = 0; @@ -916,8 +907,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, // Check the starting position bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); start_row = MAX(-range, x->mv_row_min - ref_row); start_col = MAX(-range, x->mv_col_min - ref_col); @@ -938,8 +928,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, if (sad_array[i] < bestsad) { const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad = sad_array[i] + - mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_tr = tr; @@ -955,8 +944,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, if (thissad < bestsad) { const MV this_mv = {ref_row + tr, ref_col + tc + i}; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -988,8 +976,6 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site *const ss = &x->ss[search_param * x->searches_per_step]; const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; const uint8_t *best_address, *in_what_ref; int best_sad = INT_MAX; int best_site = 0; @@ -1005,7 +991,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, // Check the starting position best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride, 0x7fffffff) + - mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -1018,8 +1004,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, best_address + ss[i].offset, in_what->stride, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; best_site = i; @@ -1044,8 +1029,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, best_address + ss[best_site].offset, in_what->stride, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; best_mv->row += ss[best_site].mv.row; @@ -1095,10 +1079,6 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->row; ref_col = ref_mv->col; @@ -1112,8 +1092,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, // Check the starting position bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -1146,9 +1125,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (sad_array[t] < bestsad) { const MV this_mv = {best_mv->row + ss[i].mv.row, best_mv->col + ss[i].mv.col}; - sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv, + sad_per_bit); if (sad_array[t] < bestsad) { bestsad = sad_array[t]; best_site = i; @@ -1168,9 +1146,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, in_what_stride, bestsad); if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = i; @@ -1289,12 +1265,10 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; for (r = row_min; r < row_max; ++r) { @@ -1302,9 +1276,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const MV mv = {r, c}; const int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + - mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); - + mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; *best_mv = mv; @@ -1338,8 +1310,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_col + distance, x->mv_col_max); unsigned int sad_array[3]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; // Work out the mid point for the search const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; @@ -1350,8 +1320,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, // Baseline value at the centre bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); for (r = row_min; r < row_max; r++) { const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; @@ -1368,9 +1337,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1388,9 +1355,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1430,9 +1395,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, unsigned int sad_array[3]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - // Work out the mid point for the search const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; @@ -1442,8 +1404,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, // Baseline value at the center bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); for (r = row_min; r < row_max; r++) { const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; @@ -1460,9 +1421,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1485,9 +1444,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1506,9 +1463,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1533,13 +1488,10 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; for (i = 0; i < search_range; i++) { @@ -1552,8 +1504,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, - error_per_bit); + sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { best_sad = sad; best_site = j; @@ -1582,12 +1533,10 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; for (i = 0; i < search_range; i++) { @@ -1612,9 +1561,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, if (sads[j] < best_sad) { const MV mv = {ref_mv->row + neighbors[j].row, ref_mv->col + neighbors[j].col}; - sads[j] += mvsad_err_cost(&mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - + sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sads[j] < best_sad) { best_sad = sads[j]; best_site = j; @@ -1631,9 +1578,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - + sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { best_sad = sad; best_site = j; @@ -1669,12 +1614,10 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; for (i = 0; i < search_range; ++i) { @@ -1689,8 +1632,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, get_buf_from_mv(in_what, &mv), in_what->stride, second_pred, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); + sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { best_sad = sad; best_site = j; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 721ce48bb..1d70538c6 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -61,11 +61,6 @@ void vp9_coef_tree_initialize(); // now so that HIGH_PRECISION is always // chosen. -// Max rate target for 1080P and below encodes under normal circumstances -// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB -#define MAX_MB_RATE 250 -#define MAXRATE_1080P 2025000 - // #define OUTPUT_YUV_REC #ifdef OUTPUT_YUV_SRC @@ -629,49 +624,8 @@ static void update_frame_size(VP9_COMP *cpi) { } void vp9_new_framerate(VP9_COMP *cpi, double framerate) { - VP9_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - VP9_CONFIG *const oxcf = &cpi->oxcf; - int vbr_max_bits; - - oxcf->framerate = framerate < 0.1 ? 30 : framerate; - rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / - oxcf->framerate); - rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth * - oxcf->two_pass_vbrmin_section / 100); - - rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); - - // A maximum bitrate for a frame is defined. - // The baseline for this aligns with HW implementations that - // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits - // per 16x16 MB (averaged over a frame). However this limit is extended if - // a very high rate is given on the command line or the the rate cannnot - // be acheived because of a user specificed max q (e.g. when the user - // specifies lossless encode. - // - vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth * - oxcf->two_pass_vbrmax_section) / 100); - rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), - vbr_max_bits); - - // Set Maximum gf/arf interval - rc->max_gf_interval = 16; - - // Extended interval for genuinely static scenes - rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; - - // Special conditions when alt ref frame enabled in lagged compress mode - if (oxcf->play_alternate && oxcf->lag_in_frames) { - if (rc->max_gf_interval > oxcf->lag_in_frames - 1) - rc->max_gf_interval = oxcf->lag_in_frames - 1; - - if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } - - if (rc->max_gf_interval > rc->static_scene_max_gf_interval) - rc->max_gf_interval = rc->static_scene_max_gf_interval; + cpi->oxcf.framerate = framerate < 0.1 ? 30 : framerate; + vp9_rc_update_framerate(cpi); } int64_t vp9_rescale(int64_t val, int64_t num, int denom) { @@ -861,10 +815,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { cm->display_width = cpi->oxcf.width; cm->display_height = cpi->oxcf.height; - // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) - cpi->oxcf.sharpness = MIN(7, cpi->oxcf.sharpness); - - cpi->common.lf.sharpness_level = cpi->oxcf.sharpness; + cm->lf.sharpness_level = cpi->oxcf.sharpness; if (cpi->initial_width) { // Increasing the size of the frame beyond the first seen frame, or some @@ -884,10 +835,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { cpi->speed = abs(cpi->oxcf.cpu_used); - // Limit on lag buffers as these are not currently dynamically allocated. - if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) - cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; - #if CONFIG_MULTIPLE_ARF vp9_zero(cpi->alt_ref_source); #else @@ -1788,7 +1735,9 @@ static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb, dsts[i], out_h_uv, out_w_uv, dst_strides[i]); } } - vp8_yv12_extend_frame_borders(dst_fb); + // TODO(hkuang): Call C version explicitly + // as neon version only expand border size 32. + vp8_yv12_extend_frame_borders_c(dst_fb); } static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, @@ -1829,7 +1778,9 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, } } - vp8_yv12_extend_frame_borders(dst_fb); + // TODO(hkuang): Call C version explicitly + // as neon version only expand border size 32. + vp8_yv12_extend_frame_borders_c(dst_fb); } static int find_fp_qindex() { diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 3ac85228b..e003a0f42 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -23,7 +23,7 @@ #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" -static int get_max_filter_level(VP9_COMP *cpi) { +static int get_max_filter_level(const VP9_COMP *cpi) { return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 : MAX_LOOP_FILTER; } @@ -43,15 +43,15 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, return filt_err; } -static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, - int partial_frame) { - VP9_COMMON *const cm = &cpi->common; - struct loopfilter *const lf = &cm->lf; +static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, + int partial_frame) { + const VP9_COMMON *const cm = &cpi->common; + const struct loopfilter *const lf = &cm->lf; const int min_filter_level = 0; const int max_filter_level = get_max_filter_level(cpi); - int best_err; - int filt_best; int filt_direction = 0; + int best_err, filt_best; + // Start the search at the previous frame filter level unless it is now out of // range. int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); @@ -128,7 +128,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, } } - lf->filter_level = filt_best; + return filt_best; } void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, @@ -150,6 +150,7 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); } else { - search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); + lf->filter_level = search_filter_level(sd, cpi, + method == LPF_PICK_FROM_SUBIMAGE); } } diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index b1ef08291..bd6a78c2d 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -27,6 +27,11 @@ #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_ratectrl.h" +// Max rate target for 1080P and below encodes under normal circumstances +// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB +#define MAX_MB_RATE 250 +#define MAXRATE_1080P 2025000 + #define DEFAULT_KF_BOOST 2000 #define DEFAULT_GF_BOOST 2000 @@ -1399,3 +1404,46 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, return target_index - qindex; } + +void vp9_rc_update_framerate(VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + RATE_CONTROL *const rc = &cpi->rc; + int vbr_max_bits; + + rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / oxcf->framerate); + rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmin_section / 100); + + rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); + + // A maximum bitrate for a frame is defined. + // The baseline for this aligns with HW implementations that + // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits + // per 16x16 MB (averaged over a frame). However this limit is extended if + // a very high rate is given on the command line or the the rate cannnot + // be acheived because of a user specificed max q (e.g. when the user + // specifies lossless encode. + vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmax_section) / 100); + rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), + vbr_max_bits); + + // Set Maximum gf/arf interval + rc->max_gf_interval = 16; + + // Extended interval for genuinely static scenes + rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; + + // Special conditions when alt ref frame enabled in lagged compress mode + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (rc->max_gf_interval > oxcf->lag_in_frames - 1) + rc->max_gf_interval = oxcf->lag_in_frames - 1; + + if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } + + if (rc->max_gf_interval > rc->static_scene_max_gf_interval) + rc->max_gf_interval = rc->static_scene_max_gf_interval; +} diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 0b12a4c58..cf6526b8b 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -174,6 +174,8 @@ int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget); int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, int qindex, double rate_target_ratio); +void vp9_rc_update_framerate(struct VP9_COMP *cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index b2ecd6c92..7ef21fa5d 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3910,10 +3910,11 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // TODO(jingning, jkoleszar): scaling reference frame not supported for // sub8x8 blocks. - if (ref_frame > NONE && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) + if (ref_frame > INTRA_FRAME && + vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) continue; - if (second_ref_frame > NONE && + if (second_ref_frame > INTRA_FRAME && vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) continue; |