diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_mvref_common.h | 2 | ||||
-rw-r--r-- | vp9/decoder/vp9_reader.h | 10 | ||||
-rw-r--r-- | vp9/encoder/vp9_bitstream.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_block.h | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 27 | ||||
-rw-r--r-- | vp9/encoder/vp9_quantize.c | 33 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 51 | ||||
-rw-r--r-- | vp9/encoder/vp9_write_bit_buffer.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_write_bit_buffer.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_writer.c | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_writer.h | 11 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 4 |
12 files changed, 99 insertions, 52 deletions
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h index 7bce3fa37..a937b7823 100644 --- a/vp9/common/vp9_mvref_common.h +++ b/vp9/common/vp9_mvref_common.h @@ -125,7 +125,7 @@ static const int idx_n_column_to_subblock[4][2] = { // clamp_mv_ref #define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units -static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { +static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER, xd->mb_to_right_edge + MV_BORDER, xd->mb_to_top_edge - MV_BORDER, diff --git a/vp9/decoder/vp9_reader.h b/vp9/decoder/vp9_reader.h index 32e200e2b..2d9eccfbf 100644 --- a/vp9/decoder/vp9_reader.h +++ b/vp9/decoder/vp9_reader.h @@ -52,7 +52,7 @@ int vp9_reader_has_error(vp9_reader *r); const uint8_t *vp9_reader_find_end(vp9_reader *r); -static int vp9_read(vp9_reader *r, int prob) { +static INLINE int vp9_read(vp9_reader *r, int prob) { unsigned int bit = 0; BD_VALUE value; BD_VALUE bigsplit; @@ -89,11 +89,11 @@ static int vp9_read(vp9_reader *r, int prob) { return bit; } -static int vp9_read_bit(vp9_reader *r) { +static INLINE int vp9_read_bit(vp9_reader *r) { return vp9_read(r, 128); // vp9_prob_half } -static int vp9_read_literal(vp9_reader *r, int bits) { +static INLINE int vp9_read_literal(vp9_reader *r, int bits) { int literal = 0, bit; for (bit = bits - 1; bit >= 0; bit--) @@ -102,8 +102,8 @@ static int vp9_read_literal(vp9_reader *r, int bits) { return literal; } -static int vp9_read_tree(vp9_reader *r, const vp9_tree_index *tree, - const vp9_prob *probs) { +static INLINE int vp9_read_tree(vp9_reader *r, const vp9_tree_index *tree, + const vp9_prob *probs) { vp9_tree_index i = 0; while ((i = tree[i + vp9_read(r, probs[i >> 1])]) > 0) diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index d638a2146..b0ff0fa81 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1216,7 +1216,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) { saved_wb = wb; vp9_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size - uncompressed_hdr_size = vp9_rb_bytes_written(&wb); + uncompressed_hdr_size = vp9_wb_bytes_written(&wb); data += uncompressed_hdr_size; vp9_clear_system_state(); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 2c7739115..bd3b0fdc8 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -41,6 +41,7 @@ struct macroblock_plane { int16_t *zbin; int16_t *round; + int64_t quant_thred[2]; // Zbin Over Quant value int16_t zbin_extra; }; @@ -117,6 +118,8 @@ struct macroblock { // skip forward transform and quantization int skip_txfm[MAX_MB_PLANE]; + int64_t bsse[MAX_MB_PLANE]; + // Used to store sub partition's choices. MV pred_mv[MAX_REF_FRAMES]; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 9e57d6abe..6115f5a0f 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -673,7 +673,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int i, j; const int width = num_4x4_blocks_wide_lookup[bsize]; const int height = num_4x4_blocks_high_lookup[bsize]; - const BLOCK_SIZE bsize_tx = txsize_to_bsize[mbmi->tx_size]; int rate2 = 0; int64_t dist2 = 0; @@ -683,28 +682,36 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE tmp_tx_size = MIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + const BLOCK_SIZE bsize_tx = txsize_to_bsize[tmp_tx_size]; const int step = 1 << tmp_tx_size; - for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { - if (cpi->sf.reuse_inter_pred_sby) { - pd->dst.buf = tmp[0].data; - pd->dst.stride = bw; - } + if (cpi->sf.reuse_inter_pred_sby) { + pd->dst.buf = tmp[0].data; + pd->dst.stride = bw; + } + for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { + uint8_t *const src_buf_base = p->src.buf; + uint8_t *const dst_buf_base = pd->dst.buf; for (j = 0; j < height; j += step) { for (i = 0; i < width; i += step) { + p->src.buf = &src_buf_base[4 * (j * src_stride + i)]; + pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)]; + // Use source buffer as an approximation for the fully reconstructed + // buffer vp9_predict_intra_block(xd, block_idx, b_width_log2(bsize), tmp_tx_size, this_mode, - &p->src.buf[4 * (j * dst_stride + i)], - src_stride, - &pd->dst.buf[4 * (j * dst_stride + i)], - dst_stride, i, j, 0); + p->src.buf, src_stride, + pd->dst.buf, dst_stride, + i, j, 0); model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y); rate2 += rate; dist2 += dist; ++block_idx; } } + p->src.buf = src_buf_base; + pd->dst.buf = dst_buf_base; rate = rate2; dist = dist2; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index e153b2077..eababdbca 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -23,15 +23,14 @@ void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { - int eob = -1; + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int tmp, eob = -1; if (!skip_block) { - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - - int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); + tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 16; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr; @@ -45,15 +44,15 @@ void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { - int eob = -1; + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int tmp, eob = -1; if (!skip_block) { - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); + tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 15; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2; @@ -354,6 +353,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->plane[0].quant_shift = quants->y_quant_shift[qindex]; x->plane[0].zbin = quants->y_zbin[qindex]; x->plane[0].round = quants->y_round[qindex]; + x->plane[0].quant_thred[0] = cm->y_dequant[qindex][0] * + cm->y_dequant[qindex][0]; + x->plane[0].quant_thred[1] = cm->y_dequant[qindex][1] * + cm->y_dequant[qindex][1]; x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7); xd->plane[0].dequant = cm->y_dequant[qindex]; @@ -365,6 +368,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; x->plane[i].zbin = quants->uv_zbin[qindex]; x->plane[i].round = quants->uv_round[qindex]; + x->plane[i].quant_thred[0] = cm->y_dequant[qindex][0] * + cm->y_dequant[qindex][0]; + x->plane[i].quant_thred[1] = cm->y_dequant[qindex][1] * + cm->y_dequant[qindex][1]; x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7); xd->plane[i].dequant = cm->uv_dequant[qindex]; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 5caafd370..cc55dd78f 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -171,15 +171,27 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int64_t dist_sum = 0; const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; + const int shift = 8; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); - (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, &sse); + const unsigned int var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, + &sse); + if (!x->select_tx_size) { + if (sse < p->quant_thred[0] >> shift) + x->skip_txfm[i] = 1; + else if (var < p->quant_thred[1] >> shift) + x->skip_txfm[i] = 2; + else + x->skip_txfm[i] = 0; + } + + x->bsse[i] = sse; if (i == 0) x->pred_sse[ref] = sse; @@ -357,12 +369,32 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (args->skip) return; - if (!is_inter_block(mbmi)) + if (!is_inter_block(mbmi)) { vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip); - else - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + dist_block(plane, block, tx_size, args); + } else { + if (x->skip_txfm[plane] == 0) { + // full forward transform and quantization + vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + dist_block(plane, block, tx_size, args); + } else if (x->skip_txfm[plane] == 2) { + // compute DC coefficient + int16_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); + int16_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); + vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + args->sse = x->bsse[plane] << 4; + args->dist = args->sse; + if (!x->plane[plane].eobs[block]) + args->dist = args->sse - ((coeff[0] * coeff[0] - + (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2); + } else { + // skip forward transform + x->plane[plane].eobs[block] = 0; + args->sse = x->bsse[plane] << 4; + args->dist = args->sse; + } + } - dist_block(plane, block, tx_size, args); rate_block(plane, block, plane_bsize, tx_size, args); rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); @@ -2102,6 +2134,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int orig_dst_stride[MAX_MB_PLANE]; int rs = 0; INTERP_FILTER best_filter = SWITCHABLE; + int skip_txfm[MAX_MB_PLANE] = {0}; + int64_t bsse[MAX_MB_PLANE] = {0}; int bsl = mi_width_log2_lookup[bsize]; int pred_filter_search = cpi->sf.cb_pred_filter_search ? @@ -2264,6 +2298,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_filter = mbmi->interp_filter; if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) best_needs_copy = !best_needs_copy; + vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); + vpx_memcpy(bsse, x->bsse, sizeof(bsse)); } if ((cm->interp_filter == SWITCHABLE && newbest) || @@ -2316,6 +2352,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, disable_skip); } + vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); + vpx_memcpy(x->bsse, bsse, sizeof(bsse)); + if (!x->skip) { int skippable_y, skippable_uv; int64_t sseuv = INT64_MAX; diff --git a/vp9/encoder/vp9_write_bit_buffer.c b/vp9/encoder/vp9_write_bit_buffer.c index 962d0ca56..6d55e84e8 100644 --- a/vp9/encoder/vp9_write_bit_buffer.c +++ b/vp9/encoder/vp9_write_bit_buffer.c @@ -8,9 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <limits.h> #include "vp9/encoder/vp9_write_bit_buffer.h" -size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { +size_t vp9_wb_bytes_written(const struct vp9_write_bit_buffer *wb) { return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); } diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h index 073608d7f..59f9bbe30 100644 --- a/vp9/encoder/vp9_write_bit_buffer.h +++ b/vp9/encoder/vp9_write_bit_buffer.h @@ -11,8 +11,6 @@ #ifndef VP9_ENCODER_VP9_WRITE_BIT_BUFFER_H_ #define VP9_ENCODER_VP9_WRITE_BIT_BUFFER_H_ -#include <limits.h> - #include "vpx/vpx_integer.h" #ifdef __cplusplus @@ -24,7 +22,7 @@ struct vp9_write_bit_buffer { size_t bit_offset; }; -size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb); +size_t vp9_wb_bytes_written(const struct vp9_write_bit_buffer *wb); void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit); diff --git a/vp9/encoder/vp9_writer.c b/vp9/encoder/vp9_writer.c index 8398fc07a..ff461f218 100644 --- a/vp9/encoder/vp9_writer.c +++ b/vp9/encoder/vp9_writer.c @@ -15,7 +15,6 @@ void vp9_start_encode(vp9_writer *br, uint8_t *source) { br->lowvalue = 0; br->range = 255; - br->value = 0; br->count = -24; br->buffer = source; br->pos = 0; diff --git a/vp9/encoder/vp9_writer.h b/vp9/encoder/vp9_writer.h index 7f4fa1ef2..9d161f95c 100644 --- a/vp9/encoder/vp9_writer.h +++ b/vp9/encoder/vp9_writer.h @@ -22,20 +22,15 @@ extern "C" { typedef struct { unsigned int lowvalue; unsigned int range; - unsigned int value; int count; unsigned int pos; uint8_t *buffer; - - // Variables used to track bit costs without outputing to the bitstream - unsigned int measure_cost; - uint64_t bit_counter; } vp9_writer; void vp9_start_encode(vp9_writer *bc, uint8_t *buffer); void vp9_stop_encode(vp9_writer *bc); -static void vp9_write(vp9_writer *br, int bit, int probability) { +static INLINE void vp9_write(vp9_writer *br, int bit, int probability) { unsigned int split; int count = br->count; unsigned int range = br->range; @@ -83,11 +78,11 @@ static void vp9_write(vp9_writer *br, int bit, int probability) { br->range = range; } -static void vp9_write_bit(vp9_writer *w, int bit) { +static INLINE void vp9_write_bit(vp9_writer *w, int bit) { vp9_write(w, bit, 128); // vp9_prob_half } -static void vp9_write_literal(vp9_writer *w, int data, int bits) { +static INLINE void vp9_write_literal(vp9_writer *w, int data, int bits) { int bit; for (bit = bits - 1; bit >= 0; bit--) diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 35ee1aee6..bf8eec717 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -808,7 +808,7 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { } // vp9 uses 10,000,000 ticks/second as time stamp -#define TICKS_PER_SEC 10000000 +#define TICKS_PER_SEC 10000000LL static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase, int64_t n) { @@ -1325,9 +1325,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { 9999, // kf_max_dist VPX_SS_DEFAULT_LAYERS, // ss_number_layers -#if CONFIG_SPATIAL_SVC {0}, -#endif {0}, // ss_target_bitrate 1, // ts_number_layers {0}, // ts_target_bitrate |