diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_bitstream.c | 78 | ||||
-rw-r--r-- | vp9/encoder/vp9_block.h | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 780 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 13 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 322 | ||||
-rw-r--r-- | vp9/encoder/vp9_lookahead.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_lookahead.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 152 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 38 | ||||
-rw-r--r-- | vp9/encoder/vp9_tokenize.c | 7 |
10 files changed, 515 insertions, 894 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 20154d813..ac29a8e0c 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -905,6 +905,7 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, case PARTITION_SPLIT: for (n = 0; n < 4; n++) { int j = n >> 1, i = n & 0x01; + *(get_sb_index(xd, subsize)) = n; write_modes_sb(cpi, m + j * bs * mis + i * bs, bc, tok, tok_end, mi_row + j * bs, mi_col + i * bs, subsize); } @@ -914,12 +915,11 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, } // update partition context - if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16)) - return; - - xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); - xd->above_seg_context = cm->above_seg_context + mi_col; - update_partition_context(xd, subsize, bsize); + if (bsize > BLOCK_SIZE_SB8X8 && + (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + update_partition_context(xd, subsize, bsize); + } } static void write_modes(VP9_COMP *cpi, vp9_writer* const bc, @@ -1242,16 +1242,6 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { FILE *vpxlogc = 0; #endif -static void put_delta_q(vp9_writer *bc, int delta_q) { - if (delta_q != 0) { - vp9_write_bit(bc, 1); - vp9_write_literal(bc, abs(delta_q), 4); - vp9_write_bit(bc, delta_q < 0); - } else { - vp9_write_bit(bc, 0); - } -} - static void decide_kf_ymode_entropy(VP9_COMP *cpi) { int mode_cost[MB_MODE_COUNT]; int bestcost = INT_MAX; @@ -1298,9 +1288,21 @@ static void segment_reference_frames(VP9_COMP *cpi) { } } -static void encode_loopfilter(MACROBLOCKD *xd, vp9_writer *w) { +static void encode_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_writer *w) { int i; + // Encode the loop filter level and type + vp9_write_literal(w, pc->filter_level, 6); + vp9_write_literal(w, pc->sharpness_level, 3); +#if CONFIG_LOOP_DERING + if (pc->dering_enabled) { + vp9_write_bit(w, 1); + vp9_write_literal(w, pc->dering_enabled - 1, 4); + } else { + vp9_write_bit(w, 0); + } +#endif + // Write out loop filter deltas applied at the MB level based on mode or // ref frame (if they are enabled). vp9_write_bit(w, xd->mode_ref_lf_delta_enabled); @@ -1354,6 +1356,24 @@ static void encode_loopfilter(MACROBLOCKD *xd, vp9_writer *w) { } } +static void put_delta_q(vp9_writer *bc, int delta_q) { + if (delta_q != 0) { + vp9_write_bit(bc, 1); + vp9_write_literal(bc, abs(delta_q), 4); + vp9_write_bit(bc, delta_q < 0); + } else { + vp9_write_bit(bc, 0); + } +} + +static void encode_quantization(VP9_COMMON *pc, vp9_writer *w) { + vp9_write_literal(w, pc->base_qindex, QINDEX_BITS); + put_delta_q(w, pc->y_dc_delta_q); + put_delta_q(w, pc->uv_dc_delta_q); + put_delta_q(w, pc->uv_ac_delta_q); +} + + static void encode_segmentation(VP9_COMP *cpi, vp9_writer *w) { int i, j; VP9_COMMON *const pc = &cpi->common; @@ -1495,27 +1515,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) { // lossless mode: note this needs to be before loopfilter vp9_write_bit(&header_bc, cpi->mb.e_mbd.lossless); - // Encode the loop filter level and type - vp9_write_literal(&header_bc, pc->filter_level, 6); - vp9_write_literal(&header_bc, pc->sharpness_level, 3); -#if CONFIG_LOOP_DERING - if (pc->dering_enabled) { - vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, pc->dering_enabled - 1, 4); - } else { - vp9_write_bit(&header_bc, 0); - } -#endif - - encode_loopfilter(xd, &header_bc); - - // Frame Q baseline quantizer index - vp9_write_literal(&header_bc, pc->base_qindex, QINDEX_BITS); + encode_loopfilter(pc, xd, &header_bc); - // Transmit Dc, Second order and Uv quantizer delta information - put_delta_q(&header_bc, pc->y_dc_delta_q); - put_delta_q(&header_bc, pc->uv_dc_delta_q); - put_delta_q(&header_bc, pc->uv_ac_delta_q); + encode_quantization(pc, &header_bc); // When there is a key frame all reference buffers are updated using the new key frame if (pc->frame_type != KEY_FRAME) { @@ -1805,6 +1807,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) { int scaling = (pc->width != pc->display_width || pc->height != pc->display_height); int v = (oh.first_partition_length_in_bytes << 8) | + (pc->subsampling_y << 7) | + (pc->subsampling_x << 6) | (scaling << 5) | (oh.show_frame << 4) | (oh.version << 1) | diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 6bc42c7ff..44261481c 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -138,8 +138,8 @@ struct macroblock { int optimize; - // Structure to hold context for each of the 4 MBs within a SB: - // when encoded as 4 independent MBs: + // TODO(jingning): Need to refactor the structure arrays that buffers the + // coding mode decisions of each partition type. PICK_MODE_CONTEXT sb8_context[4][4][4]; PICK_MODE_CONTEXT sb8x16_context[4][4][2]; PICK_MODE_CONTEXT sb16x8_context[4][4][2]; @@ -153,6 +153,10 @@ struct macroblock { PICK_MODE_CONTEXT sb64_context; int partition_cost[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; + BLOCK_SIZE_TYPE mb_partitioning[4][4]; + BLOCK_SIZE_TYPE sb_partitioning[4]; + BLOCK_SIZE_TYPE sb64_partitioning; + void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 49e8ccefa..2edeb7807 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -539,15 +539,6 @@ void vp9_setup_src_planes(MACROBLOCK *x, x->e_mbd.plane[2].subsampling_y); } -static INLINE void set_partition_seg_context(VP9_COMP *cpi, - int mi_row, int mi_col) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; - - xd->above_seg_context = cm->above_seg_context + mi_col; - xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); -} - static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize) { MACROBLOCK *const x = &cpi->mb; @@ -571,7 +562,7 @@ static void set_offsets(VP9_COMP *cpi, } // partition contexts - set_partition_seg_context(cpi, mi_row, mi_col); + set_partition_seg_context(cm, xd, mi_row, mi_col); // Activity map pointer x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; @@ -731,6 +722,9 @@ static void set_block_index(MACROBLOCKD *xd, int idx, } } +// TODO(jingning): the variables used here are little complicated. need further +// refactoring on organizing the the temporary buffers, when recursive +// partition down to 4x4 block size is enabled. static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { MACROBLOCKD *const xd = &x->e_mbd; @@ -762,6 +756,52 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, } } +static BLOCK_SIZE_TYPE *get_sb_partitioning(MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD *xd = &x->e_mbd; + switch (bsize) { + case BLOCK_SIZE_SB64X64: + return &x->sb64_partitioning; + case BLOCK_SIZE_SB32X32: + return &x->sb_partitioning[xd->sb_index]; + case BLOCK_SIZE_MB16X16: + return &x->mb_partitioning[xd->sb_index][xd->mb_index]; + default: + assert(0); + return NULL; + } +} + +static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, + ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], + ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], + PARTITION_CONTEXT sa[8], + PARTITION_CONTEXT sl[8], + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + int p; + int bwl = b_width_log2(bsize), bw = 1 << bwl; + int bhl = b_height_log2(bsize), bh = 1 << bhl; + int mwl = mi_width_log2(bsize), mw = 1 << mwl; + int mhl = mi_height_log2(bsize), mh = 1 << mhl; + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(cm->above_context[p] + + ((mi_col * 2) >> xd->plane[p].subsampling_x), + a + bw * p, + sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x); + vpx_memcpy(cm->left_context[p] + + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), + l + bh * p, + sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y); + } + vpx_memcpy(cm->above_seg_context + mi_col, sa, + sizeof(PARTITION_CONTEXT) * mw); + vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl, + sizeof(PARTITION_CONTEXT) * mh); +} + static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE_TYPE bsize, int sub_index) { @@ -788,27 +828,28 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, - BLOCK_SIZE_TYPE level, - BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4], - BLOCK_SIZE_TYPE c3[4][4] - ) { + BLOCK_SIZE_TYPE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - const int bsl = mi_width_log2(level), bs = 1 << (bsl - 1); - const int bwl = mi_width_log2(c1), bhl = mi_height_log2(c1); + BLOCK_SIZE_TYPE c1 = BLOCK_SIZE_SB8X8; + const int bsl = mi_width_log2(bsize), bs = 1 << (bsl - 1); + int bwl, bhl; int UNINITIALIZED_IS_SAFE(pl); if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - if (level > BLOCK_SIZE_SB8X8) { - set_partition_seg_context(cpi, mi_row, mi_col); - pl = partition_plane_context(xd, level); + if (bsize > BLOCK_SIZE_SB8X8) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + c1 = *(get_sb_partitioning(x, bsize)); } + bwl = mi_width_log2(c1), bhl = mi_height_log2(c1); + if (bsl == bwl && bsl == bhl) { - if (output_enabled && level > BLOCK_SIZE_SB8X8) + if (output_enabled && bsize > BLOCK_SIZE_SB8X8) cpi->partition_count[pl][PARTITION_NONE]++; encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1); } else if (bsl == bhl && bsl > bwl) { @@ -826,12 +867,12 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int i; assert(bwl < bsl && bhl < bsl); - if (level == BLOCK_SIZE_SB64X64) { + if (bsize == BLOCK_SIZE_SB64X64) { subsize = BLOCK_SIZE_SB32X32; - } else if (level == BLOCK_SIZE_SB32X32) { + } else if (bsize == BLOCK_SIZE_SB32X32) { subsize = BLOCK_SIZE_MB16X16; } else { - assert(level == BLOCK_SIZE_MB16X16); + assert(bsize == BLOCK_SIZE_MB16X16); subsize = BLOCK_SIZE_SB8X8; } @@ -843,554 +884,200 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, set_block_index(xd, i, subsize); encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs, - output_enabled, subsize, - c2 ? c2[i] : c1, c3 ? c3[i] : NULL, NULL); + output_enabled, subsize); } } - if (level > BLOCK_SIZE_SB8X8 && - (level == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) { - set_partition_seg_context(cpi, mi_row, mi_col); - update_partition_context(xd, c1, level); + if (bsize > BLOCK_SIZE_SB8X8 && + (bsize == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + update_partition_context(xd, c1, bsize); } } -static void encode_sb_row(VP9_COMP *cpi, - int mi_row, - TOKENEXTRA **tp, - int *totalrate) { + +// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are +// unlikely to be selected depending on previously rate-distortion optimization +// results, for encoding speed-up. +static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, + int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize, + int *rate, int *dist) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - int mi_col, pl; - - // Initialize the left context for the new SB row - vpx_memset(&cm->left_context, 0, sizeof(cm->left_context)); - vpx_memset(cm->left_seg_context, 0, sizeof(cm->left_seg_context)); - - // Code each SB in the row - for (mi_col = cm->cur_tile_mi_col_start; - mi_col < cm->cur_tile_mi_col_end; mi_col += 8) { - int i, p; - BLOCK_SIZE_TYPE mb_partitioning[4][4]; - BLOCK_SIZE_TYPE sb_partitioning[4]; - BLOCK_SIZE_TYPE sb64_partitioning = BLOCK_SIZE_SB32X32; - int sb64_rate = 0, sb64_dist = 0; - int sb64_skip = 0; - ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; - PARTITION_CONTEXT seg_l[64 / MI_SIZE], seg_a[64 / MI_SIZE]; - TOKENEXTRA *tp_orig = *tp; - - for (p = 0; p < MAX_MB_PLANE; p++) { - memcpy(a + 16 * p, cm->above_context[p] + - (mi_col * 2 >> xd->plane[p].subsampling_x), - sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x); - memcpy(l + 16 * p, cm->left_context[p], - sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y); - } - vpx_memcpy(&seg_a, cm->above_seg_context + mi_col, sizeof(seg_a)); - vpx_memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l)); + int bsl = b_width_log2(bsize), bs = 1 << bsl; + int msl = mi_height_log2(bsize), ms = 1 << msl; + ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; + PARTITION_CONTEXT sl[8], sa[8]; + TOKENEXTRA *tp_orig = *tp; + int i, p, pl; + BLOCK_SIZE_TYPE subsize; + int srate = INT_MAX, sdist = INT_MAX; + + assert(mi_height_log2(bsize) == mi_width_log2(bsize)); + + // buffer the above/left context information of the block in search. + for (p = 0; p < MAX_MB_PLANE; ++p) { + vpx_memcpy(a + bs * p, cm->above_context[p] + + (mi_col * 2 >> xd->plane[p].subsampling_x), + sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_x); + vpx_memcpy(l + bs * p, cm->left_context[p] + + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), + sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_y); + } + vpx_memcpy(sa, cm->above_seg_context + mi_col, + sizeof(PARTITION_CONTEXT) * ms); + vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK), + sizeof(PARTITION_CONTEXT) * ms); + + // PARTITION_SPLIT + if (bsize >= BLOCK_SIZE_MB16X16) { + int r4 = 0, d4 = 0; + subsize = get_subsize(bsize, PARTITION_SPLIT); + *(get_sb_partitioning(x, bsize)) = subsize; + + for (i = 0; i < 4; ++i) { + int x_idx = (i & 1) * (ms >> 1); + int y_idx = (i >> 1) * (ms >> 1); + int r, d; - // FIXME(rbultje): this function should probably be rewritten to be - // recursive at some point in the future. - for (i = 0; i < 4; i++) { - const int x_idx = (i & 1) << 2; - const int y_idx = (i & 2) << 1; - int sb32_rate = 0, sb32_dist = 0; - int splitmodes_used = 0; - int sb32_skip = 0; - int j; - ENTROPY_CONTEXT l2[8 * MAX_MB_PLANE], a2[8 * MAX_MB_PLANE]; - PARTITION_CONTEXT sl32[32 / MI_SIZE], sa32[32 / MI_SIZE]; - - sb_partitioning[i] = BLOCK_SIZE_MB16X16; - if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) + if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; - xd->sb_index = i; - - /* Function should not modify L & A contexts; save and restore on exit */ - for (p = 0; p < MAX_MB_PLANE; p++) { - vpx_memcpy(l2 + 8 * p, - cm->left_context[p] + - (y_idx * 2 >> xd->plane[p].subsampling_y), - sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y); - vpx_memcpy(a2 + 8 * p, - cm->above_context[p] + - ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x), - sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); - } - vpx_memcpy(&sa32, cm->above_seg_context + mi_col + x_idx, sizeof(sa32)); - vpx_memcpy(&sl32, cm->left_seg_context + y_idx, sizeof(sl32)); - - /* Encode MBs in raster order within the SB */ - for (j = 0; j < 4; j++) { - const int x_idx_m = x_idx + ((j & 1) << 1); - const int y_idx_m = y_idx + ((j >> 1) << 1); - int r, d; - int r2, d2, mb16_rate = 0, mb16_dist = 0, k; - ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE]; - PARTITION_CONTEXT sl16[16 / MI_SIZE], sa16[16 / MI_SIZE]; - - mb_partitioning[i][j] = BLOCK_SIZE_SB8X8; - - if (mi_row + y_idx_m >= cm->mi_rows || - mi_col + x_idx_m >= cm->mi_cols) { - // MB lies outside frame, move on - continue; - } - - // Index of the MB in the SB 0..3 - xd->mb_index = j; - - for (p = 0; p < MAX_MB_PLANE; p++) { - vpx_memcpy(l3 + 4 * p, - cm->left_context[p] + - (y_idx_m * 2 >> xd->plane[p].subsampling_y), - sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); - vpx_memcpy(a3 + 4 * p, - cm->above_context[p] + - ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x), - sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); - } - vpx_memcpy(&sa16, cm->above_seg_context + mi_col + x_idx_m, - sizeof(sa16)); - vpx_memcpy(&sl16, cm->left_seg_context + y_idx_m, sizeof(sl16)); - - for (k = 0; k < 4; k++) { - xd->b_index = k; - - // try 8x8 coding - pick_sb_modes(cpi, mi_row + y_idx_m + (k >> 1), - mi_col + x_idx_m + (k & 1), - tp, &r, &d, BLOCK_SIZE_SB8X8, - &x->sb8_context[xd->sb_index][xd->mb_index] - [xd->b_index]); - mb16_rate += r; - mb16_dist += d; - update_state(cpi, &x->sb8_context[xd->sb_index][xd->mb_index] - [xd->b_index], - BLOCK_SIZE_SB8X8, 0); - encode_superblock(cpi, tp, - 0, mi_row + y_idx_m + (k >> 1), - mi_col + x_idx_m + (k & 1), - BLOCK_SIZE_SB8X8); - } - set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); - pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); - mb16_rate += x->partition_cost[pl][PARTITION_SPLIT]; - for (p = 0; p < MAX_MB_PLANE; p++) { - vpx_memcpy(cm->left_context[p] + - (y_idx_m * 2 >> xd->plane[p].subsampling_y), - l3 + 4 * p, - sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); - vpx_memcpy(cm->above_context[p] + - ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x), - a3 + 4 * p, - sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); - } - vpx_memcpy(cm->above_seg_context + mi_col + x_idx_m, - sa16, sizeof(sa16)); - vpx_memcpy(cm->left_seg_context + y_idx_m, sl16, sizeof(sl16)); - - // try 8x16 coding - r2 = 0; - d2 = 0; - xd->b_index = 0; - pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m, - tp, &r, &d, BLOCK_SIZE_SB8X16, - &x->sb8x16_context[xd->sb_index][xd->mb_index] - [xd->b_index]); - r2 += r; - d2 += d; - update_state(cpi, &x->sb8x16_context[xd->sb_index][xd->mb_index] - [xd->b_index], - BLOCK_SIZE_SB8X16, 0); - encode_superblock(cpi, tp, - 0, mi_row + y_idx_m, mi_col + x_idx_m, - BLOCK_SIZE_SB8X16); - xd->b_index = 1; - pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m + 1, - tp, &r, &d, BLOCK_SIZE_SB8X16, - &x->sb8x16_context[xd->sb_index][xd->mb_index] - [xd->b_index]); - r2 += r; - d2 += d; - set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); - pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); - r2 += x->partition_cost[pl][PARTITION_VERT]; - if (RDCOST(x->rdmult, x->rddiv, r2, d2) < - RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) { - mb16_rate = r2; - mb16_dist = d2; - mb_partitioning[i][j] = BLOCK_SIZE_SB8X16; - } - for (p = 0; p < MAX_MB_PLANE; p++) { - vpx_memcpy(cm->left_context[p] + - (y_idx_m * 2 >> xd->plane[p].subsampling_y), - l3 + 4 * p, - sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); - vpx_memcpy(cm->above_context[p] + - ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x), - a3 + 4 * p, - sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); - } - - // try 16x8 coding - r2 = 0; - d2 = 0; - xd->b_index = 0; - pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m, - tp, &r, &d, BLOCK_SIZE_SB16X8, - &x->sb16x8_context[xd->sb_index][xd->mb_index] - [xd->b_index]); - r2 += r; - d2 += d; - update_state(cpi, &x->sb16x8_context[xd->sb_index][xd->mb_index] - [xd->b_index], - BLOCK_SIZE_SB16X8, 0); - encode_superblock(cpi, tp, - 0, mi_row + y_idx_m, mi_col + x_idx_m, - BLOCK_SIZE_SB16X8); - xd->b_index = 1; - pick_sb_modes(cpi, mi_row + y_idx_m + 1, mi_col + x_idx_m, - tp, &r, &d, BLOCK_SIZE_SB16X8, - &x->sb16x8_context[xd->sb_index][xd->mb_index] - [xd->b_index]); - r2 += r; - d2 += d; - set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); - pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); - r2 += x->partition_cost[pl][PARTITION_HORZ]; - if (RDCOST(x->rdmult, x->rddiv, r2, d2) < - RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) { - mb16_rate = r2; - mb16_dist = d2; - mb_partitioning[i][j] = BLOCK_SIZE_SB16X8; - } - for (p = 0; p < MAX_MB_PLANE; p++) { - vpx_memcpy(cm->left_context[p] + - (y_idx_m * 2 >> xd->plane[p].subsampling_y), - l3 + 4 * p, - sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); - vpx_memcpy(cm->above_context[p] + - ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x), - a3 + 4 * p, - sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); - } - - // try as 16x16 - pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m, - tp, &r, &d, BLOCK_SIZE_MB16X16, - &x->mb_context[xd->sb_index][xd->mb_index]); - set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); - pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); - r += x->partition_cost[pl][PARTITION_NONE]; - if (RDCOST(x->rdmult, x->rddiv, r, d) < - RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) { - mb16_rate = r; - mb16_dist = d; - mb_partitioning[i][j] = BLOCK_SIZE_MB16X16; - } - sb32_rate += mb16_rate; - sb32_dist += mb16_dist; - - // Dummy encode, do not do the tokenization - encode_sb(cpi, tp, mi_row + y_idx_m, mi_col + x_idx_m, 0, - BLOCK_SIZE_MB16X16, mb_partitioning[i][j], NULL, NULL); - } - - /* Restore L & A coding context to those in place on entry */ - for (p = 0; p < MAX_MB_PLANE; p++) { - vpx_memcpy(cm->left_context[p] + - (y_idx * 2 >> xd->plane[p].subsampling_y), - l2 + 8 * p, - sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y); - vpx_memcpy(cm->above_context[p] + - ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x), - a2 + 8 * p, - sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); - } - // restore partition information context - vpx_memcpy(cm->above_seg_context + mi_col + x_idx, sa32, sizeof(sa32)); - vpx_memcpy(cm->left_seg_context + y_idx, sl32, sizeof(sl32)); - - set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx); - pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32); - sb32_rate += x->partition_cost[pl][PARTITION_SPLIT]; - - if (cpi->sf.splitmode_breakout) { - sb32_skip = splitmodes_used; - sb64_skip += splitmodes_used; - } - - // check 32x16 - if (mi_col + x_idx + 4 <= cm->mi_cols) { - int r, d; - - xd->mb_index = 0; - pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx, - tp, &r, &d, BLOCK_SIZE_SB32X16, - &x->sb32x16_context[xd->sb_index][xd->mb_index]); - if (mi_row + y_idx + 2 < cm->mi_rows) { - int r2, d2; - - update_state(cpi, &x->sb32x16_context[xd->sb_index][xd->mb_index], - BLOCK_SIZE_SB32X16, 0); - encode_superblock(cpi, tp, - 0, mi_row + y_idx, mi_col + x_idx, - BLOCK_SIZE_SB32X16); - xd->mb_index = 1; - pick_sb_modes(cpi, mi_row + y_idx + 2, - mi_col + x_idx, tp, &r2, &d2, BLOCK_SIZE_SB32X16, - &x->sb32x16_context[xd->sb_index][xd->mb_index]); - r += r2; - d += d2; - } - - set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx); - pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32); - r += x->partition_cost[pl][PARTITION_HORZ]; - - /* is this better than MB coding? */ - if (RDCOST(x->rdmult, x->rddiv, r, d) < - RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { - sb32_rate = r; - sb32_dist = d; - sb_partitioning[i] = BLOCK_SIZE_SB32X16; - } - - for (p = 0; p < MAX_MB_PLANE; p++) { - vpx_memcpy(cm->left_context[p] + - (y_idx * 2 >> xd->plane[p].subsampling_y), - l2 + 8 * p, - sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y); - vpx_memcpy(cm->above_context[p] + - ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x), - a2 + 8 * p, - sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); - } - } - - // check 16x32 - if (mi_row + y_idx + 4 <= cm->mi_rows) { - int r, d; - - xd->mb_index = 0; - pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx, - tp, &r, &d, BLOCK_SIZE_SB16X32, - &x->sb16x32_context[xd->sb_index][xd->mb_index]); - if (mi_col + x_idx + 2 < cm->mi_cols) { - int r2, d2; - - update_state(cpi, &x->sb16x32_context[xd->sb_index][xd->mb_index], - BLOCK_SIZE_SB16X32, 0); - encode_superblock(cpi, tp, - 0, mi_row + y_idx, mi_col + x_idx, - BLOCK_SIZE_SB16X32); - xd->mb_index = 1; - pick_sb_modes(cpi, mi_row + y_idx, - mi_col + x_idx + 2, - tp, &r2, &d2, BLOCK_SIZE_SB16X32, - &x->sb16x32_context[xd->sb_index][xd->mb_index]); - r += r2; - d += d2; - } - - set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx); - pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32); - r += x->partition_cost[pl][PARTITION_VERT]; - - /* is this better than MB coding? */ - if (RDCOST(x->rdmult, x->rddiv, r, d) < - RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { - sb32_rate = r; - sb32_dist = d; - sb_partitioning[i] = BLOCK_SIZE_SB16X32; - } - - for (p = 0; p < MAX_MB_PLANE; p++) { - vpx_memcpy(cm->left_context[p] + - (y_idx * 2 >> xd->plane[p].subsampling_y), - l2 + 8 * p, - sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y); - vpx_memcpy(cm->above_context[p] + - ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x), - a2 + 8 * p, - sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); - } - } - - if (!sb32_skip && - mi_col + x_idx + 4 <= cm->mi_cols && - mi_row + y_idx + 4 <= cm->mi_rows) { - int r, d; - - /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ - pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx, - tp, &r, &d, BLOCK_SIZE_SB32X32, - &x->sb32_context[xd->sb_index]); - - set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx); - pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32); - r += x->partition_cost[pl][PARTITION_NONE]; - - if (RDCOST(x->rdmult, x->rddiv, r, d) < - RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { - sb32_rate = r; - sb32_dist = d; - sb_partitioning[i] = BLOCK_SIZE_SB32X32; - } - } - - // If we used 16x16 instead of 32x32 then skip 64x64 (if enabled). - if (cpi->sf.mb16_breakout && sb_partitioning[i] != BLOCK_SIZE_SB32X32) { - ++sb64_skip; - } - - sb64_rate += sb32_rate; - sb64_dist += sb32_dist; - - /* Encode SB using best computed mode(s) */ - // FIXME(rbultje): there really shouldn't be any need to encode_mb/sb - // for each level that we go up, we can just keep tokens and recon - // pixels of the lower level; also, inverting SB/MB order (big->small - // instead of small->big) means we can use as threshold for small, which - // may enable breakouts if RD is not good enough (i.e. faster) - encode_sb(cpi, tp, mi_row + y_idx, mi_col + x_idx, 0, - BLOCK_SIZE_SB32X32, sb_partitioning[i], mb_partitioning[i], - NULL); + *(get_sb_index(xd, subsize)) = i; + rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, + &r, &d); + r4 += r; + d4 += d; } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r4 += x->partition_cost[pl][PARTITION_SPLIT]; - for (p = 0; p < MAX_MB_PLANE; p++) { - memcpy(cm->above_context[p] + - (mi_col * 2 >> xd->plane[p].subsampling_x), - a + 16 * p, - sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x); - memcpy(cm->left_context[p], l + 16 * p, - sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y); - } - memcpy(cm->above_seg_context + mi_col, &seg_a, sizeof(seg_a)); - memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l)); - - set_partition_seg_context(cpi, mi_row, mi_col); - pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64); - sb64_rate += x->partition_cost[pl][PARTITION_SPLIT]; + srate = r4; + sdist = d4; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } - // check 64x32 - if (mi_col + 8 <= cm->mi_cols && !(cm->mb_rows & 1)) { + // PARTITION_HORZ + if ((mi_col + ms <= cm->mi_cols) && (mi_row + (ms >> 1) <= cm->mi_rows) && + (bsize >= BLOCK_SIZE_MB16X16)) { + int r2, d2; + int mb_skip = 0; + subsize = get_subsize(bsize, PARTITION_HORZ); + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, + get_block_context(x, subsize)); + + if (mi_row + ms <= cm->mi_rows) { int r, d; - - xd->sb_index = 0; - pick_sb_modes(cpi, mi_row, mi_col, - tp, &r, &d, BLOCK_SIZE_SB64X32, - &x->sb64x32_context[xd->sb_index]); - if (mi_row + 4 != cm->mi_rows) { - int r2, d2; - - update_state(cpi, &x->sb64x32_context[xd->sb_index], - BLOCK_SIZE_SB64X32, 0); - encode_superblock(cpi, tp, - 0, mi_row, mi_col, BLOCK_SIZE_SB64X32); - xd->sb_index = 1; - pick_sb_modes(cpi, mi_row + 4, mi_col, - tp, &r2, &d2, BLOCK_SIZE_SB64X32, - &x->sb64x32_context[xd->sb_index]); - r += r2; - d += d2; - } - - set_partition_seg_context(cpi, mi_row, mi_col); - pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64); - r += x->partition_cost[pl][PARTITION_HORZ]; - - /* is this better than MB coding? */ - if (RDCOST(x->rdmult, x->rddiv, r, d) < - RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { - sb64_rate = r; - sb64_dist = d; - sb64_partitioning = BLOCK_SIZE_SB64X32; - } - - for (p = 0; p < MAX_MB_PLANE; p++) { - memcpy(cm->above_context[p] + - (mi_col * 2 >> xd->plane[p].subsampling_x), - a + 16 * p, - sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x); - memcpy(cm->left_context[p], l + 16 * p, - sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y); - } + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize, + get_block_context(x, subsize)); + r2 += r; + d2 += d; + } else { + if (mi_row + (ms >> 1) != cm->mi_rows) + mb_skip = 1; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r2 += x->partition_cost[pl][PARTITION_HORZ]; + + if ((RDCOST(x->rdmult, x->rddiv, r2, d2) < + RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) { + srate = r2; + sdist = d2; + *(get_sb_partitioning(x, bsize)) = subsize; } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } - // check 32x64 - if (mi_row + 8 <= cm->mi_rows && !(cm->mb_cols & 1)) { + // PARTITION_VERT + if ((mi_row + ms <= cm->mi_rows) && (mi_col + (ms >> 1) <= cm->mi_cols) && + (bsize >= BLOCK_SIZE_MB16X16)) { + int r2, d2; + int mb_skip = 0; + subsize = get_subsize(bsize, PARTITION_VERT); + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, + get_block_context(x, subsize)); + if (mi_col + ms <= cm->mi_cols) { int r, d; + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize, + get_block_context(x, subsize)); + r2 += r; + d2 += d; + } else { + if (mi_col + (ms >> 1) != cm->mi_cols) + mb_skip = 1; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r2 += x->partition_cost[pl][PARTITION_VERT]; + + if ((RDCOST(x->rdmult, x->rddiv, r2, d2) < + RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) { + srate = r2; + sdist = d2; + *(get_sb_partitioning(x, bsize)) = subsize; + } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } - xd->sb_index = 0; - pick_sb_modes(cpi, mi_row, mi_col, - tp, &r, &d, BLOCK_SIZE_SB32X64, - &x->sb32x64_context[xd->sb_index]); - if (mi_col + 4 != cm->mi_cols) { - int r2, d2; - - update_state(cpi, &x->sb32x64_context[xd->sb_index], - BLOCK_SIZE_SB32X64, 0); - encode_superblock(cpi, tp, - 0, mi_row, mi_col, BLOCK_SIZE_SB32X64); - xd->sb_index = 1; - pick_sb_modes(cpi, mi_row, mi_col + 4, - tp, &r2, &d2, BLOCK_SIZE_SB32X64, - &x->sb32x64_context[xd->sb_index]); - r += r2; - d += d2; - } - - set_partition_seg_context(cpi, mi_row, mi_col); - pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64); - r += x->partition_cost[pl][PARTITION_VERT]; - - /* is this better than MB coding? */ - if (RDCOST(x->rdmult, x->rddiv, r, d) < - RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { - sb64_rate = r; - sb64_dist = d; - sb64_partitioning = BLOCK_SIZE_SB32X64; - } + // PARTITION_NONE + if (mi_row + ms <= cm->mi_rows && mi_col + ms <= cm->mi_cols) { + int r, d; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize, + get_block_context(x, bsize)); + if (bsize >= BLOCK_SIZE_MB16X16) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r += x->partition_cost[pl][PARTITION_NONE]; + } - for (p = 0; p < MAX_MB_PLANE; p++) { - memcpy(cm->above_context[p] + - (mi_col * 2 >> xd->plane[p].subsampling_x), - a + 16 * p, - sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x); - memcpy(cm->left_context[p], l + 16 * p, - sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y); - } + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, srate, sdist)) { + srate = r; + sdist = d; + if (bsize >= BLOCK_SIZE_MB16X16) + *(get_sb_partitioning(x, bsize)) = bsize; } + } - if (!sb64_skip && - mi_col + 8 <= cm->mi_cols && - mi_row + 8 <= cm->mi_rows) { - int r, d; + assert(srate < INT_MAX && sdist < INT_MAX); + *rate = srate; + *dist = sdist; - pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, - BLOCK_SIZE_SB64X64, &x->sb64_context); + encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize); - set_partition_seg_context(cpi, mi_row, mi_col); - pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64); - r += x->partition_cost[pl][PARTITION_NONE]; + if (bsize == BLOCK_SIZE_SB64X64) + assert(tp_orig < *tp); + else + assert(tp_orig == *tp); +} - if (RDCOST(x->rdmult, x->rddiv, r, d) < - RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { - sb64_rate = r; - sb64_dist = d; - sb64_partitioning = BLOCK_SIZE_SB64X64; - } - } +static void encode_sb_row(VP9_COMP *cpi, int mi_row, + TOKENEXTRA **tp, int *totalrate) { + VP9_COMMON *const cm = &cpi->common; + int mi_col; - assert(tp_orig == *tp); - encode_sb(cpi, tp, mi_row, mi_col, 1, BLOCK_SIZE_SB64X64, - sb64_partitioning, sb_partitioning, mb_partitioning); - assert(tp_orig < *tp); + // Initialize the left context for the new SB row + vpx_memset(&cm->left_context, 0, sizeof(cm->left_context)); + vpx_memset(cm->left_seg_context, 0, sizeof(cm->left_seg_context)); + + // Code each SB in the row + for (mi_col = cm->cur_tile_mi_col_start; + mi_col < cm->cur_tile_mi_col_end; mi_col += 8) { + int dummy_rate, dummy_dist; + rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, + &dummy_rate, &dummy_dist); } } @@ -1423,7 +1110,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_build_block_offsets(x); - vp9_setup_block_dptrs(&x->e_mbd); + vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.uv_mode = DC_PRED; @@ -1559,9 +1246,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_get_tile_col_offsets(cm, tile_col); for (mi_row = cm->cur_tile_mi_row_start; mi_row < cm->cur_tile_mi_row_end; - mi_row += 8) { + mi_row += 8) encode_sb_row(cpi, mi_row, &tp, &totalrate); - } cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old); assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 4665fccd0..e4002d689 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -139,6 +139,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, const int ib = txfrm_block_to_raster_block(xd, bsize, plane, block, 2 * tx_size); const int16_t *dequant_ptr = xd->plane[plane].dequant; + const uint8_t * band_translate; assert((!type && !plane) || (type && plane)); dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16); @@ -149,23 +150,27 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT; default_eob = 16; scan = get_scan_4x4(tx_type); + band_translate = vp9_coefband_trans_4x4; break; } case TX_8X8: { const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT; scan = get_scan_8x8(tx_type); default_eob = 64; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT; scan = get_scan_16x16(tx_type); default_eob = 256; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: scan = vp9_default_zig_zag1d_32x32; default_eob = 1024; + band_translate = vp9_coefband_trans_8x8plus; break; } assert(eob <= default_eob); @@ -204,7 +209,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, t0 = (vp9_dct_value_tokens_ptr + x)->token; /* Consider both possible successor states. */ if (next < default_eob) { - band = get_coef_band(scan, tx_size, i + 1); + band = get_coef_band(band_translate, i + 1); pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache, pad, default_eob); rate0 += @@ -254,7 +259,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; } if (next < default_eob) { - band = get_coef_band(scan, tx_size, i + 1); + band = get_coef_band(band_translate, i + 1); if (t0 != DCT_EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache, pad, default_eob); @@ -291,7 +296,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, * add a new trellis node, but we do need to update the costs. */ else { - band = get_coef_band(scan, tx_size, i + 1); + band = get_coef_band(band_translate, i + 1); t0 = tokens[next][0].token; t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ @@ -310,7 +315,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, } /* Now pick the best path through the whole trellis. */ - band = get_coef_band(scan, tx_size, i + 1); + band = get_coef_band(band_translate, i + 1); pt = combine_entropy_contexts(*a, *l); rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 436c8d4e0..ddcf849ce 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -47,7 +47,7 @@ #define KF_MB_INTRA_MIN 150 #define GF_MB_INTRA_MIN 100 -#define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001) +#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001) #define POW1 (double)cpi->oxcf.two_pass_vbrbias/100.0 #define POW2 (double)cpi->oxcf.two_pass_vbrbias/100.0 @@ -78,8 +78,8 @@ static int select_cq_level(int qindex) { // Resets the first pass file to the given position using a relative seek from the current position -static void reset_fpf_position(VP9_COMP *cpi, FIRSTPASS_STATS *Position) { - cpi->twopass.stats_in = Position; +static void reset_fpf_position(VP9_COMP *cpi, FIRSTPASS_STATS *position) { + cpi->twopass.stats_in = position; } static int lookup_next_frame_stats(VP9_COMP *cpi, FIRSTPASS_STATS *next_frame) { @@ -252,17 +252,11 @@ static void avg_stats(FIRSTPASS_STATS *section) { // Calculate a modified Error used in distributing bits between easier and harder frames static double calculate_modified_err(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - double av_err = (cpi->twopass.total_stats.ssim_weighted_pred_err / - cpi->twopass.total_stats.count); - double this_err = this_frame->ssim_weighted_pred_err; - double modified_err; - - if (this_err > av_err) - modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW1); - else - modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW2); - - return modified_err; + const FIRSTPASS_STATS *const stats = &cpi->twopass.total_stats; + const double av_err = stats->ssim_weighted_pred_err / stats->count; + const double this_err = this_frame->ssim_weighted_pred_err; + return av_err * pow(this_err / DOUBLE_DIVIDE_CHECK(av_err), + this_err > av_err ? POW1 : POW2); } static const double weight_table[256] = { @@ -328,20 +322,14 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) { static int frame_max_bits(VP9_COMP *cpi) { // Max allocation for a single frame based on the max section guidelines // passed in and how many bits are left. - int max_bits; - // For VBR base this on the bits and frames left plus the // two_pass_vbrmax_section rate passed in by the user. - max_bits = (int) (((double) cpi->twopass.bits_left - / (cpi->twopass.total_stats.count - (double) cpi->common - .current_video_frame)) - * ((double) cpi->oxcf.two_pass_vbrmax_section / 100.0)); + const double max_bits = (1.0 * cpi->twopass.bits_left / + (cpi->twopass.total_stats.count - cpi->common.current_video_frame)) * + (cpi->oxcf.two_pass_vbrmax_section / 100.0); // Trap case where we are out of bits. - if (max_bits < 0) - max_bits = 0; - - return max_bits; + return MAX((int)max_bits, 0); } void vp9_init_first_pass(VP9_COMP *cpi) { @@ -489,7 +477,7 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_build_block_offsets(x); - vp9_setup_block_dptrs(&x->e_mbd); + vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); vp9_frame_init_quantizer(cpi); @@ -854,26 +842,18 @@ static double calc_correction_factor(double err_per_mb, double err_divisor, double pt_low, double pt_high, - int Q) { - double power_term; - double error_term = err_per_mb / err_divisor; - double correction_factor; + int q) { + const double error_term = err_per_mb / err_divisor; // Adjustment based on actual quantizer to power term. - power_term = (vp9_convert_qindex_to_q(Q) * 0.01) + pt_low; - power_term = (power_term > pt_high) ? pt_high : power_term; + const double power_term = MIN(vp9_convert_qindex_to_q(q) * 0.01 + pt_low, + pt_high); // Calculate correction factor if (power_term < 1.0) assert(error_term >= 0.0); - correction_factor = pow(error_term, power_term); - // Clip range - correction_factor = - (correction_factor < 0.05) - ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor; - - return correction_factor; + return fclamp(pow(error_term, power_term), 0.05, 5.0); } // Given a current maxQ value sets a range for future values. @@ -882,10 +862,8 @@ static double calc_correction_factor(double err_per_mb, // (now uses the actual quantizer) but has not been tuned. static void adjust_maxq_qrange(VP9_COMP *cpi) { int i; - double q; - // Set the max corresponding to cpi->avg_q * 2.0 - q = cpi->avg_q * 2.0; + double q = cpi->avg_q * 2.0; cpi->twopass.maxq_max_limit = cpi->worst_quality; for (i = cpi->best_quality; i <= cpi->worst_quality; i++) { cpi->twopass.maxq_max_limit = i; @@ -906,12 +884,11 @@ static void adjust_maxq_qrange(VP9_COMP *cpi) { static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh) { - int Q; + int q; int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; - double section_err = (fpstats->coded_error / fpstats->count); - double sr_err_diff; + double section_err = fpstats->coded_error / fpstats->count; double sr_correction; double err_per_mb = section_err / num_mbs; double err_correction_factor; @@ -920,92 +897,74 @@ static int estimate_max_q(VP9_COMP *cpi, if (section_target_bandwitdh <= 0) return cpi->twopass.maxq_max_limit; // Highest value allowed - target_norm_bits_per_mb = - (section_target_bandwitdh < (1 << 20)) - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); + target_norm_bits_per_mb = section_target_bandwitdh < (1 << 20) + ? (512 * section_target_bandwitdh) / num_mbs + : 512 * (section_target_bandwitdh / num_mbs); // Look at the drop in prediction quality between the last frame // and the GF buffer (which contained an older frame). if (fpstats->sr_coded_error > fpstats->coded_error) { - sr_err_diff = - (fpstats->sr_coded_error - fpstats->coded_error) / - (fpstats->count * cpi->common.MBs); - sr_correction = (sr_err_diff / 32.0); - sr_correction = pow(sr_correction, 0.25); - if (sr_correction < 0.75) - sr_correction = 0.75; - else if (sr_correction > 1.25) - sr_correction = 1.25; + double sr_err_diff = (fpstats->sr_coded_error - fpstats->coded_error) / + (fpstats->count * cpi->common.MBs); + sr_correction = fclamp(pow(sr_err_diff / 32.0, 0.25), 0.75, 1.25); } else { sr_correction = 0.75; } // Calculate a corrective factor based on a rolling ratio of bits spent // vs target bits - if ((cpi->rolling_target_bits > 0) && - (cpi->active_worst_quality < cpi->worst_quality)) { - double rolling_ratio; - - rolling_ratio = (double)cpi->rolling_actual_bits / - (double)cpi->rolling_target_bits; + if (cpi->rolling_target_bits > 0 && + cpi->active_worst_quality < cpi->worst_quality) { + double rolling_ratio = (double)cpi->rolling_actual_bits / + (double)cpi->rolling_target_bits; if (rolling_ratio < 0.95) cpi->twopass.est_max_qcorrection_factor -= 0.005; else if (rolling_ratio > 1.05) cpi->twopass.est_max_qcorrection_factor += 0.005; - cpi->twopass.est_max_qcorrection_factor = - (cpi->twopass.est_max_qcorrection_factor < 0.1) - ? 0.1 - : (cpi->twopass.est_max_qcorrection_factor > 10.0) - ? 10.0 : cpi->twopass.est_max_qcorrection_factor; + cpi->twopass.est_max_qcorrection_factor = fclamp( + cpi->twopass.est_max_qcorrection_factor, 0.1, 10.0); } // Corrections for higher compression speed settings // (reduced compression expected) - if (cpi->compressor_speed == 1) { - if (cpi->oxcf.cpu_used <= 5) - speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); - else - speed_correction = 1.25; - } + if (cpi->compressor_speed == 1) + speed_correction = cpi->oxcf.cpu_used <= 5 ? + 1.04 + (cpi->oxcf.cpu_used * 0.04) : + 1.25; // Try and pick a max Q that will be high enough to encode the // content at the given rate. - for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) { + for (q = cpi->twopass.maxq_min_limit; q < cpi->twopass.maxq_max_limit; q++) { int bits_per_mb_at_this_q; - err_correction_factor = - calc_correction_factor(err_per_mb, ERR_DIVISOR, 0.4, 0.90, Q) * - sr_correction * speed_correction * - cpi->twopass.est_max_qcorrection_factor; + err_correction_factor = calc_correction_factor(err_per_mb, + ERR_DIVISOR, 0.4, 0.90, q) * + sr_correction * speed_correction * + cpi->twopass.est_max_qcorrection_factor; - - bits_per_mb_at_this_q = - vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor); + bits_per_mb_at_this_q = vp9_bits_per_mb(INTER_FRAME, q, + err_correction_factor); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } // Restriction on active max q for constrained quality mode. - if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < cpi->cq_target_quality)) { - Q = cpi->cq_target_quality; - } + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && + q < cpi->cq_target_quality) + q = cpi->cq_target_quality; // Adjust maxq_min_limit and maxq_max_limit limits based on // average q observed in clip for non kf/gf/arf frames // Give average a chance to settle though. // PGW TODO.. This code is broken for the extended Q range - if ((cpi->ni_frames > - ((int)cpi->twopass.total_stats.count >> 8)) && - (cpi->ni_frames > 25)) { + if (cpi->ni_frames > ((int)cpi->twopass.total_stats.count >> 8) && + cpi->ni_frames > 25) adjust_maxq_qrange(cpi); - } - return Q; + return q; } // For cq mode estimate a cq level that matches the observed @@ -1013,7 +972,7 @@ static int estimate_max_q(VP9_COMP *cpi, static int estimate_cq(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh) { - int Q; + int q; int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; @@ -1064,29 +1023,29 @@ static int estimate_cq(VP9_COMP *cpi, clip_iifactor = 0.80; // Try and pick a Q that can encode the content at the given rate. - for (Q = 0; Q < MAXQ; Q++) { + for (q = 0; q < MAXQ; q++) { int bits_per_mb_at_this_q; // Error per MB based correction factor err_correction_factor = - calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, Q) * + calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, q) * sr_correction * speed_correction * clip_iifactor; bits_per_mb_at_this_q = - vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor); + vp9_bits_per_mb(INTER_FRAME, q, err_correction_factor); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } // Clip value to range "best allowed to (worst allowed - 1)" - Q = select_cq_level(Q); - if (Q >= cpi->worst_quality) - Q = cpi->worst_quality - 1; - if (Q < cpi->best_quality) - Q = cpi->best_quality; + q = select_cq_level(q); + if (q >= cpi->worst_quality) + q = cpi->worst_quality - 1; + if (q < cpi->best_quality) + q = cpi->best_quality; - return Q; + return q; } @@ -1117,9 +1076,8 @@ void vp9_init_second_pass(VP9_COMP *cpi) { // encoded in the second pass is a guess. However the sum duration is not. // Its calculated based on the actual durations of all frames from the first // pass. - vp9_new_frame_rate(cpi, - 10000000.0 * cpi->twopass.total_stats.count / - cpi->twopass.total_stats.duration); + vp9_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / + cpi->twopass.total_stats.duration); cpi->output_frame_rate = cpi->oxcf.frame_rate; cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * @@ -1191,9 +1149,8 @@ static double get_prediction_decay_rate(VP9_COMP *cpi, // Look at the observed drop in prediction quality between the last frame // and the GF buffer (which contains an older frame). - mb_sr_err_diff = - (next_frame->sr_coded_error - next_frame->coded_error) / - (cpi->common.MBs); + mb_sr_err_diff = (next_frame->sr_coded_error - next_frame->coded_error) / + cpi->common.MBs; if (mb_sr_err_diff <= 512.0) { second_ref_decay = 1.0 - (mb_sr_err_diff / 512.0); second_ref_decay = pow(second_ref_decay, 0.5); @@ -1225,9 +1182,9 @@ static int detect_transition_to_still( // Break clause to detect very still sections after motion // For example a static image after a fade or other transition // instead of a clean scene cut. - if ((frame_interval > MIN_GF_INTERVAL) && - (loop_decay_rate >= 0.999) && - (last_decay_rate < 0.9)) { + if (frame_interval > MIN_GF_INTERVAL && + loop_decay_rate >= 0.999 && + last_decay_rate < 0.9) { int j; FIRSTPASS_STATS *position = cpi->twopass.stats_in; FIRSTPASS_STATS tmp_next_frame; @@ -1271,10 +1228,9 @@ static int detect_flash(VP9_COMP *cpi, int offset) { // are reasonably well predicted by an earlier (pre flash) frame. // The recovery after a flash is indicated by a high pcnt_second_ref // comapred to pcnt_inter. - if ((next_frame.pcnt_second_ref > next_frame.pcnt_inter) && - (next_frame.pcnt_second_ref >= 0.5)) { + if (next_frame.pcnt_second_ref > next_frame.pcnt_inter && + next_frame.pcnt_second_ref >= 0.5) flash_detected = 1; - } } return flash_detected; @@ -1356,13 +1312,9 @@ static double calc_frame_boost( return frame_boost; } -static int calc_arf_boost( - VP9_COMP *cpi, - int offset, - int f_frames, - int b_frames, - int *f_boost, - int *b_boost) { +static int calc_arf_boost(VP9_COMP *cpi, int offset, + int f_frames, int b_frames, + int *f_boost, int *b_boost) { FIRSTPASS_STATS this_frame; int i; @@ -1392,8 +1344,7 @@ static int calc_arf_boost( // Cumulative effect of prediction quality decay if (!flash_detected) { - decay_accumulator = - decay_accumulator * get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } @@ -1429,10 +1380,9 @@ static int calc_arf_boost( // Cumulative effect of prediction quality decay if (!flash_detected) { - decay_accumulator = - decay_accumulator * get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR - ? MIN_DECAY_FACTOR : decay_accumulator; + ? MIN_DECAY_FACTOR : decay_accumulator; } boost_score += (decay_accumulator * @@ -1871,26 +1821,20 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); ++i) { - int boost; int allocation_chunks; - int Q = - (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; + int q = cpi->oxcf.fixed_q < 0 ? cpi->last_q[INTER_FRAME] + : cpi->oxcf.fixed_q; int gf_bits; - boost = (cpi->gfu_boost * vp9_gfboost_qadjust(Q)) / 100; + int boost = (cpi->gfu_boost * vp9_gfboost_qadjust(q)) / 100; // Set max and minimum boost and hence minimum allocation - if (boost > ((cpi->baseline_gf_interval + 1) * 200)) - boost = ((cpi->baseline_gf_interval + 1) * 200); - else if (boost < 125) - boost = 125; + boost = clamp(boost, 125, (cpi->baseline_gf_interval + 1) * 200); if (cpi->source_alt_ref_pending && i == 0) - allocation_chunks = - ((cpi->baseline_gf_interval + 1) * 100) + boost; + allocation_chunks = ((cpi->baseline_gf_interval + 1) * 100) + boost; else - allocation_chunks = - (cpi->baseline_gf_interval * 100) + (boost - 100); + allocation_chunks = (cpi->baseline_gf_interval * 100) + (boost - 100); // Prevent overflow if (boost > 1023) { @@ -1901,41 +1845,34 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the number of bits to be spent on the gf or arf based on // the boost number - gf_bits = (int)((double)boost * - (cpi->twopass.gf_group_bits / - (double)allocation_chunks)); + gf_bits = (int)((double)boost * (cpi->twopass.gf_group_bits / + (double)allocation_chunks)); // If the frame that is to be boosted is simpler than the average for // the gf/arf group then use an alternative calculation // based on the error score of the frame itself if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) { - double alt_gf_grp_bits; - int alt_gf_bits; - - alt_gf_grp_bits = + double alt_gf_grp_bits = (double)cpi->twopass.kf_group_bits * (mod_frame_err * (double)cpi->baseline_gf_interval) / DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left); - alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits / + int alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits / (double)allocation_chunks)); - if (gf_bits > alt_gf_bits) { + if (gf_bits > alt_gf_bits) gf_bits = alt_gf_bits; - } } // Else if it is harder than other frames in the group make sure it at // least receives an allocation in keeping with its relative error // score, otherwise it may be worse off than an "un-boosted" frame else { - int alt_gf_bits = - (int)((double)cpi->twopass.kf_group_bits * - mod_frame_err / - DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left)); + int alt_gf_bits = (int)((double)cpi->twopass.kf_group_bits * + mod_frame_err / + DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left)); - if (alt_gf_bits > gf_bits) { + if (alt_gf_bits > gf_bits) gf_bits = alt_gf_bits; - } } // Dont allow a negative value for gf_bits @@ -1983,14 +1920,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // despite (MIN_GF_INTERVAL) and would cause a divide by 0 in the // calculation of alt_extra_bits. if (cpi->baseline_gf_interval >= 3) { - int boost = (cpi->source_alt_ref_pending) - ? b_boost : cpi->gfu_boost; + const int boost = cpi->source_alt_ref_pending ? b_boost : cpi->gfu_boost; if (boost >= 150) { - int pct_extra; int alt_extra_bits; - - pct_extra = (boost - 100) / 50; + int pct_extra = (boost - 100) / 50; pct_extra = (pct_extra > 20) ? 20 : pct_extra; alt_extra_bits = (int)((cpi->twopass.gf_group_bits * pct_extra) / 100); @@ -2071,33 +2005,21 @@ static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Make a damped adjustment to the active max q. static int adjust_active_maxq(int old_maxqi, int new_maxqi) { int i; - int ret_val = new_maxqi; - double old_q; - double new_q; - double target_q; - - old_q = vp9_convert_qindex_to_q(old_maxqi); - new_q = vp9_convert_qindex_to_q(new_maxqi); - - target_q = ((old_q * 7.0) + new_q) / 8.0; + const double old_q = vp9_convert_qindex_to_q(old_maxqi); + const double new_q = vp9_convert_qindex_to_q(new_maxqi); + const double target_q = ((old_q * 7.0) + new_q) / 8.0; if (target_q > old_q) { - for (i = old_maxqi; i <= new_maxqi; i++) { - if (vp9_convert_qindex_to_q(i) >= target_q) { - ret_val = i; - break; - } - } + for (i = old_maxqi; i <= new_maxqi; i++) + if (vp9_convert_qindex_to_q(i) >= target_q) + return i; } else { - for (i = old_maxqi; i >= new_maxqi; i--) { - if (vp9_convert_qindex_to_q(i) <= target_q) { - ret_val = i; - break; - } - } + for (i = old_maxqi; i >= new_maxqi; i--) + if (vp9_convert_qindex_to_q(i) <= target_q) + return i; } - return ret_val; + return new_maxqi; } void vp9_second_pass(VP9_COMP *cpi) { @@ -2111,9 +2033,8 @@ void vp9_second_pass(VP9_COMP *cpi) { double this_frame_intra_error; double this_frame_coded_error; - if (!cpi->twopass.stats_in) { + if (!cpi->twopass.stats_in) return; - } vp9_clear_system_state(); @@ -2123,12 +2044,8 @@ void vp9_second_pass(VP9_COMP *cpi) { // Set a cq_level in constrained quality mode. if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - int est_cq; - - est_cq = - estimate_cq(cpi, - &cpi->twopass.total_left_stats, - (int)(cpi->twopass.bits_left / frames_left)); + int est_cq = estimate_cq(cpi, &cpi->twopass.total_left_stats, + (int)(cpi->twopass.bits_left / frames_left)); cpi->cq_target_quality = cpi->oxcf.cq_level; if (est_cq > cpi->cq_target_quality) @@ -2139,14 +2056,12 @@ void vp9_second_pass(VP9_COMP *cpi) { cpi->twopass.maxq_max_limit = cpi->worst_quality; cpi->twopass.maxq_min_limit = cpi->best_quality; - tmp_q = estimate_max_q( - cpi, - &cpi->twopass.total_left_stats, - (int)(cpi->twopass.bits_left / frames_left)); + tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats, + (int)(cpi->twopass.bits_left / frames_left)); - cpi->active_worst_quality = tmp_q; - cpi->ni_av_qi = tmp_q; - cpi->avg_q = vp9_convert_qindex_to_q(tmp_q); + cpi->active_worst_quality = tmp_q; + cpi->ni_av_qi = tmp_q; + cpi->avg_q = vp9_convert_qindex_to_q(tmp_q); #ifndef ONE_SHOT_Q_ESTIMATE // Limit the maxq value returned subsequently. @@ -2404,9 +2319,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (cpi->oxcf.auto_key && lookup_next_frame_stats(cpi, &next_frame) != EOF) { // Normal scene cut check - if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) { + if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) break; - } + // How fast is prediction quality decaying loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); @@ -2416,19 +2331,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // quality since the last GF or KF. recent_loop_decay[i % 8] = loop_decay_rate; decay_accumulator = 1.0; - for (j = 0; j < 8; j++) { - decay_accumulator = decay_accumulator * recent_loop_decay[j]; - } + for (j = 0; j < 8; j++) + decay_accumulator *= recent_loop_decay[j]; // Special check for transition or high motion followed by a // to a static scene. - if (detect_transition_to_still(cpi, i, - (cpi->key_frame_frequency - i), - loop_decay_rate, - decay_accumulator)) { + if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i, + loop_decay_rate, decay_accumulator)) break; - } - // Step on to the next frame cpi->twopass.frames_to_key++; diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index a89d2547e..708fe4549 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -46,7 +46,7 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { unsigned int i; for (i = 0; i < ctx->max_sz; i++) - vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img); + vp9_free_frame_buffer(&ctx->buf[i].img); free(ctx->buf); } free(ctx); @@ -56,6 +56,8 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { struct lookahead_ctx * vp9_lookahead_init(unsigned int width, unsigned int height, + unsigned int subsampling_x, + unsigned int subsampling_y, unsigned int depth) { struct lookahead_ctx *ctx = NULL; @@ -71,8 +73,9 @@ struct lookahead_ctx * vp9_lookahead_init(unsigned int width, if (!ctx->buf) goto bail; for (i = 0; i < depth; i++) - if (vp8_yv12_alloc_frame_buffer(&ctx->buf[i].img, - width, height, VP9BORDERINPIXELS)) + if (vp9_alloc_frame_buffer(&ctx->buf[i].img, + width, height, subsampling_x, subsampling_y, + VP9BORDERINPIXELS)) goto bail; } return ctx; diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h index 2406618b9..81baa2c6f 100644 --- a/vp9/encoder/vp9_lookahead.h +++ b/vp9/encoder/vp9_lookahead.h @@ -31,6 +31,8 @@ struct lookahead_ctx; */ struct lookahead_ctx *vp9_lookahead_init(unsigned int width, unsigned int height, + unsigned int subsampling_x, + unsigned int subsampling_y, unsigned int depth); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 05105d794..3d8003c33 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -313,9 +313,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vp9_free_frame_buffers(&cpi->common); - vp8_yv12_de_alloc_frame_buffer(&cpi->last_frame_uf); - vp8_yv12_de_alloc_frame_buffer(&cpi->scaled_source); - vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer); + vp9_free_frame_buffer(&cpi->last_frame_uf); + vp9_free_frame_buffer(&cpi->scaled_source); + vp9_free_frame_buffer(&cpi->alt_ref_buffer); vp9_lookahead_destroy(cpi->lookahead); vpx_free(cpi->tok); @@ -835,15 +835,19 @@ void vp9_set_speed_features(VP9_COMP *cpi) { } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + cpi->lookahead = vp9_lookahead_init(cpi->oxcf.width, cpi->oxcf.height, + cm->subsampling_x, cm->subsampling_y, cpi->oxcf.lag_in_frames); if (!cpi->lookahead) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate lag buffers"); - if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer, - cpi->oxcf.width, cpi->oxcf.height, - VP9BORDERINPIXELS)) + if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer, + cpi->oxcf.width, cpi->oxcf.height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate altref buffer"); } @@ -873,13 +877,17 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate partition data"); - if (vp8_yv12_alloc_frame_buffer(&cpi->last_frame_uf, - cm->width, cm->height, VP9BORDERINPIXELS)) + if (vp9_alloc_frame_buffer(&cpi->last_frame_uf, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate last frame buffer"); - if (vp8_yv12_alloc_frame_buffer(&cpi->scaled_source, - cm->width, cm->height, VP9BORDERINPIXELS)) + if (vp9_alloc_frame_buffer(&cpi->scaled_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); @@ -914,13 +922,17 @@ static void update_frame_size(VP9_COMP *cpi) { vp9_update_frame_size(cm); // Update size of buffers local to this frame - if (vp8_yv12_realloc_frame_buffer(&cpi->last_frame_uf, - cm->width, cm->height, VP9BORDERINPIXELS)) + if (vp9_realloc_frame_buffer(&cpi->last_frame_uf, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to reallocate last frame buffer"); - if (vp8_yv12_realloc_frame_buffer(&cpi->scaled_source, - cm->width, cm->height, VP9BORDERINPIXELS)) + if (vp9_realloc_frame_buffer(&cpi->scaled_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to reallocate scaled source buffer"); @@ -1032,6 +1044,9 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cm->width = oxcf->width; cm->height = oxcf->height; + cm->subsampling_x = 0; + cm->subsampling_y = 0; + vp9_alloc_compressor_data(cpi); // change includes all joint functionality vp9_change_config(ptr, oxcf); @@ -1196,17 +1211,13 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cm->sharpness_level = cpi->oxcf.Sharpness; - // Increasing the size of the frame beyond the first seen frame, or some - // otherwise signalled maximum size, is not supported. - // TODO(jkoleszar): exit gracefully. - if (!cpi->initial_width) { - alloc_raw_frame_buffers(cpi); - vp9_alloc_compressor_data(cpi); - cpi->initial_width = cm->width; - cpi->initial_height = cm->height; + if (cpi->initial_width) { + // Increasing the size of the frame beyond the first seen frame, or some + // otherwise signalled maximum size, is not supported. + // TODO(jkoleszar): exit gracefully. + assert(cm->width <= cpi->initial_width); + assert(cm->height <= cpi->initial_height); } - assert(cm->width <= cpi->initial_width); - assert(cm->height <= cpi->initial_height); update_frame_size(cpi); if (cpi->oxcf.fixed_q >= 0) { @@ -1938,8 +1949,8 @@ static void generate_psnr_packet(VP9_COMP *cpi) { pkt.data.psnr.samples[0] = width * height; pkt.data.psnr.samples[1] = width * height; - width = (width + 1) / 2; - height = (height + 1) / 2; + width = orig->uv_width; + height = orig->uv_height; sse = calc_plane_error(orig->u_buffer, orig->uv_stride, recon->u_buffer, recon->uv_stride, @@ -2093,7 +2104,7 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { } while (--h); src = s->u_buffer; - h = (cm->height + 1) / 2; + h = s->uv_height; do { fwrite(src, s->uv_width, 1, yuv_rec_file); @@ -2101,7 +2112,7 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { } while (--h); src = s->v_buffer; - h = (cm->height + 1) / 2; + h = s->uv_height; do { fwrite(src, s->uv_width, 1, yuv_rec_file); @@ -2117,49 +2128,31 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, const int in_h = src_fb->y_crop_height; const int out_w = dst_fb->y_crop_width; const int out_h = dst_fb->y_crop_height; - int x, y; + int x, y, i; + + uint8_t *srcs[3] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer}; + int src_strides[3] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride}; + + uint8_t *dsts[3] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer}; + int dst_strides[3] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride}; for (y = 0; y < out_h; y += 16) { for (x = 0; x < out_w; x += 16) { - int x_q4 = x * 16 * in_w / out_w; - int y_q4 = y * 16 * in_h / out_h; - uint8_t *src = src_fb->y_buffer + y * in_h / out_h * src_fb->y_stride + - x * in_w / out_w; - uint8_t *dst = dst_fb->y_buffer + y * dst_fb->y_stride + x; - int src_stride = src_fb->y_stride; - int dst_stride = dst_fb->y_stride; - - vp9_convolve8(src, src_stride, dst, dst_stride, - vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, - vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, - 16, 16); - - x_q4 >>= 1; - y_q4 >>= 1; - src_stride = src_fb->uv_stride; - dst_stride = dst_fb->uv_stride; - - src = src_fb->u_buffer + - y / 2 * in_h / out_h * src_fb->uv_stride + - x / 2 * in_w / out_w; - dst = dst_fb->u_buffer + - y / 2 * dst_fb->uv_stride + - x / 2; - vp9_convolve8(src, src_stride, dst, dst_stride, - vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, - vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, - 8, 8); - - src = src_fb->v_buffer + - y / 2 * in_h / out_h * src_fb->uv_stride + - x / 2 * in_w / out_w; - dst = dst_fb->v_buffer + - y / 2 * dst_fb->uv_stride + - x / 2; - vp9_convolve8(src, src_stride, dst, dst_stride, - vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, - vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, - 8, 8); + for (i = 0; i < MAX_MB_PLANE; ++i) { + const int factor = i == 0 ? 1 : 2; + const int x_q4 = x * (16 / factor) * in_w / out_w; + const int y_q4 = y * (16 / factor) * in_h / out_h; + const int src_stride = src_strides[i]; + const int dst_stride = dst_strides[i]; + uint8_t *src = srcs[i] + y / factor * in_h / out_h * src_stride + + x / factor * in_w / out_w; + uint8_t *dst = dsts[i] + y * dst_stride + x; + + vp9_convolve8(src, src_stride, dst, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + 16 / factor, 16 / factor); + } } } @@ -2500,9 +2493,10 @@ static void scale_references(VP9_COMP *cpi) { ref->y_crop_height != cm->height) { int new_fb = get_free_fb(cm); - vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[new_fb], - cm->width, cm->height, - VP9BORDERINPIXELS); + vp9_realloc_frame_buffer(&cm->yv12_fb[new_fb], + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS); scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]); cpi->scaled_ref_idx[i] = new_fb; } else { @@ -3587,6 +3581,15 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, struct vpx_usec_timer timer; int res = 0; + if (!cpi->initial_width) { + // TODO(jkoleszar): Support 1/4 subsampling? + cm->subsampling_x = sd->uv_width < sd->y_width; + cm->subsampling_y = sd->uv_height < sd->y_height; + alloc_raw_frame_buffers(cpi); + + cpi->initial_width = cm->width; + cpi->initial_height = cm->height; + } vpx_usec_timer_start(&timer); if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL)) @@ -3851,9 +3854,10 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cm->frame_flags = *frame_flags; // Reset the frame pointers to the current frame size - vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], - cm->width, cm->height, - VP9BORDERINPIXELS); + vp9_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS); // Calculate scaling factors for each of the 3 available references for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index ef8cb2bab..5bff383b8 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -272,6 +272,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, [ENTROPY_NODES]; int seg_eob, default_eob; uint8_t token_cache[1024]; + const uint8_t * band_translate; // Check for consistency of tx_size with mode info assert((!type && !plane) || (type && plane)); @@ -291,6 +292,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, coef_probs = cm->fc.coef_probs_4x4; seg_eob = 16; scan = get_scan_4x4(tx_type); + band_translate = vp9_coefband_trans_4x4; break; } case TX_8X8: { @@ -304,6 +306,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, scan = get_scan_8x8(tx_type); coef_probs = cm->fc.coef_probs_8x8; seg_eob = 64; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { @@ -317,6 +320,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, seg_eob = 256; above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: @@ -325,6 +329,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, seg_eob = 1024; above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; + band_translate = vp9_coefband_trans_8x8plus; break; default: abort(); @@ -347,7 +352,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, for (c = 0; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; int t = vp9_dct_value_tokens_ptr[v].token; - int band = get_coef_band(scan, tx_size, c); + int band = get_coef_band(band_translate, c); if (c) pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); @@ -361,7 +366,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, if (c) pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); cost += mb->token_costs[tx_size][type][ref] - [get_coef_band(scan, tx_size, c)] + [get_coef_band(band_translate, c)] [pt][DCT_EOB_TOKEN]; } } @@ -1069,9 +1074,7 @@ typedef struct { B_PREDICTION_MODE modes[4]; int_mv mvs[4], second_mvs[4]; int eobs[4]; - int mvthresh; - int *mdcounts; } BEST_SEG_INFO; static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { @@ -1322,7 +1325,6 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv, int_mv *second_best_ref_mv, int64_t best_rd, - int *mdcounts, int *returntotrate, int *returnyrate, int *returndistortion, @@ -1339,7 +1341,6 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, bsi.second_ref_mv = second_best_ref_mv; bsi.mvp.as_int = best_ref_mv->as_int; bsi.mvthresh = mvthresh; - bsi.mdcounts = mdcounts; for (i = 0; i < 4; i++) bsi.modes[i] = ZERO4X4; @@ -1612,7 +1613,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int_mv frame_nearest_mv[MAX_REF_FRAMES], int_mv frame_near_mv[MAX_REF_FRAMES], - int frame_mdcounts[4][4], struct buf_2d yv12_mb[4][MAX_MB_PLANE], struct scale_factors scale[MAX_REF_FRAMES]) { VP9_COMMON *cm = &cpi->common; @@ -1797,7 +1797,7 @@ static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) { static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, - int mdcounts[4], int64_t txfm_cache[], + int64_t txfm_cache[], int *rate2, int *distortion, int *skippable, int *compmode_cost, int *rate_y, int *distortion_y, @@ -2305,7 +2305,9 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mode = xd->mode_info_context->mbmi.mode; txfm_size = xd->mode_info_context->mbmi.txfm_size; rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, - &dist_uv, &uv_skip, bsize); + &dist_uv, &uv_skip, + (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : + bsize); if (bsize == BLOCK_SIZE_SB8X8) err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y, &rate4x4_y_tokenonly, @@ -2357,7 +2359,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; int comp_pred, i; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - int frame_mdcounts[4][4]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; int_mv single_newmv[MAX_REF_FRAMES]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, @@ -2366,7 +2367,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx}; - int mdcounts[4]; int64_t best_rd = INT64_MAX; int64_t best_txfm_rd[NB_TXFM_MODES]; int64_t best_txfm_diff[NB_TXFM_MODES]; @@ -2449,7 +2449,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->ref_frame_flags & flag_list[ref_frame]) { setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb, scale_factor); + yv12_mb, scale_factor); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; @@ -2576,8 +2576,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[i].pre[1] = yv12_mb[second_ref][i]; } - vpx_memcpy(mdcounts, frame_mdcounts[ref_frame], sizeof(mdcounts)); - // If the segment reference frame feature is enabled.... // then do nothing if the current ref frame is not allowed.. if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && @@ -2675,7 +2673,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &mbmi->ref_mvs[mbmi->ref_frame][0], - second_ref, INT64_MAX, mdcounts, + second_ref, INT64_MAX, &rate, &rate_y, &distortion, &skippable, (int)this_rd_thresh, seg_mvs); @@ -2714,7 +2712,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // switchable list (bilinear, 6-tap) is indicated at the frame level tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &mbmi->ref_mvs[mbmi->ref_frame][0], - second_ref, INT64_MAX, mdcounts, + second_ref, INT64_MAX, &rate, &rate_y, &distortion, &skippable, (int)this_rd_thresh, seg_mvs); @@ -2745,10 +2743,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // If even the 'Y' rd value of split is higher than best so far // then dont bother looking at UV vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, - bsize); - vp9_subtract_sbuv(x, bsize); + BLOCK_SIZE_SB8X8); + vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8); super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv, - &uv_skippable, bsize, TX_4X4); + &uv_skippable, BLOCK_SIZE_SB8X8, TX_4X4); rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable; @@ -2792,7 +2790,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } this_rd = handle_inter_mode(cpi, x, bsize, - mdcounts, txfm_cache, + txfm_cache, &rate2, &distortion2, &skippable, &compmode_cost, &rate_y, &distortion_y, diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index cb670dab0..4420d49e3 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -136,6 +136,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, ENTROPY_CONTEXT above_ec, left_ec; uint8_t token_cache[1024]; TX_TYPE tx_type = DCT_DCT; + const uint8_t * band_translate; assert((!type && !plane) || (type && plane)); switch (tx_size) { @@ -149,6 +150,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, scan = get_scan_4x4(tx_type); counts = cpi->coef_counts_4x4; coef_probs = cpi->common.fc.coef_probs_4x4; + band_translate = vp9_coefband_trans_4x4; break; } case TX_8X8: { @@ -162,6 +164,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, scan = get_scan_8x8(tx_type); counts = cpi->coef_counts_8x8; coef_probs = cpi->common.fc.coef_probs_8x8; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { @@ -175,6 +178,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, scan = get_scan_16x16(tx_type); counts = cpi->coef_counts_16x16; coef_probs = cpi->common.fc.coef_probs_16x16; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: @@ -184,6 +188,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, scan = vp9_default_zig_zag1d_32x32; counts = cpi->coef_counts_32x32; coef_probs = cpi->common.fc.coef_probs_32x32; + band_translate = vp9_coefband_trans_8x8plus; break; } @@ -196,7 +201,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, c = 0; do { - const int band = get_coef_band(scan, tx_size, c); + const int band = get_coef_band(band_translate, c); int token; int v = 0; rc = scan[c]; |