summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_bitstream.c78
-rw-r--r--vp9/encoder/vp9_block.h8
-rw-r--r--vp9/encoder/vp9_encodeframe.c780
-rw-r--r--vp9/encoder/vp9_encodemb.c13
-rw-r--r--vp9/encoder/vp9_firstpass.c322
-rw-r--r--vp9/encoder/vp9_lookahead.c9
-rw-r--r--vp9/encoder/vp9_lookahead.h2
-rw-r--r--vp9/encoder/vp9_onyx_if.c152
-rw-r--r--vp9/encoder/vp9_rdopt.c38
-rw-r--r--vp9/encoder/vp9_tokenize.c7
10 files changed, 515 insertions, 894 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 20154d813..ac29a8e0c 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -905,6 +905,7 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
case PARTITION_SPLIT:
for (n = 0; n < 4; n++) {
int j = n >> 1, i = n & 0x01;
+ *(get_sb_index(xd, subsize)) = n;
write_modes_sb(cpi, m + j * bs * mis + i * bs, bc, tok, tok_end,
mi_row + j * bs, mi_col + i * bs, subsize);
}
@@ -914,12 +915,11 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
}
// update partition context
- if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
- return;
-
- xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
- xd->above_seg_context = cm->above_seg_context + mi_col;
- update_partition_context(xd, subsize, bsize);
+ if (bsize > BLOCK_SIZE_SB8X8 &&
+ (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ update_partition_context(xd, subsize, bsize);
+ }
}
static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
@@ -1242,16 +1242,6 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) {
FILE *vpxlogc = 0;
#endif
-static void put_delta_q(vp9_writer *bc, int delta_q) {
- if (delta_q != 0) {
- vp9_write_bit(bc, 1);
- vp9_write_literal(bc, abs(delta_q), 4);
- vp9_write_bit(bc, delta_q < 0);
- } else {
- vp9_write_bit(bc, 0);
- }
-}
-
static void decide_kf_ymode_entropy(VP9_COMP *cpi) {
int mode_cost[MB_MODE_COUNT];
int bestcost = INT_MAX;
@@ -1298,9 +1288,21 @@ static void segment_reference_frames(VP9_COMP *cpi) {
}
}
-static void encode_loopfilter(MACROBLOCKD *xd, vp9_writer *w) {
+static void encode_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_writer *w) {
int i;
+ // Encode the loop filter level and type
+ vp9_write_literal(w, pc->filter_level, 6);
+ vp9_write_literal(w, pc->sharpness_level, 3);
+#if CONFIG_LOOP_DERING
+ if (pc->dering_enabled) {
+ vp9_write_bit(w, 1);
+ vp9_write_literal(w, pc->dering_enabled - 1, 4);
+ } else {
+ vp9_write_bit(w, 0);
+ }
+#endif
+
// Write out loop filter deltas applied at the MB level based on mode or
// ref frame (if they are enabled).
vp9_write_bit(w, xd->mode_ref_lf_delta_enabled);
@@ -1354,6 +1356,24 @@ static void encode_loopfilter(MACROBLOCKD *xd, vp9_writer *w) {
}
}
+static void put_delta_q(vp9_writer *bc, int delta_q) {
+ if (delta_q != 0) {
+ vp9_write_bit(bc, 1);
+ vp9_write_literal(bc, abs(delta_q), 4);
+ vp9_write_bit(bc, delta_q < 0);
+ } else {
+ vp9_write_bit(bc, 0);
+ }
+}
+
+static void encode_quantization(VP9_COMMON *pc, vp9_writer *w) {
+ vp9_write_literal(w, pc->base_qindex, QINDEX_BITS);
+ put_delta_q(w, pc->y_dc_delta_q);
+ put_delta_q(w, pc->uv_dc_delta_q);
+ put_delta_q(w, pc->uv_ac_delta_q);
+}
+
+
static void encode_segmentation(VP9_COMP *cpi, vp9_writer *w) {
int i, j;
VP9_COMMON *const pc = &cpi->common;
@@ -1495,27 +1515,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) {
// lossless mode: note this needs to be before loopfilter
vp9_write_bit(&header_bc, cpi->mb.e_mbd.lossless);
- // Encode the loop filter level and type
- vp9_write_literal(&header_bc, pc->filter_level, 6);
- vp9_write_literal(&header_bc, pc->sharpness_level, 3);
-#if CONFIG_LOOP_DERING
- if (pc->dering_enabled) {
- vp9_write_bit(&header_bc, 1);
- vp9_write_literal(&header_bc, pc->dering_enabled - 1, 4);
- } else {
- vp9_write_bit(&header_bc, 0);
- }
-#endif
-
- encode_loopfilter(xd, &header_bc);
-
- // Frame Q baseline quantizer index
- vp9_write_literal(&header_bc, pc->base_qindex, QINDEX_BITS);
+ encode_loopfilter(pc, xd, &header_bc);
- // Transmit Dc, Second order and Uv quantizer delta information
- put_delta_q(&header_bc, pc->y_dc_delta_q);
- put_delta_q(&header_bc, pc->uv_dc_delta_q);
- put_delta_q(&header_bc, pc->uv_ac_delta_q);
+ encode_quantization(pc, &header_bc);
// When there is a key frame all reference buffers are updated using the new key frame
if (pc->frame_type != KEY_FRAME) {
@@ -1805,6 +1807,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) {
int scaling = (pc->width != pc->display_width ||
pc->height != pc->display_height);
int v = (oh.first_partition_length_in_bytes << 8) |
+ (pc->subsampling_y << 7) |
+ (pc->subsampling_x << 6) |
(scaling << 5) |
(oh.show_frame << 4) |
(oh.version << 1) |
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 6bc42c7ff..44261481c 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -138,8 +138,8 @@ struct macroblock {
int optimize;
- // Structure to hold context for each of the 4 MBs within a SB:
- // when encoded as 4 independent MBs:
+ // TODO(jingning): Need to refactor the structure arrays that buffers the
+ // coding mode decisions of each partition type.
PICK_MODE_CONTEXT sb8_context[4][4][4];
PICK_MODE_CONTEXT sb8x16_context[4][4][2];
PICK_MODE_CONTEXT sb16x8_context[4][4][2];
@@ -153,6 +153,10 @@ struct macroblock {
PICK_MODE_CONTEXT sb64_context;
int partition_cost[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
+ BLOCK_SIZE_TYPE mb_partitioning[4][4];
+ BLOCK_SIZE_TYPE sb_partitioning[4];
+ BLOCK_SIZE_TYPE sb64_partitioning;
+
void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 49e8ccefa..2edeb7807 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -539,15 +539,6 @@ void vp9_setup_src_planes(MACROBLOCK *x,
x->e_mbd.plane[2].subsampling_y);
}
-static INLINE void set_partition_seg_context(VP9_COMP *cpi,
- int mi_row, int mi_col) {
- VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-
- xd->above_seg_context = cm->above_seg_context + mi_col;
- xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
-}
-
static void set_offsets(VP9_COMP *cpi,
int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize) {
MACROBLOCK *const x = &cpi->mb;
@@ -571,7 +562,7 @@ static void set_offsets(VP9_COMP *cpi,
}
// partition contexts
- set_partition_seg_context(cpi, mi_row, mi_col);
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
// Activity map pointer
x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
@@ -731,6 +722,9 @@ static void set_block_index(MACROBLOCKD *xd, int idx,
}
}
+// TODO(jingning): the variables used here are little complicated. need further
+// refactoring on organizing the the temporary buffers, when recursive
+// partition down to 4x4 block size is enabled.
static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -762,6 +756,52 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
}
}
+static BLOCK_SIZE_TYPE *get_sb_partitioning(MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ switch (bsize) {
+ case BLOCK_SIZE_SB64X64:
+ return &x->sb64_partitioning;
+ case BLOCK_SIZE_SB32X32:
+ return &x->sb_partitioning[xd->sb_index];
+ case BLOCK_SIZE_MB16X16:
+ return &x->mb_partitioning[xd->sb_index][xd->mb_index];
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
+static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
+ ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
+ ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
+ PARTITION_CONTEXT sa[8],
+ PARTITION_CONTEXT sl[8],
+ BLOCK_SIZE_TYPE bsize) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int p;
+ int bwl = b_width_log2(bsize), bw = 1 << bwl;
+ int bhl = b_height_log2(bsize), bh = 1 << bhl;
+ int mwl = mi_width_log2(bsize), mw = 1 << mwl;
+ int mhl = mi_height_log2(bsize), mh = 1 << mhl;
+ for (p = 0; p < MAX_MB_PLANE; p++) {
+ vpx_memcpy(cm->above_context[p] +
+ ((mi_col * 2) >> xd->plane[p].subsampling_x),
+ a + bw * p,
+ sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x);
+ vpx_memcpy(cm->left_context[p] +
+ ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
+ l + bh * p,
+ sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y);
+ }
+ vpx_memcpy(cm->above_seg_context + mi_col, sa,
+ sizeof(PARTITION_CONTEXT) * mw);
+ vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl,
+ sizeof(PARTITION_CONTEXT) * mh);
+}
+
static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
int mi_row, int mi_col, int output_enabled,
BLOCK_SIZE_TYPE bsize, int sub_index) {
@@ -788,27 +828,28 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
int mi_row, int mi_col, int output_enabled,
- BLOCK_SIZE_TYPE level,
- BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4],
- BLOCK_SIZE_TYPE c3[4][4]
- ) {
+ BLOCK_SIZE_TYPE bsize) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- const int bsl = mi_width_log2(level), bs = 1 << (bsl - 1);
- const int bwl = mi_width_log2(c1), bhl = mi_height_log2(c1);
+ BLOCK_SIZE_TYPE c1 = BLOCK_SIZE_SB8X8;
+ const int bsl = mi_width_log2(bsize), bs = 1 << (bsl - 1);
+ int bwl, bhl;
int UNINITIALIZED_IS_SAFE(pl);
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- if (level > BLOCK_SIZE_SB8X8) {
- set_partition_seg_context(cpi, mi_row, mi_col);
- pl = partition_plane_context(xd, level);
+ if (bsize > BLOCK_SIZE_SB8X8) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ c1 = *(get_sb_partitioning(x, bsize));
}
+ bwl = mi_width_log2(c1), bhl = mi_height_log2(c1);
+
if (bsl == bwl && bsl == bhl) {
- if (output_enabled && level > BLOCK_SIZE_SB8X8)
+ if (output_enabled && bsize > BLOCK_SIZE_SB8X8)
cpi->partition_count[pl][PARTITION_NONE]++;
encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1);
} else if (bsl == bhl && bsl > bwl) {
@@ -826,12 +867,12 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
int i;
assert(bwl < bsl && bhl < bsl);
- if (level == BLOCK_SIZE_SB64X64) {
+ if (bsize == BLOCK_SIZE_SB64X64) {
subsize = BLOCK_SIZE_SB32X32;
- } else if (level == BLOCK_SIZE_SB32X32) {
+ } else if (bsize == BLOCK_SIZE_SB32X32) {
subsize = BLOCK_SIZE_MB16X16;
} else {
- assert(level == BLOCK_SIZE_MB16X16);
+ assert(bsize == BLOCK_SIZE_MB16X16);
subsize = BLOCK_SIZE_SB8X8;
}
@@ -843,554 +884,200 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
set_block_index(xd, i, subsize);
encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
- output_enabled, subsize,
- c2 ? c2[i] : c1, c3 ? c3[i] : NULL, NULL);
+ output_enabled, subsize);
}
}
- if (level > BLOCK_SIZE_SB8X8 &&
- (level == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) {
- set_partition_seg_context(cpi, mi_row, mi_col);
- update_partition_context(xd, c1, level);
+ if (bsize > BLOCK_SIZE_SB8X8 &&
+ (bsize == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ update_partition_context(xd, c1, bsize);
}
}
-static void encode_sb_row(VP9_COMP *cpi,
- int mi_row,
- TOKENEXTRA **tp,
- int *totalrate) {
+
+// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
+// unlikely to be selected depending on previously rate-distortion optimization
+// results, for encoding speed-up.
+static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
+ int mi_row, int mi_col,
+ BLOCK_SIZE_TYPE bsize,
+ int *rate, int *dist) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- int mi_col, pl;
-
- // Initialize the left context for the new SB row
- vpx_memset(&cm->left_context, 0, sizeof(cm->left_context));
- vpx_memset(cm->left_seg_context, 0, sizeof(cm->left_seg_context));
-
- // Code each SB in the row
- for (mi_col = cm->cur_tile_mi_col_start;
- mi_col < cm->cur_tile_mi_col_end; mi_col += 8) {
- int i, p;
- BLOCK_SIZE_TYPE mb_partitioning[4][4];
- BLOCK_SIZE_TYPE sb_partitioning[4];
- BLOCK_SIZE_TYPE sb64_partitioning = BLOCK_SIZE_SB32X32;
- int sb64_rate = 0, sb64_dist = 0;
- int sb64_skip = 0;
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
- PARTITION_CONTEXT seg_l[64 / MI_SIZE], seg_a[64 / MI_SIZE];
- TOKENEXTRA *tp_orig = *tp;
-
- for (p = 0; p < MAX_MB_PLANE; p++) {
- memcpy(a + 16 * p, cm->above_context[p] +
- (mi_col * 2 >> xd->plane[p].subsampling_x),
- sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
- memcpy(l + 16 * p, cm->left_context[p],
- sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
- }
- vpx_memcpy(&seg_a, cm->above_seg_context + mi_col, sizeof(seg_a));
- vpx_memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
+ int bsl = b_width_log2(bsize), bs = 1 << bsl;
+ int msl = mi_height_log2(bsize), ms = 1 << msl;
+ ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
+ PARTITION_CONTEXT sl[8], sa[8];
+ TOKENEXTRA *tp_orig = *tp;
+ int i, p, pl;
+ BLOCK_SIZE_TYPE subsize;
+ int srate = INT_MAX, sdist = INT_MAX;
+
+ assert(mi_height_log2(bsize) == mi_width_log2(bsize));
+
+ // buffer the above/left context information of the block in search.
+ for (p = 0; p < MAX_MB_PLANE; ++p) {
+ vpx_memcpy(a + bs * p, cm->above_context[p] +
+ (mi_col * 2 >> xd->plane[p].subsampling_x),
+ sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_x);
+ vpx_memcpy(l + bs * p, cm->left_context[p] +
+ ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
+ sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_y);
+ }
+ vpx_memcpy(sa, cm->above_seg_context + mi_col,
+ sizeof(PARTITION_CONTEXT) * ms);
+ vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK),
+ sizeof(PARTITION_CONTEXT) * ms);
+
+ // PARTITION_SPLIT
+ if (bsize >= BLOCK_SIZE_MB16X16) {
+ int r4 = 0, d4 = 0;
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ *(get_sb_partitioning(x, bsize)) = subsize;
+
+ for (i = 0; i < 4; ++i) {
+ int x_idx = (i & 1) * (ms >> 1);
+ int y_idx = (i >> 1) * (ms >> 1);
+ int r, d;
- // FIXME(rbultje): this function should probably be rewritten to be
- // recursive at some point in the future.
- for (i = 0; i < 4; i++) {
- const int x_idx = (i & 1) << 2;
- const int y_idx = (i & 2) << 1;
- int sb32_rate = 0, sb32_dist = 0;
- int splitmodes_used = 0;
- int sb32_skip = 0;
- int j;
- ENTROPY_CONTEXT l2[8 * MAX_MB_PLANE], a2[8 * MAX_MB_PLANE];
- PARTITION_CONTEXT sl32[32 / MI_SIZE], sa32[32 / MI_SIZE];
-
- sb_partitioning[i] = BLOCK_SIZE_MB16X16;
- if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
+ if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
- xd->sb_index = i;
-
- /* Function should not modify L & A contexts; save and restore on exit */
- for (p = 0; p < MAX_MB_PLANE; p++) {
- vpx_memcpy(l2 + 8 * p,
- cm->left_context[p] +
- (y_idx * 2 >> xd->plane[p].subsampling_y),
- sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
- vpx_memcpy(a2 + 8 * p,
- cm->above_context[p] +
- ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
- sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
- }
- vpx_memcpy(&sa32, cm->above_seg_context + mi_col + x_idx, sizeof(sa32));
- vpx_memcpy(&sl32, cm->left_seg_context + y_idx, sizeof(sl32));
-
- /* Encode MBs in raster order within the SB */
- for (j = 0; j < 4; j++) {
- const int x_idx_m = x_idx + ((j & 1) << 1);
- const int y_idx_m = y_idx + ((j >> 1) << 1);
- int r, d;
- int r2, d2, mb16_rate = 0, mb16_dist = 0, k;
- ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE];
- PARTITION_CONTEXT sl16[16 / MI_SIZE], sa16[16 / MI_SIZE];
-
- mb_partitioning[i][j] = BLOCK_SIZE_SB8X8;
-
- if (mi_row + y_idx_m >= cm->mi_rows ||
- mi_col + x_idx_m >= cm->mi_cols) {
- // MB lies outside frame, move on
- continue;
- }
-
- // Index of the MB in the SB 0..3
- xd->mb_index = j;
-
- for (p = 0; p < MAX_MB_PLANE; p++) {
- vpx_memcpy(l3 + 4 * p,
- cm->left_context[p] +
- (y_idx_m * 2 >> xd->plane[p].subsampling_y),
- sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
- vpx_memcpy(a3 + 4 * p,
- cm->above_context[p] +
- ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
- sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
- }
- vpx_memcpy(&sa16, cm->above_seg_context + mi_col + x_idx_m,
- sizeof(sa16));
- vpx_memcpy(&sl16, cm->left_seg_context + y_idx_m, sizeof(sl16));
-
- for (k = 0; k < 4; k++) {
- xd->b_index = k;
-
- // try 8x8 coding
- pick_sb_modes(cpi, mi_row + y_idx_m + (k >> 1),
- mi_col + x_idx_m + (k & 1),
- tp, &r, &d, BLOCK_SIZE_SB8X8,
- &x->sb8_context[xd->sb_index][xd->mb_index]
- [xd->b_index]);
- mb16_rate += r;
- mb16_dist += d;
- update_state(cpi, &x->sb8_context[xd->sb_index][xd->mb_index]
- [xd->b_index],
- BLOCK_SIZE_SB8X8, 0);
- encode_superblock(cpi, tp,
- 0, mi_row + y_idx_m + (k >> 1),
- mi_col + x_idx_m + (k & 1),
- BLOCK_SIZE_SB8X8);
- }
- set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m);
- pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16);
- mb16_rate += x->partition_cost[pl][PARTITION_SPLIT];
- for (p = 0; p < MAX_MB_PLANE; p++) {
- vpx_memcpy(cm->left_context[p] +
- (y_idx_m * 2 >> xd->plane[p].subsampling_y),
- l3 + 4 * p,
- sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
- vpx_memcpy(cm->above_context[p] +
- ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
- a3 + 4 * p,
- sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
- }
- vpx_memcpy(cm->above_seg_context + mi_col + x_idx_m,
- sa16, sizeof(sa16));
- vpx_memcpy(cm->left_seg_context + y_idx_m, sl16, sizeof(sl16));
-
- // try 8x16 coding
- r2 = 0;
- d2 = 0;
- xd->b_index = 0;
- pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m,
- tp, &r, &d, BLOCK_SIZE_SB8X16,
- &x->sb8x16_context[xd->sb_index][xd->mb_index]
- [xd->b_index]);
- r2 += r;
- d2 += d;
- update_state(cpi, &x->sb8x16_context[xd->sb_index][xd->mb_index]
- [xd->b_index],
- BLOCK_SIZE_SB8X16, 0);
- encode_superblock(cpi, tp,
- 0, mi_row + y_idx_m, mi_col + x_idx_m,
- BLOCK_SIZE_SB8X16);
- xd->b_index = 1;
- pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m + 1,
- tp, &r, &d, BLOCK_SIZE_SB8X16,
- &x->sb8x16_context[xd->sb_index][xd->mb_index]
- [xd->b_index]);
- r2 += r;
- d2 += d;
- set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m);
- pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16);
- r2 += x->partition_cost[pl][PARTITION_VERT];
- if (RDCOST(x->rdmult, x->rddiv, r2, d2) <
- RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) {
- mb16_rate = r2;
- mb16_dist = d2;
- mb_partitioning[i][j] = BLOCK_SIZE_SB8X16;
- }
- for (p = 0; p < MAX_MB_PLANE; p++) {
- vpx_memcpy(cm->left_context[p] +
- (y_idx_m * 2 >> xd->plane[p].subsampling_y),
- l3 + 4 * p,
- sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
- vpx_memcpy(cm->above_context[p] +
- ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
- a3 + 4 * p,
- sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
- }
-
- // try 16x8 coding
- r2 = 0;
- d2 = 0;
- xd->b_index = 0;
- pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m,
- tp, &r, &d, BLOCK_SIZE_SB16X8,
- &x->sb16x8_context[xd->sb_index][xd->mb_index]
- [xd->b_index]);
- r2 += r;
- d2 += d;
- update_state(cpi, &x->sb16x8_context[xd->sb_index][xd->mb_index]
- [xd->b_index],
- BLOCK_SIZE_SB16X8, 0);
- encode_superblock(cpi, tp,
- 0, mi_row + y_idx_m, mi_col + x_idx_m,
- BLOCK_SIZE_SB16X8);
- xd->b_index = 1;
- pick_sb_modes(cpi, mi_row + y_idx_m + 1, mi_col + x_idx_m,
- tp, &r, &d, BLOCK_SIZE_SB16X8,
- &x->sb16x8_context[xd->sb_index][xd->mb_index]
- [xd->b_index]);
- r2 += r;
- d2 += d;
- set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m);
- pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16);
- r2 += x->partition_cost[pl][PARTITION_HORZ];
- if (RDCOST(x->rdmult, x->rddiv, r2, d2) <
- RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) {
- mb16_rate = r2;
- mb16_dist = d2;
- mb_partitioning[i][j] = BLOCK_SIZE_SB16X8;
- }
- for (p = 0; p < MAX_MB_PLANE; p++) {
- vpx_memcpy(cm->left_context[p] +
- (y_idx_m * 2 >> xd->plane[p].subsampling_y),
- l3 + 4 * p,
- sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
- vpx_memcpy(cm->above_context[p] +
- ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
- a3 + 4 * p,
- sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
- }
-
- // try as 16x16
- pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m,
- tp, &r, &d, BLOCK_SIZE_MB16X16,
- &x->mb_context[xd->sb_index][xd->mb_index]);
- set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m);
- pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16);
- r += x->partition_cost[pl][PARTITION_NONE];
- if (RDCOST(x->rdmult, x->rddiv, r, d) <
- RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) {
- mb16_rate = r;
- mb16_dist = d;
- mb_partitioning[i][j] = BLOCK_SIZE_MB16X16;
- }
- sb32_rate += mb16_rate;
- sb32_dist += mb16_dist;
-
- // Dummy encode, do not do the tokenization
- encode_sb(cpi, tp, mi_row + y_idx_m, mi_col + x_idx_m, 0,
- BLOCK_SIZE_MB16X16, mb_partitioning[i][j], NULL, NULL);
- }
-
- /* Restore L & A coding context to those in place on entry */
- for (p = 0; p < MAX_MB_PLANE; p++) {
- vpx_memcpy(cm->left_context[p] +
- (y_idx * 2 >> xd->plane[p].subsampling_y),
- l2 + 8 * p,
- sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
- vpx_memcpy(cm->above_context[p] +
- ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
- a2 + 8 * p,
- sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
- }
- // restore partition information context
- vpx_memcpy(cm->above_seg_context + mi_col + x_idx, sa32, sizeof(sa32));
- vpx_memcpy(cm->left_seg_context + y_idx, sl32, sizeof(sl32));
-
- set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx);
- pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
- sb32_rate += x->partition_cost[pl][PARTITION_SPLIT];
-
- if (cpi->sf.splitmode_breakout) {
- sb32_skip = splitmodes_used;
- sb64_skip += splitmodes_used;
- }
-
- // check 32x16
- if (mi_col + x_idx + 4 <= cm->mi_cols) {
- int r, d;
-
- xd->mb_index = 0;
- pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx,
- tp, &r, &d, BLOCK_SIZE_SB32X16,
- &x->sb32x16_context[xd->sb_index][xd->mb_index]);
- if (mi_row + y_idx + 2 < cm->mi_rows) {
- int r2, d2;
-
- update_state(cpi, &x->sb32x16_context[xd->sb_index][xd->mb_index],
- BLOCK_SIZE_SB32X16, 0);
- encode_superblock(cpi, tp,
- 0, mi_row + y_idx, mi_col + x_idx,
- BLOCK_SIZE_SB32X16);
- xd->mb_index = 1;
- pick_sb_modes(cpi, mi_row + y_idx + 2,
- mi_col + x_idx, tp, &r2, &d2, BLOCK_SIZE_SB32X16,
- &x->sb32x16_context[xd->sb_index][xd->mb_index]);
- r += r2;
- d += d2;
- }
-
- set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx);
- pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
- r += x->partition_cost[pl][PARTITION_HORZ];
-
- /* is this better than MB coding? */
- if (RDCOST(x->rdmult, x->rddiv, r, d) <
- RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) {
- sb32_rate = r;
- sb32_dist = d;
- sb_partitioning[i] = BLOCK_SIZE_SB32X16;
- }
-
- for (p = 0; p < MAX_MB_PLANE; p++) {
- vpx_memcpy(cm->left_context[p] +
- (y_idx * 2 >> xd->plane[p].subsampling_y),
- l2 + 8 * p,
- sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
- vpx_memcpy(cm->above_context[p] +
- ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
- a2 + 8 * p,
- sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
- }
- }
-
- // check 16x32
- if (mi_row + y_idx + 4 <= cm->mi_rows) {
- int r, d;
-
- xd->mb_index = 0;
- pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx,
- tp, &r, &d, BLOCK_SIZE_SB16X32,
- &x->sb16x32_context[xd->sb_index][xd->mb_index]);
- if (mi_col + x_idx + 2 < cm->mi_cols) {
- int r2, d2;
-
- update_state(cpi, &x->sb16x32_context[xd->sb_index][xd->mb_index],
- BLOCK_SIZE_SB16X32, 0);
- encode_superblock(cpi, tp,
- 0, mi_row + y_idx, mi_col + x_idx,
- BLOCK_SIZE_SB16X32);
- xd->mb_index = 1;
- pick_sb_modes(cpi, mi_row + y_idx,
- mi_col + x_idx + 2,
- tp, &r2, &d2, BLOCK_SIZE_SB16X32,
- &x->sb16x32_context[xd->sb_index][xd->mb_index]);
- r += r2;
- d += d2;
- }
-
- set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx);
- pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
- r += x->partition_cost[pl][PARTITION_VERT];
-
- /* is this better than MB coding? */
- if (RDCOST(x->rdmult, x->rddiv, r, d) <
- RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) {
- sb32_rate = r;
- sb32_dist = d;
- sb_partitioning[i] = BLOCK_SIZE_SB16X32;
- }
-
- for (p = 0; p < MAX_MB_PLANE; p++) {
- vpx_memcpy(cm->left_context[p] +
- (y_idx * 2 >> xd->plane[p].subsampling_y),
- l2 + 8 * p,
- sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
- vpx_memcpy(cm->above_context[p] +
- ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
- a2 + 8 * p,
- sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
- }
- }
-
- if (!sb32_skip &&
- mi_col + x_idx + 4 <= cm->mi_cols &&
- mi_row + y_idx + 4 <= cm->mi_rows) {
- int r, d;
-
- /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */
- pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx,
- tp, &r, &d, BLOCK_SIZE_SB32X32,
- &x->sb32_context[xd->sb_index]);
-
- set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx);
- pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
- r += x->partition_cost[pl][PARTITION_NONE];
-
- if (RDCOST(x->rdmult, x->rddiv, r, d) <
- RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) {
- sb32_rate = r;
- sb32_dist = d;
- sb_partitioning[i] = BLOCK_SIZE_SB32X32;
- }
- }
-
- // If we used 16x16 instead of 32x32 then skip 64x64 (if enabled).
- if (cpi->sf.mb16_breakout && sb_partitioning[i] != BLOCK_SIZE_SB32X32) {
- ++sb64_skip;
- }
-
- sb64_rate += sb32_rate;
- sb64_dist += sb32_dist;
-
- /* Encode SB using best computed mode(s) */
- // FIXME(rbultje): there really shouldn't be any need to encode_mb/sb
- // for each level that we go up, we can just keep tokens and recon
- // pixels of the lower level; also, inverting SB/MB order (big->small
- // instead of small->big) means we can use as threshold for small, which
- // may enable breakouts if RD is not good enough (i.e. faster)
- encode_sb(cpi, tp, mi_row + y_idx, mi_col + x_idx, 0,
- BLOCK_SIZE_SB32X32, sb_partitioning[i], mb_partitioning[i],
- NULL);
+ *(get_sb_index(xd, subsize)) = i;
+ rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize,
+ &r, &d);
+ r4 += r;
+ d4 += d;
}
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ r4 += x->partition_cost[pl][PARTITION_SPLIT];
- for (p = 0; p < MAX_MB_PLANE; p++) {
- memcpy(cm->above_context[p] +
- (mi_col * 2 >> xd->plane[p].subsampling_x),
- a + 16 * p,
- sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
- memcpy(cm->left_context[p], l + 16 * p,
- sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
- }
- memcpy(cm->above_seg_context + mi_col, &seg_a, sizeof(seg_a));
- memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l));
-
- set_partition_seg_context(cpi, mi_row, mi_col);
- pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64);
- sb64_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ srate = r4;
+ sdist = d4;
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+ }
- // check 64x32
- if (mi_col + 8 <= cm->mi_cols && !(cm->mb_rows & 1)) {
+ // PARTITION_HORZ
+ if ((mi_col + ms <= cm->mi_cols) && (mi_row + (ms >> 1) <= cm->mi_rows) &&
+ (bsize >= BLOCK_SIZE_MB16X16)) {
+ int r2, d2;
+ int mb_skip = 0;
+ subsize = get_subsize(bsize, PARTITION_HORZ);
+ *(get_sb_index(xd, subsize)) = 0;
+ pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
+ get_block_context(x, subsize));
+
+ if (mi_row + ms <= cm->mi_rows) {
int r, d;
-
- xd->sb_index = 0;
- pick_sb_modes(cpi, mi_row, mi_col,
- tp, &r, &d, BLOCK_SIZE_SB64X32,
- &x->sb64x32_context[xd->sb_index]);
- if (mi_row + 4 != cm->mi_rows) {
- int r2, d2;
-
- update_state(cpi, &x->sb64x32_context[xd->sb_index],
- BLOCK_SIZE_SB64X32, 0);
- encode_superblock(cpi, tp,
- 0, mi_row, mi_col, BLOCK_SIZE_SB64X32);
- xd->sb_index = 1;
- pick_sb_modes(cpi, mi_row + 4, mi_col,
- tp, &r2, &d2, BLOCK_SIZE_SB64X32,
- &x->sb64x32_context[xd->sb_index]);
- r += r2;
- d += d2;
- }
-
- set_partition_seg_context(cpi, mi_row, mi_col);
- pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64);
- r += x->partition_cost[pl][PARTITION_HORZ];
-
- /* is this better than MB coding? */
- if (RDCOST(x->rdmult, x->rddiv, r, d) <
- RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
- sb64_rate = r;
- sb64_dist = d;
- sb64_partitioning = BLOCK_SIZE_SB64X32;
- }
-
- for (p = 0; p < MAX_MB_PLANE; p++) {
- memcpy(cm->above_context[p] +
- (mi_col * 2 >> xd->plane[p].subsampling_x),
- a + 16 * p,
- sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
- memcpy(cm->left_context[p], l + 16 * p,
- sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
- }
+ update_state(cpi, get_block_context(x, subsize), subsize, 0);
+ encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+ *(get_sb_index(xd, subsize)) = 1;
+ pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize,
+ get_block_context(x, subsize));
+ r2 += r;
+ d2 += d;
+ } else {
+ if (mi_row + (ms >> 1) != cm->mi_rows)
+ mb_skip = 1;
+ }
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ r2 += x->partition_cost[pl][PARTITION_HORZ];
+
+ if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
+ RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
+ srate = r2;
+ sdist = d2;
+ *(get_sb_partitioning(x, bsize)) = subsize;
}
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+ }
- // check 32x64
- if (mi_row + 8 <= cm->mi_rows && !(cm->mb_cols & 1)) {
+ // PARTITION_VERT
+ if ((mi_row + ms <= cm->mi_rows) && (mi_col + (ms >> 1) <= cm->mi_cols) &&
+ (bsize >= BLOCK_SIZE_MB16X16)) {
+ int r2, d2;
+ int mb_skip = 0;
+ subsize = get_subsize(bsize, PARTITION_VERT);
+ *(get_sb_index(xd, subsize)) = 0;
+ pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
+ get_block_context(x, subsize));
+ if (mi_col + ms <= cm->mi_cols) {
int r, d;
+ update_state(cpi, get_block_context(x, subsize), subsize, 0);
+ encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+ *(get_sb_index(xd, subsize)) = 1;
+ pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize,
+ get_block_context(x, subsize));
+ r2 += r;
+ d2 += d;
+ } else {
+ if (mi_col + (ms >> 1) != cm->mi_cols)
+ mb_skip = 1;
+ }
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ r2 += x->partition_cost[pl][PARTITION_VERT];
+
+ if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
+ RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
+ srate = r2;
+ sdist = d2;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ }
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+ }
- xd->sb_index = 0;
- pick_sb_modes(cpi, mi_row, mi_col,
- tp, &r, &d, BLOCK_SIZE_SB32X64,
- &x->sb32x64_context[xd->sb_index]);
- if (mi_col + 4 != cm->mi_cols) {
- int r2, d2;
-
- update_state(cpi, &x->sb32x64_context[xd->sb_index],
- BLOCK_SIZE_SB32X64, 0);
- encode_superblock(cpi, tp,
- 0, mi_row, mi_col, BLOCK_SIZE_SB32X64);
- xd->sb_index = 1;
- pick_sb_modes(cpi, mi_row, mi_col + 4,
- tp, &r2, &d2, BLOCK_SIZE_SB32X64,
- &x->sb32x64_context[xd->sb_index]);
- r += r2;
- d += d2;
- }
-
- set_partition_seg_context(cpi, mi_row, mi_col);
- pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64);
- r += x->partition_cost[pl][PARTITION_VERT];
-
- /* is this better than MB coding? */
- if (RDCOST(x->rdmult, x->rddiv, r, d) <
- RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
- sb64_rate = r;
- sb64_dist = d;
- sb64_partitioning = BLOCK_SIZE_SB32X64;
- }
+ // PARTITION_NONE
+ if (mi_row + ms <= cm->mi_rows && mi_col + ms <= cm->mi_cols) {
+ int r, d;
+ pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
+ get_block_context(x, bsize));
+ if (bsize >= BLOCK_SIZE_MB16X16) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ r += x->partition_cost[pl][PARTITION_NONE];
+ }
- for (p = 0; p < MAX_MB_PLANE; p++) {
- memcpy(cm->above_context[p] +
- (mi_col * 2 >> xd->plane[p].subsampling_x),
- a + 16 * p,
- sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
- memcpy(cm->left_context[p], l + 16 * p,
- sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
- }
+ if (RDCOST(x->rdmult, x->rddiv, r, d) <
+ RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
+ srate = r;
+ sdist = d;
+ if (bsize >= BLOCK_SIZE_MB16X16)
+ *(get_sb_partitioning(x, bsize)) = bsize;
}
+ }
- if (!sb64_skip &&
- mi_col + 8 <= cm->mi_cols &&
- mi_row + 8 <= cm->mi_rows) {
- int r, d;
+ assert(srate < INT_MAX && sdist < INT_MAX);
+ *rate = srate;
+ *dist = sdist;
- pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d,
- BLOCK_SIZE_SB64X64, &x->sb64_context);
+ encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize);
- set_partition_seg_context(cpi, mi_row, mi_col);
- pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64);
- r += x->partition_cost[pl][PARTITION_NONE];
+ if (bsize == BLOCK_SIZE_SB64X64)
+ assert(tp_orig < *tp);
+ else
+ assert(tp_orig == *tp);
+}
- if (RDCOST(x->rdmult, x->rddiv, r, d) <
- RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
- sb64_rate = r;
- sb64_dist = d;
- sb64_partitioning = BLOCK_SIZE_SB64X64;
- }
- }
+static void encode_sb_row(VP9_COMP *cpi, int mi_row,
+ TOKENEXTRA **tp, int *totalrate) {
+ VP9_COMMON *const cm = &cpi->common;
+ int mi_col;
- assert(tp_orig == *tp);
- encode_sb(cpi, tp, mi_row, mi_col, 1, BLOCK_SIZE_SB64X64,
- sb64_partitioning, sb_partitioning, mb_partitioning);
- assert(tp_orig < *tp);
+ // Initialize the left context for the new SB row
+ vpx_memset(&cm->left_context, 0, sizeof(cm->left_context));
+ vpx_memset(cm->left_seg_context, 0, sizeof(cm->left_seg_context));
+
+ // Code each SB in the row
+ for (mi_col = cm->cur_tile_mi_col_start;
+ mi_col < cm->cur_tile_mi_col_end; mi_col += 8) {
+ int dummy_rate, dummy_dist;
+ rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
+ &dummy_rate, &dummy_dist);
}
}
@@ -1423,7 +1110,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
vp9_build_block_offsets(x);
- vp9_setup_block_dptrs(&x->e_mbd);
+ vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.uv_mode = DC_PRED;
@@ -1559,9 +1246,8 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_get_tile_col_offsets(cm, tile_col);
for (mi_row = cm->cur_tile_mi_row_start;
mi_row < cm->cur_tile_mi_row_end;
- mi_row += 8) {
+ mi_row += 8)
encode_sb_row(cpi, mi_row, &tp, &totalrate);
- }
cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old);
assert(tp - cpi->tok <=
get_token_alloc(cm->mb_rows, cm->mb_cols));
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 4665fccd0..e4002d689 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -139,6 +139,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
const int ib = txfrm_block_to_raster_block(xd, bsize, plane,
block, 2 * tx_size);
const int16_t *dequant_ptr = xd->plane[plane].dequant;
+ const uint8_t * band_translate;
assert((!type && !plane) || (type && plane));
dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16);
@@ -149,23 +150,27 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT;
default_eob = 16;
scan = get_scan_4x4(tx_type);
+ band_translate = vp9_coefband_trans_4x4;
break;
}
case TX_8X8: {
const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT;
scan = get_scan_8x8(tx_type);
default_eob = 64;
+ band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_16X16: {
const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT;
scan = get_scan_16x16(tx_type);
default_eob = 256;
+ band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
default_eob = 1024;
+ band_translate = vp9_coefband_trans_8x8plus;
break;
}
assert(eob <= default_eob);
@@ -204,7 +209,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
t0 = (vp9_dct_value_tokens_ptr + x)->token;
/* Consider both possible successor states. */
if (next < default_eob) {
- band = get_coef_band(scan, tx_size, i + 1);
+ band = get_coef_band(band_translate, i + 1);
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
pad, default_eob);
rate0 +=
@@ -254,7 +259,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
}
if (next < default_eob) {
- band = get_coef_band(scan, tx_size, i + 1);
+ band = get_coef_band(band_translate, i + 1);
if (t0 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
pad, default_eob);
@@ -291,7 +296,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
* add a new trellis node, but we do need to update the costs.
*/
else {
- band = get_coef_band(scan, tx_size, i + 1);
+ band = get_coef_band(band_translate, i + 1);
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
/* Update the cost of each path if we're past the EOB token. */
@@ -310,7 +315,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
}
/* Now pick the best path through the whole trellis. */
- band = get_coef_band(scan, tx_size, i + 1);
+ band = get_coef_band(band_translate, i + 1);
pt = combine_entropy_contexts(*a, *l);
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 436c8d4e0..ddcf849ce 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -47,7 +47,7 @@
#define KF_MB_INTRA_MIN 150
#define GF_MB_INTRA_MIN 100
-#define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001)
+#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
#define POW1 (double)cpi->oxcf.two_pass_vbrbias/100.0
#define POW2 (double)cpi->oxcf.two_pass_vbrbias/100.0
@@ -78,8 +78,8 @@ static int select_cq_level(int qindex) {
// Resets the first pass file to the given position using a relative seek from the current position
-static void reset_fpf_position(VP9_COMP *cpi, FIRSTPASS_STATS *Position) {
- cpi->twopass.stats_in = Position;
+static void reset_fpf_position(VP9_COMP *cpi, FIRSTPASS_STATS *position) {
+ cpi->twopass.stats_in = position;
}
static int lookup_next_frame_stats(VP9_COMP *cpi, FIRSTPASS_STATS *next_frame) {
@@ -252,17 +252,11 @@ static void avg_stats(FIRSTPASS_STATS *section) {
// Calculate a modified Error used in distributing bits between easier and harder frames
static double calculate_modified_err(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
- double av_err = (cpi->twopass.total_stats.ssim_weighted_pred_err /
- cpi->twopass.total_stats.count);
- double this_err = this_frame->ssim_weighted_pred_err;
- double modified_err;
-
- if (this_err > av_err)
- modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW1);
- else
- modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW2);
-
- return modified_err;
+ const FIRSTPASS_STATS *const stats = &cpi->twopass.total_stats;
+ const double av_err = stats->ssim_weighted_pred_err / stats->count;
+ const double this_err = this_frame->ssim_weighted_pred_err;
+ return av_err * pow(this_err / DOUBLE_DIVIDE_CHECK(av_err),
+ this_err > av_err ? POW1 : POW2);
}
static const double weight_table[256] = {
@@ -328,20 +322,14 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) {
static int frame_max_bits(VP9_COMP *cpi) {
// Max allocation for a single frame based on the max section guidelines
// passed in and how many bits are left.
- int max_bits;
-
// For VBR base this on the bits and frames left plus the
// two_pass_vbrmax_section rate passed in by the user.
- max_bits = (int) (((double) cpi->twopass.bits_left
- / (cpi->twopass.total_stats.count - (double) cpi->common
- .current_video_frame))
- * ((double) cpi->oxcf.two_pass_vbrmax_section / 100.0));
+ const double max_bits = (1.0 * cpi->twopass.bits_left /
+ (cpi->twopass.total_stats.count - cpi->common.current_video_frame)) *
+ (cpi->oxcf.two_pass_vbrmax_section / 100.0);
// Trap case where we are out of bits.
- if (max_bits < 0)
- max_bits = 0;
-
- return max_bits;
+ return MAX((int)max_bits, 0);
}
void vp9_init_first_pass(VP9_COMP *cpi) {
@@ -489,7 +477,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
vp9_build_block_offsets(x);
- vp9_setup_block_dptrs(&x->e_mbd);
+ vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
vp9_frame_init_quantizer(cpi);
@@ -854,26 +842,18 @@ static double calc_correction_factor(double err_per_mb,
double err_divisor,
double pt_low,
double pt_high,
- int Q) {
- double power_term;
- double error_term = err_per_mb / err_divisor;
- double correction_factor;
+ int q) {
+ const double error_term = err_per_mb / err_divisor;
// Adjustment based on actual quantizer to power term.
- power_term = (vp9_convert_qindex_to_q(Q) * 0.01) + pt_low;
- power_term = (power_term > pt_high) ? pt_high : power_term;
+ const double power_term = MIN(vp9_convert_qindex_to_q(q) * 0.01 + pt_low,
+ pt_high);
// Calculate correction factor
if (power_term < 1.0)
assert(error_term >= 0.0);
- correction_factor = pow(error_term, power_term);
- // Clip range
- correction_factor =
- (correction_factor < 0.05)
- ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor;
-
- return correction_factor;
+ return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
// Given a current maxQ value sets a range for future values.
@@ -882,10 +862,8 @@ static double calc_correction_factor(double err_per_mb,
// (now uses the actual quantizer) but has not been tuned.
static void adjust_maxq_qrange(VP9_COMP *cpi) {
int i;
- double q;
-
// Set the max corresponding to cpi->avg_q * 2.0
- q = cpi->avg_q * 2.0;
+ double q = cpi->avg_q * 2.0;
cpi->twopass.maxq_max_limit = cpi->worst_quality;
for (i = cpi->best_quality; i <= cpi->worst_quality; i++) {
cpi->twopass.maxq_max_limit = i;
@@ -906,12 +884,11 @@ static void adjust_maxq_qrange(VP9_COMP *cpi) {
static int estimate_max_q(VP9_COMP *cpi,
FIRSTPASS_STATS *fpstats,
int section_target_bandwitdh) {
- int Q;
+ int q;
int num_mbs = cpi->common.MBs;
int target_norm_bits_per_mb;
- double section_err = (fpstats->coded_error / fpstats->count);
- double sr_err_diff;
+ double section_err = fpstats->coded_error / fpstats->count;
double sr_correction;
double err_per_mb = section_err / num_mbs;
double err_correction_factor;
@@ -920,92 +897,74 @@ static int estimate_max_q(VP9_COMP *cpi,
if (section_target_bandwitdh <= 0)
return cpi->twopass.maxq_max_limit; // Highest value allowed
- target_norm_bits_per_mb =
- (section_target_bandwitdh < (1 << 20))
- ? (512 * section_target_bandwitdh) / num_mbs
- : 512 * (section_target_bandwitdh / num_mbs);
+ target_norm_bits_per_mb = section_target_bandwitdh < (1 << 20)
+ ? (512 * section_target_bandwitdh) / num_mbs
+ : 512 * (section_target_bandwitdh / num_mbs);
// Look at the drop in prediction quality between the last frame
// and the GF buffer (which contained an older frame).
if (fpstats->sr_coded_error > fpstats->coded_error) {
- sr_err_diff =
- (fpstats->sr_coded_error - fpstats->coded_error) /
- (fpstats->count * cpi->common.MBs);
- sr_correction = (sr_err_diff / 32.0);
- sr_correction = pow(sr_correction, 0.25);
- if (sr_correction < 0.75)
- sr_correction = 0.75;
- else if (sr_correction > 1.25)
- sr_correction = 1.25;
+ double sr_err_diff = (fpstats->sr_coded_error - fpstats->coded_error) /
+ (fpstats->count * cpi->common.MBs);
+ sr_correction = fclamp(pow(sr_err_diff / 32.0, 0.25), 0.75, 1.25);
} else {
sr_correction = 0.75;
}
// Calculate a corrective factor based on a rolling ratio of bits spent
// vs target bits
- if ((cpi->rolling_target_bits > 0) &&
- (cpi->active_worst_quality < cpi->worst_quality)) {
- double rolling_ratio;
-
- rolling_ratio = (double)cpi->rolling_actual_bits /
- (double)cpi->rolling_target_bits;
+ if (cpi->rolling_target_bits > 0 &&
+ cpi->active_worst_quality < cpi->worst_quality) {
+ double rolling_ratio = (double)cpi->rolling_actual_bits /
+ (double)cpi->rolling_target_bits;
if (rolling_ratio < 0.95)
cpi->twopass.est_max_qcorrection_factor -= 0.005;
else if (rolling_ratio > 1.05)
cpi->twopass.est_max_qcorrection_factor += 0.005;
- cpi->twopass.est_max_qcorrection_factor =
- (cpi->twopass.est_max_qcorrection_factor < 0.1)
- ? 0.1
- : (cpi->twopass.est_max_qcorrection_factor > 10.0)
- ? 10.0 : cpi->twopass.est_max_qcorrection_factor;
+ cpi->twopass.est_max_qcorrection_factor = fclamp(
+ cpi->twopass.est_max_qcorrection_factor, 0.1, 10.0);
}
// Corrections for higher compression speed settings
// (reduced compression expected)
- if (cpi->compressor_speed == 1) {
- if (cpi->oxcf.cpu_used <= 5)
- speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04);
- else
- speed_correction = 1.25;
- }
+ if (cpi->compressor_speed == 1)
+ speed_correction = cpi->oxcf.cpu_used <= 5 ?
+ 1.04 + (cpi->oxcf.cpu_used * 0.04) :
+ 1.25;
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
- for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) {
+ for (q = cpi->twopass.maxq_min_limit; q < cpi->twopass.maxq_max_limit; q++) {
int bits_per_mb_at_this_q;
- err_correction_factor =
- calc_correction_factor(err_per_mb, ERR_DIVISOR, 0.4, 0.90, Q) *
- sr_correction * speed_correction *
- cpi->twopass.est_max_qcorrection_factor;
+ err_correction_factor = calc_correction_factor(err_per_mb,
+ ERR_DIVISOR, 0.4, 0.90, q) *
+ sr_correction * speed_correction *
+ cpi->twopass.est_max_qcorrection_factor;
-
- bits_per_mb_at_this_q =
- vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor);
+ bits_per_mb_at_this_q = vp9_bits_per_mb(INTER_FRAME, q,
+ err_correction_factor);
if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
break;
}
// Restriction on active max q for constrained quality mode.
- if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
- (Q < cpi->cq_target_quality)) {
- Q = cpi->cq_target_quality;
- }
+ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
+ q < cpi->cq_target_quality)
+ q = cpi->cq_target_quality;
// Adjust maxq_min_limit and maxq_max_limit limits based on
// average q observed in clip for non kf/gf/arf frames
// Give average a chance to settle though.
// PGW TODO.. This code is broken for the extended Q range
- if ((cpi->ni_frames >
- ((int)cpi->twopass.total_stats.count >> 8)) &&
- (cpi->ni_frames > 25)) {
+ if (cpi->ni_frames > ((int)cpi->twopass.total_stats.count >> 8) &&
+ cpi->ni_frames > 25)
adjust_maxq_qrange(cpi);
- }
- return Q;
+ return q;
}
// For cq mode estimate a cq level that matches the observed
@@ -1013,7 +972,7 @@ static int estimate_max_q(VP9_COMP *cpi,
static int estimate_cq(VP9_COMP *cpi,
FIRSTPASS_STATS *fpstats,
int section_target_bandwitdh) {
- int Q;
+ int q;
int num_mbs = cpi->common.MBs;
int target_norm_bits_per_mb;
@@ -1064,29 +1023,29 @@ static int estimate_cq(VP9_COMP *cpi,
clip_iifactor = 0.80;
// Try and pick a Q that can encode the content at the given rate.
- for (Q = 0; Q < MAXQ; Q++) {
+ for (q = 0; q < MAXQ; q++) {
int bits_per_mb_at_this_q;
// Error per MB based correction factor
err_correction_factor =
- calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, Q) *
+ calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, q) *
sr_correction * speed_correction * clip_iifactor;
bits_per_mb_at_this_q =
- vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor);
+ vp9_bits_per_mb(INTER_FRAME, q, err_correction_factor);
if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
break;
}
// Clip value to range "best allowed to (worst allowed - 1)"
- Q = select_cq_level(Q);
- if (Q >= cpi->worst_quality)
- Q = cpi->worst_quality - 1;
- if (Q < cpi->best_quality)
- Q = cpi->best_quality;
+ q = select_cq_level(q);
+ if (q >= cpi->worst_quality)
+ q = cpi->worst_quality - 1;
+ if (q < cpi->best_quality)
+ q = cpi->best_quality;
- return Q;
+ return q;
}
@@ -1117,9 +1076,8 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
// encoded in the second pass is a guess. However the sum duration is not.
// Its calculated based on the actual durations of all frames from the first
// pass.
- vp9_new_frame_rate(cpi,
- 10000000.0 * cpi->twopass.total_stats.count /
- cpi->twopass.total_stats.duration);
+ vp9_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count /
+ cpi->twopass.total_stats.duration);
cpi->output_frame_rate = cpi->oxcf.frame_rate;
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration *
@@ -1191,9 +1149,8 @@ static double get_prediction_decay_rate(VP9_COMP *cpi,
// Look at the observed drop in prediction quality between the last frame
// and the GF buffer (which contains an older frame).
- mb_sr_err_diff =
- (next_frame->sr_coded_error - next_frame->coded_error) /
- (cpi->common.MBs);
+ mb_sr_err_diff = (next_frame->sr_coded_error - next_frame->coded_error) /
+ cpi->common.MBs;
if (mb_sr_err_diff <= 512.0) {
second_ref_decay = 1.0 - (mb_sr_err_diff / 512.0);
second_ref_decay = pow(second_ref_decay, 0.5);
@@ -1225,9 +1182,9 @@ static int detect_transition_to_still(
// Break clause to detect very still sections after motion
// For example a static image after a fade or other transition
// instead of a clean scene cut.
- if ((frame_interval > MIN_GF_INTERVAL) &&
- (loop_decay_rate >= 0.999) &&
- (last_decay_rate < 0.9)) {
+ if (frame_interval > MIN_GF_INTERVAL &&
+ loop_decay_rate >= 0.999 &&
+ last_decay_rate < 0.9) {
int j;
FIRSTPASS_STATS *position = cpi->twopass.stats_in;
FIRSTPASS_STATS tmp_next_frame;
@@ -1271,10 +1228,9 @@ static int detect_flash(VP9_COMP *cpi, int offset) {
// are reasonably well predicted by an earlier (pre flash) frame.
// The recovery after a flash is indicated by a high pcnt_second_ref
// comapred to pcnt_inter.
- if ((next_frame.pcnt_second_ref > next_frame.pcnt_inter) &&
- (next_frame.pcnt_second_ref >= 0.5)) {
+ if (next_frame.pcnt_second_ref > next_frame.pcnt_inter &&
+ next_frame.pcnt_second_ref >= 0.5)
flash_detected = 1;
- }
}
return flash_detected;
@@ -1356,13 +1312,9 @@ static double calc_frame_boost(
return frame_boost;
}
-static int calc_arf_boost(
- VP9_COMP *cpi,
- int offset,
- int f_frames,
- int b_frames,
- int *f_boost,
- int *b_boost) {
+static int calc_arf_boost(VP9_COMP *cpi, int offset,
+ int f_frames, int b_frames,
+ int *f_boost, int *b_boost) {
FIRSTPASS_STATS this_frame;
int i;
@@ -1392,8 +1344,7 @@ static int calc_arf_boost(
// Cumulative effect of prediction quality decay
if (!flash_detected) {
- decay_accumulator =
- decay_accumulator * get_prediction_decay_rate(cpi, &this_frame);
+ decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
}
@@ -1429,10 +1380,9 @@ static int calc_arf_boost(
// Cumulative effect of prediction quality decay
if (!flash_detected) {
- decay_accumulator =
- decay_accumulator * get_prediction_decay_rate(cpi, &this_frame);
+ decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
- ? MIN_DECAY_FACTOR : decay_accumulator;
+ ? MIN_DECAY_FACTOR : decay_accumulator;
}
boost_score += (decay_accumulator *
@@ -1871,26 +1821,20 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
for (i = 0;
i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME);
++i) {
- int boost;
int allocation_chunks;
- int Q =
- (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
+ int q = cpi->oxcf.fixed_q < 0 ? cpi->last_q[INTER_FRAME]
+ : cpi->oxcf.fixed_q;
int gf_bits;
- boost = (cpi->gfu_boost * vp9_gfboost_qadjust(Q)) / 100;
+ int boost = (cpi->gfu_boost * vp9_gfboost_qadjust(q)) / 100;
// Set max and minimum boost and hence minimum allocation
- if (boost > ((cpi->baseline_gf_interval + 1) * 200))
- boost = ((cpi->baseline_gf_interval + 1) * 200);
- else if (boost < 125)
- boost = 125;
+ boost = clamp(boost, 125, (cpi->baseline_gf_interval + 1) * 200);
if (cpi->source_alt_ref_pending && i == 0)
- allocation_chunks =
- ((cpi->baseline_gf_interval + 1) * 100) + boost;
+ allocation_chunks = ((cpi->baseline_gf_interval + 1) * 100) + boost;
else
- allocation_chunks =
- (cpi->baseline_gf_interval * 100) + (boost - 100);
+ allocation_chunks = (cpi->baseline_gf_interval * 100) + (boost - 100);
// Prevent overflow
if (boost > 1023) {
@@ -1901,41 +1845,34 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Calculate the number of bits to be spent on the gf or arf based on
// the boost number
- gf_bits = (int)((double)boost *
- (cpi->twopass.gf_group_bits /
- (double)allocation_chunks));
+ gf_bits = (int)((double)boost * (cpi->twopass.gf_group_bits /
+ (double)allocation_chunks));
// If the frame that is to be boosted is simpler than the average for
// the gf/arf group then use an alternative calculation
// based on the error score of the frame itself
if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) {
- double alt_gf_grp_bits;
- int alt_gf_bits;
-
- alt_gf_grp_bits =
+ double alt_gf_grp_bits =
(double)cpi->twopass.kf_group_bits *
(mod_frame_err * (double)cpi->baseline_gf_interval) /
DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left);
- alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits /
+ int alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits /
(double)allocation_chunks));
- if (gf_bits > alt_gf_bits) {
+ if (gf_bits > alt_gf_bits)
gf_bits = alt_gf_bits;
- }
}
// Else if it is harder than other frames in the group make sure it at
// least receives an allocation in keeping with its relative error
// score, otherwise it may be worse off than an "un-boosted" frame
else {
- int alt_gf_bits =
- (int)((double)cpi->twopass.kf_group_bits *
- mod_frame_err /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left));
+ int alt_gf_bits = (int)((double)cpi->twopass.kf_group_bits *
+ mod_frame_err /
+ DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left));
- if (alt_gf_bits > gf_bits) {
+ if (alt_gf_bits > gf_bits)
gf_bits = alt_gf_bits;
- }
}
// Dont allow a negative value for gf_bits
@@ -1983,14 +1920,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// despite (MIN_GF_INTERVAL) and would cause a divide by 0 in the
// calculation of alt_extra_bits.
if (cpi->baseline_gf_interval >= 3) {
- int boost = (cpi->source_alt_ref_pending)
- ? b_boost : cpi->gfu_boost;
+ const int boost = cpi->source_alt_ref_pending ? b_boost : cpi->gfu_boost;
if (boost >= 150) {
- int pct_extra;
int alt_extra_bits;
-
- pct_extra = (boost - 100) / 50;
+ int pct_extra = (boost - 100) / 50;
pct_extra = (pct_extra > 20) ? 20 : pct_extra;
alt_extra_bits = (int)((cpi->twopass.gf_group_bits * pct_extra) / 100);
@@ -2071,33 +2005,21 @@ static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Make a damped adjustment to the active max q.
static int adjust_active_maxq(int old_maxqi, int new_maxqi) {
int i;
- int ret_val = new_maxqi;
- double old_q;
- double new_q;
- double target_q;
-
- old_q = vp9_convert_qindex_to_q(old_maxqi);
- new_q = vp9_convert_qindex_to_q(new_maxqi);
-
- target_q = ((old_q * 7.0) + new_q) / 8.0;
+ const double old_q = vp9_convert_qindex_to_q(old_maxqi);
+ const double new_q = vp9_convert_qindex_to_q(new_maxqi);
+ const double target_q = ((old_q * 7.0) + new_q) / 8.0;
if (target_q > old_q) {
- for (i = old_maxqi; i <= new_maxqi; i++) {
- if (vp9_convert_qindex_to_q(i) >= target_q) {
- ret_val = i;
- break;
- }
- }
+ for (i = old_maxqi; i <= new_maxqi; i++)
+ if (vp9_convert_qindex_to_q(i) >= target_q)
+ return i;
} else {
- for (i = old_maxqi; i >= new_maxqi; i--) {
- if (vp9_convert_qindex_to_q(i) <= target_q) {
- ret_val = i;
- break;
- }
- }
+ for (i = old_maxqi; i >= new_maxqi; i--)
+ if (vp9_convert_qindex_to_q(i) <= target_q)
+ return i;
}
- return ret_val;
+ return new_maxqi;
}
void vp9_second_pass(VP9_COMP *cpi) {
@@ -2111,9 +2033,8 @@ void vp9_second_pass(VP9_COMP *cpi) {
double this_frame_intra_error;
double this_frame_coded_error;
- if (!cpi->twopass.stats_in) {
+ if (!cpi->twopass.stats_in)
return;
- }
vp9_clear_system_state();
@@ -2123,12 +2044,8 @@ void vp9_second_pass(VP9_COMP *cpi) {
// Set a cq_level in constrained quality mode.
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
- int est_cq;
-
- est_cq =
- estimate_cq(cpi,
- &cpi->twopass.total_left_stats,
- (int)(cpi->twopass.bits_left / frames_left));
+ int est_cq = estimate_cq(cpi, &cpi->twopass.total_left_stats,
+ (int)(cpi->twopass.bits_left / frames_left));
cpi->cq_target_quality = cpi->oxcf.cq_level;
if (est_cq > cpi->cq_target_quality)
@@ -2139,14 +2056,12 @@ void vp9_second_pass(VP9_COMP *cpi) {
cpi->twopass.maxq_max_limit = cpi->worst_quality;
cpi->twopass.maxq_min_limit = cpi->best_quality;
- tmp_q = estimate_max_q(
- cpi,
- &cpi->twopass.total_left_stats,
- (int)(cpi->twopass.bits_left / frames_left));
+ tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats,
+ (int)(cpi->twopass.bits_left / frames_left));
- cpi->active_worst_quality = tmp_q;
- cpi->ni_av_qi = tmp_q;
- cpi->avg_q = vp9_convert_qindex_to_q(tmp_q);
+ cpi->active_worst_quality = tmp_q;
+ cpi->ni_av_qi = tmp_q;
+ cpi->avg_q = vp9_convert_qindex_to_q(tmp_q);
#ifndef ONE_SHOT_Q_ESTIMATE
// Limit the maxq value returned subsequently.
@@ -2404,9 +2319,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
if (cpi->oxcf.auto_key
&& lookup_next_frame_stats(cpi, &next_frame) != EOF) {
// Normal scene cut check
- if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) {
+ if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
break;
- }
+
// How fast is prediction quality decaying
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
@@ -2416,19 +2331,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// quality since the last GF or KF.
recent_loop_decay[i % 8] = loop_decay_rate;
decay_accumulator = 1.0;
- for (j = 0; j < 8; j++) {
- decay_accumulator = decay_accumulator * recent_loop_decay[j];
- }
+ for (j = 0; j < 8; j++)
+ decay_accumulator *= recent_loop_decay[j];
// Special check for transition or high motion followed by a
// to a static scene.
- if (detect_transition_to_still(cpi, i,
- (cpi->key_frame_frequency - i),
- loop_decay_rate,
- decay_accumulator)) {
+ if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i,
+ loop_decay_rate, decay_accumulator))
break;
- }
-
// Step on to the next frame
cpi->twopass.frames_to_key++;
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index a89d2547e..708fe4549 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -46,7 +46,7 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
unsigned int i;
for (i = 0; i < ctx->max_sz; i++)
- vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img);
+ vp9_free_frame_buffer(&ctx->buf[i].img);
free(ctx->buf);
}
free(ctx);
@@ -56,6 +56,8 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
struct lookahead_ctx * vp9_lookahead_init(unsigned int width,
unsigned int height,
+ unsigned int subsampling_x,
+ unsigned int subsampling_y,
unsigned int depth) {
struct lookahead_ctx *ctx = NULL;
@@ -71,8 +73,9 @@ struct lookahead_ctx * vp9_lookahead_init(unsigned int width,
if (!ctx->buf)
goto bail;
for (i = 0; i < depth; i++)
- if (vp8_yv12_alloc_frame_buffer(&ctx->buf[i].img,
- width, height, VP9BORDERINPIXELS))
+ if (vp9_alloc_frame_buffer(&ctx->buf[i].img,
+ width, height, subsampling_x, subsampling_y,
+ VP9BORDERINPIXELS))
goto bail;
}
return ctx;
diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h
index 2406618b9..81baa2c6f 100644
--- a/vp9/encoder/vp9_lookahead.h
+++ b/vp9/encoder/vp9_lookahead.h
@@ -31,6 +31,8 @@ struct lookahead_ctx;
*/
struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
unsigned int height,
+ unsigned int subsampling_x,
+ unsigned int subsampling_y,
unsigned int depth);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 05105d794..3d8003c33 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -313,9 +313,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vp9_free_frame_buffers(&cpi->common);
- vp8_yv12_de_alloc_frame_buffer(&cpi->last_frame_uf);
- vp8_yv12_de_alloc_frame_buffer(&cpi->scaled_source);
- vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer);
+ vp9_free_frame_buffer(&cpi->last_frame_uf);
+ vp9_free_frame_buffer(&cpi->scaled_source);
+ vp9_free_frame_buffer(&cpi->alt_ref_buffer);
vp9_lookahead_destroy(cpi->lookahead);
vpx_free(cpi->tok);
@@ -835,15 +835,19 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
}
static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
cpi->lookahead = vp9_lookahead_init(cpi->oxcf.width, cpi->oxcf.height,
+ cm->subsampling_x, cm->subsampling_y,
cpi->oxcf.lag_in_frames);
if (!cpi->lookahead)
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate lag buffers");
- if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer,
- cpi->oxcf.width, cpi->oxcf.height,
- VP9BORDERINPIXELS))
+ if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
+ cpi->oxcf.width, cpi->oxcf.height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9BORDERINPIXELS))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate altref buffer");
}
@@ -873,13 +877,17 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate partition data");
- if (vp8_yv12_alloc_frame_buffer(&cpi->last_frame_uf,
- cm->width, cm->height, VP9BORDERINPIXELS))
+ if (vp9_alloc_frame_buffer(&cpi->last_frame_uf,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9BORDERINPIXELS))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate last frame buffer");
- if (vp8_yv12_alloc_frame_buffer(&cpi->scaled_source,
- cm->width, cm->height, VP9BORDERINPIXELS))
+ if (vp9_alloc_frame_buffer(&cpi->scaled_source,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9BORDERINPIXELS))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate scaled source buffer");
@@ -914,13 +922,17 @@ static void update_frame_size(VP9_COMP *cpi) {
vp9_update_frame_size(cm);
// Update size of buffers local to this frame
- if (vp8_yv12_realloc_frame_buffer(&cpi->last_frame_uf,
- cm->width, cm->height, VP9BORDERINPIXELS))
+ if (vp9_realloc_frame_buffer(&cpi->last_frame_uf,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9BORDERINPIXELS))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to reallocate last frame buffer");
- if (vp8_yv12_realloc_frame_buffer(&cpi->scaled_source,
- cm->width, cm->height, VP9BORDERINPIXELS))
+ if (vp9_realloc_frame_buffer(&cpi->scaled_source,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9BORDERINPIXELS))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to reallocate scaled source buffer");
@@ -1032,6 +1044,9 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cm->width = oxcf->width;
cm->height = oxcf->height;
+ cm->subsampling_x = 0;
+ cm->subsampling_y = 0;
+ vp9_alloc_compressor_data(cpi);
// change includes all joint functionality
vp9_change_config(ptr, oxcf);
@@ -1196,17 +1211,13 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cm->sharpness_level = cpi->oxcf.Sharpness;
- // Increasing the size of the frame beyond the first seen frame, or some
- // otherwise signalled maximum size, is not supported.
- // TODO(jkoleszar): exit gracefully.
- if (!cpi->initial_width) {
- alloc_raw_frame_buffers(cpi);
- vp9_alloc_compressor_data(cpi);
- cpi->initial_width = cm->width;
- cpi->initial_height = cm->height;
+ if (cpi->initial_width) {
+ // Increasing the size of the frame beyond the first seen frame, or some
+ // otherwise signalled maximum size, is not supported.
+ // TODO(jkoleszar): exit gracefully.
+ assert(cm->width <= cpi->initial_width);
+ assert(cm->height <= cpi->initial_height);
}
- assert(cm->width <= cpi->initial_width);
- assert(cm->height <= cpi->initial_height);
update_frame_size(cpi);
if (cpi->oxcf.fixed_q >= 0) {
@@ -1938,8 +1949,8 @@ static void generate_psnr_packet(VP9_COMP *cpi) {
pkt.data.psnr.samples[0] = width * height;
pkt.data.psnr.samples[1] = width * height;
- width = (width + 1) / 2;
- height = (height + 1) / 2;
+ width = orig->uv_width;
+ height = orig->uv_height;
sse = calc_plane_error(orig->u_buffer, orig->uv_stride,
recon->u_buffer, recon->uv_stride,
@@ -2093,7 +2104,7 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
} while (--h);
src = s->u_buffer;
- h = (cm->height + 1) / 2;
+ h = s->uv_height;
do {
fwrite(src, s->uv_width, 1, yuv_rec_file);
@@ -2101,7 +2112,7 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
} while (--h);
src = s->v_buffer;
- h = (cm->height + 1) / 2;
+ h = s->uv_height;
do {
fwrite(src, s->uv_width, 1, yuv_rec_file);
@@ -2117,49 +2128,31 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
const int in_h = src_fb->y_crop_height;
const int out_w = dst_fb->y_crop_width;
const int out_h = dst_fb->y_crop_height;
- int x, y;
+ int x, y, i;
+
+ uint8_t *srcs[3] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer};
+ int src_strides[3] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride};
+
+ uint8_t *dsts[3] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer};
+ int dst_strides[3] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride};
for (y = 0; y < out_h; y += 16) {
for (x = 0; x < out_w; x += 16) {
- int x_q4 = x * 16 * in_w / out_w;
- int y_q4 = y * 16 * in_h / out_h;
- uint8_t *src = src_fb->y_buffer + y * in_h / out_h * src_fb->y_stride +
- x * in_w / out_w;
- uint8_t *dst = dst_fb->y_buffer + y * dst_fb->y_stride + x;
- int src_stride = src_fb->y_stride;
- int dst_stride = dst_fb->y_stride;
-
- vp9_convolve8(src, src_stride, dst, dst_stride,
- vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
- vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
- 16, 16);
-
- x_q4 >>= 1;
- y_q4 >>= 1;
- src_stride = src_fb->uv_stride;
- dst_stride = dst_fb->uv_stride;
-
- src = src_fb->u_buffer +
- y / 2 * in_h / out_h * src_fb->uv_stride +
- x / 2 * in_w / out_w;
- dst = dst_fb->u_buffer +
- y / 2 * dst_fb->uv_stride +
- x / 2;
- vp9_convolve8(src, src_stride, dst, dst_stride,
- vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
- vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
- 8, 8);
-
- src = src_fb->v_buffer +
- y / 2 * in_h / out_h * src_fb->uv_stride +
- x / 2 * in_w / out_w;
- dst = dst_fb->v_buffer +
- y / 2 * dst_fb->uv_stride +
- x / 2;
- vp9_convolve8(src, src_stride, dst, dst_stride,
- vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
- vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
- 8, 8);
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ const int factor = i == 0 ? 1 : 2;
+ const int x_q4 = x * (16 / factor) * in_w / out_w;
+ const int y_q4 = y * (16 / factor) * in_h / out_h;
+ const int src_stride = src_strides[i];
+ const int dst_stride = dst_strides[i];
+ uint8_t *src = srcs[i] + y / factor * in_h / out_h * src_stride +
+ x / factor * in_w / out_w;
+ uint8_t *dst = dsts[i] + y * dst_stride + x;
+
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 16 / factor, 16 / factor);
+ }
}
}
@@ -2500,9 +2493,10 @@ static void scale_references(VP9_COMP *cpi) {
ref->y_crop_height != cm->height) {
int new_fb = get_free_fb(cm);
- vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[new_fb],
- cm->width, cm->height,
- VP9BORDERINPIXELS);
+ vp9_realloc_frame_buffer(&cm->yv12_fb[new_fb],
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9BORDERINPIXELS);
scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]);
cpi->scaled_ref_idx[i] = new_fb;
} else {
@@ -3587,6 +3581,15 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags,
struct vpx_usec_timer timer;
int res = 0;
+ if (!cpi->initial_width) {
+ // TODO(jkoleszar): Support 1/4 subsampling?
+ cm->subsampling_x = sd->uv_width < sd->y_width;
+ cm->subsampling_y = sd->uv_height < sd->y_height;
+ alloc_raw_frame_buffers(cpi);
+
+ cpi->initial_width = cm->width;
+ cpi->initial_height = cm->height;
+ }
vpx_usec_timer_start(&timer);
if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags,
cpi->active_map_enabled ? cpi->active_map : NULL))
@@ -3851,9 +3854,10 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
cm->frame_flags = *frame_flags;
// Reset the frame pointers to the current frame size
- vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx],
- cm->width, cm->height,
- VP9BORDERINPIXELS);
+ vp9_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx],
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9BORDERINPIXELS);
// Calculate scaling factors for each of the 3 available references
for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index ef8cb2bab..5bff383b8 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -272,6 +272,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
[ENTROPY_NODES];
int seg_eob, default_eob;
uint8_t token_cache[1024];
+ const uint8_t * band_translate;
// Check for consistency of tx_size with mode info
assert((!type && !plane) || (type && plane));
@@ -291,6 +292,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
coef_probs = cm->fc.coef_probs_4x4;
seg_eob = 16;
scan = get_scan_4x4(tx_type);
+ band_translate = vp9_coefband_trans_4x4;
break;
}
case TX_8X8: {
@@ -304,6 +306,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
scan = get_scan_8x8(tx_type);
coef_probs = cm->fc.coef_probs_8x8;
seg_eob = 64;
+ band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_16X16: {
@@ -317,6 +320,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
seg_eob = 256;
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
+ band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_32X32:
@@ -325,6 +329,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
seg_eob = 1024;
above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
+ band_translate = vp9_coefband_trans_8x8plus;
break;
default:
abort();
@@ -347,7 +352,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
for (c = 0; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
int t = vp9_dct_value_tokens_ptr[v].token;
- int band = get_coef_band(scan, tx_size, c);
+ int band = get_coef_band(band_translate, c);
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
@@ -361,7 +366,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
cost += mb->token_costs[tx_size][type][ref]
- [get_coef_band(scan, tx_size, c)]
+ [get_coef_band(band_translate, c)]
[pt][DCT_EOB_TOKEN];
}
}
@@ -1069,9 +1074,7 @@ typedef struct {
B_PREDICTION_MODE modes[4];
int_mv mvs[4], second_mvs[4];
int eobs[4];
-
int mvthresh;
- int *mdcounts;
} BEST_SEG_INFO;
static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
@@ -1322,7 +1325,6 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *best_ref_mv,
int_mv *second_best_ref_mv,
int64_t best_rd,
- int *mdcounts,
int *returntotrate,
int *returnyrate,
int *returndistortion,
@@ -1339,7 +1341,6 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
bsi.second_ref_mv = second_best_ref_mv;
bsi.mvp.as_int = best_ref_mv->as_int;
bsi.mvthresh = mvthresh;
- bsi.mdcounts = mdcounts;
for (i = 0; i < 4; i++)
bsi.modes[i] = ZERO4X4;
@@ -1612,7 +1613,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
int mi_row, int mi_col,
int_mv frame_nearest_mv[MAX_REF_FRAMES],
int_mv frame_near_mv[MAX_REF_FRAMES],
- int frame_mdcounts[4][4],
struct buf_2d yv12_mb[4][MAX_MB_PLANE],
struct scale_factors scale[MAX_REF_FRAMES]) {
VP9_COMMON *cm = &cpi->common;
@@ -1797,7 +1797,7 @@ static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize,
- int mdcounts[4], int64_t txfm_cache[],
+ int64_t txfm_cache[],
int *rate2, int *distortion, int *skippable,
int *compmode_cost,
int *rate_y, int *distortion_y,
@@ -2305,7 +2305,9 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mode = xd->mode_info_context->mbmi.mode;
txfm_size = xd->mode_info_context->mbmi.txfm_size;
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
- &dist_uv, &uv_skip, bsize);
+ &dist_uv, &uv_skip,
+ (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
+ bsize);
if (bsize == BLOCK_SIZE_SB8X8)
err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y,
&rate4x4_y_tokenonly,
@@ -2357,7 +2359,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
int comp_pred, i;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
- int frame_mdcounts[4][4];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
int_mv single_newmv[MAX_REF_FRAMES];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
@@ -2366,7 +2367,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
cpi->lst_fb_idx,
cpi->gld_fb_idx,
cpi->alt_fb_idx};
- int mdcounts[4];
int64_t best_rd = INT64_MAX;
int64_t best_txfm_rd[NB_TXFM_MODES];
int64_t best_txfm_diff[NB_TXFM_MODES];
@@ -2449,7 +2449,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, yv12_mb, scale_factor);
+ yv12_mb, scale_factor);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
@@ -2576,8 +2576,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[i].pre[1] = yv12_mb[second_ref][i];
}
- vpx_memcpy(mdcounts, frame_mdcounts[ref_frame], sizeof(mdcounts));
-
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
@@ -2675,7 +2673,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
&mbmi->ref_mvs[mbmi->ref_frame][0],
- second_ref, INT64_MAX, mdcounts,
+ second_ref, INT64_MAX,
&rate, &rate_y, &distortion,
&skippable,
(int)this_rd_thresh, seg_mvs);
@@ -2714,7 +2712,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// switchable list (bilinear, 6-tap) is indicated at the frame level
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
&mbmi->ref_mvs[mbmi->ref_frame][0],
- second_ref, INT64_MAX, mdcounts,
+ second_ref, INT64_MAX,
&rate, &rate_y, &distortion,
&skippable,
(int)this_rd_thresh, seg_mvs);
@@ -2745,10 +2743,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// If even the 'Y' rd value of split is higher than best so far
// then dont bother looking at UV
vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
- bsize);
- vp9_subtract_sbuv(x, bsize);
+ BLOCK_SIZE_SB8X8);
+ vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
- &uv_skippable, bsize, TX_4X4);
+ &uv_skippable, BLOCK_SIZE_SB8X8, TX_4X4);
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
@@ -2792,7 +2790,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
this_rd = handle_inter_mode(cpi, x, bsize,
- mdcounts, txfm_cache,
+ txfm_cache,
&rate2, &distortion2, &skippable,
&compmode_cost,
&rate_y, &distortion_y,
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index cb670dab0..4420d49e3 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -136,6 +136,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
ENTROPY_CONTEXT above_ec, left_ec;
uint8_t token_cache[1024];
TX_TYPE tx_type = DCT_DCT;
+ const uint8_t * band_translate;
assert((!type && !plane) || (type && plane));
switch (tx_size) {
@@ -149,6 +150,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
scan = get_scan_4x4(tx_type);
counts = cpi->coef_counts_4x4;
coef_probs = cpi->common.fc.coef_probs_4x4;
+ band_translate = vp9_coefband_trans_4x4;
break;
}
case TX_8X8: {
@@ -162,6 +164,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
scan = get_scan_8x8(tx_type);
counts = cpi->coef_counts_8x8;
coef_probs = cpi->common.fc.coef_probs_8x8;
+ band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_16X16: {
@@ -175,6 +178,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
scan = get_scan_16x16(tx_type);
counts = cpi->coef_counts_16x16;
coef_probs = cpi->common.fc.coef_probs_16x16;
+ band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_32X32:
@@ -184,6 +188,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
scan = vp9_default_zig_zag1d_32x32;
counts = cpi->coef_counts_32x32;
coef_probs = cpi->common.fc.coef_probs_32x32;
+ band_translate = vp9_coefband_trans_8x8plus;
break;
}
@@ -196,7 +201,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
c = 0;
do {
- const int band = get_coef_band(scan, tx_size, c);
+ const int band = get_coef_band(band_translate, c);
int token;
int v = 0;
rc = scan[c];