summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_encodeframe.c
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2013-08-20 14:34:17 -0700
committerJingning Han <jingning@google.com>2013-08-22 12:36:02 -0700
commit01a37177d167f2f837ae6d2de03a17511d1b38c9 (patch)
tree2ad399f719b712f8a13a199d21ef3522513a25a4 /vp9/encoder/vp9_encodeframe.c
parent8b810c7a78cbaac715cc516973de2bfdbf4067f9 (diff)
downloadlibvpx-01a37177d167f2f837ae6d2de03a17511d1b38c9.tar
libvpx-01a37177d167f2f837ae6d2de03a17511d1b38c9.tar.gz
libvpx-01a37177d167f2f837ae6d2de03a17511d1b38c9.tar.bz2
libvpx-01a37177d167f2f837ae6d2de03a17511d1b38c9.zip
Refactor rd_pick_partition for parameter control
This commit changes the partition search order of superblocks from {SPLIT, NONE, HORZ, VERT} to {NONE, SPLIT, HORZ, VERT} for consistency with that of sub8x8 partition search. It enable the use of early termination in partition search for all block sizes. For ped_area_1080p 50 frames coded at 4000 kbps, it makes the runtime goes down from 844305ms -> 818003ms (3% speed-up) at speed 0. This will further move towards making the in-search partition types configurable, hence unifying various speed-up approaches. Some speed 1 and 2 features are turned off during the refactoring process, including: disable_split_var_thresh using_small_partition_info Stricter constraints are applied to use_square_partition_only for right/bottom boundary blocks. Will bring back/refine these features subsequently. At this point, it makes derf set at speed 1 about 0.45% higher in compression performance, and 9% down in run-time. Change-Id: I3db9f9d1d1a0d6cbe2e50e49bd9eda1cf705f37c
Diffstat (limited to 'vp9/encoder/vp9_encodeframe.c')
-rw-r--r--vp9/encoder/vp9_encodeframe.c365
1 files changed, 161 insertions, 204 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 160833baf..802cf3795 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1671,29 +1671,19 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
int i, pl;
BLOCK_SIZE_TYPE subsize;
int this_rate, sum_rate = 0, best_rate = INT_MAX;
- int64_t this_dist, sum_dist = 0, best_dist = INT_MAX;
+ int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
int64_t sum_rd = 0;
- int do_split = 1, do_rect = 1;
- // Override min_partition_size for edge blocks
- int force_horz_split = mi_row + (ms >> 1) >= cm->mi_rows;
- int force_vert_split = mi_col + (ms >> 1) >= cm->mi_cols;
- const int partition_none_allowed = (bsize <= cpi->sf.max_partition_size ||
- !cpi->sf.auto_min_max_partition_size) &&
- !force_horz_split &&
- !force_vert_split;
- const int partition_horz_allowed = (bsize <= cpi->sf.max_partition_size ||
- !cpi->sf.auto_min_max_partition_size) &&
- !cpi->sf.use_square_partition_only &&
- bsize >= BLOCK_8X8 &&
- !force_vert_split;
- const int partition_vert_allowed = (bsize <= cpi->sf.max_partition_size ||
- !cpi->sf.auto_min_max_partition_size) &&
- !cpi->sf.use_square_partition_only &&
- bsize >= BLOCK_8X8 &&
- !force_horz_split;
- int partition_split_done = 0;
+ int do_split = bsize >= BLOCK_8X8;
+ int do_rect = 1;
+ // Override skipping rectangular partition operations for edge blocks
+ const int force_horz_split = (mi_row + (ms >> 1) >= cm->mi_rows);
+ const int force_vert_split = (mi_col + (ms >> 1) >= cm->mi_cols);
+ int partition_none_allowed = !force_horz_split && !force_vert_split;
+ int partition_horz_allowed = !force_vert_split && bsize >= BLOCK_8X8;
+ int partition_vert_allowed = !force_horz_split && bsize >= BLOCK_8X8;
+ int partition_split_done = 0;
(void) *tp_orig;
if (bsize < BLOCK_8X8) {
@@ -1707,6 +1697,24 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
}
assert(mi_height_log2(bsize) == mi_width_log2(bsize));
+ // Determine partition types in search according to the speed features.
+ // The threshold set here has to be of square block size.
+ if (cpi->sf.auto_min_max_partition_size) {
+ partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
+ bsize >= cpi->sf.min_partition_size);
+ partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+ bsize > cpi->sf.min_partition_size) ||
+ force_horz_split);
+ partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+ bsize > cpi->sf.min_partition_size) ||
+ force_vert_split);
+ do_split &= bsize > cpi->sf.min_partition_size;
+ }
+ if (cpi->sf.use_square_partition_only) {
+ partition_horz_allowed &= force_horz_split;
+ partition_vert_allowed &= force_vert_split;
+ }
+
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
if (cpi->sf.disable_split_var_thresh && partition_none_allowed) {
@@ -1719,220 +1727,169 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
do_rect = 0;
}
}
- // PARTITION_SPLIT
- if (do_split &&
- (!cpi->sf.auto_min_max_partition_size ||
- bsize > cpi->sf.min_partition_size)) {
- if (bsize > BLOCK_8X8) {
- subsize = get_subsize(bsize, PARTITION_SPLIT);
- for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
- int x_idx = (i & 1) * (ms >> 1);
- int y_idx = (i >> 1) * (ms >> 1);
- if ((mi_row + y_idx >= cm->mi_rows) ||
- (mi_col + x_idx >= cm->mi_cols))
- continue;
+ // PARTITION_NONE
+ if (partition_none_allowed) {
+ pick_sb_modes(cpi, mi_row, mi_col, &this_rate, &this_dist, bsize,
+ get_block_context(x, bsize), best_rd);
+ if (this_rate != INT_MAX) {
+ if (bsize >= BLOCK_8X8) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ this_rate += x->partition_cost[pl][PARTITION_NONE];
+ }
+ sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
+ if (sum_rd < best_rd) {
+ best_rate = this_rate;
+ best_dist = this_dist;
+ best_rd = sum_rd;
+ if (bsize >= BLOCK_8X8)
+ *(get_sb_partitioning(x, bsize)) = bsize;
+ }
+ }
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+ }
- *(get_sb_index(xd, subsize)) = i;
+ // PARTITION_SPLIT
+ sum_rd = 0;
+ // TODO(jingning): use the motion vectors given by the above search as
+ // the starting point of motion search in the following partition type check.
+ if (do_split) {
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
+ int x_idx = (i & 1) * (ms >> 1);
+ int y_idx = (i >> 1) * (ms >> 1);
+
+ if ((mi_row + y_idx >= cm->mi_rows) ||
+ (mi_col + x_idx >= cm->mi_cols))
+ continue;
- rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize,
- &this_rate, &this_dist, i != 3, best_rd - sum_rd);
+ *(get_sb_index(xd, subsize)) = i;
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
- } else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- }
- }
- if (sum_rd < best_rd && i == 4) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize,
+ &this_rate, &this_dist, i != 3, best_rd - sum_rd);
+
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ sum_rate += this_rate;
+ sum_dist += this_dist;
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
- *(get_sb_partitioning(x, bsize)) = subsize;
- }
}
- partition_split_done = 1;
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
+ if (sum_rd < best_rd && i == 4) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd) {
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ best_rd = sum_rd;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ } else {
+ // skip rectangular partition test when larger block size
+ // gives better rd cost
+ do_rect &= !partition_none_allowed &&
+ cpi->sf.less_rectangular_check;
+ }
+ }
+ partition_split_done = 1;
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
x->fast_ms = 0;
x->pred_mv.as_int = 0;
x->subblock_ref = 0;
- // Use 4 subblocks' motion estimation results to speed up current
- // partition's checking.
if (partition_split_done &&
cpi->sf.using_small_partition_info) {
compute_fast_motion_search_level(cpi, bsize);
}
- if (!cpi->sf.auto_min_max_partition_size ||
- bsize <= cpi->sf.max_partition_size) {
- int larger_is_better = 0;
-
- // PARTITION_NONE
- if (partition_none_allowed) {
- pick_sb_modes(cpi, mi_row, mi_col, &this_rate, &this_dist, bsize,
- get_block_context(x, bsize), best_rd);
- if (this_rate != INT_MAX) {
- if (bsize >= BLOCK_8X8) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- this_rate += x->partition_cost[pl][PARTITION_NONE];
- }
- sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
- if (sum_rd < best_rd || bsize == BLOCK_8X8) {
- best_rate = this_rate;
- best_dist = this_dist;
- best_rd = sum_rd;
- larger_is_better = 1;
- if (bsize >= BLOCK_8X8)
- *(get_sb_partitioning(x, bsize)) = bsize;
- }
+ // PARTITION_HORZ
+ if (partition_horz_allowed && do_rect) {
+ subsize = get_subsize(bsize, PARTITION_HORZ);
+ *(get_sb_index(xd, subsize)) = 0;
+ pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
+ get_block_context(x, subsize), best_rd);
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+
+ if (sum_rd < best_rd && mi_row + (ms >> 1) < cm->mi_rows) {
+ update_state(cpi, get_block_context(x, subsize), subsize, 0);
+ encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+
+ *(get_sb_index(xd, subsize)) = 1;
+ pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, &this_rate,
+ &this_dist, subsize, get_block_context(x, subsize),
+ best_rd - sum_rd);
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
}
}
-
- if (bsize == BLOCK_8X8 && do_split) {
- sum_rate = 0; sum_dist = 0; sum_rd = 0;
-
- subsize = get_subsize(bsize, PARTITION_SPLIT);
-
- for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
- int x_idx = (i & 1) * (ms >> 1);
- int y_idx = (i >> 1) * (ms >> 1);
-
- if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
- continue;
-
- *(get_sb_index(xd, subsize)) = i;
- rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize,
- &this_rate, &this_dist, i != 3, best_rd - sum_rd);
-
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
- } else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- }
- }
- if (sum_rd < best_rd && i == 4) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
- larger_is_better = 0;
- *(get_sb_partitioning(x, bsize)) = subsize;
- }
+ if (sum_rd < best_rd) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ sum_rate += x->partition_cost[pl][PARTITION_HORZ];
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd) {
+ best_rd = sum_rd;
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ *(get_sb_partitioning(x, bsize)) = subsize;
}
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+ }
- if (do_rect &&
- !cpi->sf.use_square_partition_only &&
- (!cpi->sf.less_rectangular_check || !larger_is_better)) {
- // PARTITION_HORZ
- if (partition_horz_allowed) {
- subsize = get_subsize(bsize, PARTITION_HORZ);
- if (!cpi->sf.auto_min_max_partition_size || force_horz_split ||
- subsize >= cpi->sf.min_partition_size) {
- *(get_sb_index(xd, subsize)) = 0;
- pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
- get_block_context(x, subsize), best_rd);
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
-
- if (sum_rd < best_rd && mi_row + (ms >> 1) < cm->mi_rows) {
- update_state(cpi, get_block_context(x, subsize), subsize, 0);
- encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-
- *(get_sb_index(xd, subsize)) = 1;
- pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, &this_rate,
- &this_dist, subsize, get_block_context(x, subsize),
- best_rd - sum_rd);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
- } else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- }
- }
- if (sum_rd < best_rd) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- sum_rate += x->partition_cost[pl][PARTITION_HORZ];
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rd = sum_rd;
- best_rate = sum_rate;
- best_dist = sum_dist;
- *(get_sb_partitioning(x, bsize)) = subsize;
- }
- }
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
- }
+ // PARTITION_VERT
+ if (partition_vert_allowed && do_rect) {
+ subsize = get_subsize(bsize, PARTITION_VERT);
+
+ *(get_sb_index(xd, subsize)) = 0;
+ pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
+ get_block_context(x, subsize), best_rd);
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd && mi_col + (ms >> 1) < cm->mi_cols) {
+ update_state(cpi, get_block_context(x, subsize), subsize, 0);
+ encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+
+ *(get_sb_index(xd, subsize)) = 1;
+ pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), &this_rate,
+ &this_dist, subsize, get_block_context(x, subsize),
+ best_rd - sum_rd);
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
}
-
- // PARTITION_VERT
- if (partition_vert_allowed) {
- subsize = get_subsize(bsize, PARTITION_VERT);
- if (!cpi->sf.auto_min_max_partition_size || force_vert_split ||
- subsize >= cpi->sf.min_partition_size) {
- *(get_sb_index(xd, subsize)) = 0;
- pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
- get_block_context(x, subsize), best_rd);
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd && mi_col + (ms >> 1) < cm->mi_cols) {
- update_state(cpi, get_block_context(x, subsize), subsize, 0);
- encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-
- *(get_sb_index(xd, subsize)) = 1;
- pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), &this_rate,
- &this_dist, subsize, get_block_context(x, subsize),
- best_rd - sum_rd);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
- } else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- }
- }
- if (sum_rd < best_rd) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- sum_rate += x->partition_cost[pl][PARTITION_VERT];
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
- *(get_sb_partitioning(x, bsize)) = subsize;
- }
- }
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
- }
+ }
+ if (sum_rd < best_rd) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ sum_rate += x->partition_cost[pl][PARTITION_VERT];
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd) {
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ best_rd = sum_rd;
+ *(get_sb_partitioning(x, bsize)) = subsize;
}
}
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
+
+
*rate = best_rate;
*dist = best_dist;
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
-
- if (best_rate < INT_MAX && best_dist < INT_MAX && do_recon)
+ if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon)
encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_64X64, bsize);
if (bsize == BLOCK_64X64) {
assert(tp_orig < *tp);