diff options
author | Ronald S. Bultje <rbultje@google.com> | 2013-06-21 12:54:52 -0700 |
---|---|---|
committer | Ronald S. Bultje <rbultje@google.com> | 2013-06-21 12:54:52 -0700 |
commit | 54b2a59623b42c517010e4be9b13a41c41d21d66 (patch) | |
tree | 92c83d937588b38f3cbe87742237ed33c56502cd /vp9/encoder/vp9_encodeframe.c | |
parent | 7756e9892b312e668fc3134bad2513806d609f73 (diff) | |
download | libvpx-54b2a59623b42c517010e4be9b13a41c41d21d66.tar libvpx-54b2a59623b42c517010e4be9b13a41c41d21d66.tar.gz libvpx-54b2a59623b42c517010e4be9b13a41c41d21d66.tar.bz2 libvpx-54b2a59623b42c517010e4be9b13a41c41d21d66.zip |
Implement SSE2 block_error.
Change vp9_block_error() to return a 64bit error variable, change all
callers to expect a 64bit return value (this will prevent overflows,
which we basically don't check for at all right now). Remove duplicate
block_error() function, which fixed that through truncation. Remove
old (incompatible) mmx/sse2 block_error SIMD versions and replace with
a new one that returns a 64bit value.
Encoding time of first 50 frames of bus @ 1500kbps goes from 3min29 to
3min23, i.e. a 3% overall speedup.
Change-Id: Ib71ac5508b5ee8a80f1753cd85d72df1629abe68
Diffstat (limited to 'vp9/encoder/vp9_encodeframe.c')
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 35e1d646b..f655d456b 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -582,7 +582,7 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col, } static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, - TOKENEXTRA **tp, int *totalrate, int *totaldist, + TOKENEXTRA **tp, int *totalrate, int64_t *totaldist, BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON * const cm = &cpi->common; MACROBLOCK * const x = &cpi->mb; @@ -1195,7 +1195,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, } static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, - int *rate, int *dist) { + int *rate, int64_t *dist) { VP9_COMMON * const cm = &cpi->common; MACROBLOCK * const x = &cpi->mb; MACROBLOCKD *xd = &cpi->mb.e_mbd; @@ -1211,7 +1211,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, BLOCK_SIZE_TYPE subsize; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; - int r = 0, d = 0; + int r = 0; + int64_t d = 0; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -1252,7 +1253,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize, get_block_context(x, subsize)); if (mi_row + (bh >> 1) <= cm->mi_rows) { - int rt, dt; + int rt; + int64_t dt; update_state(cpi, get_block_context(x, subsize), subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *(get_sb_index(xd, subsize)) = 1; @@ -1270,7 +1272,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize, get_block_context(x, subsize)); if (mi_col + (bs >> 1) <= cm->mi_cols) { - int rt, dt; + int rt; + int64_t dt; update_state(cpi, get_block_context(x, subsize), subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *(get_sb_index(xd, subsize)) = 1; @@ -1289,7 +1292,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, int x_idx = (i & 1) * (bs >> 2); int y_idx = (i >> 1) * (bs >> 2); int jj = i >> 1, ii = i & 0x01; - int rt, dt; + int rt; + int64_t dt; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; @@ -1323,7 +1327,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, // results, for encoding speed-up. static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, int *rate, - int *dist) { + int64_t *dist) { VP9_COMMON * const cm = &cpi->common; MACROBLOCK * const x = &cpi->mb; MACROBLOCKD * const xd = &x->e_mbd; @@ -1334,7 +1338,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, TOKENEXTRA *tp_orig = *tp; int i, pl; BLOCK_SIZE_TYPE subsize; - int srate = INT_MAX, sdist = INT_MAX; + int srate = INT_MAX; + int64_t sdist = INT_MAX; if (bsize < BLOCK_SIZE_SB8X8) if (xd->ab_index != 0) { @@ -1351,14 +1356,16 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, || (cpi->sf.use_partitions_greater_than && bsize > cpi->sf.greater_than_block_size)) { if (bsize >= BLOCK_SIZE_SB8X8) { - int r4 = 0, d4 = 0; + int r4 = 0; + int64_t d4 = 0; subsize = get_subsize(bsize, PARTITION_SPLIT); *(get_sb_partitioning(x, bsize)) = subsize; for (i = 0; i < 4; ++i) { int x_idx = (i & 1) * (ms >> 1); int y_idx = (i >> 1) * (ms >> 1); - int r = 0, d = 0; + int r = 0; + int64_t d = 0; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; @@ -1386,8 +1393,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, && bsize <= cpi->sf.less_than_block_size)) { // PARTITION_HORZ if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) { - int r2, d2; - int r = 0, d = 0; + int r2, r = 0; + int64_t d2, d = 0; subsize = get_subsize(bsize, PARTITION_HORZ); *(get_sb_index(xd, subsize)) = 0; pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, @@ -1418,13 +1425,15 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, // PARTITION_VERT if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) { - int r2, d2; + int r2; + int64_t d2; subsize = get_subsize(bsize, PARTITION_VERT); *(get_sb_index(xd, subsize)) = 0; pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, get_block_context(x, subsize)); if (mi_col + (ms >> 1) < cm->mi_cols) { - int r = 0, d = 0; + int r = 0; + int64_t d = 0; update_state(cpi, get_block_context(x, subsize), subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); @@ -1450,7 +1459,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, // PARTITION_NONE if ((mi_row + (ms >> 1) < cm->mi_rows) && (mi_col + (ms >> 1) < cm->mi_cols)) { - int r, d; + int r; + int64_t d; pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize, get_block_context(x, bsize)); if (bsize >= BLOCK_SIZE_SB8X8) { @@ -1497,7 +1507,8 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, // Code each SB in the row for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; mi_col += 64 / MI_SIZE) { - int dummy_rate, dummy_dist; + int dummy_rate; + int64_t dummy_dist; if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning || cpi->sf.use_one_partition_size_always ) { const int idx_str = cm->mode_info_stride * mi_row + mi_col; |