summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_encodeframe.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rbultje@google.com>2013-06-21 12:54:52 -0700
committerRonald S. Bultje <rbultje@google.com>2013-06-21 12:54:52 -0700
commit54b2a59623b42c517010e4be9b13a41c41d21d66 (patch)
tree92c83d937588b38f3cbe87742237ed33c56502cd /vp9/encoder/vp9_encodeframe.c
parent7756e9892b312e668fc3134bad2513806d609f73 (diff)
downloadlibvpx-54b2a59623b42c517010e4be9b13a41c41d21d66.tar
libvpx-54b2a59623b42c517010e4be9b13a41c41d21d66.tar.gz
libvpx-54b2a59623b42c517010e4be9b13a41c41d21d66.tar.bz2
libvpx-54b2a59623b42c517010e4be9b13a41c41d21d66.zip
Implement SSE2 block_error.
Change vp9_block_error() to return a 64bit error variable, change all callers to expect a 64bit return value (this will prevent overflows, which we basically don't check for at all right now). Remove duplicate block_error() function, which fixed that through truncation. Remove old (incompatible) mmx/sse2 block_error SIMD versions and replace with a new one that returns a 64bit value. Encoding time of first 50 frames of bus @ 1500kbps goes from 3min29 to 3min23, i.e. a 3% overall speedup. Change-Id: Ib71ac5508b5ee8a80f1753cd85d72df1629abe68
Diffstat (limited to 'vp9/encoder/vp9_encodeframe.c')
-rw-r--r--vp9/encoder/vp9_encodeframe.c43
1 files changed, 27 insertions, 16 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 35e1d646b..f655d456b 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -582,7 +582,7 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
}
static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
- TOKENEXTRA **tp, int *totalrate, int *totaldist,
+ TOKENEXTRA **tp, int *totalrate, int64_t *totaldist,
BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCK * const x = &cpi->mb;
@@ -1195,7 +1195,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
}
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize,
- int *rate, int *dist) {
+ int *rate, int64_t *dist) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
@@ -1211,7 +1211,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
BLOCK_SIZE_TYPE subsize;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
- int r = 0, d = 0;
+ int r = 0;
+ int64_t d = 0;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -1252,7 +1253,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
get_block_context(x, subsize));
if (mi_row + (bh >> 1) <= cm->mi_rows) {
- int rt, dt;
+ int rt;
+ int64_t dt;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
@@ -1270,7 +1272,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
get_block_context(x, subsize));
if (mi_col + (bs >> 1) <= cm->mi_cols) {
- int rt, dt;
+ int rt;
+ int64_t dt;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
@@ -1289,7 +1292,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
int x_idx = (i & 1) * (bs >> 2);
int y_idx = (i >> 1) * (bs >> 2);
int jj = i >> 1, ii = i & 0x01;
- int rt, dt;
+ int rt;
+ int64_t dt;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
@@ -1323,7 +1327,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
// results, for encoding speed-up.
static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
int mi_col, BLOCK_SIZE_TYPE bsize, int *rate,
- int *dist) {
+ int64_t *dist) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD * const xd = &x->e_mbd;
@@ -1334,7 +1338,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
TOKENEXTRA *tp_orig = *tp;
int i, pl;
BLOCK_SIZE_TYPE subsize;
- int srate = INT_MAX, sdist = INT_MAX;
+ int srate = INT_MAX;
+ int64_t sdist = INT_MAX;
if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index != 0) {
@@ -1351,14 +1356,16 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|| (cpi->sf.use_partitions_greater_than
&& bsize > cpi->sf.greater_than_block_size)) {
if (bsize >= BLOCK_SIZE_SB8X8) {
- int r4 = 0, d4 = 0;
+ int r4 = 0;
+ int64_t d4 = 0;
subsize = get_subsize(bsize, PARTITION_SPLIT);
*(get_sb_partitioning(x, bsize)) = subsize;
for (i = 0; i < 4; ++i) {
int x_idx = (i & 1) * (ms >> 1);
int y_idx = (i >> 1) * (ms >> 1);
- int r = 0, d = 0;
+ int r = 0;
+ int64_t d = 0;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
@@ -1386,8 +1393,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
&& bsize <= cpi->sf.less_than_block_size)) {
// PARTITION_HORZ
if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
- int r2, d2;
- int r = 0, d = 0;
+ int r2, r = 0;
+ int64_t d2, d = 0;
subsize = get_subsize(bsize, PARTITION_HORZ);
*(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
@@ -1418,13 +1425,15 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
// PARTITION_VERT
if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
- int r2, d2;
+ int r2;
+ int64_t d2;
subsize = get_subsize(bsize, PARTITION_VERT);
*(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
get_block_context(x, subsize));
if (mi_col + (ms >> 1) < cm->mi_cols) {
- int r = 0, d = 0;
+ int r = 0;
+ int64_t d = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
@@ -1450,7 +1459,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
// PARTITION_NONE
if ((mi_row + (ms >> 1) < cm->mi_rows) &&
(mi_col + (ms >> 1) < cm->mi_cols)) {
- int r, d;
+ int r;
+ int64_t d;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
get_block_context(x, bsize));
if (bsize >= BLOCK_SIZE_SB8X8) {
@@ -1497,7 +1507,8 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
// Code each SB in the row
for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
mi_col += 64 / MI_SIZE) {
- int dummy_rate, dummy_dist;
+ int dummy_rate;
+ int64_t dummy_dist;
if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
cpi->sf.use_one_partition_size_always ) {
const int idx_str = cm->mode_info_stride * mi_row + mi_col;