summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/vp9_blockd.h49
-rw-r--r--vp9/common/vp9_invtrans.c46
-rw-r--r--vp9/common/vp9_mbpitch.c10
-rw-r--r--vp9/common/vp9_recon.c7
-rw-r--r--vp9/common/vp9_reconinter.c598
-rw-r--r--vp9/common/vp9_reconinter.h12
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c91
7 files changed, 249 insertions, 564 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index e8c823a59..d30cd4960 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -342,6 +342,7 @@ struct mb_plane {
DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]);
DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]);
DECLARE_ALIGNED(16, uint16_t, eobs[256]);
+ DECLARE_ALIGNED(16, int16_t, diff[64 * 64]);
PLANE_TYPE plane_type;
int subsampling_x;
int subsampling_y;
@@ -355,7 +356,6 @@ struct mb_plane {
BLOCK_OFFSET((x)->plane[2].field, ((i) - 20), 16))
typedef struct macroblockd {
- DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */
#if CONFIG_CODE_NONZEROCOUNT
DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]);
#endif
@@ -878,31 +878,40 @@ typedef void (*foreach_predicted_block_visitor)(int plane, int block,
static INLINE void foreach_predicted_block_in_plane(
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane,
foreach_predicted_block_visitor visit, void *arg) {
- const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
+ int i, x, y;
+ const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
// block sizes in number of 4x4 blocks log 2 ("*_b")
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
- const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
- const int block_size_b = bw + bh;
-
// subsampled size of the block
- const int ss_sum = xd->plane[plane].subsampling_x +
- xd->plane[plane].subsampling_y;
- const int ss_block_size = block_size_b - ss_sum;
+ const int bw = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
+ const int bh = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
// size of the predictor to use.
- // TODO(jkoleszar): support I8X8, I4X4
- const int pred_w = bw - xd->plane[plane].subsampling_x;
- const int pred_h = bh - xd->plane[plane].subsampling_y;
- const int pred_b = mode == SPLITMV ? 0 : pred_w + pred_h;
- const int step = 1 << pred_b;
-
- int i;
-
- assert(pred_b <= block_size_b);
- assert(pred_b == ss_block_size);
- for (i = 0; i < (1 << ss_block_size); i += step) {
- visit(plane, i, bsize, pred_w, pred_h, arg);
+ int pred_w, pred_h;
+
+ if (mode == SPLITMV) {
+ // 4x4 or 8x8
+ const int is_4x4 =
+ (xd->mode_info_context->mbmi.partitioning == PARTITIONING_4X4);
+ pred_w = is_4x4 ? 0 : 1 >> xd->plane[plane].subsampling_x;
+ pred_h = is_4x4 ? 0 : 1 >> xd->plane[plane].subsampling_y;
+ } else {
+ pred_w = bw;
+ pred_h = bh;
+ }
+ assert(pred_w <= bw);
+ assert(pred_h <= bh);
+
+ // visit each subblock in raster order
+ i = 0;
+ for (y = 0; y < 1 << bh; y += 1 << pred_h) {
+ for (x = 0; x < 1 << bw; x += 1 << pred_w) {
+ visit(plane, i, bsize, pred_w, pred_h, arg);
+ i += 1 << pred_w;
+ }
+ i -= 1 << bw;
+ i += 1 << (bw + pred_h);
}
}
static INLINE void foreach_predicted_block(
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
index 3b11fa9cb..0673fd81a 100644
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -38,10 +38,10 @@ void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
+ const int offset = x_idx * 32 + y_idx * 32 * stride;
vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024),
- xd->diff + x_idx * 32 + y_idx * 32 * stride,
- stride * 2);
+ xd->plane[0].diff + offset, stride * 2);
}
}
@@ -55,15 +55,14 @@ void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
const TX_TYPE tx_type = get_tx_type_16x16(xd,
(y_idx * bstride + x_idx) * 4);
+ const int offset = x_idx * 16 + y_idx * 16 * stride;
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
- xd->diff + x_idx * 16 + y_idx * stride * 16,
- stride * 2);
+ xd->plane[0].diff + offset, stride * 2);
} else {
vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
- xd->diff + x_idx * 16 + y_idx * stride * 16,
- stride, tx_type);
+ xd->plane[0].diff + offset, stride, tx_type);
}
}
}
@@ -77,15 +76,14 @@ void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2);
+ const int offset = x_idx * 8 + y_idx * 8 * stride;
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
- xd->diff + x_idx * 8 + y_idx * stride * 8,
- stride * 2);
+ xd->plane[0].diff + offset, stride * 2);
} else {
vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
- xd->diff + x_idx * 8 + y_idx * stride * 8,
- stride, tx_type);
+ xd->plane[0].diff + offset, stride, tx_type);
}
}
}
@@ -99,16 +97,15 @@ void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * bstride + x_idx);
+ const int offset = x_idx * 4 + y_idx * 4 * stride;
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n],
BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
- xd->diff + x_idx * 4 + y_idx * 4 * stride,
- stride * 2);
+ xd->plane[0].diff + offset, stride * 2);
} else {
vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
- xd->diff + x_idx * 4 + y_idx * 4 * stride,
- stride, tx_type);
+ xd->plane[0].diff + offset, stride, tx_type);
}
}
}
@@ -116,15 +113,12 @@ void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
void vp9_inverse_transform_sbuv_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
assert(bsize == BLOCK_SIZE_SB64X64);
- vp9_short_idct32x32(xd->plane[1].dqcoeff,
- xd->diff + 4096, 64);
- vp9_short_idct32x32(xd->plane[2].dqcoeff,
- xd->diff + 4096 + 1024, 64);
+ vp9_short_idct32x32(xd->plane[1].dqcoeff, xd->plane[1].diff, 64);
+ vp9_short_idct32x32(xd->plane[2].dqcoeff, xd->plane[2].diff, 64);
}
void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
- const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2;
const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
const int stride = 16 << (bwl - 1);
int n;
@@ -134,15 +128,14 @@ void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int off = x_idx * 16 + y_idx * stride * 16;
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256),
- xd->diff + uoff + off, stride * 2);
+ xd->plane[1].diff + off, stride * 2);
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256),
- xd->diff + voff + off, stride * 2);
+ xd->plane[2].diff + off, stride * 2);
}
}
void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1;
- const int uoff = (8 * 8) << (bwl + bhl), voff = (uoff * 5) >> 2;
const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
const int stride = 8 << (bwl - 1);
int n;
@@ -152,15 +145,14 @@ void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int off = x_idx * 8 + y_idx * stride * 8;
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64),
- xd->diff + uoff + off, stride * 2);
+ xd->plane[1].diff + off, stride * 2);
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64),
- xd->diff + voff + off, stride * 2);
+ xd->plane[2].diff + off, stride * 2);
}
}
void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2;
- const int uoff = (4 * 4) << (bwl + bhl), voff = (uoff * 5) >> 2;
const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
const int stride = 4 << (bwl - 1);
int n;
@@ -171,9 +163,9 @@ void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n],
BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16),
- xd->diff + uoff + off, stride * 2);
+ xd->plane[1].diff + off, stride * 2);
vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n],
BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16),
- xd->diff + voff + off, stride * 2);
+ xd->plane[2].diff + off, stride * 2);
}
}
diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c
index 6ed5f27d9..00fe9aa15 100644
--- a/vp9/common/vp9_mbpitch.c
+++ b/vp9/common/vp9_mbpitch.c
@@ -77,23 +77,23 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) {
for (c = 0; c < 4; c++) {
const int to = r * 4 + c;
const int from = r * 4 * 16 + c * 4;
- blockd[to].diff = &mb->diff[from];
+ blockd[to].diff = &mb->plane[0].diff[from];
}
}
for (r = 0; r < 2; r++) {
for (c = 0; c < 2; c++) {
const int to = 16 + r * 2 + c;
- const int from = 256 + r * 4 * 8 + c * 4;
- blockd[to].diff = &mb->diff[from];
+ const int from = r * 4 * 8 + c * 4;
+ blockd[to].diff = &mb->plane[1].diff[from];
}
}
for (r = 0; r < 2; r++) {
for (c = 0; c < 2; c++) {
const int to = 20 + r * 2 + c;
- const int from = 320 + r * 4 * 8 + c * 4;
- blockd[to].diff = &mb->diff[from];
+ const int from = r * 4 * 8 + c * 4;
+ blockd[to].diff = &mb->plane[2].diff[from];
}
}
diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c
index 121776c69..fae35844d 100644
--- a/vp9/common/vp9_recon.c
+++ b/vp9/common/vp9_recon.c
@@ -55,7 +55,7 @@ void vp9_recon_sby_s_c(MACROBLOCKD *mb, uint8_t *dst,
const int bw = 16 << mb_width_log2(bsize), bh = 16 << mb_height_log2(bsize);
int x, y;
const int stride = mb->block[0].dst_stride;
- const int16_t *diff = mb->diff;
+ const int16_t *diff = mb->plane[0].diff;
for (y = 0; y < bh; y++) {
for (x = 0; x < bw; x++)
@@ -69,12 +69,11 @@ void vp9_recon_sby_s_c(MACROBLOCKD *mb, uint8_t *dst,
void vp9_recon_sbuv_s_c(MACROBLOCKD *mb, uint8_t *u_dst, uint8_t *v_dst,
BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
- const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2;
const int bw = 8 << bwl, bh = 8 << bhl;
int x, y;
const int stride = mb->block[16].dst_stride;
- const int16_t *u_diff = mb->diff + uoff;
- const int16_t *v_diff = mb->diff + voff;
+ const int16_t *u_diff = mb->plane[1].diff;
+ const int16_t *v_diff = mb->plane[2].diff;
for (y = 0; y < bh; y++) {
for (x = 0; x < bw; x++) {
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 64929c1bc..549993200 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -358,9 +358,6 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
w, h);
}
-/* Like vp9_build_inter_predictor, but takes the full-pel part of the
- * mv separately, and the fractional part as a q4.
- */
void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int_mv *mv_q4,
@@ -438,163 +435,143 @@ static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1,
}
}
-static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
- struct scale_factors *s,
- int block_size, int stride,
- int which_mv, int weight,
- const struct subpix_fn_table *subpix,
- int row, int col) {
- uint8_t *d0_predictor = *(d0->base_dst) + d0->dst;
- uint8_t *d1_predictor = *(d1->base_dst) + d1->dst;
- struct scale_factors * scale = &s[which_mv];
- stride = d0->dst_stride;
-
- assert(d1_predictor - d0_predictor == block_size);
- assert(d1->pre == d0->pre + block_size);
-
- scale->set_scaled_offsets(scale, row, col);
-
- if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
- uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;
-
- vp9_build_inter_predictor(*base_pre + d0->pre,
- d0->pre_stride,
- d0_predictor, stride,
- &d0->bmi.as_mv[which_mv],
- scale,
- 2 * block_size, block_size,
- weight, subpix);
- } else {
- uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre;
- uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre;
-
- vp9_build_inter_predictor(*base_pre0 + d0->pre,
- d0->pre_stride,
- d0_predictor, stride,
- &d0->bmi.as_mv[which_mv],
- scale,
- block_size, block_size,
- weight, subpix);
-
- scale->set_scaled_offsets(scale, row, col + block_size);
+#if !CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
- vp9_build_inter_predictor(*base_pre1 + d1->pre,
- d1->pre_stride,
- d1_predictor, stride,
- &d1->bmi.as_mv[which_mv],
- scale,
- block_size, block_size,
- weight, subpix);
- }
+static INLINE int round_mv_comp_q4(int value) {
+ return (value < 0 ? value - 2 : value + 2) / 4;
}
-static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
- /* If the MV points so far into the UMV border that no visible pixels
- * are used for reconstruction, the subpel part of the MV can be
- * discarded and the MV limited to 16 pixels with equivalent results.
- *
- * This limit kicks in at 19 pixels for the top and left edges, for
- * the 16 pixels plus 3 taps right of the central pixel when subpel
- * filtering. The bottom and right edges use 16 pixels plus 2 pixels
- * left of the central pixel when filtering.
- */
- if (mv->col < (xd->mb_to_left_edge - ((16 + VP9_INTERP_EXTEND) << 3)))
- mv->col = xd->mb_to_left_edge - (16 << 3);
- else if (mv->col > xd->mb_to_right_edge + ((15 + VP9_INTERP_EXTEND) << 3))
- mv->col = xd->mb_to_right_edge + (16 << 3);
-
- if (mv->row < (xd->mb_to_top_edge - ((16 + VP9_INTERP_EXTEND) << 3)))
- mv->row = xd->mb_to_top_edge - (16 << 3);
- else if (mv->row > xd->mb_to_bottom_edge + ((15 + VP9_INTERP_EXTEND) << 3))
- mv->row = xd->mb_to_bottom_edge + (16 << 3);
+static int mi_mv_pred_row_q4(MACROBLOCKD *mb, int off, int idx) {
+ const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.row +
+ mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.row +
+ mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.row +
+ mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.row;
+ return round_mv_comp_q4(temp);
}
-/* A version of the above function for chroma block MVs.*/
-static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
- const int extend = VP9_INTERP_EXTEND;
-
- mv->col = (2 * mv->col < (xd->mb_to_left_edge - ((16 + extend) << 3))) ?
- (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
- mv->col = (2 * mv->col > xd->mb_to_right_edge + ((15 + extend) << 3)) ?
- (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
-
- mv->row = (2 * mv->row < (xd->mb_to_top_edge - ((16 + extend) << 3))) ?
- (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
- mv->row = (2 * mv->row > xd->mb_to_bottom_edge + ((15 + extend) << 3)) ?
- (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
+static int mi_mv_pred_col_q4(MACROBLOCKD *mb, int off, int idx) {
+ const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.col +
+ mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.col +
+ mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.col +
+ mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.col;
+ return round_mv_comp_q4(temp);
}
-#if !CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
// TODO(jkoleszar): yet another mv clamping function :-(
MV clamp_mv_to_umv_border_sb(const MV *src_mv,
- int bwl, int bhl,
+ int bwl, int bhl, int ss_x, int ss_y,
int mb_to_left_edge, int mb_to_top_edge,
int mb_to_right_edge, int mb_to_bottom_edge) {
/* If the MV points so far into the UMV border that no visible pixels
* are used for reconstruction, the subpel part of the MV can be
* discarded and the MV limited to 16 pixels with equivalent results.
*/
- const int epel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 3;
- const int epel_right = epel_left - (1 << 3);
- const int epel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 3;
- const int epel_bottom = epel_top - (1 << 3);
+ const int spel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 4;
+ const int spel_right = spel_left - (1 << 4);
+ const int spel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 4;
+ const int spel_bottom = spel_top - (1 << 4);
MV clamped_mv;
- clamped_mv.col = clamp(src_mv->col,
- mb_to_left_edge - epel_left,
- mb_to_right_edge + epel_right);
- clamped_mv.row = clamp(src_mv->row,
- mb_to_top_edge - epel_top,
- mb_to_bottom_edge + epel_bottom);
+
+ assert(ss_x <= 1);
+ assert(ss_y <= 1);
+ clamped_mv.col = clamp(src_mv->col << (1 - ss_x),
+ (mb_to_left_edge << (1 - ss_x)) - spel_left,
+ (mb_to_right_edge << (1 - ss_x)) + spel_right);
+ clamped_mv.row = clamp(src_mv->row << (1 - ss_y),
+ (mb_to_top_edge << (1 - ss_y)) - spel_top,
+ (mb_to_bottom_edge << (1 - ss_y)) + spel_bottom);
return clamped_mv;
}
+// TODO(jkoleszar): In principle, nothing has to depend on this, but it's
+// currently required. Some users look at the mi->bmi, some look at the
+// xd->bmi.
+static void duplicate_splitmv_bmi(MACROBLOCKD *xd) {
+ int i;
+
+ for (i = 0; i < 16; i += 2) {
+ xd->block[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+ xd->block[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
+ }
+}
+
struct build_inter_predictors_args {
MACROBLOCKD *xd;
- uint8_t* dst[MAX_MB_PLANE];
- int dst_stride[MAX_MB_PLANE];
int x;
int y;
+ uint8_t* dst[MAX_MB_PLANE];
+ int dst_stride[MAX_MB_PLANE];
+ uint8_t* pre[2][MAX_MB_PLANE];
+ int pre_stride[2][MAX_MB_PLANE];
};
static void build_inter_predictors(int plane, int block,
BLOCK_SIZE_TYPE bsize,
int pred_w, int pred_h,
void *argv) {
const struct build_inter_predictors_args* const arg = argv;
- const int bwl = pred_w, bw = 4 << bwl;
- const int bhl = pred_h, bh = 4 << bhl;
+ MACROBLOCKD * const xd = arg->xd;
+ const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
+ const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
+ const int bh = 4 << bhl, bw = 4 << bwl;
const int x_idx = block & ((1 << bwl) - 1), y_idx = block >> bwl;
const int x = x_idx * 4, y = y_idx * 4;
- MACROBLOCKD * const xd = arg->xd;
const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
int which_mv;
+ assert(x < bw);
+ assert(y < bh);
+ assert(xd->mode_info_context->mbmi.mode == SPLITMV || 4 << pred_w == bw);
+ assert(xd->mode_info_context->mbmi.mode == SPLITMV || 4 << pred_h == bh);
+
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
- const MV* const mv = (xd->mode_info_context->mbmi.mode == SPLITMV)
- ? &xd->block[block].bmi.as_mv[which_mv].as_mv
- : &xd->mode_info_context->mbmi.mv[which_mv].as_mv;
-
- const uint8_t * const base_pre = which_mv ? xd->second_pre.y_buffer
- : xd->pre.y_buffer;
- const int pre_stride = which_mv ? xd->second_pre.y_stride
- : xd->pre.y_stride;
+ // source
+ const uint8_t * const base_pre = arg->pre[which_mv][plane];
+ const int pre_stride = arg->pre_stride[which_mv][plane];
const uint8_t *const pre = base_pre +
scaled_buffer_offset(x, y, pre_stride, &xd->scale_factor[which_mv]);
struct scale_factors * const scale =
plane == 0 ? &xd->scale_factor[which_mv] : &xd->scale_factor_uv[which_mv];
+ // dest
+ uint8_t *const dst = arg->dst[plane] + arg->dst_stride[plane] * y + x;
+
+ // motion vector
+ const MV *mv;
+ MV split_chroma_mv;
int_mv clamped_mv;
+
+ if (xd->mode_info_context->mbmi.mode == SPLITMV) {
+ if (plane == 0) {
+ mv = &xd->block[block].bmi.as_mv[which_mv].as_mv;
+ } else {
+ const int y_block = (block & 2) * 4 + (block & 1) * 2;
+ split_chroma_mv.row = mi_mv_pred_row_q4(xd, y_block, which_mv);
+ split_chroma_mv.col = mi_mv_pred_col_q4(xd, y_block, which_mv);
+ mv = &split_chroma_mv;
+ }
+ } else {
+ mv = &xd->mode_info_context->mbmi.mv[which_mv].as_mv;
+ }
+
+ /* TODO(jkoleszar): This clamping is done in the incorrect place for the
+ * scaling case. It needs to be done on the scaled MV, not the pre-scaling
+ * MV. Note however that it performs the subsampling aware scaling so
+ * that the result is always q4.
+ */
clamped_mv.as_mv = clamp_mv_to_umv_border_sb(mv, bwl, bhl,
+ xd->plane[plane].subsampling_x,
+ xd->plane[plane].subsampling_y,
xd->mb_to_left_edge,
xd->mb_to_top_edge,
xd->mb_to_right_edge,
xd->mb_to_bottom_edge);
-
scale->set_scaled_offsets(scale, arg->y + y, arg->x + x);
- vp9_build_inter_predictor(pre, pre_stride,
- arg->dst[plane], arg->dst_stride[plane],
- &clamped_mv, &xd->scale_factor[which_mv],
- bw, bh, which_mv, &xd->subpix);
+ vp9_build_inter_predictor_q4(pre, pre_stride,
+ dst, arg->dst_stride[plane],
+ &clamped_mv, &xd->scale_factor[which_mv],
+ 4 << pred_w, 4 << pred_h, which_mv,
+ &xd->subpix);
}
}
void vp9_build_inter_predictors_sby(MACROBLOCKD *xd,
@@ -604,16 +581,85 @@ void vp9_build_inter_predictors_sby(MACROBLOCKD *xd,
int mb_col,
BLOCK_SIZE_TYPE bsize) {
struct build_inter_predictors_args args = {
- xd, {dst_y, NULL, NULL}, {dst_ystride, 0, 0}, mb_col * 16, mb_row * 16
+ xd, mb_col * 16, mb_row * 16,
+ {dst_y, NULL, NULL}, {dst_ystride, 0, 0},
+ {{xd->pre.y_buffer, NULL, NULL}, {xd->second_pre.y_buffer, NULL, NULL}},
+ {{xd->pre.y_stride, 0, 0}, {xd->second_pre.y_stride, 0, 0}},
};
+
+ // TODO(jkoleszar): This is a hack no matter where you put it, but does it
+ // belong here?
+ if (xd->mode_info_context->mbmi.mode == SPLITMV)
+ duplicate_splitmv_bmi(xd);
+
foreach_predicted_block_in_plane(xd, bsize, 0, build_inter_predictors, &args);
}
+void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col,
+ BLOCK_SIZE_TYPE bsize) {
+ struct build_inter_predictors_args args = {
+ xd, mb_col * 16, mb_row * 16,
+ {NULL, dst_u, dst_v}, {0, dst_uvstride, dst_uvstride},
+ {{NULL, xd->pre.u_buffer, xd->pre.v_buffer},
+ {NULL, xd->second_pre.u_buffer, xd->second_pre.v_buffer}},
+ {{0, xd->pre.uv_stride, xd->pre.uv_stride},
+ {0, xd->second_pre.uv_stride, xd->second_pre.uv_stride}},
+ };
+ foreach_predicted_block_uv(xd, bsize, build_inter_predictors, &args);
+}
+void vp9_build_inter_predictors_sb(MACROBLOCKD *xd,
+ int mb_row, int mb_col,
+ BLOCK_SIZE_TYPE bsize) {
+ uint8_t *const y = xd->dst.y_buffer;
+ uint8_t *const u = xd->dst.u_buffer;
+ uint8_t *const v = xd->dst.v_buffer;
+ const int y_stride = xd->dst.y_stride;
+ const int uv_stride = xd->dst.uv_stride;
+
+ vp9_build_inter_predictors_sby(xd, y, y_stride, mb_row, mb_col, bsize);
+ vp9_build_inter_predictors_sbuv(xd, u, v, uv_stride, mb_row, mb_col, bsize);
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+ if (bsize == BLOCK_SIZE_SB32X32)
+ vp9_build_interintra_32x32_predictors_sb(xd, y, u, v,
+ y_stride, uv_stride);
+ else
+ vp9_build_interintra_64x64_predictors_sb(xd, y, u, v,
+ y_stride, uv_stride);
+ }
#endif
+}
+#endif // !CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
#define AVERAGE_WEIGHT (1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT))
#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
+ /* If the MV points so far into the UMV border that no visible pixels
+ * are used for reconstruction, the subpel part of the MV can be
+ * discarded and the MV limited to 16 pixels with equivalent results.
+ *
+ * This limit kicks in at 19 pixels for the top and left edges, for
+ * the 16 pixels plus 3 taps right of the central pixel when subpel
+ * filtering. The bottom and right edges use 16 pixels plus 2 pixels
+ * left of the central pixel when filtering.
+ */
+ if (mv->col < (xd->mb_to_left_edge - ((16 + VP9_INTERP_EXTEND) << 3)))
+ mv->col = xd->mb_to_left_edge - (16 << 3);
+ else if (mv->col > xd->mb_to_right_edge + ((15 + VP9_INTERP_EXTEND) << 3))
+ mv->col = xd->mb_to_right_edge + (16 << 3);
+
+ if (mv->row < (xd->mb_to_top_edge - ((16 + VP9_INTERP_EXTEND) << 3)))
+ mv->row = xd->mb_to_top_edge - (16 << 3);
+ else if (mv->row > xd->mb_to_bottom_edge + ((15 + VP9_INTERP_EXTEND) << 3))
+ mv->row = xd->mb_to_bottom_edge + (16 << 3);
+}
+
// Whether to use implicit weighting for UV
#define USE_IMPLICIT_WEIGHT_UV
@@ -950,9 +996,7 @@ static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd,
which_mv ? weight : 0, &xd->subpix);
}
}
-#endif
-#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd,
uint8_t *dst_u,
uint8_t *dst_v,
@@ -993,68 +1037,6 @@ static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd,
scale, 8, 8, which_mv ? weight : 0, &xd->subpix);
}
}
-
-void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_uvstride,
- int mb_row,
- int mb_col) {
-#ifdef USE_IMPLICIT_WEIGHT_UV
- int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col);
-#else
- int weight = AVERAGE_WEIGHT;
-#endif
- build_inter16x16_predictors_mbuv_w(xd, dst_u, dst_v, dst_uvstride,
- weight, mb_row, mb_col);
-}
-
-#else
-
-void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_uvstride,
- int mb_row,
- int mb_col) {
- const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
- int which_mv;
-
- for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
- const int clamp_mvs =
- which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv
- : xd->mode_info_context->mbmi.need_to_clamp_mvs;
- uint8_t *uptr, *vptr;
- int pre_stride = which_mv ? xd->second_pre.uv_stride
- : xd->pre.uv_stride;
- int_mv mv;
-
- struct scale_factors *scale = &xd->scale_factor_uv[which_mv];
- mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
-
-
- if (clamp_mvs)
- clamp_mv_to_umv_border(&mv.as_mv, xd);
-
- uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
- vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);
-
- scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
-
- vp9_build_inter_predictor_q4(
- uptr, pre_stride, dst_u, dst_uvstride, &mv,
- scale, 8, 8,
- which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
-
- vp9_build_inter_predictor_q4(
- vptr, pre_stride, dst_v, dst_uvstride, &mv,
- scale, 8, 8,
- which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
- }
-}
-#endif
-
-#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
static void build_inter_predictors_sby_w(MACROBLOCKD *x,
uint8_t *dst_y,
int dst_ystride,
@@ -1117,9 +1099,7 @@ void vp9_build_inter_predictors_sby(MACROBLOCKD *x,
build_inter_predictors_sby_w(x, dst_y, dst_ystride, weight,
mb_row, mb_col, bsize);
}
-#endif
-#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
static void build_inter_predictors_sbuv_w(MACROBLOCKD *x,
uint8_t *dst_u,
uint8_t *dst_v,
@@ -1199,71 +1179,6 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd,
weight, mb_row, mb_col, bsize);
}
-#else
-
-void vp9_build_inter_predictors_sbuv(MACROBLOCKD *x,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_uvstride,
- int mb_row,
- int mb_col,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = mb_width_log2(bsize), bw = 1 << bwl;
- const int bhl = mb_height_log2(bsize), bh = 1 << bhl;
- uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
- uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer;
- int edge[4], n;
-
- edge[0] = x->mb_to_top_edge;
- edge[1] = x->mb_to_bottom_edge;
- edge[2] = x->mb_to_left_edge;
- edge[3] = x->mb_to_right_edge;
-
- for (n = 0; n < bw * bh; n++) {
- int scaled_uv_offset;
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
-
- x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3);
- x->mb_to_bottom_edge = edge[1] + (((bh - 1 - y_idx) * 16) << 3);
- x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3);
- x->mb_to_right_edge = edge[3] + (((bw - 1 - x_idx) * 16) << 3);
-
- scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
- y_idx * 8,
- x->pre.uv_stride,
- &x->scale_factor_uv[0]);
- x->pre.u_buffer = u1 + scaled_uv_offset;
- x->pre.v_buffer = v1 + scaled_uv_offset;
-
- if (x->mode_info_context->mbmi.second_ref_frame > 0) {
- scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
- y_idx * 8,
- x->second_pre.uv_stride,
- &x->scale_factor_uv[1]);
- x->second_pre.u_buffer = u2 + scaled_uv_offset;
- x->second_pre.v_buffer = v2 + scaled_uv_offset;
- }
-
- vp9_build_inter16x16_predictors_mbuv(x,
- dst_u + y_idx * 8 * dst_uvstride + x_idx * 8,
- dst_v + y_idx * 8 * dst_uvstride + x_idx * 8,
- dst_uvstride, mb_row + y_idx, mb_col + x_idx);
- }
- x->mb_to_top_edge = edge[0];
- x->mb_to_bottom_edge = edge[1];
- x->mb_to_left_edge = edge[2];
- x->mb_to_right_edge = edge[3];
-
- x->pre.u_buffer = u1;
- x->pre.v_buffer = v1;
-
- if (x->mode_info_context->mbmi.second_ref_frame > 0) {
- x->second_pre.u_buffer = u2;
- x->second_pre.v_buffer = v2;
- }
-}
-#endif
-
void vp9_build_inter_predictors_sb(MACROBLOCKD *mb,
int mb_row, int mb_col,
BLOCK_SIZE_TYPE bsize) {
@@ -1286,79 +1201,10 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *mb,
}
#endif
}
-
-static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
- int mb_row, int mb_col) {
- int i;
- MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
- BLOCKD *blockd = xd->block;
- int which_mv = 0;
- const int use_second_ref = mbmi->second_ref_frame > 0;
-#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && defined(USE_IMPLICIT_WEIGHT_SPLITMV)
- int weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col);
-#else
- int weight = AVERAGE_WEIGHT;
-#endif
-
- if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
- for (i = 0; i < 16; i += 8) {
- BLOCKD *d0 = &blockd[i];
- BLOCKD *d1 = &blockd[i + 2];
- const int y = i & 8;
-
- blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
- blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];
-
- for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
- if (mbmi->need_to_clamp_mvs) {
- clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd);
- clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
- }
-
- build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv,
- which_mv ? weight : 0,
- &xd->subpix, mb_row * 16 + y, mb_col * 16);
- }
- }
- } else {
- for (i = 0; i < 16; i += 2) {
- BLOCKD *d0 = &blockd[i];
- BLOCKD *d1 = &blockd[i + 1];
- const int x = (i & 3) * 4;
- const int y = (i >> 2) * 4;
-
- blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
- blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
-
- for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
- build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv,
- which_mv ? weight : 0,
- &xd->subpix,
- mb_row * 16 + y, mb_col * 16 + x);
- }
- }
- }
-#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
-#if !defined(USE_IMPLICIT_WEIGHT_UV)
- weight = AVERAGE_WEIGHT;
-#endif
-#endif
- for (i = 16; i < 24; i += 2) {
- BLOCKD *d0 = &blockd[i];
- BLOCKD *d1 = &blockd[i + 1];
- const int x = 4 * (i & 1);
- const int y = ((i - 16) >> 1) * 4;
-
- for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
- build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
- which_mv ? weight : 0, &xd->subpix,
- mb_row * 8 + y, mb_col * 8 + x);
- }
- }
-}
+#endif // CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
static INLINE int round_mv_comp(int value) {
- return (value < 0 ? value - 4 : value + 4) / 8;
+ return (value < 0 ? value - 2 : value + 2) / 4;
}
static int mi_mv_pred_row(MACROBLOCKD *mb, int off, int idx) {
@@ -1377,128 +1223,20 @@ static int mi_mv_pred_col(MACROBLOCKD *mb, int off, int idx) {
return round_mv_comp(temp);
}
-static int b_mv_pred_row(MACROBLOCKD *mb, int off, int idx) {
- BLOCKD *const blockd = mb->block;
- const int temp = blockd[off + 0].bmi.as_mv[idx].as_mv.row +
- blockd[off + 1].bmi.as_mv[idx].as_mv.row +
- blockd[off + 4].bmi.as_mv[idx].as_mv.row +
- blockd[off + 5].bmi.as_mv[idx].as_mv.row;
- return round_mv_comp(temp);
-}
-
-static int b_mv_pred_col(MACROBLOCKD *mb, int off, int idx) {
- BLOCKD *const blockd = mb->block;
- const int temp = blockd[off + 0].bmi.as_mv[idx].as_mv.col +
- blockd[off + 1].bmi.as_mv[idx].as_mv.col +
- blockd[off + 4].bmi.as_mv[idx].as_mv.col +
- blockd[off + 5].bmi.as_mv[idx].as_mv.col;
- return round_mv_comp(temp);
-}
-
-
-static void build_4x4uvmvs(MACROBLOCKD *xd) {
- int i, j;
- BLOCKD *blockd = xd->block;
-
- for (i = 0; i < 2; i++) {
- for (j = 0; j < 2; j++) {
- const int yoffset = i * 8 + j * 2;
- const int uoffset = 16 + i * 2 + j;
- const int voffset = 20 + i * 2 + j;
-
- MV *u = &blockd[uoffset].bmi.as_mv[0].as_mv;
- MV *v = &blockd[voffset].bmi.as_mv[0].as_mv;
- u->row = mi_mv_pred_row(xd, yoffset, 0);
- u->col = mi_mv_pred_col(xd, yoffset, 0);
-
- // if (x->mode_info_context->mbmi.need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(u, xd);
-
- // if (x->mode_info_context->mbmi.need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(u, xd);
-
- v->row = u->row;
- v->col = u->col;
-
- if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- u = &blockd[uoffset].bmi.as_mv[1].as_mv;
- v = &blockd[voffset].bmi.as_mv[1].as_mv;
- u->row = mi_mv_pred_row(xd, yoffset, 1);
- u->col = mi_mv_pred_col(xd, yoffset, 1);
-
- // if (mbmi->need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(u, xd);
-
- // if (mbmi->need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(u, xd);
-
- v->row = u->row;
- v->col = u->col;
- }
- }
- }
-}
-
void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
int mb_row,
int mb_col) {
- if (xd->mode_info_context->mbmi.mode != SPLITMV) {
- vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16);
- } else {
- build_4x4uvmvs(xd);
- build_inter4x4_predictors_mb(xd, mb_row, mb_col);
- }
+ vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16);
}
+
/*encoder only*/
void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
int mb_row, int mb_col) {
- int i, j, weight;
- BLOCKD *const blockd = xd->block;
-
- /* build uv mvs */
- for (i = 0; i < 2; i++) {
- for (j = 0; j < 2; j++) {
- const int yoffset = i * 8 + j * 2;
- const int uoffset = 16 + i * 2 + j;
- const int voffset = 20 + i * 2 + j;
-
- MV *u = &blockd[uoffset].bmi.as_mv[0].as_mv;
- MV *v = &blockd[voffset].bmi.as_mv[0].as_mv;
+ uint8_t *const u = xd->dst.u_buffer;
+ uint8_t *const v = xd->dst.v_buffer;
+ const int uv_stride = xd->dst.uv_stride;
- v->row = u->row = b_mv_pred_row(xd, yoffset, 0);
- v->col = u->col = b_mv_pred_col(xd, yoffset, 0);
-
- if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- u = &blockd[uoffset].bmi.as_mv[1].as_mv;
- v = &blockd[voffset].bmi.as_mv[1].as_mv;
-
- v->row = u->row = b_mv_pred_row(xd, yoffset, 1);
- v->col = u->col = b_mv_pred_col(xd, yoffset, 1);
- }
- }
- }
-
-#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && \
- defined(USE_IMPLICIT_WEIGHT_SPLITMV) && \
- defined(USE_IMPLICIT_WEIGHT_UV)
- weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col);
-#else
- weight = AVERAGE_WEIGHT;
-#endif
- for (i = 16; i < 24; i += 2) {
- const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
- const int x = 4 * (i & 1);
- const int y = ((i - 16) >> 1) * 4;
-
- int which_mv;
- BLOCKD *d0 = &blockd[i];
- BLOCKD *d1 = &blockd[i + 1];
-
- for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
- build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
- which_mv ? weight : 0,
- &xd->subpix, mb_row * 8 + y, mb_col * 8 + x);
- }
- }
+ vp9_build_inter_predictors_sbuv(xd, u, v, uv_stride, mb_row, mb_col,
+ BLOCK_SIZE_MB16X16);
}
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 38981e9c1..ee34fc5d2 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -16,28 +16,20 @@
struct subpix_fn_table;
-void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_uvstride,
- int mb_row,
- int mb_col);
-
-void vp9_build_inter_predictors_sby(MACROBLOCKD *x,
+void vp9_build_inter_predictors_sby(MACROBLOCKD *xd,
uint8_t *dst_y,
int dst_ystride,
int mb_row,
int mb_col,
BLOCK_SIZE_TYPE bsize);
-void vp9_build_inter_predictors_sbuv(MACROBLOCKD *x,
+void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_uvstride,
int mb_row,
int mb_col,
BLOCK_SIZE_TYPE bsize);
-
void vp9_build_inter_predictors_sb(MACROBLOCKD *mb,
int mb_row, int mb_col,
BLOCK_SIZE_TYPE bsize);
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 310f8ed24..2b66834a7 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -278,43 +278,20 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71);
+ assert(w <= 64);
assert(h <= 64);
-
- if (x_step_q4 == 16 && y_step_q4 == 16 &&
- filter_x[3] != 128 && filter_y[3] != 128) {
- if (w == 16) {
- vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d16_v8_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
- if (w == 8) {
- vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d8_v8_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
- if (w == 4) {
- vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d4_v8_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
+ if (x_step_q4 == 16 && y_step_q4 == 16) {
+ vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h + 7);
+ vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4, w, h);
+ } else {
+ vp9_convolve8_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4, w, h);
}
- vp9_convolve8_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
}
void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride,
@@ -322,42 +299,20 @@ void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71);
+ assert(w <= 64);
assert(h <= 64);
-
- if (x_step_q4 == 16 && y_step_q4 == 16 &&
- filter_x[3] != 128 && filter_y[3] != 128) {
- if (w == 16) {
- vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d16_v8_avg_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
- if (w == 8) {
- vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d8_v8_avg_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
- if (w == 4) {
- vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d4_v8_avg_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
+ if (x_step_q4 == 16 && y_step_q4 == 16) {
+ vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h + 7);
+ vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ } else {
+ vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4, w, h);
}
- vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
}
#endif