diff options
Diffstat (limited to 'vp9/common')
-rw-r--r-- | vp9/common/vp9_blockd.h | 49 | ||||
-rw-r--r-- | vp9/common/vp9_invtrans.c | 46 | ||||
-rw-r--r-- | vp9/common/vp9_mbpitch.c | 10 | ||||
-rw-r--r-- | vp9/common/vp9_recon.c | 7 | ||||
-rw-r--r-- | vp9/common/vp9_reconinter.c | 598 | ||||
-rw-r--r-- | vp9/common/vp9_reconinter.h | 12 | ||||
-rw-r--r-- | vp9/common/x86/vp9_asm_stubs.c | 91 |
7 files changed, 249 insertions, 564 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index e8c823a59..d30cd4960 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -342,6 +342,7 @@ struct mb_plane { DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]); DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]); DECLARE_ALIGNED(16, uint16_t, eobs[256]); + DECLARE_ALIGNED(16, int16_t, diff[64 * 64]); PLANE_TYPE plane_type; int subsampling_x; int subsampling_y; @@ -355,7 +356,6 @@ struct mb_plane { BLOCK_OFFSET((x)->plane[2].field, ((i) - 20), 16)) typedef struct macroblockd { - DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */ #if CONFIG_CODE_NONZEROCOUNT DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]); #endif @@ -878,31 +878,40 @@ typedef void (*foreach_predicted_block_visitor)(int plane, int block, static INLINE void foreach_predicted_block_in_plane( const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane, foreach_predicted_block_visitor visit, void *arg) { - const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); + int i, x, y; + const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; // block sizes in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 - const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; - const int block_size_b = bw + bh; - // subsampled size of the block - const int ss_sum = xd->plane[plane].subsampling_x + - xd->plane[plane].subsampling_y; - const int ss_block_size = block_size_b - ss_sum; + const int bw = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int bh = b_height_log2(bsize) - xd->plane[plane].subsampling_y; // size of the predictor to use. - // TODO(jkoleszar): support I8X8, I4X4 - const int pred_w = bw - xd->plane[plane].subsampling_x; - const int pred_h = bh - xd->plane[plane].subsampling_y; - const int pred_b = mode == SPLITMV ? 0 : pred_w + pred_h; - const int step = 1 << pred_b; - - int i; - - assert(pred_b <= block_size_b); - assert(pred_b == ss_block_size); - for (i = 0; i < (1 << ss_block_size); i += step) { - visit(plane, i, bsize, pred_w, pred_h, arg); + int pred_w, pred_h; + + if (mode == SPLITMV) { + // 4x4 or 8x8 + const int is_4x4 = + (xd->mode_info_context->mbmi.partitioning == PARTITIONING_4X4); + pred_w = is_4x4 ? 0 : 1 >> xd->plane[plane].subsampling_x; + pred_h = is_4x4 ? 0 : 1 >> xd->plane[plane].subsampling_y; + } else { + pred_w = bw; + pred_h = bh; + } + assert(pred_w <= bw); + assert(pred_h <= bh); + + // visit each subblock in raster order + i = 0; + for (y = 0; y < 1 << bh; y += 1 << pred_h) { + for (x = 0; x < 1 << bw; x += 1 << pred_w) { + visit(plane, i, bsize, pred_w, pred_h, arg); + i += 1 << pred_w; + } + i -= 1 << bw; + i += 1 << (bw + pred_h); } } static INLINE void foreach_predicted_block( diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index 3b11fa9cb..0673fd81a 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -38,10 +38,10 @@ void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { for (n = 0; n < bw * bh; n++) { const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const int offset = x_idx * 32 + y_idx * 32 * stride; vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024), - xd->diff + x_idx * 32 + y_idx * 32 * stride, - stride * 2); + xd->plane[0].diff + offset, stride * 2); } } @@ -55,15 +55,14 @@ void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int x_idx = n & (bw - 1), y_idx = n >> bwl; const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * bstride + x_idx) * 4); + const int offset = x_idx * 16 + y_idx * 16 * stride; if (tx_type == DCT_DCT) { vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), - xd->diff + x_idx * 16 + y_idx * stride * 16, - stride * 2); + xd->plane[0].diff + offset, stride * 2); } else { vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), - xd->diff + x_idx * 16 + y_idx * stride * 16, - stride, tx_type); + xd->plane[0].diff + offset, stride, tx_type); } } } @@ -77,15 +76,14 @@ void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { for (n = 0; n < bw * bh; n++) { const int x_idx = n & (bw - 1), y_idx = n >> bwl; const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2); + const int offset = x_idx * 8 + y_idx * 8 * stride; if (tx_type == DCT_DCT) { vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), - xd->diff + x_idx * 8 + y_idx * stride * 8, - stride * 2); + xd->plane[0].diff + offset, stride * 2); } else { vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), - xd->diff + x_idx * 8 + y_idx * stride * 8, - stride, tx_type); + xd->plane[0].diff + offset, stride, tx_type); } } } @@ -99,16 +97,15 @@ void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { for (n = 0; n < bw * bh; n++) { const int x_idx = n & (bw - 1), y_idx = n >> bwl; const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * bstride + x_idx); + const int offset = x_idx * 4 + y_idx * 4 * stride; if (tx_type == DCT_DCT) { vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n], BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), - xd->diff + x_idx * 4 + y_idx * 4 * stride, - stride * 2); + xd->plane[0].diff + offset, stride * 2); } else { vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), - xd->diff + x_idx * 4 + y_idx * 4 * stride, - stride, tx_type); + xd->plane[0].diff + offset, stride, tx_type); } } } @@ -116,15 +113,12 @@ void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { void vp9_inverse_transform_sbuv_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { assert(bsize == BLOCK_SIZE_SB64X64); - vp9_short_idct32x32(xd->plane[1].dqcoeff, - xd->diff + 4096, 64); - vp9_short_idct32x32(xd->plane[2].dqcoeff, - xd->diff + 4096 + 1024, 64); + vp9_short_idct32x32(xd->plane[1].dqcoeff, xd->plane[1].diff, 64); + vp9_short_idct32x32(xd->plane[2].dqcoeff, xd->plane[2].diff, 64); } void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); - const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2; const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); const int stride = 16 << (bwl - 1); int n; @@ -134,15 +128,14 @@ void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int off = x_idx * 16 + y_idx * stride * 16; vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256), - xd->diff + uoff + off, stride * 2); + xd->plane[1].diff + off, stride * 2); vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256), - xd->diff + voff + off, stride * 2); + xd->plane[2].diff + off, stride * 2); } } void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; - const int uoff = (8 * 8) << (bwl + bhl), voff = (uoff * 5) >> 2; const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); const int stride = 8 << (bwl - 1); int n; @@ -152,15 +145,14 @@ void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int off = x_idx * 8 + y_idx * stride * 8; vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64), - xd->diff + uoff + off, stride * 2); + xd->plane[1].diff + off, stride * 2); vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64), - xd->diff + voff + off, stride * 2); + xd->plane[2].diff + off, stride * 2); } } void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; - const int uoff = (4 * 4) << (bwl + bhl), voff = (uoff * 5) >> 2; const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); const int stride = 4 << (bwl - 1); int n; @@ -171,9 +163,9 @@ void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n], BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16), - xd->diff + uoff + off, stride * 2); + xd->plane[1].diff + off, stride * 2); vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n], BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16), - xd->diff + voff + off, stride * 2); + xd->plane[2].diff + off, stride * 2); } } diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c index 6ed5f27d9..00fe9aa15 100644 --- a/vp9/common/vp9_mbpitch.c +++ b/vp9/common/vp9_mbpitch.c @@ -77,23 +77,23 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) { for (c = 0; c < 4; c++) { const int to = r * 4 + c; const int from = r * 4 * 16 + c * 4; - blockd[to].diff = &mb->diff[from]; + blockd[to].diff = &mb->plane[0].diff[from]; } } for (r = 0; r < 2; r++) { for (c = 0; c < 2; c++) { const int to = 16 + r * 2 + c; - const int from = 256 + r * 4 * 8 + c * 4; - blockd[to].diff = &mb->diff[from]; + const int from = r * 4 * 8 + c * 4; + blockd[to].diff = &mb->plane[1].diff[from]; } } for (r = 0; r < 2; r++) { for (c = 0; c < 2; c++) { const int to = 20 + r * 2 + c; - const int from = 320 + r * 4 * 8 + c * 4; - blockd[to].diff = &mb->diff[from]; + const int from = r * 4 * 8 + c * 4; + blockd[to].diff = &mb->plane[2].diff[from]; } } diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index 121776c69..fae35844d 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -55,7 +55,7 @@ void vp9_recon_sby_s_c(MACROBLOCKD *mb, uint8_t *dst, const int bw = 16 << mb_width_log2(bsize), bh = 16 << mb_height_log2(bsize); int x, y; const int stride = mb->block[0].dst_stride; - const int16_t *diff = mb->diff; + const int16_t *diff = mb->plane[0].diff; for (y = 0; y < bh; y++) { for (x = 0; x < bw; x++) @@ -69,12 +69,11 @@ void vp9_recon_sby_s_c(MACROBLOCKD *mb, uint8_t *dst, void vp9_recon_sbuv_s_c(MACROBLOCKD *mb, uint8_t *u_dst, uint8_t *v_dst, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); - const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2; const int bw = 8 << bwl, bh = 8 << bhl; int x, y; const int stride = mb->block[16].dst_stride; - const int16_t *u_diff = mb->diff + uoff; - const int16_t *v_diff = mb->diff + voff; + const int16_t *u_diff = mb->plane[1].diff; + const int16_t *v_diff = mb->plane[2].diff; for (y = 0; y < bh; y++) { for (x = 0; x < bw; x++) { diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 64929c1bc..549993200 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -358,9 +358,6 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, w, h); } -/* Like vp9_build_inter_predictor, but takes the full-pel part of the - * mv separately, and the fractional part as a q4. - */ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int_mv *mv_q4, @@ -438,163 +435,143 @@ static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1, } } -static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, - struct scale_factors *s, - int block_size, int stride, - int which_mv, int weight, - const struct subpix_fn_table *subpix, - int row, int col) { - uint8_t *d0_predictor = *(d0->base_dst) + d0->dst; - uint8_t *d1_predictor = *(d1->base_dst) + d1->dst; - struct scale_factors * scale = &s[which_mv]; - stride = d0->dst_stride; - - assert(d1_predictor - d0_predictor == block_size); - assert(d1->pre == d0->pre + block_size); - - scale->set_scaled_offsets(scale, row, col); - - if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) { - uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre; - - vp9_build_inter_predictor(*base_pre + d0->pre, - d0->pre_stride, - d0_predictor, stride, - &d0->bmi.as_mv[which_mv], - scale, - 2 * block_size, block_size, - weight, subpix); - } else { - uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre; - uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre; - - vp9_build_inter_predictor(*base_pre0 + d0->pre, - d0->pre_stride, - d0_predictor, stride, - &d0->bmi.as_mv[which_mv], - scale, - block_size, block_size, - weight, subpix); - - scale->set_scaled_offsets(scale, row, col + block_size); +#if !CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT - vp9_build_inter_predictor(*base_pre1 + d1->pre, - d1->pre_stride, - d1_predictor, stride, - &d1->bmi.as_mv[which_mv], - scale, - block_size, block_size, - weight, subpix); - } +static INLINE int round_mv_comp_q4(int value) { + return (value < 0 ? value - 2 : value + 2) / 4; } -static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { - /* If the MV points so far into the UMV border that no visible pixels - * are used for reconstruction, the subpel part of the MV can be - * discarded and the MV limited to 16 pixels with equivalent results. - * - * This limit kicks in at 19 pixels for the top and left edges, for - * the 16 pixels plus 3 taps right of the central pixel when subpel - * filtering. The bottom and right edges use 16 pixels plus 2 pixels - * left of the central pixel when filtering. - */ - if (mv->col < (xd->mb_to_left_edge - ((16 + VP9_INTERP_EXTEND) << 3))) - mv->col = xd->mb_to_left_edge - (16 << 3); - else if (mv->col > xd->mb_to_right_edge + ((15 + VP9_INTERP_EXTEND) << 3)) - mv->col = xd->mb_to_right_edge + (16 << 3); - - if (mv->row < (xd->mb_to_top_edge - ((16 + VP9_INTERP_EXTEND) << 3))) - mv->row = xd->mb_to_top_edge - (16 << 3); - else if (mv->row > xd->mb_to_bottom_edge + ((15 + VP9_INTERP_EXTEND) << 3)) - mv->row = xd->mb_to_bottom_edge + (16 << 3); +static int mi_mv_pred_row_q4(MACROBLOCKD *mb, int off, int idx) { + const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.row; + return round_mv_comp_q4(temp); } -/* A version of the above function for chroma block MVs.*/ -static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { - const int extend = VP9_INTERP_EXTEND; - - mv->col = (2 * mv->col < (xd->mb_to_left_edge - ((16 + extend) << 3))) ? - (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col; - mv->col = (2 * mv->col > xd->mb_to_right_edge + ((15 + extend) << 3)) ? - (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col; - - mv->row = (2 * mv->row < (xd->mb_to_top_edge - ((16 + extend) << 3))) ? - (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row; - mv->row = (2 * mv->row > xd->mb_to_bottom_edge + ((15 + extend) << 3)) ? - (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; +static int mi_mv_pred_col_q4(MACROBLOCKD *mb, int off, int idx) { + const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.col; + return round_mv_comp_q4(temp); } -#if !CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT // TODO(jkoleszar): yet another mv clamping function :-( MV clamp_mv_to_umv_border_sb(const MV *src_mv, - int bwl, int bhl, + int bwl, int bhl, int ss_x, int ss_y, int mb_to_left_edge, int mb_to_top_edge, int mb_to_right_edge, int mb_to_bottom_edge) { /* If the MV points so far into the UMV border that no visible pixels * are used for reconstruction, the subpel part of the MV can be * discarded and the MV limited to 16 pixels with equivalent results. */ - const int epel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 3; - const int epel_right = epel_left - (1 << 3); - const int epel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 3; - const int epel_bottom = epel_top - (1 << 3); + const int spel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 4; + const int spel_right = spel_left - (1 << 4); + const int spel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 4; + const int spel_bottom = spel_top - (1 << 4); MV clamped_mv; - clamped_mv.col = clamp(src_mv->col, - mb_to_left_edge - epel_left, - mb_to_right_edge + epel_right); - clamped_mv.row = clamp(src_mv->row, - mb_to_top_edge - epel_top, - mb_to_bottom_edge + epel_bottom); + + assert(ss_x <= 1); + assert(ss_y <= 1); + clamped_mv.col = clamp(src_mv->col << (1 - ss_x), + (mb_to_left_edge << (1 - ss_x)) - spel_left, + (mb_to_right_edge << (1 - ss_x)) + spel_right); + clamped_mv.row = clamp(src_mv->row << (1 - ss_y), + (mb_to_top_edge << (1 - ss_y)) - spel_top, + (mb_to_bottom_edge << (1 - ss_y)) + spel_bottom); return clamped_mv; } +// TODO(jkoleszar): In principle, nothing has to depend on this, but it's +// currently required. Some users look at the mi->bmi, some look at the +// xd->bmi. +static void duplicate_splitmv_bmi(MACROBLOCKD *xd) { + int i; + + for (i = 0; i < 16; i += 2) { + xd->block[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; + xd->block[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; + } +} + struct build_inter_predictors_args { MACROBLOCKD *xd; - uint8_t* dst[MAX_MB_PLANE]; - int dst_stride[MAX_MB_PLANE]; int x; int y; + uint8_t* dst[MAX_MB_PLANE]; + int dst_stride[MAX_MB_PLANE]; + uint8_t* pre[2][MAX_MB_PLANE]; + int pre_stride[2][MAX_MB_PLANE]; }; static void build_inter_predictors(int plane, int block, BLOCK_SIZE_TYPE bsize, int pred_w, int pred_h, void *argv) { const struct build_inter_predictors_args* const arg = argv; - const int bwl = pred_w, bw = 4 << bwl; - const int bhl = pred_h, bh = 4 << bhl; + MACROBLOCKD * const xd = arg->xd; + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; + const int bh = 4 << bhl, bw = 4 << bwl; const int x_idx = block & ((1 << bwl) - 1), y_idx = block >> bwl; const int x = x_idx * 4, y = y_idx * 4; - MACROBLOCKD * const xd = arg->xd; const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; int which_mv; + assert(x < bw); + assert(y < bh); + assert(xd->mode_info_context->mbmi.mode == SPLITMV || 4 << pred_w == bw); + assert(xd->mode_info_context->mbmi.mode == SPLITMV || 4 << pred_h == bh); + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - const MV* const mv = (xd->mode_info_context->mbmi.mode == SPLITMV) - ? &xd->block[block].bmi.as_mv[which_mv].as_mv - : &xd->mode_info_context->mbmi.mv[which_mv].as_mv; - - const uint8_t * const base_pre = which_mv ? xd->second_pre.y_buffer - : xd->pre.y_buffer; - const int pre_stride = which_mv ? xd->second_pre.y_stride - : xd->pre.y_stride; + // source + const uint8_t * const base_pre = arg->pre[which_mv][plane]; + const int pre_stride = arg->pre_stride[which_mv][plane]; const uint8_t *const pre = base_pre + scaled_buffer_offset(x, y, pre_stride, &xd->scale_factor[which_mv]); struct scale_factors * const scale = plane == 0 ? &xd->scale_factor[which_mv] : &xd->scale_factor_uv[which_mv]; + // dest + uint8_t *const dst = arg->dst[plane] + arg->dst_stride[plane] * y + x; + + // motion vector + const MV *mv; + MV split_chroma_mv; int_mv clamped_mv; + + if (xd->mode_info_context->mbmi.mode == SPLITMV) { + if (plane == 0) { + mv = &xd->block[block].bmi.as_mv[which_mv].as_mv; + } else { + const int y_block = (block & 2) * 4 + (block & 1) * 2; + split_chroma_mv.row = mi_mv_pred_row_q4(xd, y_block, which_mv); + split_chroma_mv.col = mi_mv_pred_col_q4(xd, y_block, which_mv); + mv = &split_chroma_mv; + } + } else { + mv = &xd->mode_info_context->mbmi.mv[which_mv].as_mv; + } + + /* TODO(jkoleszar): This clamping is done in the incorrect place for the + * scaling case. It needs to be done on the scaled MV, not the pre-scaling + * MV. Note however that it performs the subsampling aware scaling so + * that the result is always q4. + */ clamped_mv.as_mv = clamp_mv_to_umv_border_sb(mv, bwl, bhl, + xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y, xd->mb_to_left_edge, xd->mb_to_top_edge, xd->mb_to_right_edge, xd->mb_to_bottom_edge); - scale->set_scaled_offsets(scale, arg->y + y, arg->x + x); - vp9_build_inter_predictor(pre, pre_stride, - arg->dst[plane], arg->dst_stride[plane], - &clamped_mv, &xd->scale_factor[which_mv], - bw, bh, which_mv, &xd->subpix); + vp9_build_inter_predictor_q4(pre, pre_stride, + dst, arg->dst_stride[plane], + &clamped_mv, &xd->scale_factor[which_mv], + 4 << pred_w, 4 << pred_h, which_mv, + &xd->subpix); } } void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, @@ -604,16 +581,85 @@ void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mb_col, BLOCK_SIZE_TYPE bsize) { struct build_inter_predictors_args args = { - xd, {dst_y, NULL, NULL}, {dst_ystride, 0, 0}, mb_col * 16, mb_row * 16 + xd, mb_col * 16, mb_row * 16, + {dst_y, NULL, NULL}, {dst_ystride, 0, 0}, + {{xd->pre.y_buffer, NULL, NULL}, {xd->second_pre.y_buffer, NULL, NULL}}, + {{xd->pre.y_stride, 0, 0}, {xd->second_pre.y_stride, 0, 0}}, }; + + // TODO(jkoleszar): This is a hack no matter where you put it, but does it + // belong here? + if (xd->mode_info_context->mbmi.mode == SPLITMV) + duplicate_splitmv_bmi(xd); + foreach_predicted_block_in_plane(xd, bsize, 0, build_inter_predictors, &args); } +void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_uvstride, + int mb_row, + int mb_col, + BLOCK_SIZE_TYPE bsize) { + struct build_inter_predictors_args args = { + xd, mb_col * 16, mb_row * 16, + {NULL, dst_u, dst_v}, {0, dst_uvstride, dst_uvstride}, + {{NULL, xd->pre.u_buffer, xd->pre.v_buffer}, + {NULL, xd->second_pre.u_buffer, xd->second_pre.v_buffer}}, + {{0, xd->pre.uv_stride, xd->pre.uv_stride}, + {0, xd->second_pre.uv_stride, xd->second_pre.uv_stride}}, + }; + foreach_predicted_block_uv(xd, bsize, build_inter_predictors, &args); +} +void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, + int mb_row, int mb_col, + BLOCK_SIZE_TYPE bsize) { + uint8_t *const y = xd->dst.y_buffer; + uint8_t *const u = xd->dst.u_buffer; + uint8_t *const v = xd->dst.v_buffer; + const int y_stride = xd->dst.y_stride; + const int uv_stride = xd->dst.uv_stride; + + vp9_build_inter_predictors_sby(xd, y, y_stride, mb_row, mb_col, bsize); + vp9_build_inter_predictors_sbuv(xd, u, v, uv_stride, mb_row, mb_col, bsize); +#if CONFIG_COMP_INTERINTRA_PRED + if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + if (bsize == BLOCK_SIZE_SB32X32) + vp9_build_interintra_32x32_predictors_sb(xd, y, u, v, + y_stride, uv_stride); + else + vp9_build_interintra_64x64_predictors_sb(xd, y, u, v, + y_stride, uv_stride); + } #endif +} +#endif // !CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT #define AVERAGE_WEIGHT (1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT)) #if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { + /* If the MV points so far into the UMV border that no visible pixels + * are used for reconstruction, the subpel part of the MV can be + * discarded and the MV limited to 16 pixels with equivalent results. + * + * This limit kicks in at 19 pixels for the top and left edges, for + * the 16 pixels plus 3 taps right of the central pixel when subpel + * filtering. The bottom and right edges use 16 pixels plus 2 pixels + * left of the central pixel when filtering. + */ + if (mv->col < (xd->mb_to_left_edge - ((16 + VP9_INTERP_EXTEND) << 3))) + mv->col = xd->mb_to_left_edge - (16 << 3); + else if (mv->col > xd->mb_to_right_edge + ((15 + VP9_INTERP_EXTEND) << 3)) + mv->col = xd->mb_to_right_edge + (16 << 3); + + if (mv->row < (xd->mb_to_top_edge - ((16 + VP9_INTERP_EXTEND) << 3))) + mv->row = xd->mb_to_top_edge - (16 << 3); + else if (mv->row > xd->mb_to_bottom_edge + ((15 + VP9_INTERP_EXTEND) << 3)) + mv->row = xd->mb_to_bottom_edge + (16 << 3); +} + // Whether to use implicit weighting for UV #define USE_IMPLICIT_WEIGHT_UV @@ -950,9 +996,7 @@ static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd, which_mv ? weight : 0, &xd->subpix); } } -#endif -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd, uint8_t *dst_u, uint8_t *dst_v, @@ -993,68 +1037,6 @@ static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd, scale, 8, 8, which_mv ? weight : 0, &xd->subpix); } } - -void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col) { -#ifdef USE_IMPLICIT_WEIGHT_UV - int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col); -#else - int weight = AVERAGE_WEIGHT; -#endif - build_inter16x16_predictors_mbuv_w(xd, dst_u, dst_v, dst_uvstride, - weight, mb_row, mb_col); -} - -#else - -void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; - int which_mv; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - const int clamp_mvs = - which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv - : xd->mode_info_context->mbmi.need_to_clamp_mvs; - uint8_t *uptr, *vptr; - int pre_stride = which_mv ? xd->second_pre.uv_stride - : xd->pre.uv_stride; - int_mv mv; - - struct scale_factors *scale = &xd->scale_factor_uv[which_mv]; - mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; - - - if (clamp_mvs) - clamp_mv_to_umv_border(&mv.as_mv, xd); - - uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer); - vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer); - - scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16); - - vp9_build_inter_predictor_q4( - uptr, pre_stride, dst_u, dst_uvstride, &mv, - scale, 8, 8, - which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix); - - vp9_build_inter_predictor_q4( - vptr, pre_stride, dst_v, dst_uvstride, &mv, - scale, 8, 8, - which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix); - } -} -#endif - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT static void build_inter_predictors_sby_w(MACROBLOCKD *x, uint8_t *dst_y, int dst_ystride, @@ -1117,9 +1099,7 @@ void vp9_build_inter_predictors_sby(MACROBLOCKD *x, build_inter_predictors_sby_w(x, dst_y, dst_ystride, weight, mb_row, mb_col, bsize); } -#endif -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT static void build_inter_predictors_sbuv_w(MACROBLOCKD *x, uint8_t *dst_u, uint8_t *dst_v, @@ -1199,71 +1179,6 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, weight, mb_row, mb_col, bsize); } -#else - -void vp9_build_inter_predictors_sbuv(MACROBLOCKD *x, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col, - BLOCK_SIZE_TYPE bsize) { - const int bwl = mb_width_log2(bsize), bw = 1 << bwl; - const int bhl = mb_height_log2(bsize), bh = 1 << bhl; - uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; - uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; - int edge[4], n; - - edge[0] = x->mb_to_top_edge; - edge[1] = x->mb_to_bottom_edge; - edge[2] = x->mb_to_left_edge; - edge[3] = x->mb_to_right_edge; - - for (n = 0; n < bw * bh; n++) { - int scaled_uv_offset; - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); - x->mb_to_bottom_edge = edge[1] + (((bh - 1 - y_idx) * 16) << 3); - x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); - x->mb_to_right_edge = edge[3] + (((bw - 1 - x_idx) * 16) << 3); - - scaled_uv_offset = scaled_buffer_offset(x_idx * 8, - y_idx * 8, - x->pre.uv_stride, - &x->scale_factor_uv[0]); - x->pre.u_buffer = u1 + scaled_uv_offset; - x->pre.v_buffer = v1 + scaled_uv_offset; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - scaled_uv_offset = scaled_buffer_offset(x_idx * 8, - y_idx * 8, - x->second_pre.uv_stride, - &x->scale_factor_uv[1]); - x->second_pre.u_buffer = u2 + scaled_uv_offset; - x->second_pre.v_buffer = v2 + scaled_uv_offset; - } - - vp9_build_inter16x16_predictors_mbuv(x, - dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_uvstride, mb_row + y_idx, mb_col + x_idx); - } - x->mb_to_top_edge = edge[0]; - x->mb_to_bottom_edge = edge[1]; - x->mb_to_left_edge = edge[2]; - x->mb_to_right_edge = edge[3]; - - x->pre.u_buffer = u1; - x->pre.v_buffer = v1; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.u_buffer = u2; - x->second_pre.v_buffer = v2; - } -} -#endif - void vp9_build_inter_predictors_sb(MACROBLOCKD *mb, int mb_row, int mb_col, BLOCK_SIZE_TYPE bsize) { @@ -1286,79 +1201,10 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *mb, } #endif } - -static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, - int mb_row, int mb_col) { - int i; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - BLOCKD *blockd = xd->block; - int which_mv = 0; - const int use_second_ref = mbmi->second_ref_frame > 0; -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && defined(USE_IMPLICIT_WEIGHT_SPLITMV) - int weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col); -#else - int weight = AVERAGE_WEIGHT; -#endif - - if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) { - for (i = 0; i < 16; i += 8) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 2]; - const int y = i & 8; - - blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; - blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2]; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - if (mbmi->need_to_clamp_mvs) { - clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd); - clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd); - } - - build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv, - which_mv ? weight : 0, - &xd->subpix, mb_row * 16 + y, mb_col * 16); - } - } - } else { - for (i = 0; i < 16; i += 2) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 1]; - const int x = (i & 3) * 4; - const int y = (i >> 2) * 4; - - blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; - blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv, - which_mv ? weight : 0, - &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - } - } - } -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -#if !defined(USE_IMPLICIT_WEIGHT_UV) - weight = AVERAGE_WEIGHT; -#endif -#endif - for (i = 16; i < 24; i += 2) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 1]; - const int x = 4 * (i & 1); - const int y = ((i - 16) >> 1) * 4; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, - which_mv ? weight : 0, &xd->subpix, - mb_row * 8 + y, mb_col * 8 + x); - } - } -} +#endif // CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT static INLINE int round_mv_comp(int value) { - return (value < 0 ? value - 4 : value + 4) / 8; + return (value < 0 ? value - 2 : value + 2) / 4; } static int mi_mv_pred_row(MACROBLOCKD *mb, int off, int idx) { @@ -1377,128 +1223,20 @@ static int mi_mv_pred_col(MACROBLOCKD *mb, int off, int idx) { return round_mv_comp(temp); } -static int b_mv_pred_row(MACROBLOCKD *mb, int off, int idx) { - BLOCKD *const blockd = mb->block; - const int temp = blockd[off + 0].bmi.as_mv[idx].as_mv.row + - blockd[off + 1].bmi.as_mv[idx].as_mv.row + - blockd[off + 4].bmi.as_mv[idx].as_mv.row + - blockd[off + 5].bmi.as_mv[idx].as_mv.row; - return round_mv_comp(temp); -} - -static int b_mv_pred_col(MACROBLOCKD *mb, int off, int idx) { - BLOCKD *const blockd = mb->block; - const int temp = blockd[off + 0].bmi.as_mv[idx].as_mv.col + - blockd[off + 1].bmi.as_mv[idx].as_mv.col + - blockd[off + 4].bmi.as_mv[idx].as_mv.col + - blockd[off + 5].bmi.as_mv[idx].as_mv.col; - return round_mv_comp(temp); -} - - -static void build_4x4uvmvs(MACROBLOCKD *xd) { - int i, j; - BLOCKD *blockd = xd->block; - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - const int yoffset = i * 8 + j * 2; - const int uoffset = 16 + i * 2 + j; - const int voffset = 20 + i * 2 + j; - - MV *u = &blockd[uoffset].bmi.as_mv[0].as_mv; - MV *v = &blockd[voffset].bmi.as_mv[0].as_mv; - u->row = mi_mv_pred_row(xd, yoffset, 0); - u->col = mi_mv_pred_col(xd, yoffset, 0); - - // if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(u, xd); - - // if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(u, xd); - - v->row = u->row; - v->col = u->col; - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - u = &blockd[uoffset].bmi.as_mv[1].as_mv; - v = &blockd[voffset].bmi.as_mv[1].as_mv; - u->row = mi_mv_pred_row(xd, yoffset, 1); - u->col = mi_mv_pred_col(xd, yoffset, 1); - - // if (mbmi->need_to_clamp_mvs) - clamp_uvmv_to_umv_border(u, xd); - - // if (mbmi->need_to_clamp_mvs) - clamp_uvmv_to_umv_border(u, xd); - - v->row = u->row; - v->col = u->col; - } - } - } -} - void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, int mb_row, int mb_col) { - if (xd->mode_info_context->mbmi.mode != SPLITMV) { - vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16); - } else { - build_4x4uvmvs(xd); - build_inter4x4_predictors_mb(xd, mb_row, mb_col); - } + vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16); } + /*encoder only*/ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, int mb_row, int mb_col) { - int i, j, weight; - BLOCKD *const blockd = xd->block; - - /* build uv mvs */ - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - const int yoffset = i * 8 + j * 2; - const int uoffset = 16 + i * 2 + j; - const int voffset = 20 + i * 2 + j; - - MV *u = &blockd[uoffset].bmi.as_mv[0].as_mv; - MV *v = &blockd[voffset].bmi.as_mv[0].as_mv; + uint8_t *const u = xd->dst.u_buffer; + uint8_t *const v = xd->dst.v_buffer; + const int uv_stride = xd->dst.uv_stride; - v->row = u->row = b_mv_pred_row(xd, yoffset, 0); - v->col = u->col = b_mv_pred_col(xd, yoffset, 0); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - u = &blockd[uoffset].bmi.as_mv[1].as_mv; - v = &blockd[voffset].bmi.as_mv[1].as_mv; - - v->row = u->row = b_mv_pred_row(xd, yoffset, 1); - v->col = u->col = b_mv_pred_col(xd, yoffset, 1); - } - } - } - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && \ - defined(USE_IMPLICIT_WEIGHT_SPLITMV) && \ - defined(USE_IMPLICIT_WEIGHT_UV) - weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col); -#else - weight = AVERAGE_WEIGHT; -#endif - for (i = 16; i < 24; i += 2) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; - const int x = 4 * (i & 1); - const int y = ((i - 16) >> 1) * 4; - - int which_mv; - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 1]; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, - which_mv ? weight : 0, - &xd->subpix, mb_row * 8 + y, mb_col * 8 + x); - } - } + vp9_build_inter_predictors_sbuv(xd, u, v, uv_stride, mb_row, mb_col, + BLOCK_SIZE_MB16X16); } diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 38981e9c1..ee34fc5d2 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -16,28 +16,20 @@ struct subpix_fn_table; -void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col); - -void vp9_build_inter_predictors_sby(MACROBLOCKD *x, +void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, uint8_t *dst_y, int dst_ystride, int mb_row, int mb_col, BLOCK_SIZE_TYPE bsize); -void vp9_build_inter_predictors_sbuv(MACROBLOCKD *x, +void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, uint8_t *dst_u, uint8_t *dst_v, int dst_uvstride, int mb_row, int mb_col, BLOCK_SIZE_TYPE bsize); - void vp9_build_inter_predictors_sb(MACROBLOCKD *mb, int mb_row, int mb_col, BLOCK_SIZE_TYPE bsize); diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index 310f8ed24..2b66834a7 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -278,43 +278,20 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71); + DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71); + assert(w <= 64); assert(h <= 64); - - if (x_step_q4 == 16 && y_step_q4 == 16 && - filter_x[3] != 128 && filter_y[3] != 128) { - if (w == 16) { - vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d16_v8_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } - if (w == 8) { - vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d8_v8_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } - if (w == 4) { - vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d4_v8_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } + if (x_step_q4 == 16 && y_step_q4 == 16) { + vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h + 7); + vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, w, h); + } else { + vp9_convolve8_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, w, h); } - vp9_convolve8_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); } void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride, @@ -322,42 +299,20 @@ void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71); + DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71); + assert(w <= 64); assert(h <= 64); - - if (x_step_q4 == 16 && y_step_q4 == 16 && - filter_x[3] != 128 && filter_y[3] != 128) { - if (w == 16) { - vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d16_v8_avg_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } - if (w == 8) { - vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d8_v8_avg_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } - if (w == 4) { - vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d4_v8_avg_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } + if (x_step_q4 == 16 && y_step_q4 == 16) { + vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h + 7); + vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h); + } else { + vp9_convolve8_avg_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, w, h); } - vp9_convolve8_avg_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); } #endif |