diff options
-rw-r--r-- | test/fdct4x4_test.cc | 20 | ||||
-rw-r--r-- | test/fdct8x8_test.cc | 31 | ||||
-rw-r--r-- | vp9/common/vp9_blockd.h | 74 | ||||
-rw-r--r-- | vp9/common/vp9_reconintra.c | 21 | ||||
-rw-r--r-- | vp9/common/vp9_reconintra.h | 2 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodframe.c | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 160 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_sse2.c | 10 |
13 files changed, 153 insertions, 178 deletions
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index 4a788edc0..3538c7bd9 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -20,23 +20,24 @@ extern "C" { #include "acm_random.h" #include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" using libvpx_test::ACMRandom; namespace { -void fdct4x4(int16_t *in, int16_t *out, uint8_t */*dst*/, +void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/, int stride, int /*tx_type*/) { vp9_short_fdct4x4_c(in, out, stride); } -void idct4x4_add(int16_t */*in*/, int16_t *out, uint8_t *dst, +void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst, int stride, int /*tx_type*/) { vp9_short_idct4x4_add_c(out, dst, stride >> 1); } -void fht4x4(int16_t *in, int16_t *out, uint8_t */*dst*/, +void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/, int stride, int tx_type) { vp9_short_fht4x4_c(in, out, stride >> 1, tx_type); } -void iht4x4_add(int16_t */*in*/, int16_t *out, uint8_t *dst, +void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst, int stride, int tx_type) { vp9_short_iht4x4_add_c(out, dst, stride >> 1, tx_type); } @@ -77,8 +78,8 @@ class FwdTrans4x4Test : public ::testing::TestWithParam<int> { TEST_P(FwdTrans4x4Test, SignBiasCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); - int16_t test_input_block[16]; - int16_t test_output_block[16]; + DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16); + DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16); const int pitch = 8; int count_sign_block[16][2]; const int count_test_block = 1000000; @@ -140,9 +141,10 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) { double total_error = 0; const int count_test_block = 1000000; for (int i = 0; i < count_test_block; ++i) { - int16_t test_input_block[16]; - int16_t test_temp_block[16]; - uint8_t dst[16], src[16]; + DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16); + DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 16); + DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 16); + DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 16); for (int j = 0; j < 16; ++j) { src[j] = rnd.Rand8(); diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index 03301a31b..eeae208f2 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -13,6 +13,7 @@ #include <string.h> #include "third_party/googletest/src/include/gtest/gtest.h" +#include "vpx_ports/mem.h" extern "C" { #include "vp9_rtcd.h" @@ -25,14 +26,16 @@ void vp9_short_idct8x8_add_c(short *input, uint8_t *output, int pitch); using libvpx_test::ACMRandom; namespace { -void fdct8x8(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) { +void fdct8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/, + int stride, int /*tx_type*/) { vp9_short_fdct8x8_c(in, out, stride); } -void idct8x8_add(int16_t *in, int16_t *out, uint8_t *dst, - int stride, int tx_type) { +void idct8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst, + int stride, int /*tx_type*/) { vp9_short_idct8x8_add_c(out, dst, stride >> 1); } -void fht8x8(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) { +void fht8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/, + int stride, int tx_type) { // TODO(jingning): need to refactor this to test both _c and _sse2 functions, // when we have all inverse dct functions done sse2. #if HAVE_SSE2 @@ -41,7 +44,7 @@ void fht8x8(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) { vp9_short_fht8x8_c(in, out, stride >> 1, tx_type); #endif } -void iht8x8_add(int16_t *in, int16_t *out, uint8_t *dst, +void iht8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst, int stride, int tx_type) { vp9_short_iht8x8_add_c(out, dst, stride >> 1, tx_type); } @@ -79,8 +82,8 @@ class FwdTrans8x8Test : public ::testing::TestWithParam<int> { TEST_P(FwdTrans8x8Test, SignBiasCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); - int16_t test_input_block[64]; - int16_t test_output_block[64]; + DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64); + DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64); const int pitch = 16; int count_sign_block[64][2]; const int count_test_block = 100000; @@ -150,9 +153,10 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) { double total_error = 0; const int count_test_block = 100000; for (int i = 0; i < count_test_block; ++i) { - int16_t test_input_block[64]; - int16_t test_temp_block[64]; - uint8_t dst[64], src[64]; + DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64); + DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64); + DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64); + DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64); for (int j = 0; j < 64; ++j) { src[j] = rnd.Rand8(); @@ -200,9 +204,10 @@ TEST_P(FwdTrans8x8Test, ExtremalCheck) { double total_error = 0; const int count_test_block = 100000; for (int i = 0; i < count_test_block; ++i) { - int16_t test_input_block[64]; - int16_t test_temp_block[64]; - uint8_t dst[64], src[64]; + DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64); + DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64); + DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64); + DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64); for (int j = 0; j < 64; ++j) { src[j] = rnd.Rand8() % 2 ? 255 : 0; diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index a09f33ed9..0f197e330 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -338,6 +338,7 @@ typedef struct macroblockd { signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; + /* 0 = ZERO_MV, MV */ signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = ZERO_MV, MV */ @@ -404,34 +405,15 @@ static INLINE void update_partition_context(MACROBLOCKD *xd, int bwl = b_width_log2(sb_type); int bhl = b_height_log2(sb_type); int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl; - int i; + char pcvalue[2] = {~(0xe << boffset), ~(0xf <<boffset)}; + + assert(MAX(bwl, bhl) <= bsl); // update the partition context at the end notes. set partition bits // of block sizes larger than the current one to be one, and partition // bits of smaller block sizes to be zero. - if ((bwl == bsl) && (bhl == bsl)) { - for (i = 0; i < bs; i++) - xd->left_seg_context[i] = ~(0xf << boffset); - for (i = 0; i < bs; i++) - xd->above_seg_context[i] = ~(0xf << boffset); - } else if ((bwl == bsl) && (bhl < bsl)) { - for (i = 0; i < bs; i++) - xd->left_seg_context[i] = ~(0xe << boffset); - for (i = 0; i < bs; i++) - xd->above_seg_context[i] = ~(0xf << boffset); - } else if ((bwl < bsl) && (bhl == bsl)) { - for (i = 0; i < bs; i++) - xd->left_seg_context[i] = ~(0xf << boffset); - for (i = 0; i < bs; i++) - xd->above_seg_context[i] = ~(0xe << boffset); - } else if ((bwl < bsl) && (bhl < bsl)) { - for (i = 0; i < bs; i++) - xd->left_seg_context[i] = ~(0xe << boffset); - for (i = 0; i < bs; i++) - xd->above_seg_context[i] = ~(0xe << boffset); - } else { - assert(0); - } + vpx_memset(xd->above_seg_context, pcvalue[bwl == bsl], bs); + vpx_memset(xd->left_seg_context, pcvalue[bhl == bsl], bs); } static INLINE int partition_plane_context(MACROBLOCKD *xd, @@ -504,53 +486,25 @@ static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize, return subsize; } -// transform mapping -static TX_TYPE txfm_map(MB_PREDICTION_MODE bmode) { - switch (bmode) { - case TM_PRED : - case D135_PRED : - return ADST_ADST; - - case V_PRED : - case D117_PRED : - case D63_PRED: - return ADST_DCT; - - case H_PRED : - case D153_PRED : - case D27_PRED : - return DCT_ADST; +extern const TX_TYPE mode2txfm_map[MB_MODE_COUNT]; - default: - return DCT_DCT; - } -} - -static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) { +static INLINE TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) { MODE_INFO *const mi = xd->mode_info_context; MB_MODE_INFO *const mbmi = &mi->mbmi; if (xd->lossless || mbmi->ref_frame[0] != INTRA_FRAME) return DCT_DCT; - if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { - return txfm_map(mi->bmi[ib].as_mode.first); - } else { - assert(mbmi->mode <= TM_PRED); - return txfm_map(mbmi->mode); - } + return mode2txfm_map[mbmi->sb_type < BLOCK_SIZE_SB8X8 ? + mi->bmi[ib].as_mode.first : mbmi->mode]; } -static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd) { - return xd->mode_info_context->mbmi.mode <= TM_PRED - ? txfm_map(xd->mode_info_context->mbmi.mode) - : DCT_DCT; +static INLINE TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd) { + return mode2txfm_map[xd->mode_info_context->mbmi.mode]; } -static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd) { - return xd->mode_info_context->mbmi.mode <= TM_PRED - ? txfm_map(xd->mode_info_context->mbmi.mode) - : DCT_DCT; +static INLINE TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd) { + return mode2txfm_map[xd->mode_info_context->mbmi.mode]; } void vp9_setup_block_dptrs(MACROBLOCKD *xd, diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 4086bf0e2..2989b9ccc 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -16,6 +16,24 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vpx_mem/vpx_mem.h" +const TX_TYPE mode2txfm_map[MB_MODE_COUNT] = { + DCT_DCT, // DC + ADST_DCT, // V + DCT_ADST, // H + DCT_DCT, // D45 + ADST_ADST, // D135 + ADST_DCT, // D117 + DCT_ADST, // D153 + DCT_ADST, // D27 + ADST_DCT, // D63 + ADST_ADST, // TM + DCT_DCT, // NEARESTMV + DCT_DCT, // NEARMV + DCT_DCT, // ZEROMV + DCT_DCT // NEWMV +}; + + static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int bw, int bh, uint8_t *yabove_row, uint8_t *yleft_col) { @@ -300,6 +318,7 @@ void vp9_predict_intra_block(MACROBLOCKD *xd, int bwl_in, TX_SIZE tx_size, int mode, + uint8_t *reference, int ref_stride, uint8_t *predictor, int pre_stride) { const int bwl = bwl_in - tx_size; const int wmask = (1 << bwl) - 1; @@ -309,7 +328,7 @@ void vp9_predict_intra_block(MACROBLOCKD *xd, const int txfm_block_size = 4 << tx_size; assert(bwl >= 0); - vp9_build_intra_predictors(predictor, pre_stride, + vp9_build_intra_predictors(reference, ref_stride, predictor, pre_stride, mode, txfm_block_size, diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index f5f5f42c4..e369a7192 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -25,6 +25,6 @@ void vp9_predict_intra_block(MACROBLOCKD *xd, int block_idx, int bwl_in, TX_SIZE tx_size, - int mode, + int mode, uint8_t *ref, int ref_stride, uint8_t *predictor, int pre_stride); #endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index a87cfd3c5..ac8404001 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -261,6 +261,7 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, plane_b_size = b_width_log2(bsize) - pd->subsampling_x; vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode, + dst, pd->dst.stride, dst, pd->dst.stride); // Early exit if there are no coefficients diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index e800582dc..4b1ff103a 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1531,8 +1531,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { 0, 0, NULL, NULL ); setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0); - vp9_build_block_offsets(x); - vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); xd->mode_info_context->mbmi.mode = DC_PRED; @@ -2006,9 +2004,6 @@ void vp9_encode_frame(VP9_COMP *cpi) { } -void vp9_build_block_offsets(MACROBLOCK *x) { -} - static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { const MACROBLOCKD *xd = &x->e_mbd; const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode; diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h index d37bdca36..399196927 100644 --- a/vp9/encoder/vp9_encodeframe.h +++ b/vp9/encoder/vp9_encodeframe.h @@ -15,8 +15,6 @@ struct macroblock; struct yv12_buffer_config; -void vp9_build_block_offsets(struct macroblock *x); - void vp9_setup_src_planes(struct macroblock *x, const struct yv12_buffer_config *src, int mb_row, int mb_col); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index ccd84b39c..e13ffbdcd 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -78,7 +78,6 @@ void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) -#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) typedef struct vp9_token_state vp9_token_state; struct vp9_token_state { @@ -643,6 +642,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, plane_b_size = b_width_log2(bsize) - pd->subsampling_x; vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode, + dst, pd->dst.stride, dst, pd->dst.stride); vp9_subtract_block(txfm_b_size, txfm_b_size, src_diff, bw, src, p->src.stride, dst, pd->dst.stride); diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 522f89982..d25d78178 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -521,8 +521,6 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->mode_info_context = cm->mi; - vp9_build_block_offsets(x); - vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); vp9_frame_init_quantizer(cpi); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 1204ce092..22fd87d1b 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -216,6 +216,7 @@ typedef struct { int static_segmentation; int comp_inter_joint_search_thresh; int adpative_rd_thresh; + int skip_encode_sb; int use_lastframe_partitioning; int use_largest_txform; int use_8tap_always; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index dc3536387..833dfff57 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -495,23 +495,26 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, static int64_t block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); + struct macroblockd_plane *p = &x->e_mbd.plane[0]; + const int bw = plane_block_width(bsize, p); + const int bh = plane_block_height(bsize, p); return vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, - 16 << (bwl + bhl)) >> shift; + bw * bh) >> shift; } static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); int64_t sum = 0; int plane; for (plane = 1; plane < MAX_MB_PLANE; plane++) { - const int subsampling = x->e_mbd.plane[plane].subsampling_x + - x->e_mbd.plane[plane].subsampling_y; + struct macroblockd_plane *p = &x->e_mbd.plane[plane]; + const int bw = plane_block_width(bsize, p); + const int bh = plane_block_height(bsize, p); sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff, - 16 << (bwl + bhl - subsampling)); + bw * bh); } + return sum >> shift; } @@ -645,7 +648,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int rate = 0; int64_t distortion; VP9_COMMON *const cm = &cpi->common; - const int src_stride = x->plane[0].src.stride; + struct macroblock_plane *p = &x->plane[0]; + struct macroblockd_plane *pd = &xd->plane[0]; + const int src_stride = p->src.stride; uint8_t *src, *dst; int16_t *src_diff, *coeff; @@ -679,18 +684,20 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, block = ib + idy * 2 + idx; xd->mode_info_context->bmi[block].as_mode.first = mode; src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, - x->plane[0].src.buf, src_stride); + p->src.buf, src_stride); src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block, - x->plane[0].src_diff); + p->src_diff); coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16); dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); + pd->dst.buf, + pd->dst.stride); vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), - TX_4X4, mode, dst, xd->plane[0].dst.stride); + TX_4X4, mode, + dst, pd->dst.stride, + dst, pd->dst.stride); vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, - dst, xd->plane[0].dst.stride); + dst, pd->dst.stride); tx_type = get_tx_type_4x4(xd, block); if (tx_type != DCT_DCT) { @@ -703,15 +710,15 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC, tempa + idx, templ + idy, TX_4X4, 16); - distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, - block, 16), 16) >> 2; + distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, + block, 16), 16) >> 2; if (best_tx_type != DCT_DCT) - vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), - dst, xd->plane[0].dst.stride, best_tx_type); + vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16), + dst, pd->dst.stride, best_tx_type); else - xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), - dst, xd->plane[0].dst.stride); + xd->inv_txm4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16), + dst, pd->dst.stride); } } @@ -731,7 +738,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, for (idx = 0; idx < bw; ++idx) { block = ib + idy * 2 + idx; vpx_memcpy(best_dqcoeff[idy * 2 + idx], - BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + BLOCK_OFFSET(pd->dqcoeff, block, 16), sizeof(best_dqcoeff[0])); } } @@ -743,18 +750,19 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, block = ib + idy * 2 + idx; xd->mode_info_context->bmi[block].as_mode.first = *best_mode; dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); + pd->dst.buf, + pd->dst.stride); vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), TX_4X4, - *best_mode, dst, xd->plane[0].dst.stride); + *best_mode, dst, pd->dst.stride, + dst, pd->dst.stride); // inverse transform if (best_tx_type != DCT_DCT) vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst, - xd->plane[0].dst.stride, best_tx_type); + pd->dst.stride, best_tx_type); else xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst, - xd->plane[0].dst.stride); + pd->dst.stride); } } @@ -1093,25 +1101,22 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, int k; MACROBLOCKD *xd = &x->e_mbd; BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; - int bwl = b_width_log2(bsize), bw = 1 << bwl; - int bhl = b_height_log2(bsize), bh = 1 << bhl; + const int bw = plane_block_width(bsize, &xd->plane[0]); + const int bh = plane_block_height(bsize, &xd->plane[0]); int idx, idy; const int src_stride = x->plane[0].src.stride; - uint8_t* const src = - raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - x->plane[0].src.buf, src_stride); - int16_t* src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, - x->plane[0].src_diff); + uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src.buf, + src_stride); + int16_t* src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src_diff); int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); - uint8_t* const pre = - raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].pre[0].buf, - xd->plane[0].pre[0].stride); - uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); + uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].pre[0].buf, + xd->plane[0].pre[0].stride); + uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); int64_t thisdistortion = 0; int thisrate = 0; @@ -1124,7 +1129,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, xd->plane[0].dst.stride, &xd->mode_info_context->bmi[i].as_mv[0], &xd->scale_factor[0], - 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix, + bw, bh, 0 /* no avg */, &xd->subpix, MV_PRECISION_Q3); // TODO(debargha): Make this work properly with the @@ -1138,17 +1143,17 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, dst, xd->plane[0].dst.stride, &xd->mode_info_context->bmi[i].as_mv[1], - &xd->scale_factor[1], 4 * bw, 4 * bh, 1, + &xd->scale_factor[1], bw, bh, 1, &xd->subpix, MV_PRECISION_Q3); } - vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8, + vp9_subtract_block(bh, bw, src_diff, 8, src, src_stride, dst, xd->plane[0].dst.stride); k = i; - for (idy = 0; idy < bh; ++idy) { - for (idx = 0; idx < bw; ++idx) { + for (idy = 0; idy < bh / 4; ++idy) { + for (idx = 0; idx < bw / 4; ++idx) { k += (idy * 2 + idx); src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k, x->plane[0].src_diff); @@ -2231,13 +2236,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int_mv *frame_mv, int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES]) { - const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize); - VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); - const enum BlockSize uv_block_size = get_plane_block_size(bsize, - &xd->plane[1]); MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; const int is_comp_pred = (mbmi->ref_frame[1] > 0); const int num_refs = is_comp_pred ? 2 : 1; @@ -2368,13 +2368,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int p; for (p = 0; p < MAX_MB_PLANE; p++) { - const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y; - const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x; + struct macroblockd_plane *pd = &xd->plane[p]; + const int bw = plane_block_width(bsize, pd); + const int bh = plane_block_height(bsize, pd); int i; - for (i = 0; i < y; i++) - vpx_memcpy(&tmp_buf[p][64 * i], - xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, x); + for (i = 0; i < bh; i++) + vpx_memcpy(&tmp_buf[p][64 * i], pd->dst.buf + i * pd->dst.stride, + bw); } pred_exists = 1; } @@ -2392,13 +2393,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int p; for (p = 0; p < MAX_MB_PLANE; p++) { - const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y; - const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x; + struct macroblockd_plane *pd = &xd->plane[p]; + const int bw = plane_block_width(bsize, pd); + const int bh = plane_block_height(bsize, pd); int i; - for (i = 0; i < y; i++) - vpx_memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, - &tmp_buf[p][64 * i], x); + for (i = 0; i < bh; i++) + vpx_memcpy(pd->dst.buf + i * pd->dst.stride, &tmp_buf[p][64 * i], bw); } } else { // Handles the special case when a filter that is not in the @@ -2412,36 +2413,37 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->active_map_enabled && x->active_ptr[0] == 0) x->skip = 1; else if (x->encode_breakout) { + const enum BlockSize y_size = get_plane_block_size(bsize, &xd->plane[0]); + const enum BlockSize uv_size = get_plane_block_size(bsize, &xd->plane[1]); + unsigned int var, sse; - int threshold = (xd->plane[0].dequant[1] - * xd->plane[0].dequant[1] >> 4); + int threshold = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1] >> 4); + if (threshold < x->encode_breakout) threshold = x->encode_breakout; - var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf, - x->plane[0].src.stride, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride, - &sse); + var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].dst.buf, xd->plane[0].dst.stride, + &sse); if ((int)sse < threshold) { unsigned int q2dc = xd->plane[0].dequant[0]; - /* If there is no codeable 2nd order dc - or a very small uniform pixel change change */ + // If there is no codeable 2nd order dc + // or a very small uniform pixel change change if ((sse - var < q2dc * q2dc >> 4) || (sse / 2 > var && sse - var < 64)) { // Check u and v to make sure skip is ok int sse2; unsigned int sse2u, sse2v; - var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf, - x->plane[1].src.stride, - xd->plane[1].dst.buf, - xd->plane[1].dst.stride, &sse2u); - var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf, - x->plane[1].src.stride, - xd->plane[2].dst.buf, - xd->plane[1].dst.stride, &sse2v); + var = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, + x->plane[1].src.stride, + xd->plane[1].dst.buf, + xd->plane[1].dst.stride, &sse2u); + var = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, + x->plane[2].src.stride, + xd->plane[2].dst.buf, + xd->plane[2].dst.stride, &sse2v); sse2 = sse2u + sse2v; if (sse2 * 2 < threshold) { @@ -2449,7 +2451,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *distortion = sse + sse2; *rate2 = 500; - /* for best_yrd calculation */ + // for best_yrd calculation *rate_uv = 0; *distortion_uv = sse2; diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index 484afce73..cc7d45243 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -375,7 +375,7 @@ void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int pitch) { } // load 8x8 array -static INLINE void load_buffer_8x8(int16_t *input, __m128i in[8], int stride) { +static INLINE void load_buffer_8x8(int16_t *input, __m128i *in, int stride) { in[0] = _mm_load_si128((__m128i *)(input + 0 * stride)); in[1] = _mm_load_si128((__m128i *)(input + 1 * stride)); in[2] = _mm_load_si128((__m128i *)(input + 2 * stride)); @@ -396,7 +396,7 @@ static INLINE void load_buffer_8x8(int16_t *input, __m128i in[8], int stride) { } // write 8x8 array -static INLINE void write_buffer_8x8(int16_t *output, __m128i res[8]) { +static INLINE void write_buffer_8x8(int16_t *output, __m128i *res) { __m128i sign0 = _mm_srai_epi16(res[0], 15); __m128i sign1 = _mm_srai_epi16(res[1], 15); __m128i sign2 = _mm_srai_epi16(res[2], 15); @@ -435,7 +435,7 @@ static INLINE void write_buffer_8x8(int16_t *output, __m128i res[8]) { } // perform in-place transpose -static INLINE void array_transpose_8x8(__m128i res[8]) { +static INLINE void array_transpose_8x8(__m128i *res) { const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); const __m128i tr0_1 = _mm_unpacklo_epi16(res[2], res[3]); const __m128i tr0_2 = _mm_unpackhi_epi16(res[0], res[1]); @@ -486,7 +486,7 @@ static INLINE void array_transpose_8x8(__m128i res[8]) { // 07 17 27 37 47 57 67 77 } -void fdct8_1d_sse2(__m128i in[8]) { +void fdct8_1d_sse2(__m128i *in) { // constants const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); @@ -626,7 +626,7 @@ void fdct8_1d_sse2(__m128i in[8]) { array_transpose_8x8(in); } -void fadst8_1d_sse2(__m128i in[8]) { +void fadst8_1d_sse2(__m128i *in) { // Constants const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); |