diff options
-rw-r--r-- | build/make/Makefile | 12 | ||||
-rw-r--r-- | test/tile_independence_test.cc | 8 | ||||
-rw-r--r-- | vp8/vp8cx.mk | 10 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodframe.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_bitstream.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 193 | ||||
-rw-r--r-- | vp9/vp9_common.mk | 8 | ||||
-rw-r--r-- | vp9/vp9cx.mk | 5 | ||||
-rw-r--r-- | vp9/vp9dx.mk | 4 |
9 files changed, 149 insertions, 106 deletions
diff --git a/build/make/Makefile b/build/make/Makefile index 4ac5bcf1f..de71c6133 100644 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -103,6 +103,18 @@ test:: .PHONY: testdata testdata:: +# Add compiler flags for intrinsic files +$(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx +$(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx +$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 +$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 +$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 +$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 +$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 +$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 +$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 +$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 + $(BUILD_PFX)%.c.d: %.c $(if $(quiet),@echo " [DEP] $@") $(qexec)mkdir -p $(dir $@) diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc index 711d0bd45..9633ed756 100644 --- a/test/tile_independence_test.cc +++ b/test/tile_independence_test.cc @@ -56,7 +56,13 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest, void UpdateMD5(::libvpx_test::Decoder *dec, const vpx_codec_cx_pkt_t *pkt, ::libvpx_test::MD5 *md5) { - dec->DecodeFrame((uint8_t *) pkt->data.frame.buf, pkt->data.frame.sz); + const vpx_codec_err_t res = + dec->DecodeFrame(reinterpret_cast<uint8_t*>(pkt->data.frame.buf), + pkt->data.frame.sz); + if (res != VPX_CODEC_OK) { + abort_ = true; + ASSERT_EQ(VPX_CODEC_OK, res); + } const vpx_image_t *img = dec->GetDxData().Next(); md5->Add(img); } diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 7d1904aaf..cd091f39a 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -91,18 +91,8 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c -# TODO(johann) make this generic -ifeq ($(HAVE_SSE2),yes) -vp8/encoder/x86/quantize_sse2.c.o: CFLAGS += -msse2 -vp8/encoder/x86/quantize_sse2.c.d: CFLAGS += -msse2 -endif - ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c -ifeq ($(HAVE_SSE2),yes) -vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2 -vp8/encoder/x86/denoising_sse2.c.d: CFLAGS += -msse2 -endif endif VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 2f713d3ad..2457f79e1 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -1246,8 +1246,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { setup_loopfilter(pc, xd, &header_bc); - vp9_read_literal(&header_bc, 2); // unused - setup_quantization(pbi, &header_bc); // Determine if the golden frame or ARF buffer should be updated and how. @@ -1343,11 +1341,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { vp9_setup_block_dptrs(xd); // clear out the coeff buffer - vpx_memset(xd->plane[0].qcoeff, 0, sizeof(xd->plane[0].qcoeff)); - vpx_memset(xd->plane[1].qcoeff, 0, sizeof(xd->plane[1].qcoeff)); - vpx_memset(xd->plane[2].qcoeff, 0, sizeof(xd->plane[2].qcoeff)); - - vp9_read_bit(&header_bc); // unused + for (i = 0; i < MAX_MB_PLANE; ++i) + vp9_zero(xd->plane[i].qcoeff); vp9_decode_mode_mvs_init(pbi, &header_bc); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 7152ac955..0d5de648e 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1930,9 +1930,6 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) { } } - // TODO(jkoleszar): remove these unused bits - vp9_write_literal(&header_bc, 0, 2); - // Frame Q baseline quantizer index vp9_write_literal(&header_bc, pc->base_qindex, QINDEX_BITS); @@ -2178,9 +2175,6 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) { active_section = 2; #endif - // TODO(jkoleszar): remove this unused bit - vp9_write_bit(&header_bc, 1); - vp9_update_skip_probs(cpi); for (i = 0; i < MBSKIP_CONTEXTS; ++i) { vp9_write_prob(&header_bc, pc->mbskip_pred_probs[i]); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index f2cee7fff..dcbdef3ba 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3089,6 +3089,93 @@ static enum BlockSize y_bsizet_to_block_size(BLOCK_SIZE_TYPE bs) { } } +static enum BlockSize get_block_size(int bw, int bh) { +#if CONFIG_SB8X8 + if (bw == 4 && bh == 4) + return BLOCK_4X4; + + if (bw == 4 && bh == 8) + return BLOCK_4X8; + + if (bw == 8 && bh == 4) + return BLOCK_8X4; + + if (bw == 8 && bh == 8) + return BLOCK_8X8; + + if (bw == 8 && bh == 16) + return BLOCK_8X16; + + if (bw == 16 && bh == 8) + return BLOCK_16X8; +#else + if (bw == 16 && bh == 8) + return BLOCK_16X8; + + if (bw == 8 && bh == 16) + return BLOCK_8X16; + + if (bw == 8 && bh == 8) + return BLOCK_8X8; + + if (bw == 4 && bh == 4) + return BLOCK_4X4; +#endif + if (bw == 16 && bh == 16) + return BLOCK_16X16; + + if (bw == 32 && bh == 32) + return BLOCK_32X32; + + if (bw == 32 && bh == 16) + return BLOCK_32X16; + + if (bw == 16 && bh == 32) + return BLOCK_16X32; + + if (bw == 64 && bh == 32) + return BLOCK_64X32; + + if (bw == 32 && bh == 64) + return BLOCK_32X64; + + if (bw == 64 && bh == 64) + return BLOCK_64X64; + + assert(0); + return -1; +} + +static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, + MACROBLOCK *x, MACROBLOCKD *xd, + int *out_rate_sum, int *out_dist_sum) { + // Note our transform coeffs are 8 times an orthogonal transform. + // Hence quantizer step is also 8 times. To get effective quantizer + // we need to divide by 8 before sending to modeling function. + unsigned int sse, var; + int i, rate_sum = 0, dist_sum = 0; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblock_plane *const p = &x->plane[i]; + struct macroblockd_plane *const pd = &xd->plane[i]; + + const int bwl = b_width_log2(bsize) - pd->subsampling_x; + const int bhl = b_height_log2(bsize) - pd->subsampling_y; + const enum BlockSize bs = get_block_size(4 << bwl, 4 << bhl); + int rate, dist; + var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, &sse); + model_rd_from_var_lapndz(var, 16 << (bwl + bhl), + pd->dequant[1] >> 3, &rate, &dist); + + rate_sum += rate; + dist_sum += dist; + } + + *out_rate_sum = rate_sum; + *out_dist_sum = dist_sum; +} + static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int mdcounts[4], int64_t txfm_cache[], @@ -3283,76 +3370,40 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used if (1) { - int switchable_filter_index, newbest; - int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0; - int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0; - for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; - ++switchable_filter_index) { + int i, newbest; + int tmp_rate_sum = 0, tmp_dist_sum = 0; + for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { int rs = 0; - mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index]; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i]; + const int is_intpel_interp = intpel_mv && + vp9_is_interpolating_filter[filter]; + mbmi->interp_filter = filter; + vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - if (cpi->common.mcomp_filter_type == SWITCHABLE) { + if (cm->mcomp_filter_type == SWITCHABLE) { const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); const int m = vp9_switchable_interp_map[mbmi->interp_filter]; rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; } - if (interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i, - tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i); + + if (interpolating_intpel_seen && is_intpel_interp) { + rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum); } else { - unsigned int sse, var; - int tmp_rate_y, tmp_rate_u, tmp_rate_v; - int tmp_dist_y, tmp_dist_u, tmp_dist_v; + int rate_sum = 0, dist_sum = 0; vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); - var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf, - x->plane[0].src.stride, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride, - &sse); - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - model_rd_from_var_lapndz(var, MI_SIZE * bw * MI_SIZE * bh, - xd->plane[0].dequant[1] >> 3, - &tmp_rate_y, &tmp_dist_y); - var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf, - x->plane[1].src.stride, - xd->plane[1].dst.buf, - xd->plane[1].dst.stride, - &sse); - model_rd_from_var_lapndz(var, MI_UV_SIZE * bw * MI_UV_SIZE * bh, - xd->plane[1].dequant[1] >> 3, - &tmp_rate_u, &tmp_dist_u); - var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf, - x->plane[1].src.stride, - xd->plane[2].dst.buf, - xd->plane[1].dst.stride, - &sse); - model_rd_from_var_lapndz(var, MI_UV_SIZE * bw * MI_UV_SIZE * bh, - xd->plane[2].dequant[1] >> 3, - &tmp_rate_v, &tmp_dist_v); - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y + tmp_rate_u + tmp_rate_v, - tmp_dist_y + tmp_dist_u + tmp_dist_v); - if (!interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - tmp_rate_y_i = tmp_rate_y; - tmp_rate_u_i = tmp_rate_u; - tmp_rate_v_i = tmp_rate_v; - tmp_dist_y_i = tmp_dist_y; - tmp_dist_u_i = tmp_dist_u; - tmp_dist_v_i = tmp_dist_v; + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); + rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum); + if (!interpolating_intpel_seen && is_intpel_interp) { + tmp_rate_sum = rate_sum; + tmp_dist_sum = dist_sum; } } - newbest = (switchable_filter_index == 0 || rd < best_rd); + newbest = i == 0 || rd < best_rd; if (newbest) { best_rd = rd; *best_filter = mbmi->interp_filter; } + if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || (cm->mcomp_filter_type != SWITCHABLE && cm->mcomp_filter_type == mbmi->interp_filter)) { @@ -3367,21 +3418,18 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, sizeof(unsigned char) * MI_UV_SIZE * bw); for (i = 0; i < MI_UV_SIZE * bh; ++i) vpx_memcpy(tmp_vbuf + i * MI_UV_SIZE * bw, - xd->plane[2].dst.buf + i * xd->plane[1].dst.stride, + xd->plane[2].dst.buf + i * xd->plane[2].dst.stride, sizeof(unsigned char) * MI_UV_SIZE * bw); pred_exists = 1; } - interpolating_intpel_seen |= - intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter]; + interpolating_intpel_seen |= is_intpel_interp; } } // Set the appripriate filter - if (cm->mcomp_filter_type != SWITCHABLE) - mbmi->interp_filter = cm->mcomp_filter_type; - else - mbmi->interp_filter = *best_filter; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? + cm->mcomp_filter_type : *best_filter; + vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); if (pred_exists) { // FIXME(rbultje): mb code still predicts into xd->predictor @@ -3394,7 +3442,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, tmp_ubuf + i * bw * MI_UV_SIZE, sizeof(unsigned char) * bw * MI_UV_SIZE); for (i = 0; i < bh * MI_UV_SIZE; ++i) - vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[1].dst.stride, + vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[2].dst.stride, tmp_vbuf + i * bw * MI_UV_SIZE, sizeof(unsigned char) * bw * MI_UV_SIZE); } else { @@ -4756,6 +4804,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame = ref_frame; mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; + + // TODO(jingning): scaling not supported in SPLITMV mode. + if (mbmi->ref_frame > 0 && + (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) && + this_mode == SPLITMV) + continue; + + if (mbmi->second_ref_frame > 0 && + (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) && + this_mode == SPLITMV) + continue; + + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, scale_factor); comp_pred = mbmi->second_ref_frame > INTRA_FRAME; diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 1ddd4f057..cbe3aa367 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -113,14 +113,6 @@ endif VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_sse2.c -ifeq ($(HAVE_SSE2),yes) -vp9/common/x86/vp9_idct_intrin_sse2.c.o: CFLAGS += -msse2 -vp9/common/x86/vp9_loopfilter_intrin_sse2.c.o: CFLAGS += -msse2 -vp9/common/x86/vp9_sadmxn_sse2.c.o: CFLAGS += -msse2 -vp9/common/x86/vp9_idct_intrin_sse2.c.d: CFLAGS += -msse2 -vp9/common/x86/vp9_loopfilter_intrin_sse2.c.d: CFLAGS += -msse2 -vp9/common/x86/vp9_sadmxn_sse2.c.d: CFLAGS += -msse2 -endif $(eval $(call asm_offsets_template,\ vp9_asm_com_offsets.asm, $(VP9_PREFIX)common/vp9_asm_com_offsets.c)) diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 39f836fd4..42ab02d31 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -95,10 +95,5 @@ VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c -ifeq ($(HAVE_SSE2),yes) -vp9/encoder/x86/vp9_dct_sse2.c.d: CFLAGS += -msse2 -vp9/encoder/x86/vp9_dct_sse2.c.o: CFLAGS += -msse2 -endif - VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes)) diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk index babdebb86..72cdfebf4 100644 --- a/vp9/vp9dx.mk +++ b/vp9/vp9dx.mk @@ -38,10 +38,6 @@ VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes)) VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_idct_blk_sse2.c VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_dequantize_sse2.c -ifeq ($(HAVE_SSE2),yes) -vp9/decoder/x86/vp9_dequantize_sse2.c.o: CFLAGS += -msse2 -vp9/decoder/x86/vp9_dequantize_sse2.c.d: CFLAGS += -msse2 -endif $(eval $(call asm_offsets_template,\ vp9_asm_dec_offsets.asm, $(VP9_PREFIX)decoder/vp9_asm_dec_offsets.c)) |