diff options
44 files changed, 800 insertions, 607 deletions
diff --git a/build/make/thumb.pm b/build/make/thumb.pm index e1f34c1ec..d8d04aa85 100644 --- a/build/make/thumb.pm +++ b/build/make/thumb.pm @@ -24,7 +24,7 @@ sub FixThumbInstructions($$) # with left shift, addition and a right shift (to restore the # register to the original value). Currently the right shift # isn't necessary in the code base since the values in these - # registers aren't used, but doing the shift for consitency. + # registers aren't used, but doing the shift for consistency. # This converts instructions such as "add r12, r12, r5, lsl r4" # into the sequence "lsl r5, r4", "add r12, r12, r5", "lsr r5, r4". s/^(\s*)(add)(\s+)(r\d+),\s*(r\d+),\s*(r\d+),\s*lsl (r\d+)/$1lsl$3$6, $7\n$1$2$3$4, $5, $6\n$1lsr$3$6, $7/g; diff --git a/examples.mk b/examples.mk index 2600a9d45..fe36c4f82 100644 --- a/examples.mk +++ b/examples.mk @@ -41,6 +41,7 @@ vpxenc.SRCS += args.c args.h y4minput.c y4minput.h vpxenc.h vpxenc.SRCS += ivfdec.c ivfdec.h vpxenc.SRCS += ivfenc.c ivfenc.h vpxenc.SRCS += tools_common.c tools_common.h +vpxenc.SRCS += warnings.c warnings.h vpxenc.SRCS += webmenc.c webmenc.h vpxenc.SRCS += vpx_ports/mem_ops.h vpxenc.SRCS += vpx_ports/mem_ops_aligned.h diff --git a/test/test-data.sha1 b/test/test-data.sha1 index 827ae3182..bb09b7539 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -542,6 +542,8 @@ c9b237dfcc01c1b414fbcaa481d014a906ef7998 vp90-2-08-tile_1x4.webm.md5 ae7451810247fd13975cc257aa0301ff17102255 vp90-2-08-tile-4x4.webm.md5 2ec6e15422ac7a61af072dc5f27fcaf1942ce116 vp90-2-08-tile-4x1.webm 0094f5ee5e46345017c30e0aa4835b550212d853 vp90-2-08-tile-4x1.webm.md5 +edea45dac4a3c2e5372339f8851d24c9bef803d6 vp90-2-09-subpixel-00.ivf +5428efc4bf92191faedf4a727fcd1d94966a7abc vp90-2-09-subpixel-00.ivf.md5 8cdd435d89029987ee196896e21520e5f879f04d vp90-2-bbb_1280x720_tile_1x4_1310kbps.webm 091b373aa2ecb59aa5c647affd5bcafcc7547364 vp90-2-bbb_1920x1080_tile_1x1_2581kbps.webm 87ee28032b0963a44b73a850fcc816a6dc83efbb vp90-2-bbb_1920x1080_tile_1x4_2586kbps.webm diff --git a/test/test.mk b/test/test.mk index 32601c569..4f877f48f 100644 --- a/test/test.mk +++ b/test/test.mk @@ -654,6 +654,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5 diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc index 08449a56a..433242037 100644 --- a/test/test_vector_test.cc +++ b/test/test_vector_test.cc @@ -165,6 +165,7 @@ const char *kVP9TestVectors[] = { "vp90-2-08-tile_1x2_frame_parallel.webm", "vp90-2-08-tile_1x2.webm", "vp90-2-08-tile_1x4_frame_parallel.webm", "vp90-2-08-tile_1x4.webm", "vp90-2-08-tile-4x4.webm", "vp90-2-08-tile-4x1.webm", + "vp90-2-09-subpixel-00.ivf", #if CONFIG_NON420 "vp91-2-04-yv444.webm" #endif diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm index e559272cd..751bc74bc 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm +++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm @@ -112,27 +112,27 @@ vabd.u8 q4, q10, q9 ; m6 = abs(q3 - q2) ; only compare the largest value to limit - vmax.u8 q11, q11, q12 ; m1 = max(m1, m2) - vmax.u8 q12, q13, q14 ; m2 = max(m3, m4) + vmax.u8 q11, q11, q12 ; m7 = max(m1, m2) + vmax.u8 q12, q13, q14 ; m8 = max(m3, m4) vabd.u8 q9, q6, q7 ; abs(p0 - q0) - vmax.u8 q3, q3, q4 ; m3 = max(m5, m6) + vmax.u8 q3, q3, q4 ; m9 = max(m5, m6) vmov.u8 q10, #0x80 - vmax.u8 q15, q11, q12 ; m1 = max(m1, m2) + vmax.u8 q15, q11, q12 ; m10 = max(m7, m8) vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - vmax.u8 q15, q15, q3 ; m1 = max(m1, m3) + vmax.u8 q15, q15, q3 ; m11 = max(m10, m9) vabd.u8 q2, q5, q8 ; a = abs(p1 - q1) vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2 veor q7, q7, q10 ; qs0 - vcge.u8 q15, q1, q15 ; abs(m1) > limit + vcge.u8 q15, q1, q15 ; abs(m11) > limit vshr.u8 q2, q2, #1 ; a = a / 2 veor q6, q6, q10 ; ps0 @@ -142,7 +142,7 @@ veor q8, q8, q10 ; qs1 - vmov.u8 q4, #3 + vmov.u16 q4, #3 vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) vsubl.s8 q11, d15, d13 @@ -150,13 +150,15 @@ vcge.u8 q9, q0, q9 ; a > blimit vqsub.s8 q1, q5, q8 ; filter = clamp(ps1-qs1) - vorr q14, q13, q14 ; hevmask + vorr q14, q13, q14 ; hev vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0) vmul.i16 q11, q11, q4 vand q1, q1, q14 ; filter &= hev - vand q15, q15, q9 ; filter_mask + vand q15, q15, q9 ; mask + + vmov.u8 q4, #3 vaddw.s8 q2, q2, d2 ; filter + 3 * (qs0 - ps0) vaddw.s8 q11, q11, d3 @@ -180,15 +182,14 @@ ; outer tap adjustments vrshr.s8 q1, q1, #1 ; filter = ++filter1 >> 1 - veor q6, q11, q10 ; *op0 = u^0x80 + veor q7, q0, q10 ; *oq0 = u^0x80 vbic q1, q1, q14 ; filter &= ~hev vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + filter) vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - filter) - - veor q7, q0, q10 ; *oq0 = u^0x80 + veor q6, q11, q10 ; *op0 = u^0x80 veor q5, q13, q10 ; *op1 = u^0x80 veor q8, q12, q10 ; *oq1 = u^0x80 diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c index 2f022dc1d..b97e7aa4a 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c +++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c @@ -10,17 +10,6 @@ #include "./vp9_rtcd.h" -void vp9_loop_filter_horizontal_edge_16_neon(uint8_t *s, int p /* pitch */, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_loop_filter_horizontal_edge(s, p, blimit0, limit0, thresh0, 1); - vp9_loop_filter_horizontal_edge(s + 8, p, blimit1, limit1, thresh1, 1); -} - void vp9_mbloop_filter_horizontal_edge_16_neon(uint8_t *s, int p /* pitch */, const uint8_t *blimit0, const uint8_t *limit0, @@ -31,3 +20,34 @@ void vp9_mbloop_filter_horizontal_edge_16_neon(uint8_t *s, int p /* pitch */, vp9_mbloop_filter_horizontal_edge(s, p, blimit0, limit0, thresh0, 1); vp9_mbloop_filter_horizontal_edge(s + 8, p, blimit1, limit1, thresh1, 1); } + +void vp9_loop_filter_vertical_edge_16_neon(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_loop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1); + vp9_loop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); +} + +void vp9_mbloop_filter_vertical_edge_16_neon(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_mbloop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1); + vp9_mbloop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1, + 1); +} + +void vp9_mb_lpf_vertical_edge_w_16_neon(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + vp9_mb_lpf_vertical_edge_w_neon(s, p, blimit, limit, thresh); + vp9_mb_lpf_vertical_edge_w_neon(s + 8 * p, p, blimit, limit, thresh); +} diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c index 36cfc83c4..0c0f155ae 100644 --- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c @@ -306,4 +306,59 @@ void vp9_loop_filter_vertical_edge_dspr2(unsigned char *s, } } } + +void vp9_loop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_loop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1); + vp9_loop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); +} + +void vp9_mbloop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_mbloop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1); + vp9_mbloop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1, + 1); +} + +void vp9_loop_filter_vertical_edge_16_dspr2(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_loop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1); + vp9_loop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, + 1); +} + +void vp9_mbloop_filter_vertical_edge_16_dspr2(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_mbloop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1); + vp9_mbloop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, + 1); +} + +void vp9_mb_lpf_vertical_edge_w_16_dspr2(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + vp9_mb_lpf_vertical_edge_w_dspr2(s, p, blimit, limit, thresh); + vp9_mb_lpf_vertical_edge_w_dspr2(s + 8 * p, p, blimit, limit, thresh); +} #endif // #if HAVE_DSPR2 diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index d2981601b..6e12638e3 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -202,7 +202,7 @@ void vp9_create_common(VP9_COMMON *cm) { vp9_machine_specific_config(cm); cm->tx_mode = ONLY_4X4; - cm->comp_pred_mode = HYBRID_PREDICTION; + cm->comp_pred_mode = REFERENCE_MODE_SELECT; } void vp9_remove_common(VP9_COMMON *cm) { @@ -213,7 +213,6 @@ void vp9_initialize_common() { vp9_init_neighbors(); vp9_coef_tree_initialize(); vp9_entropy_mode_init(); - vp9_entropy_mv_init(); } void vp9_update_frame_size(VP9_COMMON *cm) { diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index df963d1cc..37da92bd3 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -153,6 +153,34 @@ static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) { return mbmi->ref_frame[1] > INTRA_FRAME; } +static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *left_mi, int b) { + if (b == 0 || b == 2) { + if (!left_mi || is_inter_block(&left_mi->mbmi)) + return DC_PRED; + + return left_mi->mbmi.sb_type < BLOCK_8X8 ? left_mi->bmi[b + 1].as_mode + : left_mi->mbmi.mode; + } else { + assert(b == 1 || b == 3); + return cur_mi->bmi[b - 1].as_mode; + } +} + +static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *above_mi, int b) { + if (b == 0 || b == 1) { + if (!above_mi || is_inter_block(&above_mi->mbmi)) + return DC_PRED; + + return above_mi->mbmi.sb_type < BLOCK_8X8 ? above_mi->bmi[b + 2].as_mode + : above_mi->mbmi.mode; + } else { + assert(b == 2 || b == 3); + return cur_mi->bmi[b - 2].as_mode; + } +} + enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 3b2510dcd..5d74c6967 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -329,6 +329,7 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) { vp9_copy(cm->fc.single_ref_prob, default_single_ref_p); cm->fc.tx_probs = default_tx_probs; vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs); + vp9_copy(cm->fc.inter_mode_probs, default_inter_mode_probs); } const vp9_tree_index vp9_switchable_interp_tree @@ -466,7 +467,6 @@ void vp9_setup_past_independence(VP9_COMMON *cm) { vp9_default_coef_probs(cm); vp9_init_mbmode_probs(cm); vp9_init_mv_probs(cm); - vp9_copy(cm->fc.inter_mode_probs, default_inter_mode_probs); if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode || cm->reset_frame_context == 3) { diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c index 290dcdd17..60ae79fdc 100644 --- a/vp9/common/vp9_entropymv.c +++ b/vp9/common/vp9_entropymv.c @@ -23,7 +23,6 @@ const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ }; -struct vp9_token vp9_mv_joint_encodings[MV_JOINTS]; const vp9_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { -MV_CLASS_0, 2, @@ -37,19 +36,16 @@ const vp9_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10, }; -struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; const vp9_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1, }; -struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE]; -const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(4)] = { +const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1, 4, -2, -3 }; -struct vp9_token vp9_mv_fp_encodings[4]; static const nmv_context default_nmv_context = { {32, 64, 96}, @@ -235,13 +231,6 @@ void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { } } -void vp9_entropy_mv_init() { - vp9_tokens_from_tree(vp9_mv_joint_encodings, vp9_mv_joint_tree); - vp9_tokens_from_tree(vp9_mv_class_encodings, vp9_mv_class_tree); - vp9_tokens_from_tree(vp9_mv_class0_encodings, vp9_mv_class0_tree); - vp9_tokens_from_tree(vp9_mv_fp_encodings, vp9_mv_fp_tree); -} - void vp9_init_mv_probs(VP9_COMMON *cm) { cm->fc.nmvc = default_nmv_context; } diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h index b62f7c42f..3175a1e49 100644 --- a/vp9/common/vp9_entropymv.h +++ b/vp9/common/vp9_entropymv.h @@ -18,7 +18,6 @@ struct VP9Common; -void vp9_entropy_mv_init(); void vp9_init_mv_probs(struct VP9Common *cm); void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); @@ -72,17 +71,10 @@ typedef enum { #define MV_UPP ((1 << MV_IN_USE_BITS) - 1) #define MV_LOW (-(1 << MV_IN_USE_BITS)) -extern const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)]; -extern struct vp9_token vp9_mv_joint_encodings[MV_JOINTS]; - -extern const vp9_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)]; -extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; - -extern const vp9_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)]; -extern struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE]; - -extern const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)]; -extern struct vp9_token vp9_mv_fp_encodings[4]; +extern const vp9_tree_index vp9_mv_joint_tree[]; +extern const vp9_tree_index vp9_mv_class_tree[]; +extern const vp9_tree_index vp9_mv_class0_tree[]; +extern const vp9_tree_index vp9_mv_fp_tree[]; typedef struct { vp9_prob sign; diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h index 2362caa41..e9d4e1171 100644 --- a/vp9/common/vp9_findnearmv.h +++ b/vp9/common/vp9_findnearmv.h @@ -41,32 +41,4 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block_idx, int ref_idx, int mi_row, int mi_col); -static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *left_mi, int b) { - if (b == 0 || b == 2) { - if (!left_mi || is_inter_block(&left_mi->mbmi)) - return DC_PRED; - - return left_mi->mbmi.sb_type < BLOCK_8X8 ? left_mi->bmi[b + 1].as_mode - : left_mi->mbmi.mode; - } else { - assert(b == 1 || b == 3); - return cur_mi->bmi[b - 1].as_mode; - } -} - -static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *above_mi, int b) { - if (b == 0 || b == 1) { - if (!above_mi || is_inter_block(&above_mi->mbmi)) - return DC_PRED; - - return above_mi->mbmi.sb_type < BLOCK_8X8 ? above_mi->bmi[b + 2].as_mode - : above_mi->mbmi.mode; - } else { - assert(b == 2 || b == 3); - return cur_mi->bmi[b - 2].as_mode; - } -} - #endif // VP9_COMMON_VP9_FINDNEARMV_H_ diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 0b48de2cb..85dd7d8f0 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -354,12 +354,11 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, // TODO(yunqingwang): count in loopfilter functions should be removed. if (mask & 1) { if ((mask_16x16_0 | mask_16x16_1) & 1) { + // TODO(yunqingwang): if (mask_16x16_0 & 1), then (mask_16x16_0 & 1) + // is always 1. Same is true for horizontal lf. if ((mask_16x16_0 & mask_16x16_1) & 1) { - // TODO(yunqingwang): Combine 2 calls as 1 wide filtering. - vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim, + vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); - vp9_mb_lpf_vertical_edge_w(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr); } else if (mask_16x16_0 & 1) { vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); @@ -371,11 +370,9 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, if ((mask_8x8_0 | mask_8x8_1) & 1) { if ((mask_8x8_0 & mask_8x8_1) & 1) { - // TODO(yunqingwang): Combine 2 calls as 1 wide filtering. - vp9_mbloop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); - vp9_mbloop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1); + vp9_mbloop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, lfi1->mblim, + lfi1->lim, lfi1->hev_thr); } else if (mask_8x8_0 & 1) { vp9_mbloop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 1); @@ -387,11 +384,9 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, if ((mask_4x4_0 | mask_4x4_1) & 1) { if ((mask_4x4_0 & mask_4x4_1) & 1) { - // TODO(yunqingwang): Combine 2 calls as 1 wide filtering. - vp9_loop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); - vp9_loop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1); + vp9_loop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, lfi1->mblim, + lfi1->lim, lfi1->hev_thr); } else if (mask_4x4_0 & 1) { vp9_loop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 1); @@ -403,11 +398,9 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - // TODO(yunqingwang): Combine 2 calls as 1 wide filtering. - vp9_loop_filter_vertical_edge(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); - vp9_loop_filter_vertical_edge(s + 8 *pitch + 4, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1); + vp9_loop_filter_vertical_edge_16(s + 4, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, lfi1->mblim, + lfi1->lim, lfi1->hev_thr); } else if (mask_4x4_int_0 & 1) { vp9_loop_filter_vertical_edge(s + 4, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 1); diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index 9edf8701f..ef8de2010 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -169,6 +169,34 @@ void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch, } } +void vp9_loop_filter_vertical_edge_16_c(uint8_t *s, int pitch, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + int i, j; + const uint8_t *blimit = blimit0; + const uint8_t *limit = limit0; + const uint8_t *thresh = thresh0; + + for (i = 0; i < 2; ++i) { + for (j = 0; j < 8; ++j) { + const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1); + filter4(mask, hev, s - 2, s - 1, s, s + 1); + s += pitch; + } + blimit = blimit1; + limit = limit1; + thresh = thresh1; + } +} + static INLINE void filter8(int8_t mask, uint8_t hev, uint8_t flat, uint8_t *op3, uint8_t *op2, uint8_t *op1, uint8_t *op0, @@ -264,6 +292,36 @@ void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch, } } +void vp9_mbloop_filter_vertical_edge_16_c(uint8_t *s, int pitch, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + int i, j; + const uint8_t *blimit = blimit0; + const uint8_t *limit = limit0; + const uint8_t *thresh = thresh0; + + for (i = 0; i < 2; ++i) { + for (j = 0; j < 8; ++j) { + const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t hev = hev_mask(thresh[0], p1, p0, q0, q1); + const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); + filter8(mask, hev, flat, s - 4, s - 3, s - 2, s - 1, + s, s + 1, s + 2, s + 3); + s += pitch; + } + blimit = blimit1; + limit = limit1; + thresh = thresh1; + } +} + static INLINE void filter16(int8_t mask, uint8_t hev, uint8_t flat, uint8_t flat2, uint8_t *op7, uint8_t *op6, @@ -366,3 +424,26 @@ void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p, s += p; } } + +void vp9_mb_lpf_vertical_edge_w_16_c(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + int i; + + for (i = 0; i < 16; ++i) { + const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1); + const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t flat2 = flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, + q0, s[4], s[5], s[6], s[7]); + + filter16(mask, hev, flat, flat2, + s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, + s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7); + s += p; + } +} diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index fb959cb36..751accf02 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -81,11 +81,11 @@ typedef struct { typedef enum { - SINGLE_PREDICTION_ONLY = 0, - COMP_PREDICTION_ONLY = 1, - HYBRID_PREDICTION = 2, - NB_PREDICTION_TYPES = 3, -} COMPPREDMODE_TYPE; + SINGLE_REFERENCE = 0, + COMPOUND_REFERENCE = 1, + REFERENCE_MODE_SELECT = 2, + REFERENCE_MODES = 3, +} REFERENCE_MODE; typedef struct VP9Common { struct vpx_internal_error_info error; @@ -195,7 +195,7 @@ typedef struct VP9Common { int allow_comp_inter_inter; MV_REFERENCE_FRAME comp_fixed_ref; MV_REFERENCE_FRAME comp_var_ref[2]; - COMPPREDMODE_TYPE comp_pred_mode; + REFERENCE_MODE comp_pred_mode; FRAME_CONTEXT fc; /* this frame entropy */ FRAME_CONTEXT frame_contexts[NUM_FRAME_CONTEXTS]; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index e18e757c1..28d24179c 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -193,12 +193,21 @@ specialize vp9_dc_128_predictor_32x32 prototype void vp9_mb_lpf_vertical_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" specialize vp9_mb_lpf_vertical_edge_w sse2 neon dspr2 +prototype void vp9_mb_lpf_vertical_edge_w_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" +specialize vp9_mb_lpf_vertical_edge_w_16 sse2 neon dspr2 + prototype void vp9_mbloop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" specialize vp9_mbloop_filter_vertical_edge sse2 neon dspr2 +prototype void vp9_mbloop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" +specialize vp9_mbloop_filter_vertical_edge_16 sse2 neon dspr2 + prototype void vp9_loop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" specialize vp9_loop_filter_vertical_edge mmx neon dspr2 +prototype void vp9_loop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" +specialize vp9_loop_filter_vertical_edge_16 sse2 neon dspr2 + prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" specialize vp9_mb_lpf_horizontal_edge_w sse2 avx2 neon dspr2 @@ -206,13 +215,13 @@ prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const u specialize vp9_mbloop_filter_horizontal_edge sse2 neon dspr2 prototype void vp9_mbloop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_mbloop_filter_horizontal_edge_16 sse2 neon +specialize vp9_mbloop_filter_horizontal_edge_16 sse2 neon dspr2 prototype void vp9_loop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" specialize vp9_loop_filter_horizontal_edge mmx neon dspr2 prototype void vp9_loop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_loop_filter_horizontal_edge_16 sse2 neon +specialize vp9_loop_filter_horizontal_edge_16 sse2 neon dspr2 # # post proc diff --git a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c index 925f74d19..3ca55cfc3 100644 --- a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c +++ b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include <emmintrin.h> /* SSE2 */ +#include <emmintrin.h> // SSE2 #include "vp9/common/vp9_loopfilter.h" #include "vpx_ports/emmintrin_compat.h" @@ -99,7 +99,7 @@ static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - /* (vp9_filter + 3 * (qs0 - ps0)) & mask */ + // (vp9_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); @@ -110,11 +110,11 @@ static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s, filter2 = _mm_unpacklo_epi8(zero, filter2); filter2 = _mm_srai_epi16(filter2, 0xB); - /* Filter1 >> 3 */ + // Filter1 >> 3 filt = _mm_packs_epi16(filter2, _mm_subs_epi16(zero, filter1)); qs0ps0 = _mm_xor_si128(_mm_adds_epi8(qs0ps0, filt), t80); - /* filt >> 1 */ + // filt >> 1 filt = _mm_adds_epi16(filter1, t1); filt = _mm_srai_epi16(filt, 1); filt = _mm_andnot_si128(_mm_srai_epi16(_mm_unpacklo_epi8(zero, hev), 0x8), @@ -473,13 +473,13 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - /* (vp9_filter + 3 * (qs0 - ps0)) & mask */ + // (vp9_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); - /* Filter1 >> 3 */ + // Filter1 >> 3 work_a = _mm_cmpgt_epi8(zero, filter1); filter1 = _mm_srli_epi16(filter1, 3); work_a = _mm_and_si128(work_a, te0); @@ -487,7 +487,7 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, filter1 = _mm_or_si128(filter1, work_a); qs0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); - /* Filter2 >> 3 */ + // Filter2 >> 3 work_a = _mm_cmpgt_epi8(zero, filter2); filter2 = _mm_srli_epi16(filter2, 3); work_a = _mm_and_si128(work_a, te0); @@ -495,7 +495,7 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, filter2 = _mm_or_si128(filter2, work_a); ps0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); - /* filt >> 1 */ + // filt >> 1 filt = _mm_adds_epi8(filter1, t1); work_a = _mm_cmpgt_epi8(zero, filt); filt = _mm_srli_epi16(filt, 1); @@ -1014,23 +1014,23 @@ void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - /* (vp9_filter + 3 * (qs0 - ps0)) & mask */ + // (vp9_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); - /* Filter1 >> 3 */ + // Filter1 >> 3 filter1 = _mm_unpacklo_epi8(zero, filter1); filter1 = _mm_srai_epi16(filter1, 11); filter1 = _mm_packs_epi16(filter1, filter1); - /* Filter2 >> 3 */ + // Filter2 >> 3 filter2 = _mm_unpacklo_epi8(zero, filter2); filter2 = _mm_srai_epi16(filter2, 11); filter2 = _mm_packs_epi16(filter2, zero); - /* filt >> 1 */ + // filt >> 1 filt = _mm_adds_epi8(filter1, t1); filt = _mm_unpacklo_epi8(zero, filt); filt = _mm_srai_epi16(filt, 9); @@ -1083,7 +1083,7 @@ void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s, } } -void vp9_mbloop_filter_horizontal_edge_16_sse2(uint8_t *s, int p /* pitch */, +void vp9_mbloop_filter_horizontal_edge_16_sse2(uint8_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0, const uint8_t *_thresh0, @@ -1255,27 +1255,27 @@ void vp9_mbloop_filter_horizontal_edge_16_sse2(uint8_t *s, int p /* pitch */, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - /* (vp9_filter + 3 * (qs0 - ps0)) & mask */ + // (vp9_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); - /* Filter1 >> 3 */ + // Filter1 >> 3 work_a = _mm_cmpgt_epi8(zero, filter1); filter1 = _mm_srli_epi16(filter1, 3); work_a = _mm_and_si128(work_a, te0); filter1 = _mm_and_si128(filter1, t1f); filter1 = _mm_or_si128(filter1, work_a); - /* Filter2 >> 3 */ + // Filter2 >> 3 work_a = _mm_cmpgt_epi8(zero, filter2); filter2 = _mm_srli_epi16(filter2, 3); work_a = _mm_and_si128(work_a, te0); filter2 = _mm_and_si128(filter2, t1f); filter2 = _mm_or_si128(filter2, work_a); - /* filt >> 1 */ + // filt >> 1 filt = _mm_adds_epi8(filter1, t1); work_a = _mm_cmpgt_epi8(zero, filt); filt = _mm_srli_epi16(filt, 1); @@ -1427,27 +1427,27 @@ void vp9_loop_filter_horizontal_edge_16_sse2(unsigned char *s, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - /* (vp9_filter + 3 * (qs0 - ps0)) & mask */ + // (vp9_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); - /* Filter1 >> 3 */ + // Filter1 >> 3 work_a = _mm_cmpgt_epi8(zero, filter1); filter1 = _mm_srli_epi16(filter1, 3); work_a = _mm_and_si128(work_a, te0); filter1 = _mm_and_si128(filter1, t1f); filter1 = _mm_or_si128(filter1, work_a); - /* Filter2 >> 3 */ + // Filter2 >> 3 work_a = _mm_cmpgt_epi8(zero, filter2); filter2 = _mm_srli_epi16(filter2, 3); work_a = _mm_and_si128(work_a, te0); filter2 = _mm_and_si128(filter2, t1f); filter2 = _mm_or_si128(filter2, work_a); - /* filt >> 1 */ + // filt >> 1 filt = _mm_adds_epi8(filter1, t1); work_a = _mm_cmpgt_epi8(zero, filt); filt = _mm_srli_epi16(filt, 1); @@ -1474,7 +1474,7 @@ static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1, __m128i x0, x1, x2, x3, x4, x5, x6, x7; __m128i x8, x9, x10, x11, x12, x13, x14, x15; - /* Read in 16 lines */ + // Read in 16 lines x0 = _mm_loadl_epi64((__m128i *)in0); x8 = _mm_loadl_epi64((__m128i *)in1); x1 = _mm_loadl_epi64((__m128i *)(in0 + in_p)); @@ -1512,7 +1512,7 @@ static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1, x14 = _mm_unpacklo_epi32(x12, x13); x15 = _mm_unpackhi_epi32(x12, x13); - /* Store first 4-line result */ + // Store first 4-line result _mm_storeu_si128((__m128i *)out, _mm_unpacklo_epi64(x6, x14)); _mm_storeu_si128((__m128i *)(out + out_p), _mm_unpackhi_epi64(x6, x14)); _mm_storeu_si128((__m128i *)(out + 2 * out_p), _mm_unpacklo_epi64(x7, x15)); @@ -1528,7 +1528,7 @@ static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1, x14 = _mm_unpacklo_epi32(x12, x13); x15 = _mm_unpackhi_epi32(x12, x13); - /* Store second 4-line result */ + // Store second 4-line result _mm_storeu_si128((__m128i *)(out + 4 * out_p), _mm_unpacklo_epi64(x6, x14)); _mm_storeu_si128((__m128i *)(out + 5 * out_p), _mm_unpackhi_epi64(x6, x14)); _mm_storeu_si128((__m128i *)(out + 6 * out_p), _mm_unpacklo_epi64(x7, x15)); @@ -1598,61 +1598,129 @@ static INLINE void transpose(unsigned char *src[], int in_p, } while (++idx8x8 < num_8x8_to_transpose); } -void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, - int p, +void vp9_loop_filter_vertical_edge_16_sse2(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8); + unsigned char *src[2]; + unsigned char *dst[2]; + + // Transpose 8x16 + transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); + + // Loop filtering + vp9_loop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0, + thresh0, blimit1, limit1, thresh1); + src[0] = t_dst; + src[1] = t_dst + 8; + dst[0] = s - 4; + dst[1] = s - 4 + p * 8; + + // Transpose back + transpose(src, 16, dst, p, 2); +} + +void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256); + DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 8); + unsigned char *src[1]; + unsigned char *dst[1]; + (void)count; + + // Transpose 8x8 + src[0] = s - 4; + dst[0] = t_dst; + + transpose(src, p, dst, 8, 1); + + // Loop filtering + vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 4 * 8, 8, blimit, limit, + thresh, 1); + + src[0] = t_dst; + dst[0] = s - 4; + + // Transpose back + transpose(src, 8, dst, p, 1); +} + +void vp9_mbloop_filter_vertical_edge_16_sse2(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8); unsigned char *src[2]; unsigned char *dst[2]; - (void)count; - /* Transpose 16x16 */ - transpose8x16(s - 8, s - 8 + p * 8, p, t_dst, 16); - transpose8x16(s, s + p * 8, p, t_dst + 16 * 8, 16); + // Transpose 8x16 + transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); - /* Loop filtering */ - vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 8 * 16, 16, blimit, limit, - thresh, 1); - src[0] = t_dst + 3 * 16; - src[1] = t_dst + 3 * 16 + 8; + // Loop filtering + vp9_mbloop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0, + thresh0, blimit1, limit1, thresh1); + src[0] = t_dst; + src[1] = t_dst + 8; - dst[0] = s - 5; - dst[1] = s - 5 + p * 8; + dst[0] = s - 4; + dst[1] = s - 4 + p * 8; - /* Transpose 16x8 */ + // Transpose back transpose(src, 16, dst, p, 2); } -void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, - int p, +void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256); - unsigned char *src[4]; - unsigned char *dst[4]; - - dst[0] = t_dst; - dst[1] = t_dst + 8 * 16; + DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 16); + unsigned char *src[2]; + unsigned char *dst[2]; src[0] = s - 8; - src[1] = s - 8 + 8; + src[1] = s; + dst[0] = t_dst; + dst[1] = t_dst + 8 * 8; - /* Transpose 16x16 */ - transpose(src, p, dst, 16, 2); + // Transpose 16x8 + transpose(src, p, dst, 8, 2); - /* Loop filtering */ - vp9_mb_lpf_horizontal_edge_w_sse2(t_dst + 8 * 16, 16, blimit, limit, - thresh, 1); + // Loop filtering + mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, thresh); src[0] = t_dst; - src[1] = t_dst + 8 * 16; - + src[1] = t_dst + 8 * 8; dst[0] = s - 8; - dst[1] = s - 8 + 8; + dst[1] = s; - transpose(src, 16, dst, p, 2); + // Transpose back + transpose(src, 8, dst, p, 2); +} + +void vp9_mb_lpf_vertical_edge_w_16_sse2(unsigned char *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256); + + // Transpose 16x16 + transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); + transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); + + // Loop filtering + mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit, + thresh); + + // Transpose back + transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); + transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * p, p); } diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c index 06acec4db..4f16e95b0 100644 --- a/vp9/decoder/vp9_dboolhuff.c +++ b/vp9/decoder/vp9_dboolhuff.c @@ -18,32 +18,28 @@ // Even relatively modest values like 100 would work fine. #define LOTS_OF_BITS 0x40000000 - int vp9_reader_init(vp9_reader *r, const uint8_t *buffer, size_t size) { - int marker_bit; - - r->buffer_end = buffer + size; - r->buffer = buffer; - r->value = 0; - r->count = -8; - r->range = 255; - - if (size && !buffer) + if (size && !buffer) { return 1; - - vp9_reader_fill(r); - marker_bit = vp9_read_bit(r); - return marker_bit != 0; + } else { + r->buffer_end = buffer + size; + r->buffer = buffer; + r->value = 0; + r->count = -8; + r->range = 255; + vp9_reader_fill(r); + return vp9_read_bit(r) != 0; // marker bit + } } void vp9_reader_fill(vp9_reader *r) { const uint8_t *const buffer_end = r->buffer_end; const uint8_t *buffer = r->buffer; - VP9_BD_VALUE value = r->value; + BD_VALUE value = r->value; int count = r->count; - int shift = BD_VALUE_SIZE - 8 - (count + 8); + int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); int loop_end = 0; - const int bits_left = (int)((buffer_end - buffer)*CHAR_BIT); + const int bits_left = (int)((buffer_end - buffer) * CHAR_BIT); const int x = shift + CHAR_BIT - bits_left; if (x >= 0) { @@ -54,7 +50,7 @@ void vp9_reader_fill(vp9_reader *r) { if (x < 0 || bits_left) { while (shift >= loop_end) { count += CHAR_BIT; - value |= (VP9_BD_VALUE)*buffer++ << shift; + value |= (BD_VALUE)*buffer++ << shift; shift -= CHAR_BIT; } } diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h index fd8e74ca4..8339c2701 100644 --- a/vp9/decoder/vp9_dboolhuff.h +++ b/vp9/decoder/vp9_dboolhuff.h @@ -18,46 +18,50 @@ #include "vpx_ports/mem.h" #include "vpx/vpx_integer.h" -typedef size_t VP9_BD_VALUE; +#include "vp9/common/vp9_treecoder.h" -#define BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT) +typedef size_t BD_VALUE; + +#define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT) + +DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); typedef struct { const uint8_t *buffer_end; const uint8_t *buffer; - VP9_BD_VALUE value; + BD_VALUE value; int count; unsigned int range; } vp9_reader; -DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); - int vp9_reader_init(vp9_reader *r, const uint8_t *buffer, size_t size); void vp9_reader_fill(vp9_reader *r); +int vp9_reader_has_error(vp9_reader *r); + const uint8_t *vp9_reader_find_end(vp9_reader *r); -static int vp9_read(vp9_reader *br, int probability) { +static int vp9_read(vp9_reader *r, int prob) { unsigned int bit = 0; - VP9_BD_VALUE value; - VP9_BD_VALUE bigsplit; + BD_VALUE value; + BD_VALUE bigsplit; int count; unsigned int range; - unsigned int split = ((br->range * probability) + (256 - probability)) >> 8; + unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT; - if (br->count < 0) - vp9_reader_fill(br); + if (r->count < 0) + vp9_reader_fill(r); - value = br->value; - count = br->count; + value = r->value; + count = r->count; - bigsplit = (VP9_BD_VALUE)split << (BD_VALUE_SIZE - 8); + bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT); range = split; if (value >= bigsplit) { - range = br->range - split; + range = r->range - split; value = value - bigsplit; bit = 1; } @@ -68,9 +72,9 @@ static int vp9_read(vp9_reader *br, int probability) { value <<= shift; count -= shift; } - br->value = value; - br->count = count; - br->range = range; + r->value = value; + r->count = count; + r->range = range; return bit; } @@ -79,15 +83,23 @@ static int vp9_read_bit(vp9_reader *r) { return vp9_read(r, 128); // vp9_prob_half } -static int vp9_read_literal(vp9_reader *br, int bits) { - int z = 0, bit; +static int vp9_read_literal(vp9_reader *r, int bits) { + int literal = 0, bit; for (bit = bits - 1; bit >= 0; bit--) - z |= vp9_read_bit(br) << bit; + literal |= vp9_read_bit(r) << bit; - return z; + return literal; } -int vp9_reader_has_error(vp9_reader *r); +static int vp9_read_tree(vp9_reader *r, const vp9_tree_index *tree, + const vp9_prob *probs) { + vp9_tree_index i = 0; + + while ((i = tree[i + vp9_read(r, probs[i >> 1])]) > 0) + continue; + + return -i; +} #endif // VP9_DECODER_VP9_DBOOLHUFF_H_ diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index ea185703c..097ffb1da 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -36,7 +36,6 @@ #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_read_bit_buffer.h" #include "vp9/decoder/vp9_thread.h" -#include "vp9/decoder/vp9_treereader.h" typedef struct TileWorkerData { VP9_COMMON *cm; @@ -126,8 +125,8 @@ static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) { vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); } -static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { - COMPPREDMODE_TYPE mode = vp9_read_bit(r); +static INLINE REFERENCE_MODE read_comp_pred_mode(vp9_reader *r) { + REFERENCE_MODE mode = vp9_read_bit(r); if (mode) mode += vp9_read_bit(r); return mode; @@ -138,21 +137,21 @@ static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { const int compound_allowed = is_compound_prediction_allowed(cm); cm->comp_pred_mode = compound_allowed ? read_comp_pred_mode(r) - : SINGLE_PREDICTION_ONLY; + : SINGLE_REFERENCE; if (compound_allowed) setup_compound_prediction(cm); - if (cm->comp_pred_mode == HYBRID_PREDICTION) + if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) for (i = 0; i < COMP_INTER_CONTEXTS; i++) vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]); - if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) + if (cm->comp_pred_mode != COMPOUND_REFERENCE) for (i = 0; i < REF_CONTEXTS; i++) { vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]); vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]); } - if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) + if (cm->comp_pred_mode != SINGLE_REFERENCE) for (i = 0; i < REF_CONTEXTS; i++) vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]); } @@ -473,7 +472,7 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, PARTITION_TYPE p; if (has_rows && has_cols) - p = treed_read(r, vp9_partition_tree, probs); + p = vp9_read_tree(r, vp9_partition_tree, probs); else if (!has_rows && has_cols) p = vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; else if (has_rows && !has_cols) diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 75f0ae865..327a9166c 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -20,13 +20,13 @@ #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_seg_common.h" +#include "vp9/decoder/vp9_dboolhuff.h" #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_decodeframe.h" #include "vp9/decoder/vp9_onyxd_int.h" -#include "vp9/decoder/vp9_treereader.h" static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p); + return (MB_PREDICTION_MODE)vp9_read_tree(r, vp9_intra_mode_tree, p); } static MB_PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, vp9_reader *r, @@ -49,8 +49,8 @@ static MB_PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, vp9_reader *r, static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r, int ctx) { - const int mode = treed_read(r, vp9_inter_mode_tree, - cm->fc.inter_mode_probs[ctx]); + const int mode = vp9_read_tree(r, vp9_inter_mode_tree, + cm->fc.inter_mode_probs[ctx]); if (!cm->frame_parallel_decoding_mode) ++cm->counts.inter_mode[ctx][mode]; @@ -58,7 +58,7 @@ static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r, } static int read_segment_id(vp9_reader *r, const struct segmentation *seg) { - return treed_read(r, vp9_segment_tree, seg->tree_probs); + return vp9_read_tree(r, vp9_segment_tree, seg->tree_probs); } static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, @@ -210,12 +210,12 @@ static int read_mv_component(vp9_reader *r, const nmv_component *mvcomp, int usehp) { int mag, d, fr, hp; const int sign = vp9_read(r, mvcomp->sign); - const int mv_class = treed_read(r, vp9_mv_class_tree, mvcomp->classes); + const int mv_class = vp9_read_tree(r, vp9_mv_class_tree, mvcomp->classes); const int class0 = mv_class == MV_CLASS_0; // Integer part if (class0) { - d = treed_read(r, vp9_mv_class0_tree, mvcomp->class0); + d = vp9_read_tree(r, vp9_mv_class0_tree, mvcomp->class0); } else { int i; const int n = mv_class + CLASS0_BITS - 1; // number of bits @@ -226,8 +226,8 @@ static int read_mv_component(vp9_reader *r, } // Fractional part - fr = treed_read(r, vp9_mv_fp_tree, - class0 ? mvcomp->class0_fp[d] : mvcomp->fp); + fr = vp9_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d] + : mvcomp->fp); // High precision part (if hp is not used, the default value of the hp is 1) @@ -242,7 +242,7 @@ static int read_mv_component(vp9_reader *r, static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *ctx, nmv_context_counts *counts, int allow_hp) { - const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, ctx->joints); + const MV_JOINT_TYPE j = vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints); const int use_hp = allow_hp && vp9_use_mv_hp(ref); MV diff = {0, 0}; @@ -258,14 +258,14 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, mv->col = ref->col + diff.col; } -static COMPPREDMODE_TYPE read_reference_mode(VP9_COMMON *cm, +static REFERENCE_MODE read_reference_mode(VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_reader *r) { const int ctx = vp9_get_pred_context_comp_inter_inter(cm, xd); const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]); if (!cm->frame_parallel_decoding_mode) ++cm->counts.comp_inter[ctx][mode]; - return mode; // SINGLE_PREDICTION_ONLY or COMP_PREDICTION_ONLY + return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE } // Read the referncence frame @@ -279,12 +279,12 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); ref_frame[1] = NONE; } else { - const COMPPREDMODE_TYPE mode = (cm->comp_pred_mode == HYBRID_PREDICTION) + const REFERENCE_MODE mode = (cm->comp_pred_mode == REFERENCE_MODE_SELECT) ? read_reference_mode(cm, xd, r) : cm->comp_pred_mode; // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding - if (mode == COMP_PREDICTION_ONLY) { + if (mode == COMPOUND_REFERENCE) { const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); const int bit = vp9_read(r, fc->comp_ref_prob[ctx]); @@ -292,7 +292,7 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, ++counts->comp_ref[ctx][bit]; ref_frame[idx] = cm->comp_fixed_ref; ref_frame[!idx] = cm->comp_var_ref[bit]; - } else if (mode == SINGLE_PREDICTION_ONLY) { + } else if (mode == SINGLE_REFERENCE) { const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]); if (!cm->frame_parallel_decoding_mode) @@ -318,8 +318,8 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, static INLINE INTERPOLATION_TYPE read_switchable_filter_type( VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r) { const int ctx = vp9_get_pred_context_switchable_interp(xd); - const int type = treed_read(r, vp9_switchable_interp_tree, - cm->fc.switchable_interp_prob[ctx]); + const int type = vp9_read_tree(r, vp9_switchable_interp_tree, + cm->fc.switchable_interp_prob[ctx]); if (!cm->frame_parallel_decoding_mode) ++cm->counts.switchable_interp[ctx][type]; return type; diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 214c1c198..fb6e52b74 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -18,7 +18,6 @@ #include "vp9/decoder/vp9_dboolhuff.h" #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_onyxd_int.h" -#include "vp9/decoder/vp9_treereader.h" #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 diff --git a/vp9/decoder/vp9_treereader.h b/vp9/decoder/vp9_treereader.h deleted file mode 100644 index 41680d245..000000000 --- a/vp9/decoder/vp9_treereader.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_TREEREADER_H_ -#define VP9_DECODER_VP9_TREEREADER_H_ - -#include "vp9/common/vp9_treecoder.h" -#include "vp9/decoder/vp9_dboolhuff.h" - -// Intent of tree data structure is to make decoding trivial. -static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */ - vp9_tree t, - const vp9_prob *const p) { - register vp9_tree_index i = 0; - - while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0) - continue; - - return -i; -} - -#endif // VP9_DECODER_VP9_TREEREADER_H_ diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index a0fced576..146636469 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -169,6 +169,8 @@ static void prob_diff_update(const vp9_tree_index *tree, int n, vp9_writer *w) { int i; unsigned int branch_ct[32][2]; + + // Assuming max number of probabilities <= 32 assert(n <= 32); vp9_tree_probs_from_distribution(tree, branch_ct, counts); @@ -319,12 +321,12 @@ static void encode_ref_frame(VP9_COMP *cpi, vp9_writer *bc) { if (!seg_ref_active) { // does the feature use compound prediction or not // (if not specified at the frame/segment level) - if (cm->comp_pred_mode == HYBRID_PREDICTION) { + if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) { vp9_write(bc, mi->ref_frame[1] > INTRA_FRAME, vp9_get_pred_prob_comp_inter_inter(cm, xd)); } else { assert((mi->ref_frame[1] <= INTRA_FRAME) == - (cm->comp_pred_mode == SINGLE_PREDICTION_ONLY)); + (cm->comp_pred_mode == SINGLE_REFERENCE)); } if (mi->ref_frame[1] > INTRA_FRAME) { @@ -1357,8 +1359,8 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { if (cm->allow_comp_inter_inter) { const int comp_pred_mode = cpi->common.comp_pred_mode; - const int use_compound_pred = comp_pred_mode != SINGLE_PREDICTION_ONLY; - const int use_hybrid_pred = comp_pred_mode == HYBRID_PREDICTION; + const int use_compound_pred = comp_pred_mode != SINGLE_REFERENCE; + const int use_hybrid_pred = comp_pred_mode == REFERENCE_MODE_SELECT; vp9_write_bit(&header_bc, use_compound_pred); if (use_compound_pred) { @@ -1370,7 +1372,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { } } - if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) { + if (cm->comp_pred_mode != COMPOUND_REFERENCE) { for (i = 0; i < REF_CONTEXTS; i++) { vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0], cpi->single_ref_count[i][0]); @@ -1379,7 +1381,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { } } - if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) + if (cm->comp_pred_mode != SINGLE_REFERENCE) for (i = 0; i < REF_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i], cpi->comp_ref_count[i]); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index cd02aadb0..18cfddd55 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -531,9 +531,9 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; } - cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff; - cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff; - cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff; + cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; + cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; + cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) cpi->rd_filter_diff[i] += ctx->best_filter_diff[i]; @@ -758,7 +758,7 @@ static void update_stats(VP9_COMP *cpi) { // reference frame allowed for the segment so exclude it from // the reference frame counts used to work out probabilities. if (is_inter_block(mbmi) && !seg_ref_active) { - if (cm->comp_pred_mode == HYBRID_PREDICTION) + if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) cpi->comp_inter_count[vp9_get_pred_context_comp_inter_inter(cm, xd)] [has_second_ref(mbmi)]++; @@ -1949,10 +1949,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { xd->mode_info_stride = cm->mode_info_stride; - // reset intra mode contexts - if (frame_is_intra_only(cm)) - vp9_init_mbmode_probs(cm); - // Copy data over into macro block data structures. vp9_setup_src_planes(x, cpi->Source, 0, 0); @@ -2315,18 +2311,18 @@ void vp9_encode_frame(VP9_COMP *cpi) { /* prediction (compound, single or hybrid) mode selection */ if (frame_type == 3 || !cm->allow_comp_inter_inter) - pred_type = SINGLE_PREDICTION_ONLY; + pred_type = SINGLE_REFERENCE; else if (cpi->rd_prediction_type_threshes[frame_type][1] > cpi->rd_prediction_type_threshes[frame_type][0] && cpi->rd_prediction_type_threshes[frame_type][1] > cpi->rd_prediction_type_threshes[frame_type][2] && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) - pred_type = COMP_PREDICTION_ONLY; + pred_type = COMPOUND_REFERENCE; else if (cpi->rd_prediction_type_threshes[frame_type][0] > cpi->rd_prediction_type_threshes[frame_type][2]) - pred_type = SINGLE_PREDICTION_ONLY; + pred_type = SINGLE_REFERENCE; else - pred_type = HYBRID_PREDICTION; + pred_type = REFERENCE_MODE_SELECT; /* filter type selection */ // FIXME(rbultje) for some odd reason, we often select smooth_filter @@ -2363,7 +2359,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { cpi->common.mcomp_filter_type = filter_type; encode_frame_internal(cpi); - for (i = 0; i < NB_PREDICTION_TYPES; ++i) { + for (i = 0; i < REFERENCE_MODES; ++i) { const int diff = (int) (cpi->rd_comp_pred_diff[i] / cpi->common.MBs); cpi->rd_prediction_type_threshes[frame_type][i] += diff; cpi->rd_prediction_type_threshes[frame_type][i] >>= 1; @@ -2386,7 +2382,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { cpi->rd_tx_select_threshes[frame_type][i] /= 2; } - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + if (cpi->common.comp_pred_mode == REFERENCE_MODE_SELECT) { int single_count_zero = 0; int comp_count_zero = 0; @@ -2396,10 +2392,10 @@ void vp9_encode_frame(VP9_COMP *cpi) { } if (comp_count_zero == 0) { - cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY; + cpi->common.comp_pred_mode = SINGLE_REFERENCE; vp9_zero(cpi->comp_inter_count); } else if (single_count_zero == 0) { - cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY; + cpi->common.comp_pred_mode = COMPOUND_REFERENCE; vp9_zero(cpi->comp_inter_count); } } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 88cf11214..bd9678afb 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -372,22 +372,19 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); const scan_order *so; uint16_t *eob = &pd->eobs[block]; - const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; - const int twl = bwl - tx_size, twmask = (1 << twl) - 1; - int xoff, yoff; + const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + int i, j; int16_t *src_diff; + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); + src_diff = &p->src_diff[4 * (j * diff_stride + i)]; switch (tx_size) { case TX_32X32: so = &vp9_default_scan_orders[TX_32X32]; - block >>= 6; - xoff = 32 * (block & twmask); - yoff = 32 * (block >> twl); - src_diff = p->src_diff + 4 * bw * yoff + xoff; if (x->use_lp32x32fdct) - vp9_fdct32x32_rd(src_diff, coeff, bw * 4); + vp9_fdct32x32_rd(src_diff, coeff, diff_stride); else - vp9_fdct32x32(src_diff, coeff, bw * 4); + vp9_fdct32x32(src_diff, coeff, diff_stride); vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, so->scan, @@ -395,32 +392,21 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, break; case TX_16X16: so = &vp9_default_scan_orders[TX_16X16]; - block >>= 4; - xoff = 16 * (block & twmask); - yoff = 16 * (block >> twl); - src_diff = p->src_diff + 4 * bw * yoff + xoff; - vp9_fdct16x16(src_diff, coeff, bw * 4); + vp9_fdct16x16(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, so->scan, so->iscan); break; case TX_8X8: so = &vp9_default_scan_orders[TX_8X8]; - block >>= 2; - xoff = 8 * (block & twmask); - yoff = 8 * (block >> twl); - src_diff = p->src_diff + 4 * bw * yoff + xoff; - vp9_fdct8x8(src_diff, coeff, bw * 4); + vp9_fdct8x8(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, so->scan, so->iscan); break; case TX_4X4: so = &vp9_default_scan_orders[TX_4X4]; - xoff = 4 * (block & twmask); - yoff = 4 * (block >> twl); - src_diff = p->src_diff + 4 * bw * yoff + xoff; - x->fwd_txm4x4(src_diff, coeff, bw * 4); + x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, so->scan, so->iscan); diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index cc4e347a3..3f01c778f 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -15,11 +15,22 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/encoder/vp9_encodemv.h" - #ifdef ENTROPY_STATS extern unsigned int active_section; #endif +static struct vp9_token mv_joint_encodings[MV_JOINTS]; +static struct vp9_token mv_class_encodings[MV_CLASSES]; +static struct vp9_token mv_fp_encodings[MV_FP_SIZE]; +static struct vp9_token mv_class0_encodings[CLASS0_SIZE]; + +void vp9_entropy_mv_init() { + vp9_tokens_from_tree(mv_joint_encodings, vp9_mv_joint_tree); + vp9_tokens_from_tree(mv_class_encodings, vp9_mv_class_tree); + vp9_tokens_from_tree(mv_class0_encodings, vp9_mv_class0_tree); + vp9_tokens_from_tree(mv_fp_encodings, vp9_mv_fp_tree); +} + static void encode_mv_component(vp9_writer* w, int comp, const nmv_component* mvcomp, int usehp) { int offset; @@ -37,12 +48,12 @@ static void encode_mv_component(vp9_writer* w, int comp, // Class write_token(w, vp9_mv_class_tree, mvcomp->classes, - &vp9_mv_class_encodings[mv_class]); + &mv_class_encodings[mv_class]); // Integer bits if (mv_class == MV_CLASS_0) { write_token(w, vp9_mv_class0_tree, mvcomp->class0, - &vp9_mv_class0_encodings[d]); + &mv_class0_encodings[d]); } else { int i; const int n = mv_class + CLASS0_BITS - 1; // number of bits @@ -53,7 +64,7 @@ static void encode_mv_component(vp9_writer* w, int comp, // Fractional bits write_token(w, vp9_mv_fp_tree, mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp, - &vp9_mv_fp_encodings[fr]); + &mv_fp_encodings[fr]); // High precision bit if (usehp) @@ -137,111 +148,55 @@ static int update_mv(vp9_writer *w, const unsigned int ct[2], vp9_prob *cur_p, return update; } -static void counts_to_nmv_context( - nmv_context_counts *nmv_count, - int usehp, - unsigned int (*branch_ct_joint)[2], - unsigned int (*branch_ct_sign)[2], - unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2], - unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2], - unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2], - unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][MV_FP_SIZE - 1][2], - unsigned int (*branch_ct_fp)[MV_FP_SIZE - 1][2], - unsigned int (*branch_ct_class0_hp)[2], - unsigned int (*branch_ct_hp)[2]) { - int i, j, k; - vp9_tree_probs_from_distribution(vp9_mv_joint_tree, branch_ct_joint, - nmv_count->joints); - for (i = 0; i < 2; ++i) { - branch_ct_sign[i][0] = nmv_count->comps[i].sign[0]; - branch_ct_sign[i][1] = nmv_count->comps[i].sign[1]; - vp9_tree_probs_from_distribution(vp9_mv_class_tree, - branch_ct_classes[i], - nmv_count->comps[i].classes); - vp9_tree_probs_from_distribution(vp9_mv_class0_tree, - branch_ct_class0[i], - nmv_count->comps[i].class0); - for (j = 0; j < MV_OFFSET_BITS; ++j) { - branch_ct_bits[i][j][0] = nmv_count->comps[i].bits[j][0]; - branch_ct_bits[i][j][1] = nmv_count->comps[i].bits[j][1]; - } - } - for (i = 0; i < 2; ++i) { - for (k = 0; k < CLASS0_SIZE; ++k) { - vp9_tree_probs_from_distribution(vp9_mv_fp_tree, - branch_ct_class0_fp[i][k], - nmv_count->comps[i].class0_fp[k]); - } - vp9_tree_probs_from_distribution(vp9_mv_fp_tree, - branch_ct_fp[i], - nmv_count->comps[i].fp); - } - if (usehp) { - for (i = 0; i < 2; ++i) { - branch_ct_class0_hp[i][0] = nmv_count->comps[i].class0_hp[0]; - branch_ct_class0_hp[i][1] = nmv_count->comps[i].class0_hp[1]; +static void write_mv_update(const vp9_tree_index *tree, + vp9_prob probs[/*n - 1*/], + const unsigned int counts[/*n - 1*/], + int n, vp9_writer *w) { + int i; + unsigned int branch_ct[32][2]; - branch_ct_hp[i][0] = nmv_count->comps[i].hp[0]; - branch_ct_hp[i][1] = nmv_count->comps[i].hp[1]; - } - } + // Assuming max number of probabilities <= 32 + assert(n <= 32); + + vp9_tree_probs_from_distribution(tree, branch_ct, counts); + for (i = 0; i < n - 1; ++i) + update_mv(w, branch_ct[i], &probs[i], NMV_UPDATE_PROB); } -void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) { +void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer *w) { int i, j; - unsigned int branch_ct_joint[MV_JOINTS - 1][2]; - unsigned int branch_ct_sign[2][2]; - unsigned int branch_ct_classes[2][MV_CLASSES - 1][2]; - unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2]; - unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2]; - unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][MV_FP_SIZE - 1][2]; - unsigned int branch_ct_fp[2][MV_FP_SIZE - 1][2]; - unsigned int branch_ct_class0_hp[2][2]; - unsigned int branch_ct_hp[2][2]; nmv_context *mvc = &cpi->common.fc.nmvc; + nmv_context_counts *counts = &cpi->NMVcount; - counts_to_nmv_context(&cpi->NMVcount, usehp, - branch_ct_joint, branch_ct_sign, branch_ct_classes, - branch_ct_class0, branch_ct_bits, - branch_ct_class0_fp, branch_ct_fp, - branch_ct_class0_hp, branch_ct_hp); - - for (j = 0; j < MV_JOINTS - 1; ++j) - update_mv(bc, branch_ct_joint[j], &mvc->joints[j], NMV_UPDATE_PROB); + write_mv_update(vp9_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w); for (i = 0; i < 2; ++i) { - update_mv(bc, branch_ct_sign[i], &mvc->comps[i].sign, NMV_UPDATE_PROB); - for (j = 0; j < MV_CLASSES - 1; ++j) - update_mv(bc, branch_ct_classes[i][j], &mvc->comps[i].classes[j], - NMV_UPDATE_PROB); - - for (j = 0; j < CLASS0_SIZE - 1; ++j) - update_mv(bc, branch_ct_class0[i][j], &mvc->comps[i].class0[j], - NMV_UPDATE_PROB); - + nmv_component *comp = &mvc->comps[i]; + nmv_component_counts *comp_counts = &counts->comps[i]; + + update_mv(w, comp_counts->sign, &comp->sign, NMV_UPDATE_PROB); + write_mv_update(vp9_mv_class_tree, comp->classes, comp_counts->classes, + MV_CLASSES, w); + write_mv_update(vp9_mv_class0_tree, comp->class0, comp_counts->class0, + CLASS0_SIZE, w); for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(bc, branch_ct_bits[i][j], &mvc->comps[i].bits[j], - NMV_UPDATE_PROB); + update_mv(w, comp_counts->bits[j], &comp->bits[j], NMV_UPDATE_PROB); } for (i = 0; i < 2; ++i) { - for (j = 0; j < CLASS0_SIZE; ++j) { - int k; - for (k = 0; k < MV_FP_SIZE - 1; ++k) - update_mv(bc, branch_ct_class0_fp[i][j][k], - &mvc->comps[i].class0_fp[j][k], NMV_UPDATE_PROB); - } + for (j = 0; j < CLASS0_SIZE; ++j) + write_mv_update(vp9_mv_fp_tree, mvc->comps[i].class0_fp[j], + counts->comps[i].class0_fp[j], MV_FP_SIZE, w); - for (j = 0; j < MV_FP_SIZE - 1; ++j) - update_mv(bc, branch_ct_fp[i][j], &mvc->comps[i].fp[j], NMV_UPDATE_PROB); + write_mv_update(vp9_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp, + MV_FP_SIZE, w); } if (usehp) { for (i = 0; i < 2; ++i) { - update_mv(bc, branch_ct_class0_hp[i], &mvc->comps[i].class0_hp, - NMV_UPDATE_PROB); - update_mv(bc, branch_ct_hp[i], &mvc->comps[i].hp, + update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp, NMV_UPDATE_PROB); + update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, NMV_UPDATE_PROB); } } } @@ -254,7 +209,7 @@ void vp9_encode_mv(VP9_COMP* cpi, vp9_writer* w, const MV_JOINT_TYPE j = vp9_get_mv_joint(&diff); usehp = usehp && vp9_use_mv_hp(ref); - write_token(w, vp9_mv_joint_tree, mvctx->joints, &vp9_mv_joint_encodings[j]); + write_token(w, vp9_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]); if (mv_joint_vertical(j)) encode_mv_component(w, diff.row, &mvctx->comps[0], usehp); @@ -314,3 +269,4 @@ void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]) { inc_mvs(mbmi->mv, best_ref_mv, is_compound, &cpi->NMVcount); } } + diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h index 633177885..4cc10da73 100644 --- a/vp9/encoder/vp9_encodemv.h +++ b/vp9/encoder/vp9_encodemv.h @@ -14,6 +14,8 @@ #include "vp9/encoder/vp9_onyx_int.h" +void vp9_entropy_mv_init(); + void vp9_write_nmv_probs(VP9_COMP* const, int usehp, vp9_writer* const); void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref, diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index df2841020..924f9f324 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -336,9 +336,11 @@ static int frame_max_bits(VP9_COMP *cpi) { const double max_bits = (1.0 * cpi->twopass.bits_left / (cpi->twopass.total_stats.count - cpi->common.current_video_frame)) * (cpi->oxcf.two_pass_vbrmax_section / 100.0); - - // Trap case where we are out of bits. - return MAX((int)max_bits, 0); + if (max_bits < 0) + return 0; + if (max_bits >= INT_MAX) + return INT_MAX; + return (int)max_bits; } void vp9_init_first_pass(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c deleted file mode 100644 index 7eb659232..000000000 --- a/vp9/encoder/vp9_modecosts.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/common/vp9_blockd.h" -#include "vp9/encoder/vp9_onyx_int.h" -#include "vp9/encoder/vp9_treewriter.h" -#include "vp9/common/vp9_entropymode.h" - - -void vp9_init_mode_costs(VP9_COMP *c) { - VP9_COMMON *const cm = &c->common; - const vp9_tree_index *KT = vp9_intra_mode_tree; - int i, j; - - for (i = 0; i < INTRA_MODES; i++) { - for (j = 0; j < INTRA_MODES; j++) { - vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], - KT); - } - } - - // TODO(rbultje) separate tables for superblock costing? - vp9_cost_tokens(c->mb.mbmode_cost, cm->fc.y_mode_prob[1], - vp9_intra_mode_tree); - vp9_cost_tokens(c->mb.intra_uv_mode_cost[1], - cm->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree); - vp9_cost_tokens(c->mb.intra_uv_mode_cost[0], - vp9_kf_uv_mode_prob[INTRA_MODES - 1], - vp9_intra_mode_tree); - - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i], - cm->fc.switchable_interp_prob[i], - vp9_switchable_interp_tree); -} diff --git a/vp9/encoder/vp9_modecosts.h b/vp9/encoder/vp9_modecosts.h deleted file mode 100644 index f43033e5f..000000000 --- a/vp9/encoder/vp9_modecosts.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_VP9_MODECOSTS_H_ -#define VP9_ENCODER_VP9_MODECOSTS_H_ - -void vp9_init_mode_costs(VP9_COMP *x); - -#endif // VP9_ENCODER_VP9_MODECOSTS_H_ diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 8b2765104..b28939120 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -24,6 +24,8 @@ #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" + +#include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mbgraph.h" #include "vp9/encoder/vp9_onyx_int.h" @@ -159,6 +161,7 @@ void vp9_initialize_enc() { vp9_init_me_luts(); vp9_init_minq_luts(); // init_base_skip_probs(); + vp9_entropy_mv_init(); init_done = 1; } } @@ -2690,7 +2693,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi, int loop = 0; int overshoot_seen = 0; int undershoot_seen = 0; - int active_worst_qchanged = 0; int q_low = bottom_index, q_high = top_index; do { vp9_clear_system_state(); // __asm emms; @@ -2742,7 +2744,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; - active_worst_qchanged = 0; if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { loop = 0; @@ -2803,16 +2804,12 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (undershoot_seen || loop_count > 1) { // Update rate_correction_factor unless - // cpi->rc.active_worst_quality has changed. - if (!active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, 1); + vp9_update_rate_correction_factors(cpi, 1); q = (q_high + q_low + 1) / 2; } else { // Update rate_correction_factor unless - // cpi->rc.active_worst_quality has changed. - if (!active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, 0); + vp9_update_rate_correction_factors(cpi, 0); q = vp9_regulate_q(cpi, cpi->rc.this_frame_target); @@ -2831,15 +2828,13 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (overshoot_seen || loop_count > 1) { // Update rate_correction_factor unless // cpi->rc.active_worst_quality has changed. - if (!active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, 1); + vp9_update_rate_correction_factors(cpi, 1); q = (q_high + q_low) / 2; } else { // Update rate_correction_factor unless // cpi->rc.active_worst_quality has changed. - if (!active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, 0); + vp9_update_rate_correction_factors(cpi, 0); q = vp9_regulate_q(cpi, cpi->rc.this_frame_target); @@ -2881,7 +2876,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi, #endif } } while (loop); - cpi->rc.active_worst_qchanged = active_worst_qchanged; } static void encode_frame_to_data_rate(VP9_COMP *cpi, @@ -3164,9 +3158,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cpi->rc.projected_frame_size = (*size) << 3; // Post encode loop adjustment of Q prediction. - if (!cpi->rc.active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, (cpi->sf.recode_loop || - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); + vp9_update_rate_correction_factors( + cpi, (cpi->sf.recode_loop || + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); cpi->rc.last_q[cm->frame_type] = cm->base_qindex; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 52ad1e1c5..03002ef4b 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -340,7 +340,6 @@ typedef struct { int active_worst_quality; int best_quality; int active_best_quality; - int active_worst_qchanged; } RATE_CONTROL; typedef struct VP9_COMP { @@ -432,8 +431,8 @@ typedef struct VP9_COMP { int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS]; int rd_thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS]; - int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES]; - int64_t rd_prediction_type_threshes[4][NB_PREDICTION_TYPES]; + int64_t rd_comp_pred_diff[REFERENCE_MODES]; + int64_t rd_prediction_type_threshes[4][REFERENCE_MODES]; unsigned int intra_inter_count[INTRA_INTER_CONTEXTS][2]; unsigned int comp_inter_count[COMP_INTER_CONTEXTS][2]; unsigned int single_ref_count[REF_CONTEXTS][2][2]; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 6e4c56c1a..42372e56c 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -244,7 +244,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) { cpi->rc.this_frame_target = target; // Target rate per SB64 (including partial SB64s. - cpi->rc.sb64_target_rate = (cpi->rc.this_frame_target * 64 * 64) / + cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) / (cpi->common.width * cpi->common.height); } @@ -274,7 +274,7 @@ static void calc_pframe_target_size(VP9_COMP *cpi) { } // Target rate per SB64 (including partial SB64s. - cpi->rc.sb64_target_rate = (cpi->rc.this_frame_target * 64 * 64) / + cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) / (cpi->common.width * cpi->common.height); @@ -390,7 +390,7 @@ void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { } -int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame) { +int vp9_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame) { int q = cpi->rc.active_worst_quality; int i; diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 57dcd3f15..13357447a 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -21,7 +21,7 @@ void vp9_restore_coding_context(VP9_COMP *cpi); void vp9_setup_key_frame(VP9_COMP *cpi); void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var); -int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame); +int vp9_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame); void vp9_adjust_key_frame_context(VP9_COMP *cpi); void vp9_compute_frame_size_bounds(VP9_COMP *cpi, int *frame_under_shoot_limit, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index cea1f292e..43b7d6b7c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -17,7 +17,6 @@ #include "vp9/encoder/vp9_tokenize.h" #include "vp9/encoder/vp9_treewriter.h" #include "vp9/encoder/vp9_onyx_int.h" -#include "vp9/encoder/vp9_modecosts.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" @@ -114,6 +113,30 @@ static int rd_thresh_block_size_factor[BLOCK_SIZES] = #define MV_COST_WEIGHT 108 #define MV_COST_WEIGHT_SUB 120 +static void fill_mode_costs(VP9_COMP *c) { + VP9_COMMON *const cm = &c->common; + int i, j; + + for (i = 0; i < INTRA_MODES; i++) + for (j = 0; j < INTRA_MODES; j++) + vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], + vp9_intra_mode_tree); + + // TODO(rbultje) separate tables for superblock costing? + vp9_cost_tokens(c->mb.mbmode_cost, cm->fc.y_mode_prob[1], + vp9_intra_mode_tree); + vp9_cost_tokens(c->mb.intra_uv_mode_cost[1], + cm->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree); + vp9_cost_tokens(c->mb.intra_uv_mode_cost[0], + vp9_kf_uv_mode_prob[INTRA_MODES - 1], + vp9_intra_mode_tree); + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i], + cm->fc.switchable_interp_prob[i], + vp9_switchable_interp_tree); +} + static void fill_token_costs(vp9_coeff_cost *c, vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { int i, j, k, l; @@ -258,7 +281,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { vp9_partition_tree); /*rough estimate for costing*/ - vp9_init_mode_costs(cpi); + fill_mode_costs(cpi); if (!frame_is_intra_only(cm)) { vp9_build_nmv_cost_table( @@ -731,6 +754,32 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, cpi->tx_stepdown_count[0]++; } +static TX_SIZE select_tx_size(TX_MODE tx_mode, TX_SIZE max_tx_size, + int64_t rd[][2]) { + if (max_tx_size == TX_32X32 && + (tx_mode == ALLOW_32X32 || + (tx_mode == TX_MODE_SELECT && + rd[TX_32X32][1] < rd[TX_16X16][1] && + rd[TX_32X32][1] < rd[TX_8X8][1] && + rd[TX_32X32][1] < rd[TX_4X4][1]))) { + return TX_32X32; + } else if (max_tx_size >= TX_16X16 && + (tx_mode == ALLOW_16X16 || + tx_mode == ALLOW_32X32 || + (tx_mode == TX_MODE_SELECT && + rd[TX_16X16][1] < rd[TX_8X8][1] && + rd[TX_16X16][1] < rd[TX_4X4][1]))) { + return TX_16X16; + } else if (tx_mode == ALLOW_8X8 || + tx_mode == ALLOW_16X16 || + tx_mode == ALLOW_32X32 || + (tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { + return TX_8X8; + } else { + return TX_4X4; + } +} + static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int (*r)[2], int *rate, int64_t *d, int64_t *distortion, @@ -777,27 +826,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, } } - if (max_tx_size == TX_32X32 && - (cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && - rd[TX_32X32][1] < rd[TX_4X4][1]))) { - mbmi->tx_size = TX_32X32; - } else if (max_tx_size >= TX_16X16 && - (cm->tx_mode == ALLOW_16X16 || - cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_16X16][1] < rd[TX_8X8][1] && - rd[TX_16X16][1] < rd[TX_4X4][1]))) { - mbmi->tx_size = TX_16X16; - } else if (cm->tx_mode == ALLOW_8X8 || - cm->tx_mode == ALLOW_16X16 || - cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { - mbmi->tx_size = TX_8X8; - } else { - mbmi->tx_size = TX_4X4; - } + mbmi->tx_size = select_tx_size(cm->tx_mode, max_tx_size, rd); *distortion = d[mbmi->tx_size]; *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; @@ -883,29 +912,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]); } - if (max_tx_size == TX_32X32 && - (cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_32X32][1] <= rd[TX_16X16][1] && - rd[TX_32X32][1] <= rd[TX_8X8][1] && - rd[TX_32X32][1] <= rd[TX_4X4][1]))) { - mbmi->tx_size = TX_32X32; - } else if (max_tx_size >= TX_16X16 && - (cm->tx_mode == ALLOW_16X16 || - cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_16X16][1] <= rd[TX_8X8][1] && - rd[TX_16X16][1] <= rd[TX_4X4][1]))) { - mbmi->tx_size = TX_16X16; - } else if (cm->tx_mode == ALLOW_8X8 || - cm->tx_mode == ALLOW_16X16 || - cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_8X8][1] <= rd[TX_4X4][1])) { - mbmi->tx_size = TX_8X8; - } else { - mbmi->tx_size = TX_4X4; - } + mbmi->tx_size = select_tx_size(cm->tx_mode, max_tx_size, rd); // Actually encode using the chosen mode if a model was used, but do not // update the r, d costs @@ -2194,7 +2201,7 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd); vp9_prob comp_inter_p = 128; - if (cm->comp_pred_mode == HYBRID_PREDICTION) { + if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) { comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd); *comp_mode_p = comp_inter_p; } else { @@ -2203,12 +2210,12 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); - if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) { + if (cm->comp_pred_mode != COMPOUND_REFERENCE) { vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); - if (cm->comp_pred_mode == HYBRID_PREDICTION) + if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) base_cost += vp9_cost_bit(comp_inter_p, 0); ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = @@ -2223,11 +2230,11 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, ref_costs_single[GOLDEN_FRAME] = 512; ref_costs_single[ALTREF_FRAME] = 512; } - if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) { + if (cm->comp_pred_mode != SINGLE_REFERENCE) { vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd); unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); - if (cm->comp_pred_mode == HYBRID_PREDICTION) + if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) base_cost += vp9_cost_bit(comp_inter_p, 1); ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); @@ -2243,7 +2250,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, int_mv *ref_mv, int_mv *second_ref_mv, - int64_t comp_pred_diff[NB_PREDICTION_TYPES], + int64_t comp_pred_diff[REFERENCE_MODES], int64_t tx_size_diff[TX_MODES], int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { MACROBLOCKD *const xd = &x->e_mbd; @@ -2257,9 +2264,9 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->best_ref_mv.as_int = ref_mv->as_int; ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; - ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY]; - ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; - ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; + ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; + ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; + ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); vpx_memcpy(ctx->best_filter_diff, best_filter_diff, @@ -2782,9 +2789,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!(*mode_excluded)) { if (is_comp_pred) { - *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); + *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_REFERENCE); } else { - *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); + *mode_excluded = (cpi->common.comp_pred_mode == COMPOUND_REFERENCE); } } @@ -3149,8 +3156,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_rd = best_rd_so_far; int64_t best_tx_rd[TX_MODES]; int64_t best_tx_diff[TX_MODES]; - int64_t best_pred_diff[NB_PREDICTION_TYPES]; - int64_t best_pred_rd[NB_PREDICTION_TYPES]; + int64_t best_pred_diff[REFERENCE_MODES]; + int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MB_MODE_INFO best_mbmode = { 0 }; @@ -3186,7 +3193,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); - for (i = 0; i < NB_PREDICTION_TYPES; ++i) + for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; for (i = 0; i < TX_MODES; i++) best_tx_rd[i] = INT64_MAX; @@ -3363,12 +3370,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mode_excluded = mode_excluded ? mode_excluded - : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; + : cm->comp_pred_mode == SINGLE_REFERENCE; } else { if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { mode_excluded = mode_excluded ? - mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; + mode_excluded : cm->comp_pred_mode == COMPOUND_REFERENCE; } } @@ -3491,7 +3498,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; } - if (cm->comp_pred_mode == HYBRID_PREDICTION) { + if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) { rate2 += compmode_cost; } @@ -3576,7 +3583,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (!disable_skip && ref_frame == INTRA_FRAME) { - for (i = 0; i < NB_PREDICTION_TYPES; ++i) + for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); @@ -3638,7 +3645,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!disable_skip && ref_frame != INTRA_FRAME) { int single_rd, hybrid_rd, single_rate, hybrid_rate; - if (cm->comp_pred_mode == HYBRID_PREDICTION) { + if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) { single_rate = rate2 - compmode_cost; hybrid_rate = rate2; } else { @@ -3650,14 +3657,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); if (second_ref_frame <= INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { - best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; + single_rd < best_pred_rd[SINGLE_REFERENCE]) { + best_pred_rd[SINGLE_REFERENCE] = single_rd; } else if (second_ref_frame > INTRA_FRAME && - single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { - best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; + single_rd < best_pred_rd[COMPOUND_REFERENCE]) { + best_pred_rd[COMPOUND_REFERENCE] = single_rd; } - if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) - best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; + if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) + best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; } /* keep record of best filter type */ @@ -3779,7 +3786,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *mbmi = best_mbmode; x->skip |= best_skip2; - for (i = 0; i < NB_PREDICTION_TYPES; ++i) { + for (i = 0; i < REFERENCE_MODES; ++i) { if (best_pred_rd[i] == INT64_MAX) best_pred_diff[i] = INT_MIN; else @@ -3850,8 +3857,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise int64_t best_tx_rd[TX_MODES]; int64_t best_tx_diff[TX_MODES]; - int64_t best_pred_diff[NB_PREDICTION_TYPES]; - int64_t best_pred_rd[NB_PREDICTION_TYPES]; + int64_t best_pred_diff[REFERENCE_MODES]; + int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MB_MODE_INFO best_mbmode = { 0 }; @@ -3886,7 +3893,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); - for (i = 0; i < NB_PREDICTION_TYPES; ++i) + for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; for (i = 0; i < TX_MODES; i++) best_tx_rd[i] = INT64_MAX; @@ -4030,12 +4037,12 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, mode_excluded = mode_excluded ? mode_excluded - : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; + : cm->comp_pred_mode == SINGLE_REFERENCE; } else { if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { mode_excluded = mode_excluded ? - mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; + mode_excluded : cm->comp_pred_mode == COMPOUND_REFERENCE; } } @@ -4241,9 +4248,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (!mode_excluded) { if (comp_pred) - mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; + mode_excluded = cpi->common.comp_pred_mode == SINGLE_REFERENCE; else - mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; + mode_excluded = cpi->common.comp_pred_mode == COMPOUND_REFERENCE; } compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); @@ -4271,7 +4278,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + if (cpi->common.comp_pred_mode == REFERENCE_MODE_SELECT) { rate2 += compmode_cost; } @@ -4332,7 +4339,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (!disable_skip && ref_frame == INTRA_FRAME) { - for (i = 0; i < NB_PREDICTION_TYPES; ++i) + for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); @@ -4389,7 +4396,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (!disable_skip && ref_frame != INTRA_FRAME) { int single_rd, hybrid_rd, single_rate, hybrid_rate; - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + if (cpi->common.comp_pred_mode == REFERENCE_MODE_SELECT) { single_rate = rate2 - compmode_cost; hybrid_rate = rate2; } else { @@ -4401,14 +4408,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); if (second_ref_frame <= INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { - best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; + single_rd < best_pred_rd[SINGLE_REFERENCE]) { + best_pred_rd[SINGLE_REFERENCE] = single_rd; } else if (second_ref_frame > INTRA_FRAME && - single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { - best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; + single_rd < best_pred_rd[COMPOUND_REFERENCE]) { + best_pred_rd[COMPOUND_REFERENCE] = single_rd; } - if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) - best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; + if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) + best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; } /* keep record of best filter type */ @@ -4524,7 +4531,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int; } - for (i = 0; i < NB_PREDICTION_TYPES; ++i) { + for (i = 0; i < REFERENCE_MODES; ++i) { if (best_pred_rd[i] == INT64_MAX) best_pred_diff[i] = INT_MIN; else diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 6e4a498cb..eefbd1ac9 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -124,7 +124,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM) -#VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_1_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_add_neon$(ASM) diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index bd13518f5..74727848e 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -38,7 +38,6 @@ VP9_CX_SRCS-yes += encoder/vp9_firstpass.h VP9_CX_SRCS-yes += encoder/vp9_lookahead.c VP9_CX_SRCS-yes += encoder/vp9_lookahead.h VP9_CX_SRCS-yes += encoder/vp9_mcomp.h -VP9_CX_SRCS-yes += encoder/vp9_modecosts.h VP9_CX_SRCS-yes += encoder/vp9_onyx_int.h VP9_CX_SRCS-yes += encoder/vp9_psnr.h VP9_CX_SRCS-yes += encoder/vp9_quantize.h @@ -49,7 +48,6 @@ VP9_CX_SRCS-yes += encoder/vp9_tokenize.h VP9_CX_SRCS-yes += encoder/vp9_treewriter.h VP9_CX_SRCS-yes += encoder/vp9_variance.h VP9_CX_SRCS-yes += encoder/vp9_mcomp.c -VP9_CX_SRCS-yes += encoder/vp9_modecosts.c VP9_CX_SRCS-yes += encoder/vp9_onyx_if.c VP9_CX_SRCS-yes += encoder/vp9_picklpf.c VP9_CX_SRCS-yes += encoder/vp9_picklpf.h diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk index 7e76682d4..f43172170 100644 --- a/vp9/vp9dx.mk +++ b/vp9/vp9dx.mk @@ -30,7 +30,6 @@ VP9_DX_SRCS-yes += decoder/vp9_onyxd.h VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h VP9_DX_SRCS-yes += decoder/vp9_thread.c VP9_DX_SRCS-yes += decoder/vp9_thread.h -VP9_DX_SRCS-yes += decoder/vp9_treereader.h VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h @@ -40,6 +40,7 @@ #include "vpx_ports/mem_ops.h" #include "vpx_ports/vpx_timer.h" #include "./vpxstats.h" +#include "./warnings.h" #include "./webmenc.h" #include "./y4minput.h" @@ -238,13 +239,16 @@ static const arg_def_t rate_hist_n = ARG_DEF(NULL, "rate-hist", 1, static const arg_def_t disable_warnings = ARG_DEF(NULL, "disable-warnings", 0, "Disable warnings about potentially incorrect encode settings."); +static const arg_def_t disable_warning_prompt = + ARG_DEF("y", "disable-warning-prompt", 0, + "Display warnings, but do not prompt user to continue."); static const arg_def_t *main_args[] = { &debugmode, &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &skip, &deadline, &best_dl, &good_dl, &rt_dl, &quietarg, &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, - &rate_hist_n, &disable_warnings, + &rate_hist_n, &disable_warnings, &disable_warning_prompt, NULL }; @@ -987,6 +991,10 @@ static void parse_global_config(struct VpxEncoderConfig *global, char **argv) { global->show_q_hist_buckets = arg_parse_uint(&arg); else if (arg_match(&arg, &rate_hist_n, argi)) global->show_rate_hist_buckets = arg_parse_uint(&arg); + else if (arg_match(&arg, &disable_warnings, argi)) + global->disable_warnings = 1; + else if (arg_match(&arg, &disable_warning_prompt, argi)) + global->disable_warning_prompt = 1; else argj++; } @@ -1259,7 +1267,7 @@ static int parse_stream_params(struct VpxEncoderConfig *global, #define FOREACH_STREAM(func) \ do { \ struct stream_state *stream; \ - for(stream = streams; stream; stream = stream->next) { \ + for (stream = streams; stream; stream = stream->next) { \ func; \ } \ } while (0) @@ -1729,31 +1737,6 @@ static void print_time(const char *label, int64_t etl) { } } -int continue_prompt() { - int c; - fprintf(stderr, "Continue? (y to continue) "); - c = getchar(); - return c == 'y'; -} - -void check_quantizer(struct VpxEncoderConfig* config, int min_q, int max_q) { - int check_failed = 0; - - if (config->disable_warnings) - return; - - if (min_q == max_q || abs(max_q - min_q) < 8) { - check_failed = 1; - } - - if (check_failed) { - warn("Bad quantizer values. Quantizer values must not be equal, and " - "should differ by at least 8."); - - if (!continue_prompt()) - exit(EXIT_FAILURE); - } -} int main(int argc, const char **argv_) { int pass; @@ -1807,10 +1790,9 @@ int main(int argc, const char **argv_) { if (argi[0][0] == '-' && argi[0][1]) die("Error: Unrecognized option %s\n", *argi); - FOREACH_STREAM( - check_quantizer(&global, - stream->config.cfg.rc_min_quantizer, - stream->config.cfg.rc_max_quantizer);); + FOREACH_STREAM(check_encoder_config(global.disable_warning_prompt, + &global, &stream->config.cfg);); + /* Handle non-option arguments */ input.filename = argv[0]; diff --git a/warnings.c b/warnings.c new file mode 100644 index 000000000..96400db2d --- /dev/null +++ b/warnings.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./warnings.h" + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vpx_encoder.h" + +#include "./tools_common.h" +#include "./vpxenc.h" + +static const char quantizer_warning_string[] = + "Bad quantizer values. Quantizer values should not be equal, and should " + "differ by at least 8."; +static const char lag_in_frames_with_realtime[] = + "Lag in frames is ignored when deadline is set to realtime."; + +struct WarningListNode { + const char *warning_string; + struct WarningListNode *next_warning; +}; + +struct WarningList { + struct WarningListNode *warning_node; +}; + +static void add_warning(const char *warning_string, + struct WarningList *warning_list) { + struct WarningListNode **node = &warning_list->warning_node; + + struct WarningListNode *new_node = malloc(sizeof(*new_node)); + if (new_node == NULL) { + fatal("Unable to allocate warning node."); + } + + new_node->warning_string = warning_string; + new_node->next_warning = NULL; + + while (*node != NULL) + node = &(*node)->next_warning; + + *node = new_node; +} + +static void free_warning_list(struct WarningList *warning_list) { + struct WarningListNode *node = warning_list->warning_node; + while (warning_list->warning_node != NULL) { + node = warning_list->warning_node->next_warning; + free(warning_list->warning_node); + warning_list->warning_node = node; + } +} + +static int continue_prompt(int num_warnings) { + int c; + fprintf(stderr, + "%d encoder configuration warning(s). Continue? (y to continue) ", + num_warnings); + c = getchar(); + return c == 'y'; +} + +static void check_lag_in_frames_realtime_deadline( + int lag_in_frames, + int deadline, + struct WarningList *warning_list) { + if (deadline == VPX_DL_REALTIME && lag_in_frames != 0) + add_warning(lag_in_frames_with_realtime, warning_list); +} + +static void check_quantizer(int min_q, int max_q, + struct WarningList *warning_list) { + if (min_q == max_q || abs(max_q - min_q) < 8) + add_warning(quantizer_warning_string, warning_list); +} + +void check_encoder_config(int disable_prompt, + const struct VpxEncoderConfig *global_config, + const struct vpx_codec_enc_cfg *stream_config) { + int num_warnings = 0; + struct WarningListNode *warning = NULL; + struct WarningList warning_list = {0}; + + check_quantizer(stream_config->rc_min_quantizer, + stream_config->rc_max_quantizer, + &warning_list); + check_lag_in_frames_realtime_deadline(stream_config->g_lag_in_frames, + global_config->deadline, + &warning_list); + + /* Count and print warnings. */ + for (warning = warning_list.warning_node; + warning != NULL; + warning = warning->next_warning, + ++num_warnings) { + warn(warning->warning_string); + } + + free_warning_list(&warning_list); + + if (num_warnings) { + if (!disable_prompt && !continue_prompt(num_warnings)) + exit(EXIT_FAILURE); + } +} diff --git a/warnings.h b/warnings.h new file mode 100644 index 000000000..ac3a4b63e --- /dev/null +++ b/warnings.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef WARNINGS_H_ +#define WARNINGS_H_ + +struct vpx_codec_enc_cfg; +struct VpxEncoderConfig; + +/* + * Checks config for improperly used settings. Warns user upon encountering + * settings that will lead to poor output quality. Prompts user to continue + * when warnings are issued. + */ +void check_encoder_config(int disable_prompt, + const struct VpxEncoderConfig *global_config, + const struct vpx_codec_enc_cfg *stream_config); + +#endif // WARNINGS_H_ |