summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2015-03-09 18:55:38 -0700
committerJingning Han <jingning@google.com>2015-03-11 16:03:49 -0700
commit54eda13f8df587fe0a5a202f232f66863aff445a (patch)
treefbcc16b3f514cd3304c620574ce894318e6b043d /vp9
parent1ca4d51b2e1a370b807919e567f6d7796885c40f (diff)
downloadlibvpx-54eda13f8df587fe0a5a202f232f66863aff445a.tar
libvpx-54eda13f8df587fe0a5a202f232f66863aff445a.tar.gz
libvpx-54eda13f8df587fe0a5a202f232f66863aff445a.tar.bz2
libvpx-54eda13f8df587fe0a5a202f232f66863aff445a.zip
Apply fast motion search to golden reference frame
This commit enables the rtc coding mode to run integral projection based motion search for golden reference frame. It improves the speed -6 compression performance by 1.1% on average, 3.46% for jimred_vga, 6.46% for tacomascmvvga, and 0.5% for vidyo clips. The speed -6 is about 6% slower. Change-Id: I0fe402ad2edf0149d0349ad304ab9b2abdf0c804
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/vp9_pickmode.c40
-rw-r--r--vp9/encoder/x86/vp9_avg_intrin_sse2.c12
2 files changed, 40 insertions, 12 deletions
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 6c2576add..23a2569c8 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -784,15 +784,43 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
if (this_mode == NEWMV) {
- if (ref_frame > LAST_FRAME)
- continue;
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
best_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
- if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame],
- &rate_mv, best_rdc.rdcost))
+
+ if (ref_frame > LAST_FRAME) {
+ int tmp_sad;
+ int dis, cost_list[5];
+
+ if (bsize < BLOCK_16X16)
+ continue;
+
+ tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+ if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
+ continue;
+
+ frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
+ rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
+ &mbmi->ref_mvs[ref_frame][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
+ frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
+
+ cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
+ &mbmi->ref_mvs[ref_frame][0].as_mv,
+ cpi->common.allow_high_precision_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis,
+ &x->pred_sse[ref_frame], NULL, 0, 0);
+ } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame],
+ &rate_mv, best_rdc.rdcost)) {
continue;
+ }
}
if (this_mode != NEARESTMV &&
@@ -817,7 +845,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
- pred_filter_search &&
+ pred_filter_search && (ref_frame == LAST_FRAME) &&
((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
(mbmi->mv[0].as_mv.col & 0x07) != 0)) {
int pf_rate[3];
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
index f49949940..618b5f73d 100644
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -61,7 +61,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
const int ref_stride, const int height) {
int idx;
__m128i zero = _mm_setzero_si128();
- __m128i src_line = _mm_load_si128((const __m128i *)ref);
+ __m128i src_line = _mm_loadu_si128((const __m128i *)ref);
__m128i s0 = _mm_unpacklo_epi8(src_line, zero);
__m128i s1 = _mm_unpackhi_epi8(src_line, zero);
__m128i t0, t1;
@@ -69,14 +69,14 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
ref += ref_stride;
for (idx = 1; idx < height_1; idx += 2) {
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
s1 = _mm_adds_epu16(s1, t1);
ref += ref_stride;
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
@@ -84,7 +84,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
ref += ref_stride;
}
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
@@ -101,9 +101,9 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
s1 = _mm_srai_epi16(s1, 3);
}
- _mm_store_si128((__m128i *)hbuf, s0);
+ _mm_storeu_si128((__m128i *)hbuf, s0);
hbuf += 8;
- _mm_store_si128((__m128i *)hbuf, s1);
+ _mm_storeu_si128((__m128i *)hbuf, s1);
}
int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {