summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2015-03-12 09:23:41 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2015-03-12 09:23:41 -0700
commit594890a534bc1beb4fb7f00e9bf134ca43ed6729 (patch)
tree0aefbbb58b2603824571d615b6da8247a9021d12
parent8fdddd5c0152301494babe8b0a220529c8f28ef1 (diff)
parent54eda13f8df587fe0a5a202f232f66863aff445a (diff)
downloadlibvpx-594890a534bc1beb4fb7f00e9bf134ca43ed6729.tar
libvpx-594890a534bc1beb4fb7f00e9bf134ca43ed6729.tar.gz
libvpx-594890a534bc1beb4fb7f00e9bf134ca43ed6729.tar.bz2
libvpx-594890a534bc1beb4fb7f00e9bf134ca43ed6729.zip
Merge "Apply fast motion search to golden reference frame"
-rw-r--r--vp9/encoder/vp9_pickmode.c40
-rw-r--r--vp9/encoder/x86/vp9_avg_intrin_sse2.c12
2 files changed, 40 insertions, 12 deletions
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 6c2576add..23a2569c8 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -784,15 +784,43 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
if (this_mode == NEWMV) {
- if (ref_frame > LAST_FRAME)
- continue;
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
best_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
- if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame],
- &rate_mv, best_rdc.rdcost))
+
+ if (ref_frame > LAST_FRAME) {
+ int tmp_sad;
+ int dis, cost_list[5];
+
+ if (bsize < BLOCK_16X16)
+ continue;
+
+ tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+ if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
+ continue;
+
+ frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
+ rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
+ &mbmi->ref_mvs[ref_frame][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
+ frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
+
+ cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
+ &mbmi->ref_mvs[ref_frame][0].as_mv,
+ cpi->common.allow_high_precision_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis,
+ &x->pred_sse[ref_frame], NULL, 0, 0);
+ } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame],
+ &rate_mv, best_rdc.rdcost)) {
continue;
+ }
}
if (this_mode != NEARESTMV &&
@@ -817,7 +845,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
- pred_filter_search &&
+ pred_filter_search && (ref_frame == LAST_FRAME) &&
((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
(mbmi->mv[0].as_mv.col & 0x07) != 0)) {
int pf_rate[3];
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
index f49949940..618b5f73d 100644
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -61,7 +61,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
const int ref_stride, const int height) {
int idx;
__m128i zero = _mm_setzero_si128();
- __m128i src_line = _mm_load_si128((const __m128i *)ref);
+ __m128i src_line = _mm_loadu_si128((const __m128i *)ref);
__m128i s0 = _mm_unpacklo_epi8(src_line, zero);
__m128i s1 = _mm_unpackhi_epi8(src_line, zero);
__m128i t0, t1;
@@ -69,14 +69,14 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
ref += ref_stride;
for (idx = 1; idx < height_1; idx += 2) {
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
s1 = _mm_adds_epu16(s1, t1);
ref += ref_stride;
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
@@ -84,7 +84,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
ref += ref_stride;
}
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
@@ -101,9 +101,9 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
s1 = _mm_srai_epi16(s1, 3);
}
- _mm_store_si128((__m128i *)hbuf, s0);
+ _mm_storeu_si128((__m128i *)hbuf, s0);
hbuf += 8;
- _mm_store_si128((__m128i *)hbuf, s1);
+ _mm_storeu_si128((__m128i *)hbuf, s1);
}
int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {