diff options
author | Jingning Han <jingning@google.com> | 2015-02-27 09:49:30 -0800 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2015-02-27 09:49:30 -0800 |
commit | 89ee460ee4cebcef775109d8267d5475d03408db (patch) | |
tree | a741f7d4cfd2e79a1f0693359de047c002b43a37 /vp9/encoder | |
parent | c4cb8059ff921e5318d018557ac557348e76a64f (diff) | |
parent | 73a00d3219b474279c169f779d29dd7f5ec5cf24 (diff) | |
download | libvpx-89ee460ee4cebcef775109d8267d5475d03408db.tar libvpx-89ee460ee4cebcef775109d8267d5475d03408db.tar.gz libvpx-89ee460ee4cebcef775109d8267d5475d03408db.tar.bz2 libvpx-89ee460ee4cebcef775109d8267d5475d03408db.zip |
Merge "Refactor integral projection based motion estimation"
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 62 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_avg_intrin_sse2.c | 26 |
2 files changed, 40 insertions, 48 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index ff12bf8a2..a5e9ed616 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -519,13 +519,14 @@ void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) { #endif #if GLOBAL_MOTION -static int vector_match(int16_t *ref, int16_t *src) { +static int vector_match(int16_t *ref, int16_t *src, int length) { int best_sad = INT_MAX; int this_sad; int d; int center, offset = 0; - for (d = 0; d <= 64; d += 16) { - this_sad = vp9_vector_sad(&ref[d], src, 64); + int bw = length; // redundant variable, to be changed in the experiments. + for (d = 0; d <= bw; d += 16) { + this_sad = vp9_vector_sad(&ref[d], src, length); if (this_sad < best_sad) { best_sad = this_sad; offset = d; @@ -536,9 +537,9 @@ static int vector_match(int16_t *ref, int16_t *src) { for (d = -8; d <= 8; d += 16) { int this_pos = offset + d; // check limit - if (this_pos < 0 || this_pos > 64) + if (this_pos < 0 || this_pos > bw) continue; - this_sad = vp9_vector_sad(&ref[this_pos], src, 64); + this_sad = vp9_vector_sad(&ref[this_pos], src, length); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; @@ -549,9 +550,9 @@ static int vector_match(int16_t *ref, int16_t *src) { for (d = -4; d <= 4; d += 8) { int this_pos = offset + d; // check limit - if (this_pos < 0 || this_pos > 64) + if (this_pos < 0 || this_pos > bw) continue; - this_sad = vp9_vector_sad(&ref[this_pos], src, 64); + this_sad = vp9_vector_sad(&ref[this_pos], src, length); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; @@ -562,9 +563,9 @@ static int vector_match(int16_t *ref, int16_t *src) { for (d = -2; d <= 2; d += 4) { int this_pos = offset + d; // check limit - if (this_pos < 0 || this_pos > 64) + if (this_pos < 0 || this_pos > bw) continue; - this_sad = vp9_vector_sad(&ref[this_pos], src, 64); + this_sad = vp9_vector_sad(&ref[this_pos], src, length); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; @@ -575,16 +576,16 @@ static int vector_match(int16_t *ref, int16_t *src) { for (d = -1; d <= 1; d += 2) { int this_pos = offset + d; // check limit - if (this_pos < 0 || this_pos > 64) + if (this_pos < 0 || this_pos > bw) continue; - this_sad = vp9_vector_sad(&ref[this_pos], src, 64); + this_sad = vp9_vector_sad(&ref[this_pos], src, length); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; } } - return (center - 32); + return (center - (bw >> 1)); } static const MV search_pos[9] = { @@ -592,16 +593,18 @@ static const MV search_pos[9] = { {1, -1}, {1, 0}, {1, 1}, }; -static void motion_estimation(VP9_COMP *cpi, MACROBLOCK *x) { +static void motion_estimation(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize) { MACROBLOCKD *xd = &x->e_mbd; DECLARE_ALIGNED(16, int16_t, hbuf[128]); DECLARE_ALIGNED(16, int16_t, vbuf[128]); DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); int idx; - const int stride = 64; - const int search_width = 128; - const int search_height = 128; + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + const int search_width = bw << 1; + const int search_height = bh << 1; const int src_stride = x->plane[0].src.stride; const int ref_stride = xd->plane[0].pre[0].stride; uint8_t const *ref_buf, *src_buf; @@ -610,34 +613,33 @@ static void motion_estimation(VP9_COMP *cpi, MACROBLOCK *x) { MV this_mv; // Set up prediction 1-D reference set - ref_buf = xd->plane[0].pre[0].buf + (-32); + ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); for (idx = 0; idx < search_width; idx += 16) { - vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, 64); + vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); ref_buf += 16; } - ref_buf = xd->plane[0].pre[0].buf + (-32) * ref_stride; + ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; for (idx = 0; idx < search_height; ++idx) { - vbuf[idx] = vp9_int_pro_col(ref_buf, 64); + vbuf[idx] = vp9_int_pro_col(ref_buf, bw); ref_buf += ref_stride; } // Set up src 1-D reference set - for (idx = 0; idx < stride; idx += 16) { + for (idx = 0; idx < bw; idx += 16) { src_buf = x->plane[0].src.buf + idx; - vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, 64); + vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); } src_buf = x->plane[0].src.buf; - for (idx = 0; idx < stride; ++idx) { - src_vbuf[idx] = vp9_int_pro_col(src_buf, 64); + for (idx = 0; idx < bh; ++idx) { + src_vbuf[idx] = vp9_int_pro_col(src_buf, bw); src_buf += src_stride; } // Find the best match per 1-D search - - tmp_mv->col = vector_match(hbuf, src_hbuf); - tmp_mv->row = vector_match(vbuf, src_vbuf); + tmp_mv->col = vector_match(hbuf, src_hbuf, bw); + tmp_mv->row = vector_match(vbuf, src_vbuf, bh); best_sad = INT_MAX; this_mv = *tmp_mv; @@ -648,8 +650,8 @@ static void motion_estimation(VP9_COMP *cpi, MACROBLOCK *x) { (search_pos[idx].row + this_mv.row) * ref_stride + (search_pos[idx].col + this_mv.col); - this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src_buf, src_stride, - ref_buf, ref_stride); + this_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, + ref_buf, ref_stride); if (this_sad < best_sad) { best_sad = this_sad; tmp_mv->row = search_pos[idx].row + this_mv.row; @@ -717,7 +719,7 @@ static void choose_partitioning(VP9_COMP *cpi, mbmi->interp_filter = BILINEAR; #if GLOBAL_MOTION - motion_estimation(cpi, x); + motion_estimation(cpi, x, BLOCK_64X64); #endif vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c index 0a105629f..123255a72 100644 --- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c +++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c @@ -100,22 +100,14 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) { __m128i src_line = _mm_load_si128((const __m128i *)ref); __m128i s0 = _mm_sad_epu8(src_line, zero); __m128i s1; - (void) width; // width = 64 + int i; - ref += 16; - src_line = _mm_load_si128((const __m128i *)ref); - s1 = _mm_sad_epu8(src_line, zero); - s0 = _mm_adds_epu16(s0, s1); - - ref += 16; - src_line = _mm_load_si128((const __m128i *)ref); - s1 = _mm_sad_epu8(src_line, zero); - s0 = _mm_adds_epu16(s0, s1); - - ref += 16; - src_line = _mm_load_si128((const __m128i *)ref); - s1 = _mm_sad_epu8(src_line, zero); - s0 = _mm_adds_epu16(s0, s1); + for (i = 16; i < width; i += 16) { + ref += 16; + src_line = _mm_load_si128((const __m128i *)ref); + s1 = _mm_sad_epu8(src_line, zero); + s0 = _mm_adds_epu16(s0, s1); + } s1 = _mm_srli_si128(s0, 8); s0 = _mm_adds_epu16(s0, s1); @@ -136,8 +128,6 @@ int vp9_vector_sad_sse2(int16_t const *ref, int16_t const *src, diff = _mm_xor_si128(diff, sign); sum = _mm_sub_epi16(diff, sign); - (void) width; // width = 64; - ref += 8; src += 8; @@ -145,7 +135,7 @@ int vp9_vector_sad_sse2(int16_t const *ref, int16_t const *src, v1 = _mm_unpackhi_epi16(sum, zero); sum = _mm_add_epi32(v0, v1); - for (idx = 1; idx < 8; ++idx) { + for (idx = 8; idx < width; idx += 8) { v0 = _mm_loadu_si128((const __m128i *)ref); v1 = _mm_load_si128((const __m128i *)src); diff = _mm_subs_epi16(v0, v1); |