diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_alloccommon.c | 15 | ||||
-rw-r--r-- | vp9/common/vp9_mvref_common.c | 44 | ||||
-rw-r--r-- | vp9/common/vp9_onyxc_int.h | 15 | ||||
-rw-r--r-- | vp9/common/x86/vp9_loopfilter_intrin_avx2.c | 111 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodeframe.c | 21 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodemv.c | 20 | ||||
-rw-r--r-- | vp9/decoder/vp9_decoder.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 93 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 21 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 152 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 38 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.h | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 6 |
16 files changed, 437 insertions, 137 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 8b3b9dbe0..5b8b2a9ec 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -87,13 +87,14 @@ void vp9_free_ref_frame_buffers(VP9_COMMON *cm) { int i; for (i = 0; i < FRAME_BUFFERS; ++i) { - vp9_free_frame_buffer(&cm->frame_bufs[i].buf); - if (cm->frame_bufs[i].ref_count > 0 && cm->frame_bufs[i].raw_frame_buffer.data != NULL) { cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer); cm->frame_bufs[i].ref_count = 0; } + vpx_free(cm->frame_bufs[i].mvs); + cm->frame_bufs[i].mvs = NULL; + vp9_free_frame_buffer(&cm->frame_bufs[i].buf); } vp9_free_frame_buffer(&cm->post_proc_buffer); @@ -166,6 +167,16 @@ int vp9_alloc_ref_frame_buffers(VP9_COMMON *cm, int width, int height) { #endif VP9_ENC_BORDER_IN_PIXELS) < 0) goto fail; + if (cm->frame_bufs[i].mvs == NULL) { + cm->frame_bufs[i].mvs = + (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*cm->frame_bufs[i].mvs)); + if (cm->frame_bufs[i].mvs == NULL) + goto fail; + + cm->frame_bufs[i].mi_rows = cm->mi_rows; + cm->frame_bufs[i].mi_cols = cm->mi_cols; + } } init_frame_bufs(cm); diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index b310eb44d..561201ffe 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -20,13 +20,11 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, int block, int mi_row, int mi_col) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; - const MODE_INFO *prev_mi = !cm->error_resilient_mode && cm->prev_mi - ? cm->prev_mi[mi_row * xd->mi_stride + mi_col].src_mi - : NULL; - const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->src_mi->mbmi : NULL; const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; int different_ref_found = 0; int context_counter = 0; + const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? + cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; // Blank the reference vector list vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); @@ -71,11 +69,12 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, } // Check the last frame's mode and mv info. - if (prev_mbmi) { - if (prev_mbmi->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(prev_mbmi->mv[0], refmv_count, mv_ref_list, Done); - else if (prev_mbmi->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST(prev_mbmi->mv[1], refmv_count, mv_ref_list, Done); + if (cm->use_prev_frame_mvs) { + if (prev_frame_mvs->ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); + } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); + } } // Since we couldn't find 2 mvs from the same reference frame @@ -96,9 +95,30 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, } // Since we still don't have a candidate we'll try the last frame. - if (prev_mbmi) - IF_DIFF_REF_FRAME_ADD_MV(prev_mbmi, ref_frame, ref_sign_bias, refmv_count, - mv_ref_list, Done); + if (cm->use_prev_frame_mvs) { + if (prev_frame_mvs->ref_frame[0] != ref_frame && + prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { + int_mv mv = prev_frame_mvs->mv[0]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); + } + + if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && + prev_frame_mvs->ref_frame[1] != ref_frame && + prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { + int_mv mv = prev_frame_mvs->mv[1]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); + } + } Done: diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index b818ae818..b3a6590b2 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -56,9 +56,16 @@ typedef enum { REFERENCE_MODES = 3, } REFERENCE_MODE; +typedef struct { + int_mv mv[2]; + MV_REFERENCE_FRAME ref_frame[2]; +} MV_REF; typedef struct { int ref_count; + MV_REF *mvs; + int mi_rows; + int mi_cols; vpx_codec_frame_buffer_t raw_frame_buffer; YV12_BUFFER_CONFIG buf; } RefCntBuffer; @@ -91,6 +98,10 @@ typedef struct VP9Common { YV12_BUFFER_CONFIG *frame_to_show; RefCntBuffer frame_bufs[FRAME_BUFFERS]; + RefCntBuffer *prev_frame; + + // TODO(hkuang): Combine this with cur_buf in macroblockd. + RefCntBuffer *cur_frame; int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ @@ -149,6 +160,10 @@ typedef struct VP9Common { MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ + + // Whether to use previous frame's motion vectors for prediction. + int use_prev_frame_mvs; + // Persistent mb segment id map used in prediction. unsigned char *last_frame_seg_map; diff --git a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c index 439c028f2..0cb0912ad 100644 --- a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c +++ b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c @@ -9,6 +9,7 @@ */ #include <immintrin.h> /* AVX2 */ +#include "vpx_ports/mem.h" static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, @@ -392,6 +393,11 @@ static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p, } } +DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = { + 0, 128, 1, 128, 2, 128, 3, 128, 4, 128, 5, 128, 6, 128, 7, 128, + 8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 +}; + static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, const unsigned char *_thresh) { @@ -401,6 +407,9 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, __m128i p7, p6, p5; __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; __m128i q5, q6, q7; + __m256i p256_7, q256_7, p256_6, q256_6, p256_5, q256_5, p256_4, + q256_4, p256_3, q256_3, p256_2, q256_2, p256_1, q256_1, + p256_0, q256_0; const __m128i thresh = _mm_broadcastb_epi8( _mm_cvtsi32_si128((int) _thresh[0])); @@ -409,16 +418,37 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, const __m128i blimit = _mm_broadcastb_epi8( _mm_cvtsi32_si128((int) _blimit[0])); - p4 = _mm_loadu_si128((__m128i *) (s - 5 * p)); - p3 = _mm_loadu_si128((__m128i *) (s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *) (s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *) (s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *) (s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *) (s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *) (s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *) (s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *) (s + 3 * p)); - q4 = _mm_loadu_si128((__m128i *) (s + 4 * p)); + p256_4 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 5 * p))); + p256_3 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 4 * p))); + p256_2 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 3 * p))); + p256_1 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 2 * p))); + p256_0 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 1 * p))); + q256_0 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 0 * p))); + q256_1 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 1 * p))); + q256_2 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 2 * p))); + q256_3 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 3 * p))); + q256_4 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 4 * p))); + + p4 = _mm256_castsi256_si128(p256_4); + p3 = _mm256_castsi256_si128(p256_3); + p2 = _mm256_castsi256_si128(p256_2); + p1 = _mm256_castsi256_si128(p256_1); + p0 = _mm256_castsi256_si128(p256_0); + q0 = _mm256_castsi256_si128(q256_0); + q1 = _mm256_castsi256_si128(q256_1); + q2 = _mm256_castsi256_si128(q256_2); + q3 = _mm256_castsi256_si128(q256_3); + q4 = _mm256_castsi256_si128(q256_4); { const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), @@ -534,23 +564,35 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, flat = _mm_cmpeq_epi8(flat, zero); flat = _mm_and_si128(flat, mask); - p5 = _mm_loadu_si128((__m128i *) (s - 6 * p)); - q5 = _mm_loadu_si128((__m128i *) (s + 5 * p)); + p256_5 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 6 * p))); + q256_5 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 5 * p))); + p5 = _mm256_castsi256_si128(p256_5); + q5 = _mm256_castsi256_si128(q256_5); flat2 = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p5, p0), _mm_subs_epu8(p0, p5)), _mm_or_si128(_mm_subs_epu8(q5, q0), _mm_subs_epu8(q0, q5))); flat2 = _mm_max_epu8(work, flat2); - p6 = _mm_loadu_si128((__m128i *) (s - 7 * p)); - q6 = _mm_loadu_si128((__m128i *) (s + 6 * p)); + p256_6 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 7 * p))); + q256_6 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 6 * p))); + p6 = _mm256_castsi256_si128(p256_6); + q6 = _mm256_castsi256_si128(q256_6); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p6, p0), _mm_subs_epu8(p0, p6)), _mm_or_si128(_mm_subs_epu8(q6, q0), _mm_subs_epu8(q0, q6))); flat2 = _mm_max_epu8(work, flat2); - p7 = _mm_loadu_si128((__m128i *) (s - 8 * p)); - q7 = _mm_loadu_si128((__m128i *) (s + 7 * p)); + p256_7 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 8 * p))); + q256_7 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 7 * p))); + p7 = _mm256_castsi256_si128(p256_7); + q7 = _mm256_castsi256_si128(q256_7); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p7, p0), _mm_subs_epu8(p0, p7)), _mm_or_si128(_mm_subs_epu8(q7, q0), _mm_subs_epu8(q0, q7))); @@ -566,29 +608,28 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, { const __m256i eight = _mm256_set1_epi16(8); const __m256i four = _mm256_set1_epi16(4); - __m256i p256_7, q256_7, p256_6, q256_6, p256_5, q256_5, p256_4, - q256_4, p256_3, q256_3, p256_2, q256_2, p256_1, q256_1, - p256_0, q256_0; __m256i pixelFilter_p, pixelFilter_q, pixetFilter_p2p1p0, pixetFilter_q2q1q0, sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; - p256_7 = _mm256_cvtepu8_epi16(p7); - p256_6 = _mm256_cvtepu8_epi16(p6); - p256_5 = _mm256_cvtepu8_epi16(p5); - p256_4 = _mm256_cvtepu8_epi16(p4); - p256_3 = _mm256_cvtepu8_epi16(p3); - p256_2 = _mm256_cvtepu8_epi16(p2); - p256_1 = _mm256_cvtepu8_epi16(p1); - p256_0 = _mm256_cvtepu8_epi16(p0); - q256_0 = _mm256_cvtepu8_epi16(q0); - q256_1 = _mm256_cvtepu8_epi16(q1); - q256_2 = _mm256_cvtepu8_epi16(q2); - q256_3 = _mm256_cvtepu8_epi16(q3); - q256_4 = _mm256_cvtepu8_epi16(q4); - q256_5 = _mm256_cvtepu8_epi16(q5); - q256_6 = _mm256_cvtepu8_epi16(q6); - q256_7 = _mm256_cvtepu8_epi16(q7); + const __m256i filter = _mm256_load_si256( + (__m256i const *)filt_loopfilter_avx2); + p256_7 = _mm256_shuffle_epi8(p256_7, filter); + p256_6 = _mm256_shuffle_epi8(p256_6, filter); + p256_5 = _mm256_shuffle_epi8(p256_5, filter); + p256_4 = _mm256_shuffle_epi8(p256_4, filter); + p256_3 = _mm256_shuffle_epi8(p256_3, filter); + p256_2 = _mm256_shuffle_epi8(p256_2, filter); + p256_1 = _mm256_shuffle_epi8(p256_1, filter); + p256_0 = _mm256_shuffle_epi8(p256_0, filter); + q256_0 = _mm256_shuffle_epi8(q256_0, filter); + q256_1 = _mm256_shuffle_epi8(q256_1, filter); + q256_2 = _mm256_shuffle_epi8(q256_2, filter); + q256_3 = _mm256_shuffle_epi8(q256_3, filter); + q256_4 = _mm256_shuffle_epi8(q256_4, filter); + q256_5 = _mm256_shuffle_epi8(q256_5, filter); + q256_6 = _mm256_shuffle_epi8(q256_6, filter); + q256_7 = _mm256_shuffle_epi8(q256_7, filter); pixelFilter_p = _mm256_add_epi16(_mm256_add_epi16(p256_6, p256_5), _mm256_add_epi16(p256_4, p256_3)); diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 66da63ac6..a088325df 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -667,6 +667,14 @@ static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { vp9_read_frame_size(rb, &cm->display_width, &cm->display_height); } +static void resize_mv_buffer(VP9_COMMON *cm) { + vpx_free(cm->cur_frame->mvs); + cm->cur_frame->mi_rows = cm->mi_rows; + cm->cur_frame->mi_cols = cm->mi_cols; + cm->cur_frame->mvs = (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*cm->cur_frame->mvs)); +} + static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { #if CONFIG_SIZE_LIMIT if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) @@ -692,6 +700,10 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { cm->width = width; cm->height = height; } + if (cm->cur_frame->mvs == NULL || cm->mi_rows > cm->cur_frame->mi_rows || + cm->mi_cols > cm->cur_frame->mi_cols) { + resize_mv_buffer(cm); + } } static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { @@ -1537,10 +1549,11 @@ void vp9_decode_frame(VP9Decoder *pbi, init_macroblockd(cm, &pbi->mb); - if (!cm->error_resilient_mode) - set_prev_mi(cm); - else - cm->prev_mi = NULL; + cm->use_prev_frame_mvs = !cm->error_resilient_mode && + cm->width == cm->last_width && + cm->height == cm->last_height && + !cm->intra_only && + cm->last_show_frame; setup_plane_dequants(cm, xd, cm->base_qindex); vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index d0e0b76da..7ca812f40 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -425,7 +425,6 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; const int allow_hp = cm->allow_high_precision_mv; - int_mv nearestmv[2], nearmv[2]; int inter_mode_ctx, ref, is_compound; @@ -544,8 +543,27 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm, void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r) { + MODE_INFO *const mi = xd->mi[0].src_mi; + const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; + const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; + const int x_mis = MIN(bw, cm->mi_cols - mi_col); + const int y_mis = MIN(bh, cm->mi_rows - mi_row); + MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; + int w, h; + if (frame_is_intra_only(cm)) read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r); else read_inter_frame_mode_info(cm, xd, tile, mi_row, mi_col, r); + + for (h = 0; h < y_mis; ++h) { + MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; + for (w = 0; w < x_mis; ++w) { + MV_REF *const mv = frame_mv + w; + mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0]; + mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1]; + mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int; + mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int; + } + } } diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index fa2f01041..196816531 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -252,6 +252,9 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer); cm->new_fb_idx = get_free_fb(cm); + // Assign a MV array to the frame buffer. + cm->cur_frame = &cm->frame_bufs[cm->new_fb_idx]; + if (setjmp(cm->error.jmp)) { pbi->need_resync = 1; cm->error.setjmp = 0; @@ -284,14 +287,13 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, cm->last_width = cm->width; cm->last_height = cm->height; - if (!cm->show_existing_frame) + if (!cm->show_existing_frame) { cm->last_show_frame = cm->show_frame; - if (cm->show_frame) { - if (!cm->show_existing_frame) - vp9_swap_mi_and_prev_mi(cm); + cm->prev_frame = cm->cur_frame; + } + if (cm->show_frame) cm->current_video_frame++; - } cm->error.setjmp = 0; return retcode; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index baa4908d4..b87a28332 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -611,6 +611,13 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; MODE_INFO *mi_addr = &xd->mi[0]; const struct segmentation *const seg = &cm->seg; + const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; + const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; + const int x_mis = MIN(bw, cm->mi_cols - mi_col); + const int y_mis = MIN(bh, cm->mi_rows - mi_row); + MV_REF *const frame_mvs = + cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; + int w, h; const int mis = cm->mi_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; @@ -728,6 +735,17 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) rd_opt->filter_diff[i] += ctx->best_filter_diff[i]; } + + for (h = 0; h < y_mis; ++h) { + MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; + for (w = 0; w < x_mis; ++w) { + MV_REF *const mv = frame_mv + w; + mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0]; + mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1]; + mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int; + mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int; + } + } } void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, @@ -1293,8 +1311,16 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mi = xd->mi[0].src_mi; MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; const struct segmentation *const seg = &cm->seg; + const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; + const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; + const int x_mis = MIN(bw, cm->mi_cols - mi_col); + const int y_mis = MIN(bh, cm->mi_rows - mi_row); + MV_REF *const frame_mvs = + cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; + int w, h; *(xd->mi[0].src_mi) = ctx->mic; xd->mi[0].src_mi = &xd->mi[0]; @@ -1323,6 +1349,17 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } } + for (h = 0; h < y_mis; ++h) { + MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; + for (w = 0; w < x_mis; ++w) { + MV_REF *const mv = frame_mv + w; + mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0]; + mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1]; + mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int; + mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int; + } + } + x->skip = ctx->skip; x->skip_txfm[0] = mbmi->segment_id ? 0 : ctx->skip_txfm[0]; } @@ -2673,6 +2710,22 @@ static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, } } +// Reset the prediction pixel ready flag recursively. +static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { + pc_tree->none.pred_pixel_ready = 0; + pc_tree->horizontal[0].pred_pixel_ready = 0; + pc_tree->horizontal[1].pred_pixel_ready = 0; + pc_tree->vertical[0].pred_pixel_ready = 0; + pc_tree->vertical[1].pred_pixel_ready = 0; + + if (bsize > BLOCK_8X8) { + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); + int i; + for (i = 0; i < 4; ++i) + pred_pixel_ready_reset(pc_tree->split[i], subsize); + } +} + static void nonrd_pick_partition(VP9_COMP *cpi, TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, @@ -2731,6 +2784,10 @@ static void nonrd_pick_partition(VP9_COMP *cpi, partition_vert_allowed &= force_vert_split; } + ctx->pred_pixel_ready = !(partition_vert_allowed || + partition_horz_allowed || + do_split); + // PARTITION_NONE if (partition_none_allowed) { nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, @@ -2738,7 +2795,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ctx->mic.mbmi = xd->mi[0].src_mi->mbmi; ctx->skip_txfm[0] = x->skip_txfm[0]; ctx->skip = x->skip; - ctx->pred_pixel_ready = 0; if (this_rdc.rate != INT_MAX) { int pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -2814,17 +2870,17 @@ static void nonrd_pick_partition(VP9_COMP *cpi, subsize = get_subsize(bsize, PARTITION_HORZ); if (sf->adaptive_motion_search) load_pred_mv(x, ctx); - + pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; - pc_tree->horizontal[0].pred_pixel_ready = 0; if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) { load_pred_mv(x, ctx); + pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row + ms, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1]); @@ -2832,7 +2888,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi, pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; - pc_tree->horizontal[1].pred_pixel_ready = 0; if (this_rdc.rate == INT_MAX) { vp9_rd_cost_reset(&sum_rdc); @@ -2849,32 +2904,32 @@ static void nonrd_pick_partition(VP9_COMP *cpi, if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_HORZ; + } else { + pred_pixel_ready_reset(pc_tree, bsize); } } // PARTITION_VERT if (partition_vert_allowed && do_rect) { subsize = get_subsize(bsize, PARTITION_VERT); - if (sf->adaptive_motion_search) load_pred_mv(x, ctx); - + pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; - pc_tree->vertical[0].pred_pixel_ready = 0; if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) { load_pred_mv(x, ctx); + pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + ms, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; - pc_tree->vertical[1].pred_pixel_ready = 0; if (this_rdc.rate == INT_MAX) { vp9_rd_cost_reset(&sum_rdc); @@ -2891,6 +2946,8 @@ static void nonrd_pick_partition(VP9_COMP *cpi, if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_VERT; + } else { + pred_pixel_ready_reset(pc_tree, bsize); } } @@ -2972,27 +3029,27 @@ static void nonrd_select_partition(VP9_COMP *cpi, } else { switch (partition) { case PARTITION_NONE: + pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->none); pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; pc_tree->none.skip = x->skip; - pc_tree->none.pred_pixel_ready = 1; break; case PARTITION_VERT: + pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; - pc_tree->vertical[0].pred_pixel_ready = 1; if (mi_col + hbs < cm->mi_cols) { + pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; - pc_tree->vertical[1].pred_pixel_ready = 1; if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { rd_cost->rate += this_rdc.rate; @@ -3001,19 +3058,19 @@ static void nonrd_select_partition(VP9_COMP *cpi, } break; case PARTITION_HORZ: + pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; - pc_tree->horizontal[0].pred_pixel_ready = 1; if (mi_row + hbs < cm->mi_rows) { + pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col, &this_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; - pc_tree->horizontal[1].pred_pixel_ready = 1; if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { rd_cost->rate += this_rdc.rate; @@ -3091,6 +3148,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, switch (partition) { case PARTITION_NONE: + pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->none); pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi; @@ -3098,12 +3156,14 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->none.skip = x->skip; break; case PARTITION_VERT: + pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; if (mi_col + hbs < cm->mi_cols) { + pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; @@ -3117,12 +3177,14 @@ static void nonrd_use_partition(VP9_COMP *cpi, } break; case PARTITION_HORZ: + pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; if (mi_row + hbs < cm->mi_rows) { + pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col, &this_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; @@ -3504,6 +3566,11 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_initialize_me_consts(cpi, cm->base_qindex); init_encode_frame_mb_context(cpi); set_prev_mi(cm); + cm->use_prev_frame_mvs = !cm->error_resilient_mode && + cm->width == cm->last_width && + cm->height == cm->last_height && + !cm->intra_only && + cm->last_show_frame; x->quant_fp = cpi->sf.use_quant_fp; vp9_zero(x->skip_txfm); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index dfc636a41..1d9fe5e92 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -584,7 +584,7 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || ((cpi->svc.number_temporal_layers > 1 || cpi->svc.number_spatial_layers > 1) && - cpi->oxcf.pass == 2)) { + cpi->oxcf.pass != 1)) { vp9_init_layer_context(cpi); } @@ -1285,7 +1285,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cpi->oxcf.rc_mode == VPX_CBR) || ((cpi->svc.number_temporal_layers > 1 || cpi->svc.number_spatial_layers > 1) && - cpi->oxcf.pass == 2)) { + cpi->oxcf.pass != 1)) { vp9_update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth); } @@ -2420,6 +2420,7 @@ void vp9_scale_references(VP9_COMP *cpi) { #if CONFIG_VP9_HIGHBITDEPTH if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { const int new_fb = get_free_fb(cm); + cm->cur_frame = &cm->frame_bufs[new_fb]; vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, @@ -2437,6 +2438,15 @@ void vp9_scale_references(VP9_COMP *cpi) { scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf); #endif // CONFIG_VP9_HIGHBITDEPTH cpi->scaled_ref_idx[ref_frame - 1] = new_fb; + if (cm->frame_bufs[new_fb].mvs == NULL || + cm->frame_bufs[new_fb].mi_rows < cm->mi_rows || + cm->frame_bufs[new_fb].mi_cols < cm->mi_cols) { + cm->frame_bufs[new_fb].mvs = + (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*cm->frame_bufs[new_fb].mvs)); + cm->frame_bufs[new_fb].mi_rows = cm->mi_rows; + cm->frame_bufs[new_fb].mi_cols = cm->mi_cols; + } } else { cpi->scaled_ref_idx[ref_frame - 1] = idx; ++cm->frame_bufs[idx].ref_count; @@ -2973,7 +2983,9 @@ static int get_ref_frame_flags(const VP9_COMP *cpi) { if (gold_is_last) flags &= ~VP9_GOLD_FLAG; - if (cpi->rc.frames_till_gf_update_due == INT_MAX && !is_two_pass_svc(cpi)) + if (cpi->rc.frames_till_gf_update_due == INT_MAX && + (cpi->svc.number_temporal_layers == 1 && + cpi->svc.number_spatial_layers == 1)) flags &= ~VP9_GOLD_FLAG; if (alt_is_last) @@ -3279,13 +3291,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (cm->show_frame) { vp9_swap_mi_and_prev_mi(cm); - // Don't increment frame counters if this was an altref buffer // update not a real frame ++cm->current_video_frame; if (cpi->use_svc) vp9_inc_frame_in_layer(cpi); } + cm->prev_frame = cm->cur_frame; if (is_two_pass_svc(cpi)) cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type = @@ -3630,6 +3642,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // held. cm->frame_bufs[cm->new_fb_idx].ref_count--; cm->new_fb_idx = get_free_fb(cm); + cm->cur_frame = &cm->frame_bufs[cm->new_fb_idx]; if (!cpi->use_svc && cpi->multi_arf_allowed) { if (cm->frame_type == KEY_FRAME) { diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 0e112f2ff..f0c05430a 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -531,9 +531,8 @@ void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) { return cpi->use_svc && - (cpi->svc.number_temporal_layers > 1 || - cpi->svc.number_spatial_layers > 1) && - (cpi->oxcf.pass == 1 || cpi->oxcf.pass == 2); + ((cpi->svc.number_spatial_layers > 1) || + (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.pass != 0)); } static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 69b419384..28598f1aa 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -590,6 +590,13 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, return besterr; } +const MV search_step_table[12] = { + // left, right, up, down + {0, -4}, {0, 4}, {-4, 0}, {4, 0}, + {0, -2}, {0, 2}, {-2, 0}, {2, 0}, + {0, -1}, {0, 1}, {-1, 0}, {1, 0} +}; + int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, @@ -603,43 +610,134 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, unsigned int *sse1, const uint8_t *second_pred, int w, int h) { - SETUP_SUBPEL_SEARCH; - SETUP_CENTER_ERROR; - (void) cost_list; // to silence compiler warning + const uint8_t *const z = x->plane[0].src.buf; + const uint8_t *const src_address = z; + const int src_stride = x->plane[0].src.stride; + const MACROBLOCKD *xd = &x->e_mbd; + unsigned int besterr = INT_MAX; + unsigned int sse; + unsigned int whichdir = 0; + int thismse; + const int y_stride = xd->plane[0].pre[0].stride; + const int offset = bestmv->row * y_stride + bestmv->col; + const uint8_t *const y = xd->plane[0].pre[0].buf; + + int rr = ref_mv->row; + int rc = ref_mv->col; + int br = bestmv->row * 8; + int bc = bestmv->col * 8; + int hstep = 4; + int iter, round = 3 - forced_stop; + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); + int tr = br; + int tc = bc; + const MV *search_step = search_step_table; + int idx, best_idx = -1; + unsigned int cost_array[5]; + + if (!(allow_hp && vp9_use_mv_hp(ref_mv))) + if (round == 3) + round = 2; + + bestmv->row *= 8; + bestmv->col *= 8; - // Each subsequent iteration checks at least one point in - // common with the last iteration could be 2 ( if diag selected) - // 1/2 pel - FIRST_LEVEL_CHECKS; - if (halfiters > 1) { - SECOND_LEVEL_CHECKS; + if (second_pred != NULL) { + DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); + vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); + besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse1); + } else { + besterr = vfp->vf(y + offset, y_stride, src_address, src_stride, sse1); } - tr = br; - tc = bc; + *distortion = besterr; + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - // Each subsequent iteration checks at least one point in common with - // the last iteration could be 2 ( if diag selected) 1/4 pel + (void) cost_list; // to silence compiler warning - // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only - if (forced_stop != 2) { - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (quarteriters > 1) { - SECOND_LEVEL_CHECKS; + for (iter = 0; iter < round; ++iter) { + // Check vertical and horizontal sub-pixel positions. + for (idx = 0; idx < 4; ++idx) { + tr = br + search_step[idx].row; + tc = bc + search_step[idx].col; + if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { + const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); + int row_offset = (tr & 0x07) << 1; + int col_offset = (tc & 0x07) << 1; + MV this_mv; + this_mv.row = tr; + this_mv.col = tc; + if (second_pred == NULL) + thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset, + src_address, src_stride, &sse); + else + thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset, + src_address, src_stride, &sse, second_pred); + cost_array[idx] = thismse + + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); + + if (cost_array[idx] < besterr) { + best_idx = idx; + besterr = cost_array[idx]; + *distortion = thismse; + *sse1 = sse; + } + } else { + cost_array[idx] = INT_MAX; + } } - tr = br; - tc = bc; - } - if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (eighthiters > 1) { - SECOND_LEVEL_CHECKS; + // Check diagonal sub-pixel position + tc = bc + (cost_array[0] < cost_array[1] ? -hstep : hstep); + tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep); + if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { + const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); + int row_offset = (tr & 0x07) << 1; + int col_offset = (tc & 0x07) << 1; + MV this_mv = {tr, tc}; + if (second_pred == NULL) + thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset, + src_address, src_stride, &sse); + else + thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset, + src_address, src_stride, &sse, second_pred); + cost_array[4] = thismse + + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); + + if (cost_array[4] < besterr) { + best_idx = 4; + besterr = cost_array[4]; + *distortion = thismse; + *sse1 = sse; + } + } else { + cost_array[idx] = INT_MAX; } + + if (best_idx < 4 && best_idx >= 0) { + br += search_step[best_idx].row; + bc += search_step[best_idx].col; + } else if (best_idx == 4) { + br = tr; + bc = tc; + } + + if (iters_per_step > 1) + SECOND_LEVEL_CHECKS; + tr = br; tc = bc; + + search_step += 4; + hstep >>= 1; + best_idx = -1; } + + // Each subsequent iteration checks at least one point in common with + // the last iteration could be 2 ( if diag selected) 1/4 pel + // These lines insure static analysis doesn't warn that // tr and tc aren't used after the above point. (void) tr; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 28f12916e..1943fdb28 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -453,7 +453,7 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, } static const THR_MODES mode_idx[MAX_REF_FRAMES][4] = { - {THR_DC, THR_H_PRED, THR_V_PRED}, + {THR_DC, THR_H_PRED, THR_V_PRED, THR_TM}, {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV}, {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG}, {THR_NEARESTA, THR_NEARA, THR_ZEROA, THR_NEWA}, @@ -517,8 +517,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, PRED_BUFFER *best_pred = NULL; PRED_BUFFER *this_mode_pred = NULL; const int pixels_in_block = bh * bw; + int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready; - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { int i; for (i = 0; i < 3; i++) { #if CONFIG_VP9_HIGHBITDEPTH @@ -605,6 +606,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int rate_mv = 0; int mode_rd_thresh; + int mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)]; if (const_motion[ref_frame] && this_mode == NEARMV) continue; @@ -612,10 +614,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; - mode_rd_thresh = - rd_threshes[mode_idx[ref_frame][INTER_OFFSET(this_mode)]]; + mode_rd_thresh = rd_threshes[mode_index]; if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, - rd_thresh_freq_fact[this_mode])) + rd_thresh_freq_fact[mode_index])) continue; if (this_mode == NEWMV) { @@ -641,7 +642,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Search for the best prediction filter type, when the resulting // motion vector is at sub-pixel accuracy level for luma component, i.e., // the last three bits are all zeros. - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { if (!this_mode_pred) { this_mode_pred = &tmp[3]; } else { @@ -679,7 +680,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_cost = cost; skip_txfm = x->skip_txfm[0]; - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { if (this_mode_pred != current_pred) { free_pred_buffer(this_mode_pred); this_mode_pred = current_pred; @@ -694,7 +695,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred) + if (reuse_inter_pred && this_mode_pred != current_pred) free_pred_buffer(current_pred); mbmi->interp_filter = best_filter; @@ -746,13 +747,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_ref_frame = ref_frame; skip_txfm = x->skip_txfm[0]; - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { free_pred_buffer(best_pred); - best_pred = this_mode_pred; } } else { - if (cpi->sf.reuse_inter_pred_sby) + if (reuse_inter_pred) free_pred_buffer(this_mode_pred); } @@ -766,7 +766,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // If best prediction is not in dst buf, then copy the prediction block from // temp buf to dst buf. - if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby && + if (best_pred != NULL && reuse_inter_pred && best_pred->data != orig_dst.buf) { pd->dst = orig_dst; #if CONFIG_VP9_HIGHBITDEPTH @@ -801,7 +801,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, MIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { pd->dst.buf = tmp[0].data; pd->dst.stride = bw; } @@ -833,16 +833,18 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->skip_txfm[0] = skip_txfm; } } - if (cpi->sf.reuse_inter_pred_sby) + if (reuse_inter_pred) pd->dst = orig_dst; } if (is_inter_block(mbmi)) - vp9_update_rd_thresh_fact(cpi, tile_data, bsize, - mode_idx[ref_frame][INTER_OFFSET(mbmi->mode)]); + vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, + cpi->sf.adaptive_rd_thresh, bsize, + mode_idx[best_ref_frame][INTER_OFFSET(mbmi->mode)]); else - vp9_update_rd_thresh_fact(cpi, tile_data, bsize, - mode_idx[ref_frame][mbmi->mode]); + vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, + cpi->sf.adaptive_rd_thresh, bsize, + mode_idx[INTRA_FRAME][mbmi->mode]); *rd_cost = best_rdc; } diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 13e317d6d..2f19d2942 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -605,10 +605,9 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { } } -// TODO(jingning) Refactor this function. Use targeted smaller struct as inputs. -void vp9_update_rd_thresh_fact(VP9_COMP *cpi, TileDataEnc *tile_data, +void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, int bsize, int best_mode_index) { - if (cpi->sf.adaptive_rd_thresh > 0) { + if (rd_thresh > 0) { const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; int mode; for (mode = 0; mode < top_mode; ++mode) { @@ -616,12 +615,12 @@ void vp9_update_rd_thresh_fact(VP9_COMP *cpi, TileDataEnc *tile_data, const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64); BLOCK_SIZE bs; for (bs = min_size; bs <= max_size; ++bs) { - int *const fact = &tile_data->thresh_freq_fact[bs][mode]; + int *const fact = &factor_buf[bs][mode]; if (mode == best_mode_index) { *fact -= (*fact >> 4); } else { *fact = MIN(*fact + RD_THRESH_INC, - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); + rd_thresh * RD_THRESH_MAX_FACT); } } } diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index aecca0b43..ebbe821d5 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -162,8 +162,7 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi); void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi); -void vp9_update_rd_thresh_fact(struct VP9_COMP *cpi, - struct TileDataEnc *tile_data, +void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize, int best_mode_index); static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e80f345e8..882bac105 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3398,7 +3398,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, !is_inter_block(&best_mbmode)); if (!cpi->rc.is_src_frame_alt_ref) - vp9_update_rd_thresh_fact(cpi, tile_data, bsize, best_mode_index); + vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, + sf->adaptive_rd_thresh, bsize, best_mode_index); // macroblock modes *mbmi = best_mbmode; @@ -3553,7 +3554,8 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, assert((cm->interp_filter == SWITCHABLE) || (cm->interp_filter == mbmi->interp_filter)); - vp9_update_rd_thresh_fact(cpi, tile_data, bsize, THR_ZEROMV); + vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, + cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV); vp9_zero(best_pred_diff); vp9_zero(best_filter_diff); @@ -4128,7 +4130,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); - vp9_update_rd_thresh_fact(cpi, tile_data, bsize, best_ref_index); + vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, + sf->adaptive_rd_thresh, bsize, best_ref_index); // macroblock modes *mbmi = best_mbmode; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 3315aa6a1..7a1b0cc1f 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -295,16 +295,16 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, } if (speed >= 7) { + sf->adaptive_rd_thresh = 3; sf->mv.search_method = FAST_DIAMOND; sf->mv.fullpel_search_step_param = 10; sf->lpf_pick = LPF_PICK_MINIMAL_LPF; sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ? 800 : 300; - sf->elevate_newmv_thresh = 2500; } if (speed >= 12) { - sf->elevate_newmv_thresh = 4000; + sf->adaptive_rd_thresh = 4; sf->mv.subpel_force_stop = 2; } @@ -386,7 +386,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->search_type_check_frequency = 50; sf->encode_breakout_thresh = 0; sf->elevate_newmv_thresh = 0; - // Recode loop tolerence %. + // Recode loop tolerance %. sf->recode_tolerance = 25; sf->default_interp_filter = SWITCHABLE; sf->tx_size_search_breakout = 0; |