summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_alloccommon.c15
-rw-r--r--vp9/common/vp9_mvref_common.c44
-rw-r--r--vp9/common/vp9_onyxc_int.h15
-rw-r--r--vp9/common/x86/vp9_loopfilter_intrin_avx2.c111
-rw-r--r--vp9/decoder/vp9_decodeframe.c21
-rw-r--r--vp9/decoder/vp9_decodemv.c20
-rw-r--r--vp9/decoder/vp9_decoder.c12
-rw-r--r--vp9/encoder/vp9_encodeframe.c42
-rw-r--r--vp9/encoder/vp9_encoder.c21
-rw-r--r--vp9/encoder/vp9_encoder.h5
-rw-r--r--vp9/encoder/vp9_mcomp.c152
-rw-r--r--vp9/encoder/vp9_speed_features.c6
12 files changed, 368 insertions, 96 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 8b3b9dbe0..5b8b2a9ec 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -87,13 +87,14 @@ void vp9_free_ref_frame_buffers(VP9_COMMON *cm) {
int i;
for (i = 0; i < FRAME_BUFFERS; ++i) {
- vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
-
if (cm->frame_bufs[i].ref_count > 0 &&
cm->frame_bufs[i].raw_frame_buffer.data != NULL) {
cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer);
cm->frame_bufs[i].ref_count = 0;
}
+ vpx_free(cm->frame_bufs[i].mvs);
+ cm->frame_bufs[i].mvs = NULL;
+ vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
}
vp9_free_frame_buffer(&cm->post_proc_buffer);
@@ -166,6 +167,16 @@ int vp9_alloc_ref_frame_buffers(VP9_COMMON *cm, int width, int height) {
#endif
VP9_ENC_BORDER_IN_PIXELS) < 0)
goto fail;
+ if (cm->frame_bufs[i].mvs == NULL) {
+ cm->frame_bufs[i].mvs =
+ (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*cm->frame_bufs[i].mvs));
+ if (cm->frame_bufs[i].mvs == NULL)
+ goto fail;
+
+ cm->frame_bufs[i].mi_rows = cm->mi_rows;
+ cm->frame_bufs[i].mi_cols = cm->mi_cols;
+ }
}
init_frame_bufs(cm);
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index b310eb44d..561201ffe 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -20,13 +20,11 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
int block, int mi_row, int mi_col) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
- const MODE_INFO *prev_mi = !cm->error_resilient_mode && cm->prev_mi
- ? cm->prev_mi[mi_row * xd->mi_stride + mi_col].src_mi
- : NULL;
- const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->src_mi->mbmi : NULL;
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
int different_ref_found = 0;
int context_counter = 0;
+ const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
+ cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
// Blank the reference vector list
vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
@@ -71,11 +69,12 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
// Check the last frame's mode and mv info.
- if (prev_mbmi) {
- if (prev_mbmi->ref_frame[0] == ref_frame)
- ADD_MV_REF_LIST(prev_mbmi->mv[0], refmv_count, mv_ref_list, Done);
- else if (prev_mbmi->ref_frame[1] == ref_frame)
- ADD_MV_REF_LIST(prev_mbmi->mv[1], refmv_count, mv_ref_list, Done);
+ if (cm->use_prev_frame_mvs) {
+ if (prev_frame_mvs->ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
+ } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
+ ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done);
+ }
}
// Since we couldn't find 2 mvs from the same reference frame
@@ -96,9 +95,30 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
// Since we still don't have a candidate we'll try the last frame.
- if (prev_mbmi)
- IF_DIFF_REF_FRAME_ADD_MV(prev_mbmi, ref_frame, ref_sign_bias, refmv_count,
- mv_ref_list, Done);
+ if (cm->use_prev_frame_mvs) {
+ if (prev_frame_mvs->ref_frame[0] != ref_frame &&
+ prev_frame_mvs->ref_frame[0] > INTRA_FRAME) {
+ int_mv mv = prev_frame_mvs->mv[0];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done);
+ }
+
+ if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
+ prev_frame_mvs->ref_frame[1] != ref_frame &&
+ prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) {
+ int_mv mv = prev_frame_mvs->mv[1];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done);
+ }
+ }
Done:
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index b818ae818..b3a6590b2 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -56,9 +56,16 @@ typedef enum {
REFERENCE_MODES = 3,
} REFERENCE_MODE;
+typedef struct {
+ int_mv mv[2];
+ MV_REFERENCE_FRAME ref_frame[2];
+} MV_REF;
typedef struct {
int ref_count;
+ MV_REF *mvs;
+ int mi_rows;
+ int mi_cols;
vpx_codec_frame_buffer_t raw_frame_buffer;
YV12_BUFFER_CONFIG buf;
} RefCntBuffer;
@@ -91,6 +98,10 @@ typedef struct VP9Common {
YV12_BUFFER_CONFIG *frame_to_show;
RefCntBuffer frame_bufs[FRAME_BUFFERS];
+ RefCntBuffer *prev_frame;
+
+ // TODO(hkuang): Combine this with cur_buf in macroblockd.
+ RefCntBuffer *cur_frame;
int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
@@ -149,6 +160,10 @@ typedef struct VP9Common {
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
+
+ // Whether to use previous frame's motion vectors for prediction.
+ int use_prev_frame_mvs;
+
// Persistent mb segment id map used in prediction.
unsigned char *last_frame_seg_map;
diff --git a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
index 439c028f2..0cb0912ad 100644
--- a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
+++ b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
@@ -9,6 +9,7 @@
*/
#include <immintrin.h> /* AVX2 */
+#include "vpx_ports/mem.h"
static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
const unsigned char *_blimit, const unsigned char *_limit,
@@ -392,6 +393,11 @@ static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
}
}
+DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = {
+ 0, 128, 1, 128, 2, 128, 3, 128, 4, 128, 5, 128, 6, 128, 7, 128,
+ 8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
+};
+
static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
const unsigned char *_blimit, const unsigned char *_limit,
const unsigned char *_thresh) {
@@ -401,6 +407,9 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
__m128i p7, p6, p5;
__m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4;
__m128i q5, q6, q7;
+ __m256i p256_7, q256_7, p256_6, q256_6, p256_5, q256_5, p256_4,
+ q256_4, p256_3, q256_3, p256_2, q256_2, p256_1, q256_1,
+ p256_0, q256_0;
const __m128i thresh = _mm_broadcastb_epi8(
_mm_cvtsi32_si128((int) _thresh[0]));
@@ -409,16 +418,37 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
const __m128i blimit = _mm_broadcastb_epi8(
_mm_cvtsi32_si128((int) _blimit[0]));
- p4 = _mm_loadu_si128((__m128i *) (s - 5 * p));
- p3 = _mm_loadu_si128((__m128i *) (s - 4 * p));
- p2 = _mm_loadu_si128((__m128i *) (s - 3 * p));
- p1 = _mm_loadu_si128((__m128i *) (s - 2 * p));
- p0 = _mm_loadu_si128((__m128i *) (s - 1 * p));
- q0 = _mm_loadu_si128((__m128i *) (s - 0 * p));
- q1 = _mm_loadu_si128((__m128i *) (s + 1 * p));
- q2 = _mm_loadu_si128((__m128i *) (s + 2 * p));
- q3 = _mm_loadu_si128((__m128i *) (s + 3 * p));
- q4 = _mm_loadu_si128((__m128i *) (s + 4 * p));
+ p256_4 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s - 5 * p)));
+ p256_3 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s - 4 * p)));
+ p256_2 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s - 3 * p)));
+ p256_1 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s - 2 * p)));
+ p256_0 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s - 1 * p)));
+ q256_0 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s - 0 * p)));
+ q256_1 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s + 1 * p)));
+ q256_2 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s + 2 * p)));
+ q256_3 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s + 3 * p)));
+ q256_4 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s + 4 * p)));
+
+ p4 = _mm256_castsi256_si128(p256_4);
+ p3 = _mm256_castsi256_si128(p256_3);
+ p2 = _mm256_castsi256_si128(p256_2);
+ p1 = _mm256_castsi256_si128(p256_1);
+ p0 = _mm256_castsi256_si128(p256_0);
+ q0 = _mm256_castsi256_si128(q256_0);
+ q1 = _mm256_castsi256_si128(q256_1);
+ q2 = _mm256_castsi256_si128(q256_2);
+ q3 = _mm256_castsi256_si128(q256_3);
+ q4 = _mm256_castsi256_si128(q256_4);
{
const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0),
@@ -534,23 +564,35 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
flat = _mm_cmpeq_epi8(flat, zero);
flat = _mm_and_si128(flat, mask);
- p5 = _mm_loadu_si128((__m128i *) (s - 6 * p));
- q5 = _mm_loadu_si128((__m128i *) (s + 5 * p));
+ p256_5 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s - 6 * p)));
+ q256_5 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s + 5 * p)));
+ p5 = _mm256_castsi256_si128(p256_5);
+ q5 = _mm256_castsi256_si128(q256_5);
flat2 = _mm_max_epu8(
_mm_or_si128(_mm_subs_epu8(p5, p0), _mm_subs_epu8(p0, p5)),
_mm_or_si128(_mm_subs_epu8(q5, q0), _mm_subs_epu8(q0, q5)));
flat2 = _mm_max_epu8(work, flat2);
- p6 = _mm_loadu_si128((__m128i *) (s - 7 * p));
- q6 = _mm_loadu_si128((__m128i *) (s + 6 * p));
+ p256_6 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s - 7 * p)));
+ q256_6 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s + 6 * p)));
+ p6 = _mm256_castsi256_si128(p256_6);
+ q6 = _mm256_castsi256_si128(q256_6);
work = _mm_max_epu8(
_mm_or_si128(_mm_subs_epu8(p6, p0), _mm_subs_epu8(p0, p6)),
_mm_or_si128(_mm_subs_epu8(q6, q0), _mm_subs_epu8(q0, q6)));
flat2 = _mm_max_epu8(work, flat2);
- p7 = _mm_loadu_si128((__m128i *) (s - 8 * p));
- q7 = _mm_loadu_si128((__m128i *) (s + 7 * p));
+ p256_7 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s - 8 * p)));
+ q256_7 = _mm256_castpd_si256(_mm256_broadcast_pd(
+ (__m128d const *)(s + 7 * p)));
+ p7 = _mm256_castsi256_si128(p256_7);
+ q7 = _mm256_castsi256_si128(q256_7);
work = _mm_max_epu8(
_mm_or_si128(_mm_subs_epu8(p7, p0), _mm_subs_epu8(p0, p7)),
_mm_or_si128(_mm_subs_epu8(q7, q0), _mm_subs_epu8(q0, q7)));
@@ -566,29 +608,28 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
{
const __m256i eight = _mm256_set1_epi16(8);
const __m256i four = _mm256_set1_epi16(4);
- __m256i p256_7, q256_7, p256_6, q256_6, p256_5, q256_5, p256_4,
- q256_4, p256_3, q256_3, p256_2, q256_2, p256_1, q256_1,
- p256_0, q256_0;
__m256i pixelFilter_p, pixelFilter_q, pixetFilter_p2p1p0,
pixetFilter_q2q1q0, sum_p7, sum_q7, sum_p3, sum_q3, res_p,
res_q;
- p256_7 = _mm256_cvtepu8_epi16(p7);
- p256_6 = _mm256_cvtepu8_epi16(p6);
- p256_5 = _mm256_cvtepu8_epi16(p5);
- p256_4 = _mm256_cvtepu8_epi16(p4);
- p256_3 = _mm256_cvtepu8_epi16(p3);
- p256_2 = _mm256_cvtepu8_epi16(p2);
- p256_1 = _mm256_cvtepu8_epi16(p1);
- p256_0 = _mm256_cvtepu8_epi16(p0);
- q256_0 = _mm256_cvtepu8_epi16(q0);
- q256_1 = _mm256_cvtepu8_epi16(q1);
- q256_2 = _mm256_cvtepu8_epi16(q2);
- q256_3 = _mm256_cvtepu8_epi16(q3);
- q256_4 = _mm256_cvtepu8_epi16(q4);
- q256_5 = _mm256_cvtepu8_epi16(q5);
- q256_6 = _mm256_cvtepu8_epi16(q6);
- q256_7 = _mm256_cvtepu8_epi16(q7);
+ const __m256i filter = _mm256_load_si256(
+ (__m256i const *)filt_loopfilter_avx2);
+ p256_7 = _mm256_shuffle_epi8(p256_7, filter);
+ p256_6 = _mm256_shuffle_epi8(p256_6, filter);
+ p256_5 = _mm256_shuffle_epi8(p256_5, filter);
+ p256_4 = _mm256_shuffle_epi8(p256_4, filter);
+ p256_3 = _mm256_shuffle_epi8(p256_3, filter);
+ p256_2 = _mm256_shuffle_epi8(p256_2, filter);
+ p256_1 = _mm256_shuffle_epi8(p256_1, filter);
+ p256_0 = _mm256_shuffle_epi8(p256_0, filter);
+ q256_0 = _mm256_shuffle_epi8(q256_0, filter);
+ q256_1 = _mm256_shuffle_epi8(q256_1, filter);
+ q256_2 = _mm256_shuffle_epi8(q256_2, filter);
+ q256_3 = _mm256_shuffle_epi8(q256_3, filter);
+ q256_4 = _mm256_shuffle_epi8(q256_4, filter);
+ q256_5 = _mm256_shuffle_epi8(q256_5, filter);
+ q256_6 = _mm256_shuffle_epi8(q256_6, filter);
+ q256_7 = _mm256_shuffle_epi8(q256_7, filter);
pixelFilter_p = _mm256_add_epi16(_mm256_add_epi16(p256_6, p256_5),
_mm256_add_epi16(p256_4, p256_3));
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 66da63ac6..a088325df 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -667,6 +667,14 @@ static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
vp9_read_frame_size(rb, &cm->display_width, &cm->display_height);
}
+static void resize_mv_buffer(VP9_COMMON *cm) {
+ vpx_free(cm->cur_frame->mvs);
+ cm->cur_frame->mi_rows = cm->mi_rows;
+ cm->cur_frame->mi_cols = cm->mi_cols;
+ cm->cur_frame->mvs = (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*cm->cur_frame->mvs));
+}
+
static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
#if CONFIG_SIZE_LIMIT
if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
@@ -692,6 +700,10 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
cm->width = width;
cm->height = height;
}
+ if (cm->cur_frame->mvs == NULL || cm->mi_rows > cm->cur_frame->mi_rows ||
+ cm->mi_cols > cm->cur_frame->mi_cols) {
+ resize_mv_buffer(cm);
+ }
}
static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
@@ -1537,10 +1549,11 @@ void vp9_decode_frame(VP9Decoder *pbi,
init_macroblockd(cm, &pbi->mb);
- if (!cm->error_resilient_mode)
- set_prev_mi(cm);
- else
- cm->prev_mi = NULL;
+ cm->use_prev_frame_mvs = !cm->error_resilient_mode &&
+ cm->width == cm->last_width &&
+ cm->height == cm->last_height &&
+ !cm->intra_only &&
+ cm->last_show_frame;
setup_plane_dequants(cm, xd, cm->base_qindex);
vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index d0e0b76da..7ca812f40 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -425,7 +425,6 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
MB_MODE_INFO *const mbmi = &mi->mbmi;
const BLOCK_SIZE bsize = mbmi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
-
int_mv nearestmv[2], nearmv[2];
int inter_mode_ctx, ref, is_compound;
@@ -544,8 +543,27 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm,
void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *const tile,
int mi_row, int mi_col, vp9_reader *r) {
+ MODE_INFO *const mi = xd->mi[0].src_mi;
+ const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
+ const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
+ const int x_mis = MIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = MIN(bh, cm->mi_rows - mi_row);
+ MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
+ int w, h;
+
if (frame_is_intra_only(cm))
read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
else
read_inter_frame_mode_info(cm, xd, tile, mi_row, mi_col, r);
+
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0];
+ mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1];
+ mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int;
+ mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int;
+ }
+ }
}
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index fa2f01041..196816531 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -252,6 +252,9 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
&cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer);
cm->new_fb_idx = get_free_fb(cm);
+ // Assign a MV array to the frame buffer.
+ cm->cur_frame = &cm->frame_bufs[cm->new_fb_idx];
+
if (setjmp(cm->error.jmp)) {
pbi->need_resync = 1;
cm->error.setjmp = 0;
@@ -284,14 +287,13 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
cm->last_width = cm->width;
cm->last_height = cm->height;
- if (!cm->show_existing_frame)
+ if (!cm->show_existing_frame) {
cm->last_show_frame = cm->show_frame;
- if (cm->show_frame) {
- if (!cm->show_existing_frame)
- vp9_swap_mi_and_prev_mi(cm);
+ cm->prev_frame = cm->cur_frame;
+ }
+ if (cm->show_frame)
cm->current_video_frame++;
- }
cm->error.setjmp = 0;
return retcode;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 917d91b26..b87a28332 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -611,6 +611,13 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
MODE_INFO *mi_addr = &xd->mi[0];
const struct segmentation *const seg = &cm->seg;
+ const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
+ const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
+ const int x_mis = MIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = MIN(bh, cm->mi_rows - mi_row);
+ MV_REF *const frame_mvs =
+ cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
+ int w, h;
const int mis = cm->mi_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
@@ -728,6 +735,17 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
rd_opt->filter_diff[i] += ctx->best_filter_diff[i];
}
+
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0];
+ mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1];
+ mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int;
+ mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int;
+ }
+ }
}
void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
@@ -1293,8 +1311,16 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mi = xd->mi[0].src_mi;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
const struct segmentation *const seg = &cm->seg;
+ const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
+ const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
+ const int x_mis = MIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = MIN(bh, cm->mi_rows - mi_row);
+ MV_REF *const frame_mvs =
+ cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
+ int w, h;
*(xd->mi[0].src_mi) = ctx->mic;
xd->mi[0].src_mi = &xd->mi[0];
@@ -1323,6 +1349,17 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
}
}
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0];
+ mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1];
+ mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int;
+ mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int;
+ }
+ }
+
x->skip = ctx->skip;
x->skip_txfm[0] = mbmi->segment_id ? 0 : ctx->skip_txfm[0];
}
@@ -3529,6 +3566,11 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_initialize_me_consts(cpi, cm->base_qindex);
init_encode_frame_mb_context(cpi);
set_prev_mi(cm);
+ cm->use_prev_frame_mvs = !cm->error_resilient_mode &&
+ cm->width == cm->last_width &&
+ cm->height == cm->last_height &&
+ !cm->intra_only &&
+ cm->last_show_frame;
x->quant_fp = cpi->sf.use_quant_fp;
vp9_zero(x->skip_txfm);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index dfc636a41..1d9fe5e92 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -584,7 +584,7 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
((cpi->svc.number_temporal_layers > 1 ||
cpi->svc.number_spatial_layers > 1) &&
- cpi->oxcf.pass == 2)) {
+ cpi->oxcf.pass != 1)) {
vp9_init_layer_context(cpi);
}
@@ -1285,7 +1285,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
cpi->oxcf.rc_mode == VPX_CBR) ||
((cpi->svc.number_temporal_layers > 1 ||
cpi->svc.number_spatial_layers > 1) &&
- cpi->oxcf.pass == 2)) {
+ cpi->oxcf.pass != 1)) {
vp9_update_layer_context_change_config(cpi,
(int)cpi->oxcf.target_bandwidth);
}
@@ -2420,6 +2420,7 @@ void vp9_scale_references(VP9_COMP *cpi) {
#if CONFIG_VP9_HIGHBITDEPTH
if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
const int new_fb = get_free_fb(cm);
+ cm->cur_frame = &cm->frame_bufs[new_fb];
vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
@@ -2437,6 +2438,15 @@ void vp9_scale_references(VP9_COMP *cpi) {
scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf);
#endif // CONFIG_VP9_HIGHBITDEPTH
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
+ if (cm->frame_bufs[new_fb].mvs == NULL ||
+ cm->frame_bufs[new_fb].mi_rows < cm->mi_rows ||
+ cm->frame_bufs[new_fb].mi_cols < cm->mi_cols) {
+ cm->frame_bufs[new_fb].mvs =
+ (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*cm->frame_bufs[new_fb].mvs));
+ cm->frame_bufs[new_fb].mi_rows = cm->mi_rows;
+ cm->frame_bufs[new_fb].mi_cols = cm->mi_cols;
+ }
} else {
cpi->scaled_ref_idx[ref_frame - 1] = idx;
++cm->frame_bufs[idx].ref_count;
@@ -2973,7 +2983,9 @@ static int get_ref_frame_flags(const VP9_COMP *cpi) {
if (gold_is_last)
flags &= ~VP9_GOLD_FLAG;
- if (cpi->rc.frames_till_gf_update_due == INT_MAX && !is_two_pass_svc(cpi))
+ if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
+ (cpi->svc.number_temporal_layers == 1 &&
+ cpi->svc.number_spatial_layers == 1))
flags &= ~VP9_GOLD_FLAG;
if (alt_is_last)
@@ -3279,13 +3291,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (cm->show_frame) {
vp9_swap_mi_and_prev_mi(cm);
-
// Don't increment frame counters if this was an altref buffer
// update not a real frame
++cm->current_video_frame;
if (cpi->use_svc)
vp9_inc_frame_in_layer(cpi);
}
+ cm->prev_frame = cm->cur_frame;
if (is_two_pass_svc(cpi))
cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =
@@ -3630,6 +3642,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// held.
cm->frame_bufs[cm->new_fb_idx].ref_count--;
cm->new_fb_idx = get_free_fb(cm);
+ cm->cur_frame = &cm->frame_bufs[cm->new_fb_idx];
if (!cpi->use_svc && cpi->multi_arf_allowed) {
if (cm->frame_type == KEY_FRAME) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 0e112f2ff..f0c05430a 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -531,9 +531,8 @@ void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) {
return cpi->use_svc &&
- (cpi->svc.number_temporal_layers > 1 ||
- cpi->svc.number_spatial_layers > 1) &&
- (cpi->oxcf.pass == 1 || cpi->oxcf.pass == 2);
+ ((cpi->svc.number_spatial_layers > 1) ||
+ (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.pass != 0));
}
static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 69b419384..28598f1aa 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -590,6 +590,13 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
return besterr;
}
+const MV search_step_table[12] = {
+ // left, right, up, down
+ {0, -4}, {0, 4}, {-4, 0}, {4, 0},
+ {0, -2}, {0, 2}, {-2, 0}, {2, 0},
+ {0, -1}, {0, 1}, {-1, 0}, {1, 0}
+};
+
int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
@@ -603,43 +610,134 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
unsigned int *sse1,
const uint8_t *second_pred,
int w, int h) {
- SETUP_SUBPEL_SEARCH;
- SETUP_CENTER_ERROR;
- (void) cost_list; // to silence compiler warning
+ const uint8_t *const z = x->plane[0].src.buf;
+ const uint8_t *const src_address = z;
+ const int src_stride = x->plane[0].src.stride;
+ const MACROBLOCKD *xd = &x->e_mbd;
+ unsigned int besterr = INT_MAX;
+ unsigned int sse;
+ unsigned int whichdir = 0;
+ int thismse;
+ const int y_stride = xd->plane[0].pre[0].stride;
+ const int offset = bestmv->row * y_stride + bestmv->col;
+ const uint8_t *const y = xd->plane[0].pre[0].buf;
+
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
+ int hstep = 4;
+ int iter, round = 3 - forced_stop;
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
+ int tr = br;
+ int tc = bc;
+ const MV *search_step = search_step_table;
+ int idx, best_idx = -1;
+ unsigned int cost_array[5];
+
+ if (!(allow_hp && vp9_use_mv_hp(ref_mv)))
+ if (round == 3)
+ round = 2;
+
+ bestmv->row *= 8;
+ bestmv->col *= 8;
- // Each subsequent iteration checks at least one point in
- // common with the last iteration could be 2 ( if diag selected)
- // 1/2 pel
- FIRST_LEVEL_CHECKS;
- if (halfiters > 1) {
- SECOND_LEVEL_CHECKS;
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
+ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse1);
+ } else {
+ besterr = vfp->vf(y + offset, y_stride, src_address, src_stride, sse1);
}
- tr = br;
- tc = bc;
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
- // Each subsequent iteration checks at least one point in common with
- // the last iteration could be 2 ( if diag selected) 1/4 pel
+ (void) cost_list; // to silence compiler warning
- // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
- if (forced_stop != 2) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (quarteriters > 1) {
- SECOND_LEVEL_CHECKS;
+ for (iter = 0; iter < round; ++iter) {
+ // Check vertical and horizontal sub-pixel positions.
+ for (idx = 0; idx < 4; ++idx) {
+ tr = br + search_step[idx].row;
+ tc = bc + search_step[idx].col;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+ int row_offset = (tr & 0x07) << 1;
+ int col_offset = (tc & 0x07) << 1;
+ MV this_mv;
+ this_mv.row = tr;
+ this_mv.col = tc;
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+ src_address, src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+ src_address, src_stride, &sse, second_pred);
+ cost_array[idx] = thismse +
+ mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+ if (cost_array[idx] < besterr) {
+ best_idx = idx;
+ besterr = cost_array[idx];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
}
- tr = br;
- tc = bc;
- }
- if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (eighthiters > 1) {
- SECOND_LEVEL_CHECKS;
+ // Check diagonal sub-pixel position
+ tc = bc + (cost_array[0] < cost_array[1] ? -hstep : hstep);
+ tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+ int row_offset = (tr & 0x07) << 1;
+ int col_offset = (tc & 0x07) << 1;
+ MV this_mv = {tr, tc};
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+ src_address, src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+ src_address, src_stride, &sse, second_pred);
+ cost_array[4] = thismse +
+ mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+ if (cost_array[4] < besterr) {
+ best_idx = 4;
+ besterr = cost_array[4];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
}
+
+ if (best_idx < 4 && best_idx >= 0) {
+ br += search_step[best_idx].row;
+ bc += search_step[best_idx].col;
+ } else if (best_idx == 4) {
+ br = tr;
+ bc = tc;
+ }
+
+ if (iters_per_step > 1)
+ SECOND_LEVEL_CHECKS;
+
tr = br;
tc = bc;
+
+ search_step += 4;
+ hstep >>= 1;
+ best_idx = -1;
}
+
+ // Each subsequent iteration checks at least one point in common with
+ // the last iteration could be 2 ( if diag selected) 1/4 pel
+
// These lines insure static analysis doesn't warn that
// tr and tc aren't used after the above point.
(void) tr;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 3315aa6a1..7a1b0cc1f 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -295,16 +295,16 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
}
if (speed >= 7) {
+ sf->adaptive_rd_thresh = 3;
sf->mv.search_method = FAST_DIAMOND;
sf->mv.fullpel_search_step_param = 10;
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
800 : 300;
- sf->elevate_newmv_thresh = 2500;
}
if (speed >= 12) {
- sf->elevate_newmv_thresh = 4000;
+ sf->adaptive_rd_thresh = 4;
sf->mv.subpel_force_stop = 2;
}
@@ -386,7 +386,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->search_type_check_frequency = 50;
sf->encode_breakout_thresh = 0;
sf->elevate_newmv_thresh = 0;
- // Recode loop tolerence %.
+ // Recode loop tolerance %.
sf->recode_tolerance = 25;
sf->default_interp_filter = SWITCHABLE;
sf->tx_size_search_breakout = 0;