summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples.mk1
-rw-r--r--libs.mk5
-rw-r--r--mainpage.dox2
-rw-r--r--tools/tiny_ssim.c38
-rw-r--r--usage_cx.dox2
-rw-r--r--usage_dx.dox2
-rw-r--r--vp8/common/mfqe.c2
-rw-r--r--vp8/common/postproc.c2
-rw-r--r--vp8/decoder/onyxd_if.c16
-rw-r--r--vp8/encoder/onyx_if.c8
-rw-r--r--vp8/vp8_cx_iface.c15
-rw-r--r--vp8/vp8_dx_iface.c19
-rw-r--r--vp9/encoder/vp9_encodeframe.c4
-rw-r--r--vp9/encoder/vp9_encoder.c266
-rw-r--r--vp9/encoder/vp9_encoder.h1
-rw-r--r--vp9/encoder/vp9_speed_features.c43
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c5
-rw-r--r--vp9/encoder/vp9_svc_layercontext.h7
18 files changed, 230 insertions, 208 deletions
diff --git a/examples.mk b/examples.mk
index 1187f147a..a1d4eb68c 100644
--- a/examples.mk
+++ b/examples.mk
@@ -404,3 +404,4 @@ CLEAN-OBJS += examples.doxy samples.dox $(ALL_EXAMPLES:.c=.dox)
DOCS-yes += examples.doxy samples.dox
examples.doxy: samples.dox $(ALL_EXAMPLES:.c=.dox)
@echo "INPUT += $^" > $@
+ @echo "ENABLED_SECTIONS += samples" >> $@
diff --git a/libs.mk b/libs.mk
index 5dfb78ff1..7ec8c8756 100644
--- a/libs.mk
+++ b/libs.mk
@@ -112,11 +112,6 @@ ifeq ($(CONFIG_DECODERS),yes)
CODEC_DOC_SECTIONS += decoder
endif
-# Suppress -Wextra warnings in first party code pending investigation.
-# https://bugs.chromium.org/p/webm/issues/detail?id=1246
-$(BUILD_PFX)vp8/encoder/onyx_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered
-$(BUILD_PFX)vp8/decoder/onyxd_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered
-
ifeq ($(CONFIG_MSVS),yes)
CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd)
diff --git a/mainpage.dox b/mainpage.dox
index ec202fa4f..4b0dff087 100644
--- a/mainpage.dox
+++ b/mainpage.dox
@@ -25,8 +25,10 @@
release.
- The \ref readme contains instructions on recompiling the sample applications.
- Read the \ref usage "usage" for a narrative on codec usage.
+ \if samples
- Read the \ref samples "sample code" for examples of how to interact with the
codec.
+ \endif
- \ref codec reference
\if encoder
- \ref encoder reference
diff --git a/tools/tiny_ssim.c b/tools/tiny_ssim.c
index 1f73c73c1..67a6903b5 100644
--- a/tools/tiny_ssim.c
+++ b/tools/tiny_ssim.c
@@ -50,7 +50,8 @@ static uint64_t calc_plane_error16(uint16_t *orig, int orig_stride,
}
return total_sse;
}
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
static uint64_t calc_plane_error(uint8_t *orig, int orig_stride, uint8_t *recon,
int recon_stride, unsigned int cols,
unsigned int rows) {
@@ -269,6 +270,7 @@ static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) {
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8);
}
+#if CONFIG_VP9_HIGHBITDEPTH
static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r,
int rp, uint32_t bd, uint32_t shift) {
uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
@@ -277,6 +279,7 @@ static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r,
return similarity(sum_s >> shift, sum_r >> shift, sum_sq_s >> (2 * shift),
sum_sq_r >> (2 * shift), sum_sxr >> (2 * shift), 64, bd);
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
// We are using a 8x8 moving window with starting location of each 8x8 window
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
@@ -300,6 +303,7 @@ static double ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1,
return ssim_total;
}
+#if CONFIG_VP9_HIGHBITDEPTH
static double highbd_ssim2(const uint8_t *img1, const uint8_t *img2,
int stride_img1, int stride_img2, int width,
int height, uint32_t bd, uint32_t shift) {
@@ -321,6 +325,7 @@ static double highbd_ssim2(const uint8_t *img1, const uint8_t *img2,
ssim_total /= samples;
return ssim_total;
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
// traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity
//
@@ -565,35 +570,6 @@ double get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
return inconsistency_total;
}
-double highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *weight,
- uint32_t bd, uint32_t in_bd) {
- double a, b, c;
- double ssimv;
- uint32_t shift = 0;
-
- assert(bd >= in_bd);
- shift = bd - in_bd;
-
- a = highbd_ssim2(source->y_buffer, dest->y_buffer, source->y_stride,
- dest->y_stride, source->y_crop_width, source->y_crop_height,
- in_bd, shift);
-
- b = highbd_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride,
- dest->uv_stride, source->uv_crop_width,
- source->uv_crop_height, in_bd, shift);
-
- c = highbd_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride,
- dest->uv_stride, source->uv_crop_width,
- source->uv_crop_height, in_bd, shift);
-
- ssimv = a * .8 + .1 * (b + c);
-
- *weight = 1;
-
- return ssimv;
-}
-
int main(int argc, char *argv[]) {
FILE *framestats = NULL;
int bit_depth = 8;
@@ -711,7 +687,7 @@ int main(int argc, char *argv[]) {
#define psnr_and_ssim(ssim, psnr, buf0, buf1, w, h) \
ssim = ssim2(buf0, buf1, w, w, w, h); \
psnr = calc_plane_error(buf0, w, buf1, w, w, h);
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
if (n_frames == allocated_frames) {
allocated_frames = allocated_frames == 0 ? 1024 : allocated_frames * 2;
diff --git a/usage_cx.dox b/usage_cx.dox
index 92b0d34ef..b2220cfdd 100644
--- a/usage_cx.dox
+++ b/usage_cx.dox
@@ -8,6 +8,8 @@
\ref usage_deadline.
+ \if samples
\ref samples
+ \endif
*/
diff --git a/usage_dx.dox b/usage_dx.dox
index 883ce2492..85063f705 100644
--- a/usage_dx.dox
+++ b/usage_dx.dox
@@ -11,7 +11,9 @@
\ref usage_postproc based on the amount of free CPU time. For more
information on the <code>deadline</code> parameter, see \ref usage_deadline.
+ \if samples
\ref samples
+ \endif
\section usage_cb Callback Based Decoding
diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c
index aad908572..1fe7363f1 100644
--- a/vp8/common/mfqe.c
+++ b/vp8/common/mfqe.c
@@ -235,7 +235,7 @@ void vp8_multiframe_quality_enhance(VP8_COMMON *cm) {
FRAME_TYPE frame_type = cm->frame_type;
/* Point at base of Mb MODE_INFO list has motion vectors etc */
- const MODE_INFO *mode_info_context = cm->show_frame_mi;
+ const MODE_INFO *mode_info_context = cm->mi;
int mb_row;
int mb_col;
int totmap, map[4];
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 8c292d616..1ff2e5cc5 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -325,7 +325,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest,
vpx_clear_system_state();
if ((flags & VP8D_MFQE) && oci->postproc_state.last_frame_valid &&
- oci->current_video_frame >= 2 &&
+ oci->current_video_frame > 10 &&
oci->postproc_state.last_base_qindex < 60 &&
oci->base_qindex - oci->postproc_state.last_base_qindex >= 20) {
vp8_multiframe_quality_enhance(oci);
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index bb34821f3..918c68626 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -321,22 +321,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx];
pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx];
- if (setjmp(pbi->common.error.jmp)) {
- /* We do not know if the missing frame(s) was supposed to update
- * any of the reference buffers, but we act conservative and
- * mark only the last buffer as corrupted.
- */
- cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
-
- if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) {
- cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
- }
- pbi->common.error.setjmp = 0;
- goto decode_exit;
- }
-
- pbi->common.error.setjmp = 1;
-
retcode = vp8_decode_frame(pbi);
if (retcode < 0) {
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 8de8ca18e..adc25024c 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4813,14 +4813,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
cm = &cpi->common;
- if (setjmp(cpi->common.error.jmp)) {
- cpi->common.error.setjmp = 0;
- vpx_clear_system_state();
- return VPX_CODEC_CORRUPT_FRAME;
- }
-
- cpi->common.error.setjmp = 1;
-
vpx_usec_timer_start(&cmptimer);
cpi->source = NULL;
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 71eadeb08..d01d2095f 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -16,6 +16,7 @@
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx_version.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/system_state.h"
#include "vpx_ports/vpx_once.h"
#include "vp8/encoder/onyx_int.h"
#include "vpx/vp8cx.h"
@@ -796,9 +797,11 @@ static vpx_codec_err_t set_reference_and_update(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
const vpx_image_t *img, vpx_codec_pts_t pts,
unsigned long duration,
- vpx_enc_frame_flags_t flags,
+ vpx_enc_frame_flags_t enc_flags,
unsigned long deadline) {
- vpx_codec_err_t res = VPX_CODEC_OK;
+ volatile vpx_codec_err_t res = VPX_CODEC_OK;
+ // Make a copy as volatile to avoid -Wclobbered with longjmp.
+ volatile vpx_enc_frame_flags_t flags = enc_flags;
if (!ctx->cfg.rc_target_bitrate) {
#if CONFIG_MULTI_RES_ENCODING
@@ -840,6 +843,12 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
}
}
+ if (setjmp(ctx->cpi->common.error.jmp)) {
+ ctx->cpi->common.error.setjmp = 0;
+ vpx_clear_system_state();
+ return VPX_CODEC_CORRUPT_FRAME;
+ }
+
/* Initialize the encoder instance on the first frame*/
if (!res && ctx->cpi) {
unsigned int lib_flags;
@@ -886,6 +895,8 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
cx_data_end = ctx->cx_data + cx_data_sz;
lib_flags = 0;
+ ctx->cpi->common.error.setjmp = 1;
+
while (cx_data_sz >= ctx->cx_data_sz / 2) {
comp_data_state = vp8_get_compressed_data(
ctx->cpi, &lib_flags, &size, cx_data, cx_data_end, &dst_time_stamp,
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index a2008b903..6d1c5f595 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -335,8 +335,8 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
if (!res) {
VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0];
+ VP8_COMMON *const pc = &pbi->common;
if (resolution_change) {
- VP8_COMMON *const pc = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
#if CONFIG_MULTITHREAD
int i;
@@ -428,6 +428,23 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
pbi->common.fb_idx_ref_cnt[0] = 0;
}
+ if (setjmp(pbi->common.error.jmp)) {
+ /* We do not know if the missing frame(s) was supposed to update
+ * any of the reference buffers, but we act conservative and
+ * mark only the last buffer as corrupted.
+ */
+ pc->yv12_fb[pc->lst_fb_idx].corrupted = 1;
+
+ if (pc->fb_idx_ref_cnt[pc->new_fb_idx] > 0) {
+ pc->fb_idx_ref_cnt[pc->new_fb_idx]--;
+ }
+ pc->error.setjmp = 0;
+ res = update_error_state(ctx, &pbi->common.error);
+ return res;
+ }
+
+ pbi->common.error.setjmp = 1;
+
/* update the pbi fragment data */
pbi->fragments = ctx->fragments;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 98343f0d2..9c82f2ebe 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1382,7 +1382,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
x->sb_mvrow_part = mi->mv[0].as_mv.row;
if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
cpi->svc.spatial_layer_id == 0 &&
- cpi->rc.high_num_blocks_with_motion && !x->zero_temp_sad_source &&
+ cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source &&
cm->width > 640 && cm->height > 480) {
// Disable split below 16x16 block size when scroll motion is detected.
// TODO(marpan/jianj): Improve this condition: issue is that search
@@ -5734,7 +5734,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
int64_t mc_dep_cost_base = 0;
int row, col;
- for (row = 0; row < cm->mi_rows; ++row) {
+ for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
for (col = 0; col < cm->mi_cols; ++col) {
TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
intra_cost_base += this_stats->intra_cost;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 0b1d74a59..b10b91c98 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2110,7 +2110,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
BufferPool *const pool) {
- unsigned int i, frame;
+ unsigned int i;
VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
@@ -2361,51 +2361,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
vp9_set_speed_features_framesize_independent(cpi);
vp9_set_speed_features_framesize_dependent(cpi);
- if (cpi->sf.enable_tpl_model) {
- const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
- const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
#if CONFIG_NON_GREEDY_MV
- CHECK_MEM_ERROR(
- cm, cpi->feature_score_loc_arr,
- vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr)));
- CHECK_MEM_ERROR(
- cm, cpi->feature_score_loc_sort,
- vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
- CHECK_MEM_ERROR(
- cm, cpi->feature_score_loc_heap,
- vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
-#endif
- // TODO(jingning): Reduce the actual memory use for tpl model build up.
- for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
-#if CONFIG_NON_GREEDY_MV
- int sqr_bsize;
- int rf_idx;
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
- for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
- CHECK_MEM_ERROR(
- cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
- vpx_calloc(mi_rows * mi_cols,
- sizeof(*cpi->tpl_stats[frame]
- .pyramid_mv_arr[rf_idx][sqr_bsize])));
- }
- }
-#endif
- CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
- vpx_calloc(mi_rows * mi_cols,
- sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
- cpi->tpl_stats[frame].is_valid = 0;
- cpi->tpl_stats[frame].width = mi_cols;
- cpi->tpl_stats[frame].height = mi_rows;
- cpi->tpl_stats[frame].stride = mi_cols;
- cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
- cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
- }
-
- for (frame = 0; frame < REF_FRAMES; ++frame) {
- cpi->enc_frame_buf[frame].mem_valid = 0;
- cpi->enc_frame_buf[frame].released = 1;
- }
- }
+ cpi->feature_score_loc_alloc = 0;
+#endif // CONFIG_NON_GREEDY_MV
+ for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
// Allocate memory to store variances for a frame.
CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
@@ -3848,12 +3807,15 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
svc->high_source_sad_superframe = cpi->rc.high_source_sad;
+ svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion;
// On scene change reset temporal layer pattern to TL0.
// Note that if the base/lower spatial layers are skipped: instead of
// inserting base layer here, we force max-q for the next superframe
// with lower spatial layers: this is done in vp9_encodedframe_overshoot()
// when max-q is decided for the current layer.
- if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0) {
+ // Only do this reset for bypass/flexible mode.
+ if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
+ svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
// rc->high_source_sad will get reset so copy it to restore it.
int tmp_high_source_sad = cpi->rc.high_source_sad;
vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
@@ -6185,6 +6147,79 @@ static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame,
}
#endif // USE_PQSORT
#endif // CHANGE_MV_SEARCH_ORDER
+
+static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx,
+ YV12_BUFFER_CONFIG *ref_frame[3],
+ BLOCK_SIZE bsize) {
+ VP9_COMMON *cm = &cpi->common;
+ ThreadData *td = &cpi->td;
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int fs_loc_sort_size;
+ int fs_loc_heap_size;
+ int mi_row, mi_col;
+
+ tpl_frame->lambda = 250;
+
+ fs_loc_sort_size = 0;
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ const int mb_y_offset =
+ mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ FEATURE_SCORE_LOC *fs_loc =
+ &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col];
+ tpl_stats->feature_score = get_feature_score(
+ xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
+ fs_loc->visited = 0;
+ fs_loc->feature_score = tpl_stats->feature_score;
+ fs_loc->mi_row = mi_row;
+ fs_loc->mi_col = mi_col;
+ cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc;
+ ++fs_loc_sort_size;
+ }
+ }
+
+ qsort(cpi->feature_score_loc_sort, fs_loc_sort_size,
+ sizeof(*cpi->feature_score_loc_sort), compare_feature_score);
+
+#if CHANGE_MV_SEARCH_ORDER
+#if !USE_PQSORT
+ for (i = 0; i < fs_loc_sort_size; ++i) {
+ FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i];
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+ fs_loc->mi_col);
+ }
+#else // !USE_PQSORT
+ fs_loc_heap_size = 0;
+ max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size,
+ cpi->feature_score_loc_sort[0]);
+
+ while (fs_loc_heap_size > 0) {
+ FEATURE_SCORE_LOC *fs_loc;
+ max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc);
+
+ fs_loc->visited = 1;
+
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+ fs_loc->mi_col);
+
+ add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col,
+ &fs_loc_heap_size);
+ }
+#endif // !USE_PQSORT
+#else // CHANGE_MV_SEARCH_ORDER
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col);
+ }
+ }
+#endif // CHANGE_MV_SEARCH_ORDER
+}
#endif // CONFIG_NON_GREEDY_MV
void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
@@ -6217,17 +6252,6 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
const int mi_height = num_8x8_blocks_high_lookup[bsize];
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
int64_t recon_error, sse;
-#if CONFIG_NON_GREEDY_MV
- int rf_idx;
- int fs_loc_sort_size;
-#if CHANGE_MV_SEARCH_ORDER
-#if USE_PQSORT
- int fs_loc_heap_size;
-#else
- int i;
-#endif // USE_PQSORT
-#endif // CHANGE_MV_SEARCH_ORDER
-#endif // CONFIG_NON_GREEDY_MV
// Setup scaling factor
#if CONFIG_VP9_HIGHBITDEPTH
@@ -6268,66 +6292,9 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
vp9_frame_init_quantizer(cpi);
#if CONFIG_NON_GREEDY_MV
- tpl_frame->lambda = 250;
- fs_loc_sort_size = 0;
-
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
- const int mb_y_offset =
- mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
- const int bw = 4 << b_width_log2_lookup[bsize];
- const int bh = 4 << b_height_log2_lookup[bsize];
- TplDepStats *tpl_stats =
- &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
- FEATURE_SCORE_LOC *fs_loc =
- &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col];
- tpl_stats->feature_score = get_feature_score(
- xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
- fs_loc->visited = 0;
- fs_loc->feature_score = tpl_stats->feature_score;
- fs_loc->mi_row = mi_row;
- fs_loc->mi_col = mi_col;
- cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc;
- ++fs_loc_sort_size;
- }
- }
-
- qsort(cpi->feature_score_loc_sort, fs_loc_sort_size,
- sizeof(*cpi->feature_score_loc_sort), compare_feature_score);
-
-#if CHANGE_MV_SEARCH_ORDER
-#if !USE_PQSORT
- for (i = 0; i < fs_loc_sort_size; ++i) {
- FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i];
- do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
- fs_loc->mi_col);
- }
-#else // !USE_PQSORT
- fs_loc_heap_size = 0;
- max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size,
- cpi->feature_score_loc_sort[0]);
-
- while (fs_loc_heap_size > 0) {
- FEATURE_SCORE_LOC *fs_loc;
- max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc);
-
- fs_loc->visited = 1;
-
- do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
- fs_loc->mi_col);
+ build_motion_field(cpi, xd, frame_idx, ref_frame, bsize);
+#endif
- add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col,
- &fs_loc_heap_size);
- }
-#endif // !USE_PQSORT
-#else // CHANGE_MV_SEARCH_ORDER
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
- do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col);
- }
- }
-#endif // CHANGE_MV_SEARCH_ORDER
-#endif // CONFIG_NON_GREEDY_MV
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
@@ -6341,6 +6308,7 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
bsize);
#if CONFIG_NON_GREEDY_MV
{
+ int rf_idx;
TplDepStats *this_tpl_stats =
&tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
@@ -6436,6 +6404,71 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
#endif // DUMP_TPL_STATS
#endif // CONFIG_NON_GREEDY_MV
+static void init_tpl_buffer(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int frame;
+
+ const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+#if CONFIG_NON_GREEDY_MV
+ int sqr_bsize;
+ int rf_idx;
+
+ // TODO(angiebird): This probably needs further modifications to support
+ // frame scaling later on.
+ if (cpi->feature_score_loc_alloc == 0) {
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_arr,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr)));
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_sort,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_heap,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
+
+ cpi->feature_score_loc_alloc = 1;
+ }
+#endif
+
+ // TODO(jingning): Reduce the actual memory use for tpl model build up.
+ for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
+ if (cpi->tpl_stats[frame].width >= mi_cols &&
+ cpi->tpl_stats[frame].height >= mi_rows &&
+ cpi->tpl_stats[frame].tpl_stats_ptr)
+ continue;
+
+#if CONFIG_NON_GREEDY_MV
+ vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr);
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
+ CHECK_MEM_ERROR(
+ cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
+ vpx_calloc(
+ mi_rows * mi_cols,
+ sizeof(
+ *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize])));
+ }
+ }
+#endif
+ vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
+ CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
+ vpx_calloc(mi_rows * mi_cols,
+ sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
+ cpi->tpl_stats[frame].is_valid = 0;
+ cpi->tpl_stats[frame].width = mi_cols;
+ cpi->tpl_stats[frame].height = mi_rows;
+ cpi->tpl_stats[frame].stride = mi_cols;
+ cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
+ cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
+ }
+
+ for (frame = 0; frame < REF_FRAMES; ++frame) {
+ cpi->enc_frame_buf[frame].mem_valid = 0;
+ cpi->enc_frame_buf[frame].released = 1;
+ }
+}
+
static void setup_tpl_stats(VP9_COMP *cpi) {
GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
@@ -6673,6 +6706,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (gf_group_index == 1 &&
cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
cpi->sf.enable_tpl_model) {
+ init_tpl_buffer(cpi);
vp9_estimate_qp_gop(cpi);
setup_tpl_stats(cpi);
}
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index c7c45e3a4..e4175f6ed 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -570,6 +570,7 @@ typedef struct VP9_COMP {
YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
EncFrameBuf enc_frame_buf[REF_FRAMES];
#if CONFIG_NON_GREEDY_MV
+ int feature_score_loc_alloc;
FEATURE_SCORE_LOC *feature_score_loc_arr;
FEATURE_SCORE_LOC **feature_score_loc_sort;
FEATURE_SCORE_LOC **feature_score_loc_heap;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index e4a5f3e18..602147421 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -424,6 +424,7 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi,
static void set_rt_speed_feature_framesize_independent(
VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, vp9e_tune_content content) {
VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
const int is_keyframe = cm->frame_type == KEY_FRAME;
const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key;
sf->static_segmentation = 0;
@@ -479,7 +480,7 @@ static void set_rt_speed_feature_framesize_independent(
// Reference masking only enabled for 1 spatial layer, and if none of the
// references have been scaled. The latter condition needs to be checked
// for external or internal dynamic resize.
- sf->reference_masking = (cpi->svc.number_spatial_layers == 1);
+ sf->reference_masking = (svc->number_spatial_layers == 1);
if (sf->reference_masking == 1 &&
(cpi->external_resize == 1 ||
cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) {
@@ -615,7 +616,7 @@ static void set_rt_speed_feature_framesize_independent(
}
// Keep nonrd_keyframe = 1 for non-base spatial layers to prevent
// increase in encoding time.
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;
+ if (cpi->use_svc && svc->spatial_layer_id > 0) sf->nonrd_keyframe = 1;
if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
cpi->oxcf.rc_mode == VPX_CBR)
sf->overshoot_detection_cbr_rt = FAST_DETECTION_MAXQ;
@@ -653,7 +654,7 @@ static void set_rt_speed_feature_framesize_independent(
(cm->width * cm->height <= 640 * 360) ? 40000 : 60000;
if (cpi->content_state_sb_fd == NULL &&
(!cpi->use_svc ||
- cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
+ svc->spatial_layer_id == svc->number_spatial_layers - 1)) {
cpi->content_state_sb_fd = (uint8_t *)vpx_calloc(
(cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));
}
@@ -662,7 +663,7 @@ static void set_rt_speed_feature_framesize_independent(
// Enable short circuit for low temporal variance.
sf->short_circuit_low_temp_var = 1;
}
- if (cpi->svc.temporal_layer_id > 0) {
+ if (svc->temporal_layer_id > 0) {
sf->adaptive_rd_thresh = 4;
sf->limit_newmv_early_exit = 0;
sf->base_mv_aggressive = 1;
@@ -676,16 +677,15 @@ static void set_rt_speed_feature_framesize_independent(
sf->mv.fullpel_search_step_param = 10;
// For SVC: use better mv search on base temporal layer, and only
// on base spatial layer if highest resolution is above 640x360.
- if (cpi->svc.number_temporal_layers > 2 &&
- cpi->svc.temporal_layer_id == 0 &&
- (cpi->svc.spatial_layer_id == 0 ||
+ if (svc->number_temporal_layers > 2 && svc->temporal_layer_id == 0 &&
+ (svc->spatial_layer_id == 0 ||
cpi->oxcf.width * cpi->oxcf.height <= 640 * 360)) {
sf->mv.search_method = NSTEP;
sf->mv.fullpel_search_step_param = 6;
}
- if (cpi->svc.temporal_layer_id > 0 || cpi->svc.spatial_layer_id > 1) {
+ if (svc->temporal_layer_id > 0 || svc->spatial_layer_id > 1) {
sf->use_simple_block_yrd = 1;
- if (cpi->svc.non_reference_frame)
+ if (svc->non_reference_frame)
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_EVENMORE;
}
if (cpi->use_svc && cpi->row_mt && cpi->oxcf.max_threads > 1)
@@ -696,28 +696,28 @@ static void set_rt_speed_feature_framesize_independent(
if (!cpi->last_frame_dropped && cpi->resize_state == ORIG &&
!cpi->external_resize &&
(!cpi->use_svc ||
- (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 &&
- !cpi->svc.last_layer_dropped[cpi->svc.number_spatial_layers - 1]))) {
+ (svc->spatial_layer_id == svc->number_spatial_layers - 1 &&
+ !svc->last_layer_dropped[svc->number_spatial_layers - 1]))) {
sf->copy_partition_flag = 1;
cpi->max_copied_frame = 2;
// The top temporal enhancement layer (for number of temporal layers > 1)
// are non-reference frames, so use large/max value for max_copied_frame.
- if (cpi->svc.number_temporal_layers > 1 &&
- cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1)
+ if (svc->number_temporal_layers > 1 &&
+ svc->temporal_layer_id == svc->number_temporal_layers - 1)
cpi->max_copied_frame = 255;
}
// For SVC: enable use of lower resolution partition for higher resolution,
// only for 3 spatial layers and when config/top resolution is above VGA.
// Enable only for non-base temporal layer frames.
- if (cpi->use_svc && cpi->svc.use_partition_reuse &&
- cpi->svc.number_spatial_layers == 3 && cpi->svc.temporal_layer_id > 0 &&
+ if (cpi->use_svc && svc->use_partition_reuse &&
+ svc->number_spatial_layers == 3 && svc->temporal_layer_id > 0 &&
cpi->oxcf.width * cpi->oxcf.height > 640 * 480)
sf->svc_use_lowres_part = 1;
// For SVC when golden is used as second temporal reference: to avoid
// encode time increase only use this feature on base temporal layer.
// (i.e remove golden flag from frame_flags for temporal_layer_id > 0).
- if (cpi->use_svc && cpi->svc.use_gf_temporal_ref_current_layer &&
- cpi->svc.temporal_layer_id > 0)
+ if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer &&
+ svc->temporal_layer_id > 0)
cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
}
@@ -789,17 +789,18 @@ static void set_rt_speed_feature_framesize_independent(
(uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(*cpi->count_lastgolden_frame_usage));
}
- if (cpi->svc.previous_frame_is_intra_only) {
+ if (svc->previous_frame_is_intra_only) {
sf->partition_search_type = FIXED_PARTITION;
sf->always_this_block_size = BLOCK_64X64;
}
// Special case for screen content: increase motion search on base spatial
// layer when high motion is detected or previous SL0 frame was dropped.
if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed >= 5 &&
- cpi->svc.spatial_layer_id == 0 &&
- (cpi->rc.high_num_blocks_with_motion || cpi->svc.last_layer_dropped[0])) {
+ (svc->high_num_blocks_with_motion || svc->last_layer_dropped[0])) {
sf->mv.search_method = NSTEP;
- sf->mv.fullpel_search_step_param = 2;
+ // TODO(marpan/jianj): Tune this setting for screensharing. For now use
+ // larger step_param for non-base layer, to avoid increase in encode time.
+ sf->mv.fullpel_search_step_param = (svc->spatial_layer_id == 0) ? 2 : 4;
}
}
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 510087580..df5af6dc9 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -884,7 +884,10 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
svc->non_reference_frame = 0;
}
- if (svc->spatial_layer_id == 0) svc->high_source_sad_superframe = 0;
+ if (svc->spatial_layer_id == 0) {
+ svc->high_source_sad_superframe = 0;
+ svc->high_num_blocks_with_motion = 0;
+ }
if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
svc->last_layer_dropped[svc->spatial_layer_id] &&
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index f1f2457b2..c25644617 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -143,10 +143,11 @@ typedef struct SVC {
INTER_LAYER_PRED disable_inter_layer_pred;
- // Flag to indicate scene change at current superframe, scene detection is
- // currently checked for each superframe prior to encoding, on the full
- // resolution source.
+ // Flag to indicate scene change and high num of motion blocks at current
+ // superframe, scene detection is currently checked for each superframe prior
+ // to encoding, on the full resolution source.
int high_source_sad_superframe;
+ int high_num_blocks_with_motion;
// Flags used to get SVC pattern info.
int update_buffer_slot[VPX_SS_MAX_LAYERS];