summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/ppc/vp9_idct_vsx.c115
-rw-r--r--vp9/common/vp9_rtcd_defs.pl6
-rw-r--r--vp9/encoder/vp9_encodeframe.c101
-rw-r--r--vp9/encoder/vp9_encoder.c20
-rw-r--r--vp9/encoder/vp9_encoder.h2
-rw-r--r--vp9/encoder/vp9_firstpass.c200
-rw-r--r--vp9/encoder/vp9_firstpass.h18
-rw-r--r--vp9/encoder/vp9_speed_features.c4
-rw-r--r--vp9/encoder/vp9_speed_features.h1
-rw-r--r--vp9/vp9_common.mk1
10 files changed, 428 insertions, 40 deletions
diff --git a/vp9/common/ppc/vp9_idct_vsx.c b/vp9/common/ppc/vp9_idct_vsx.c
new file mode 100644
index 000000000..1b2a93edb
--- /dev/null
+++ b/vp9/common/ppc/vp9_idct_vsx.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/ppc/inv_txfm_vsx.h"
+#include "vpx_dsp/ppc/bitdepth_conversion_vsx.h"
+
+#include "vp9/common/vp9_enums.h"
+
+void vp9_iht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ int16x8_t in[2], out[2];
+
+ in[0] = load_tran_low(0, input);
+ in[1] = load_tran_low(8 * sizeof(*input), input);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_idct4_vsx(in, out);
+ vpx_idct4_vsx(out, in);
+ break;
+ case ADST_DCT:
+ vpx_idct4_vsx(in, out);
+ vp9_iadst4_vsx(out, in);
+ break;
+ case DCT_ADST:
+ vp9_iadst4_vsx(in, out);
+ vpx_idct4_vsx(out, in);
+ break;
+ default:
+ assert(tx_type == ADST_ADST);
+ vp9_iadst4_vsx(in, out);
+ vp9_iadst4_vsx(out, in);
+ break;
+ }
+
+ vpx_round_store4x4_vsx(in, out, dest, stride);
+}
+
+void vp9_iht8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ int16x8_t in[8], out[8];
+
+ // load input data
+ in[0] = load_tran_low(0, input);
+ in[1] = load_tran_low(8 * sizeof(*input), input);
+ in[2] = load_tran_low(2 * 8 * sizeof(*input), input);
+ in[3] = load_tran_low(3 * 8 * sizeof(*input), input);
+ in[4] = load_tran_low(4 * 8 * sizeof(*input), input);
+ in[5] = load_tran_low(5 * 8 * sizeof(*input), input);
+ in[6] = load_tran_low(6 * 8 * sizeof(*input), input);
+ in[7] = load_tran_low(7 * 8 * sizeof(*input), input);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_idct8_vsx(in, out);
+ vpx_idct8_vsx(out, in);
+ break;
+ case ADST_DCT:
+ vpx_idct8_vsx(in, out);
+ vp9_iadst8_vsx(out, in);
+ break;
+ case DCT_ADST:
+ vp9_iadst8_vsx(in, out);
+ vpx_idct8_vsx(out, in);
+ break;
+ default:
+ assert(tx_type == ADST_ADST);
+ vp9_iadst8_vsx(in, out);
+ vp9_iadst8_vsx(out, in);
+ break;
+ }
+
+ vpx_round_store8x8_vsx(in, dest, stride);
+}
+
+void vp9_iht16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest,
+ int stride, int tx_type) {
+ int16x8_t in0[16], in1[16];
+
+ LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), in0);
+ LOAD_INPUT16(load_tran_low, input, 8 * 8 * 2 * sizeof(*input),
+ 8 * sizeof(*input), in1);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_idct16_vsx(in0, in1);
+ vpx_idct16_vsx(in0, in1);
+ break;
+ case ADST_DCT:
+ vpx_idct16_vsx(in0, in1);
+ vpx_iadst16_vsx(in0, in1);
+ break;
+ case DCT_ADST:
+ vpx_iadst16_vsx(in0, in1);
+ vpx_idct16_vsx(in0, in1);
+ break;
+ default:
+ assert(tx_type == ADST_ADST);
+ vpx_iadst16_vsx(in0, in1);
+ vpx_iadst16_vsx(in0, in1);
+ break;
+ }
+
+ vpx_round_store16x16_vsx(in0, in1, dest, stride);
+}
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 8f5b0bf30..6d7f95260 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -67,9 +67,9 @@ add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *outp
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
# Note that there are more specializations appended when
# CONFIG_VP9_HIGHBITDEPTH is off.
- specialize qw/vp9_iht4x4_16_add neon sse2/;
- specialize qw/vp9_iht8x8_64_add neon sse2/;
- specialize qw/vp9_iht16x16_256_add neon sse2/;
+ specialize qw/vp9_iht4x4_16_add neon sse2 vsx/;
+ specialize qw/vp9_iht8x8_64_add neon sse2 vsx/;
+ specialize qw/vp9_iht16x16_256_add neon sse2 vsx/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
# Note that these specializations are appended to the above ones.
specialize qw/vp9_iht4x4_16_add dspr2 msa/;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 383f7a8d7..f6fcd9d33 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1967,6 +1967,8 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
}
+ rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+
x->rdmult = orig_rdmult;
// TODO(jingning) The rate-distortion optimization flow needs to be
@@ -3317,20 +3319,73 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
}
#define FEATURES 4
-static const float partition_breakout_weights_64[FEATURES + 1] = {
- -0.016673f, -0.001025f, -0.000032f, 0.000833f, 1.94261885f - 2.1f,
+#define Q_CTX 2
+static const float partition_breakout_weights_64[Q_CTX][FEATURES + 1] = {
+ {
+ -0.016673f,
+ -0.001025f,
+ -0.000032f,
+ 0.000833f,
+ 1.94261885f - 2.1f,
+ },
+ {
+ -0.160867f,
+ -0.002101f,
+ 0.000011f,
+ 0.002448f,
+ 1.65738142f - 2.5f,
+ },
};
-static const float partition_breakout_weights_32[FEATURES + 1] = {
- -0.010554f, -0.003081f, -0.000134f, 0.004491f, 1.68445992f - 3.5f,
+static const float partition_breakout_weights_32[Q_CTX][FEATURES + 1] = {
+ {
+ -0.010554f,
+ -0.003081f,
+ -0.000134f,
+ 0.004491f,
+ 1.68445992f - 3.5f,
+ },
+ {
+ -0.051489f,
+ -0.007609f,
+ 0.000016f,
+ 0.009792f,
+ 1.28089404f - 2.5f,
+ },
};
-static const float partition_breakout_weights_16[FEATURES + 1] = {
- -0.013154f, -0.002404f, -0.000977f, 0.008450f, 2.57404566f - 5.5f,
+static const float partition_breakout_weights_16[Q_CTX][FEATURES + 1] = {
+ {
+ -0.013154f,
+ -0.002404f,
+ -0.000977f,
+ 0.008450f,
+ 2.57404566f - 5.5f,
+ },
+ {
+ -0.019146f,
+ -0.004018f,
+ 0.000064f,
+ 0.008187f,
+ 2.15043926f - 2.5f,
+ },
};
-static const float partition_breakout_weights_8[FEATURES + 1] = {
- -0.011807f, -0.009873f, -0.000931f, 0.034768f, 1.32254851f - 2.0f,
+static const float partition_breakout_weights_8[Q_CTX][FEATURES + 1] = {
+ {
+ -0.011807f,
+ -0.009873f,
+ -0.000931f,
+ 0.034768f,
+ 1.32254851f - 2.0f,
+ },
+ {
+ -0.003861f,
+ -0.002701f,
+ 0.000100f,
+ 0.013876f,
+ 1.96755111f - 1.5f,
+ },
};
// ML-based partition search breakout.
@@ -3338,22 +3393,30 @@ static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
const MACROBLOCK *const x,
const RD_COST *const rd_cost) {
DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
+ const VP9_COMMON *const cm = &cpi->common;
float features[FEATURES];
const float *linear_weights = NULL; // Linear model weights.
float linear_score = 0.0f;
+ const int qindex = cm->base_qindex;
+ const int q_ctx = qindex >= 200 ? 0 : 1;
switch (bsize) {
- case BLOCK_64X64: linear_weights = partition_breakout_weights_64; break;
- case BLOCK_32X32: linear_weights = partition_breakout_weights_32; break;
- case BLOCK_16X16: linear_weights = partition_breakout_weights_16; break;
- case BLOCK_8X8: linear_weights = partition_breakout_weights_8; break;
+ case BLOCK_64X64:
+ linear_weights = partition_breakout_weights_64[q_ctx];
+ break;
+ case BLOCK_32X32:
+ linear_weights = partition_breakout_weights_32[q_ctx];
+ break;
+ case BLOCK_16X16:
+ linear_weights = partition_breakout_weights_16[q_ctx];
+ break;
+ case BLOCK_8X8: linear_weights = partition_breakout_weights_8[q_ctx]; break;
default: assert(0 && "Unexpected block size."); return 0;
}
if (!linear_weights) return 0;
{ // Generate feature values.
- const VP9_COMMON *const cm = &cpi->common;
- const int ac_q = vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth);
+ const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth);
const int num_pels_log2 = num_pels_log2_lookup[bsize];
int feature_index = 0;
unsigned int var, sse;
@@ -3385,9 +3448,10 @@ static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
linear_score += linear_weights[i] * features[i];
}
- return linear_score >= 0;
+ return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx];
}
#undef FEATURES
+#undef Q_CTX
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
@@ -3559,8 +3623,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
best_rdc.rdcost);
if (this_rdc.rate != INT_MAX) {
if (bsize >= BLOCK_8X8) {
- this_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
this_rdc.rdcost += RDCOST(x->rdmult, x->rddiv,
cpi->partition_cost[pl][PARTITION_NONE], 0);
this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
@@ -3579,7 +3641,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (!x->e_mbd.lossless && ctx->skippable) {
int use_ml_based_breakout =
cpi->sf.use_ml_partition_search_breakout &&
- cm->base_qindex >= 200;
+ cm->base_qindex >= 150;
#if CONFIG_VP9_HIGHBITDEPTH
if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
use_ml_based_breakout = 0;
@@ -3714,7 +3776,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
sum_rdc.rdcost += RDCOST(x->rdmult, x->rddiv,
cpi->partition_cost[pl][PARTITION_SPLIT], 0);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
@@ -3777,7 +3838,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
sum_rdc.rdcost += RDCOST(x->rdmult, x->rddiv,
cpi->partition_cost[pl][PARTITION_HORZ], 0);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
@@ -3827,7 +3887,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
sum_rdc.rdcost += RDCOST(x->rdmult, x->rddiv,
cpi->partition_cost[pl][PARTITION_VERT], 0);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 6ec7a5ee8..74e0d85a5 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2952,7 +2952,7 @@ static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
return force_recode;
}
-void vp9_update_reference_frames(VP9_COMP *cpi) {
+void update_ref_frames(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
BufferPool *const pool = cm->buffer_pool;
@@ -3016,6 +3016,14 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
cpi->interp_filter_selected[0],
sizeof(cpi->interp_filter_selected[0]));
}
+}
+
+void vp9_update_reference_frames(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ BufferPool *const pool = cm->buffer_pool;
+
+ update_ref_frames(cpi);
+
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->denoiser.denoising_level > kDenLowLow) {
@@ -3054,6 +3062,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
denoise_svc_second_layer);
}
#endif
+
if (is_one_pass_cbr_svc(cpi)) {
// Keep track of frame index for each reference frame.
SVC *const svc = &cpi->svc;
@@ -5670,6 +5679,15 @@ void setup_tpl_stats(VP9_COMP *cpi) {
int tpl_group_frames = 0;
int frame_idx;
+ // TODO(jingning): Make the model support high bit-depth route.
+#if CONFIG_VP9_HIGHBITDEPTH
+ (void)gf_picture;
+ (void)gf_group;
+ (void)tpl_group_frames;
+ (void)frame_idx;
+ return;
+#endif
+
init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
init_tpl_stats(cpi);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index a3d39266f..ec02a78ee 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -758,9 +758,7 @@ typedef struct VP9_COMP {
int num_extra_arfs;
int arf_pos_in_gf[MAX_EXT_ARFS + 1];
int arf_pos_for_ovrly[MAX_EXT_ARFS + 1];
-
int extra_arf_allowed;
- int bwd_ref_allowed;
vpx_roi_map_t roi;
} VP9_COMP;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c13576343..6717d961d 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2135,7 +2135,7 @@ static void define_gf_multi_arf_structure(VP9_COMP *cpi) {
// (3) The bi-predictive group interval is strictly smaller than the
// golden group interval.
const int is_bipred_enabled =
- cpi->bwd_ref_allowed && rc->source_alt_ref_pending &&
+ cpi->extra_arf_allowed && rc->source_alt_ref_pending &&
rc->bipred_group_interval &&
rc->bipred_group_interval <=
(rc->baseline_gf_interval - rc->source_alt_ref_pending);
@@ -2439,6 +2439,151 @@ static void define_gf_group_structure(VP9_COMP *cpi) {
cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled;
}
+static void allocate_gf_multi_arf_bits(VP9_COMP *cpi, int64_t gf_group_bits,
+ int gf_arf_bits) {
+ VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ RATE_CONTROL *const rc = &cpi->rc;
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
+ FIRSTPASS_STATS frame_stats;
+ int i;
+ int frame_index = 0;
+ int target_frame_size;
+ int key_frame;
+ const int max_bits = frame_max_bits(&cpi->rc, oxcf);
+ int64_t total_group_bits = gf_group_bits;
+ int normal_frames;
+ int normal_frame_bits;
+ int last_frame_reduction = 0;
+ double av_score = 1.0;
+ double tot_norm_frame_score = 1.0;
+ double this_frame_score = 1.0;
+
+ // Define the GF structure and specify
+ define_gf_multi_arf_structure(cpi);
+
+ //========================================
+
+ key_frame = cpi->common.frame_type == KEY_FRAME;
+
+ // For key frames the frame target rate is already set and it
+ // is also the golden frame.
+ // === [frame_index == 0] ===
+ if (!key_frame) {
+ gf_group->bit_allocation[frame_index] =
+ rc->source_alt_ref_active ? 0 : gf_arf_bits;
+ }
+
+ // Deduct the boost bits for arf (or gf if it is not a key frame)
+ // from the group total.
+ if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
+
+ ++frame_index;
+
+ // === [frame_index == 1] ===
+ // Store the bits to spend on the ARF if there is one.
+ if (rc->source_alt_ref_pending) {
+ gf_group->bit_allocation[frame_index] = gf_arf_bits;
+
+ ++frame_index;
+
+ // Skip all the extra-ARF's right after ARF at the starting segment of
+ // the current GF group.
+ if (cpi->num_extra_arfs) {
+ while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
+ ++frame_index;
+ }
+ }
+
+ normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending);
+ if (normal_frames > 1)
+ normal_frame_bits = (int)(total_group_bits / normal_frames);
+ else
+ normal_frame_bits = (int)total_group_bits;
+
+ if (oxcf->vbr_corpus_complexity) {
+ av_score = get_distribution_av_err(cpi, twopass);
+ tot_norm_frame_score = calculate_group_score(cpi, av_score, normal_frames);
+ }
+
+ // Allocate bits to the other frames in the group.
+ for (i = 0; i < normal_frames; ++i) {
+ if (EOF == input_stats(twopass, &frame_stats)) break;
+
+ if (oxcf->vbr_corpus_complexity) {
+ this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf,
+ &frame_stats, av_score);
+ normal_frame_bits = (int)((double)total_group_bits *
+ (this_frame_score / tot_norm_frame_score));
+ }
+
+ target_frame_size = normal_frame_bits;
+ if ((i == (normal_frames - 1)) && (i >= 1)) {
+ last_frame_reduction = normal_frame_bits / 16;
+ target_frame_size -= last_frame_reduction;
+ }
+
+ // TODO(zoeliu): Further check whether following is needed for
+ // hierarchical GF group structure.
+ if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
+ target_frame_size -= (target_frame_size >> 4);
+ }
+
+ target_frame_size =
+ clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits));
+
+ if (gf_group->update_type[frame_index] == BRF_UPDATE) {
+ // Boost up the allocated bits on BWDREF_FRAME
+ gf_group->bit_allocation[frame_index] =
+ target_frame_size + (target_frame_size >> 2);
+ } else if (gf_group->update_type[frame_index] == LAST_BIPRED_UPDATE) {
+ // Press down the allocated bits on LAST_BIPRED_UPDATE frames
+ gf_group->bit_allocation[frame_index] =
+ target_frame_size - (target_frame_size >> 1);
+ } else if (gf_group->update_type[frame_index] == BIPRED_UPDATE) {
+ // TODO(zoeliu): Investigate whether the allocated bits on BIPRED_UPDATE
+ // frames need to be further adjusted.
+ gf_group->bit_allocation[frame_index] = target_frame_size;
+ } else {
+ assert(gf_group->update_type[frame_index] == LF_UPDATE ||
+ gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE);
+ gf_group->bit_allocation[frame_index] = target_frame_size;
+ }
+
+ ++frame_index;
+
+ // Skip all the extra-ARF's.
+ if (cpi->num_extra_arfs) {
+ while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
+ ++frame_index;
+ }
+ }
+
+ // NOTE: We need to configure the frame at the end of the sequence + 1 that
+ // will be the start frame for the next group. Otherwise prior to the
+ // call to av1_rc_get_second_pass_params() the data will be undefined.
+ if (rc->source_alt_ref_pending) {
+ if (cpi->num_extra_arfs) {
+ // NOTE: For bit allocation, move the allocated bits associated with
+ // INTNL_OVERLAY_UPDATE to the corresponding INTNL_ARF_UPDATE.
+ // i > 0 for extra-ARF's and i == 0 for ARF:
+ // arf_pos_for_ovrly[i]: Position for INTNL_OVERLAY_UPDATE
+ // arf_pos_in_gf[i]: Position for INTNL_ARF_UPDATE
+ for (i = cpi->num_extra_arfs; i > 0; --i) {
+ assert(gf_group->update_type[cpi->arf_pos_for_ovrly[i]] ==
+ INTNL_OVERLAY_UPDATE);
+
+ // Encoder's choice:
+ // Set show_existing_frame == 1 for all extra-ARF's, and hence
+ // allocate zero bit for both all internal OVERLAY frames.
+ gf_group->bit_allocation[cpi->arf_pos_in_gf[i]] =
+ gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]];
+ gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]] = 0;
+ }
+ }
+ }
+}
+
static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
int gf_arf_bits) {
VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -2462,17 +2607,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
double this_frame_score = 1.0;
// Define the GF structure and specify
- cpi->bwd_ref_allowed = 0;
- cpi->extra_arf_allowed = 0;
-
- cpi->num_extra_arfs = 0;
- cpi->num_extra_arfs = cpi->extra_arf_allowed ? cpi->num_extra_arfs : 0;
-
- if (cpi->bwd_ref_allowed) {
- define_gf_multi_arf_structure(cpi);
- } else {
- define_gf_group_structure(cpi);
- }
+ define_gf_group_structure(cpi);
key_frame = cpi->common.frame_type == KEY_FRAME;
@@ -2620,6 +2755,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
const int is_key_frame = frame_is_intra_only(cm);
const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
+ int disable_bwd_extarf;
+
// Reset the GF group data structures unless this is a key
// frame in which case it will already have been done.
if (is_key_frame == 0) {
@@ -2800,6 +2937,39 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ // TODO(zoeliu): Turn on the option to disable extra ALTREFs for still GF
+ // groups.
+ // Disable extra altrefs for "still" gf group:
+ // zero_motion_accumulator: minimum percentage of (0,0) motion;
+ // avg_sr_coded_error: average of the SSE per pixel of each frame;
+ // avg_raw_err_stdev: average of the standard deviation of (0,0)
+ // motion error per block of each frame.
+#if 0
+ assert(num_mbs > 0);
+ disable_bwd_extarf =
+ (zero_motion_accumulator > MIN_ZERO_MOTION &&
+ avg_sr_coded_error / num_mbs < MAX_SR_CODED_ERROR &&
+ avg_raw_err_stdev < MAX_RAW_ERR_VAR);
+#else
+ disable_bwd_extarf = 0;
+#endif // 0
+
+ if (disable_bwd_extarf) cpi->extra_arf_allowed = 0;
+
+ if (!cpi->extra_arf_allowed) {
+ cpi->num_extra_arfs = 0;
+ } else {
+ // Compute how many extra alt_refs we can have
+ cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
+ rc->source_alt_ref_pending);
+ }
+ // Currently at maximum two extra ARFs' are allowed
+ assert(cpi->num_extra_arfs <= MAX_EXT_ARFS);
+
+ rc->bipred_group_interval = BFG_INTERVAL;
+ // The minimum bi-predictive frame group interval is 2.
+ if (rc->bipred_group_interval < 2) rc->bipred_group_interval = 0;
+
// Reset the file position.
reset_fpf_position(twopass, start_pos);
@@ -2851,7 +3021,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->kf_group_error_left -= gf_group_err;
// Allocate bits to each of the frames in the GF group.
- allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits);
+ if (cpi->extra_arf_allowed) {
+ allocate_gf_multi_arf_bits(cpi, gf_group_bits, gf_arf_bits);
+ } else {
+ allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits);
+ }
// Reset the file position.
reset_fpf_position(twopass, start_pos);
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 958dc128d..404175d92 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -11,6 +11,8 @@
#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
#define VP9_ENCODER_VP9_FIRSTPASS_H_
+#include <assert.h>
+
#include "vp9/encoder/vp9_lookahead.h"
#include "vp9/encoder/vp9_ratectrl.h"
@@ -41,7 +43,12 @@ typedef struct {
#define INVALID_ROW -1
+// Length of the bi-predictive frame group (BFG)
+// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain
+// number of bi-predictive frames.
+#define BFG_INTERVAL 2
#define MAX_EXT_ARFS 2
+#define MIN_EXT_ARF_INTERVAL 4
typedef struct {
double frame_mb_intra_factor;
@@ -210,6 +217,17 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width,
int *scaled_frame_height);
+static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) {
+ assert(MAX_EXT_ARFS > 0);
+ if (arf_pending) {
+ if (interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1))
+ return MAX_EXT_ARFS;
+ else if (interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS)
+ return MAX_EXT_ARFS - 1;
+ }
+ return 0;
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 254c4e2b1..7a02623dc 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -83,6 +83,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
sf->partition_search_breakout_thr.dist = (1 << 21);
sf->use_ml_partition_search_breakout = 1;
+ sf->ml_partition_search_breakout_thresh[0] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[1] = 0.0f;
}
}
@@ -97,6 +99,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
sf->partition_search_breakout_thr.dist = (1 << 22);
sf->partition_search_breakout_thr.rate = 100;
+ sf->ml_partition_search_breakout_thresh[0] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[1] = -1.0f;
}
sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index eede9cbe2..7a9b3a622 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -472,6 +472,7 @@ typedef struct SPEED_FEATURES {
// Use ML-based partition search early breakout.
int use_ml_partition_search_breakout;
+ float ml_partition_search_breakout_thresh[2];
// Machine-learning based partition search early termination
int ml_partition_search_early_termination;
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index d40d3c445..7ca4004b0 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -68,6 +68,7 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
+VP9_COMMON_SRCS-$(HAVE_VSX) += common/ppc/vp9_idct_vsx.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht16x16_add_neon.c