summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_asm_enc_offsets.c1
-rw-r--r--vp9/encoder/vp9_bitstream.c554
-rw-r--r--vp9/encoder/vp9_block.h39
-rw-r--r--vp9/encoder/vp9_boolhuff.c1
-rw-r--r--vp9/encoder/vp9_dct.c3177
-rw-r--r--vp9/encoder/vp9_encodeframe.c411
-rw-r--r--vp9/encoder/vp9_encodeframe.h4
-rw-r--r--vp9/encoder/vp9_encodeintra.c155
-rw-r--r--vp9/encoder/vp9_encodemb.c329
-rw-r--r--vp9/encoder/vp9_encodemb.h4
-rw-r--r--vp9/encoder/vp9_firstpass.c87
-rw-r--r--vp9/encoder/vp9_firstpass.h12
-rw-r--r--vp9/encoder/vp9_mbgraph.c46
-rw-r--r--vp9/encoder/vp9_mbgraph.h2
-rw-r--r--vp9/encoder/vp9_mcomp.c171
-rw-r--r--vp9/encoder/vp9_mcomp.h36
-rw-r--r--vp9/encoder/vp9_onyx_if.c1298
-rw-r--r--vp9/encoder/vp9_onyx_int.h92
-rw-r--r--vp9/encoder/vp9_picklpf.c3
-rw-r--r--vp9/encoder/vp9_psnr.c9
-rw-r--r--vp9/encoder/vp9_psnr.h2
-rw-r--r--vp9/encoder/vp9_quantize.c712
-rw-r--r--vp9/encoder/vp9_quantize.h11
-rw-r--r--vp9/encoder/vp9_ratectrl.c176
-rw-r--r--vp9/encoder/vp9_ratectrl.h27
-rw-r--r--vp9/encoder/vp9_rdopt.c1445
-rw-r--r--vp9/encoder/vp9_rdopt.h6
-rw-r--r--vp9/encoder/vp9_sad_c.c632
-rw-r--r--vp9/encoder/vp9_satd_c.c48
-rw-r--r--vp9/encoder/vp9_segmentation.c166
-rw-r--r--vp9/encoder/vp9_segmentation.h24
-rw-r--r--vp9/encoder/vp9_temporal_filter.c89
-rw-r--r--vp9/encoder/vp9_temporal_filter.h2
-rw-r--r--vp9/encoder/vp9_tokenize.c484
-rw-r--r--vp9/encoder/vp9_tokenize.h45
-rw-r--r--vp9/encoder/vp9_treewriter.h52
-rw-r--r--vp9/encoder/vp9_variance.h9
-rw-r--r--vp9/encoder/vp9_variance_c.c28
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2_intrinsics.c272
-rw-r--r--vp9/encoder/x86/vp9_encodeopt.asm22
-rw-r--r--vp9/encoder/x86/vp9_sad4d_sse2.asm225
-rw-r--r--vp9/encoder/x86/vp9_sad_sse2.asm572
-rw-r--r--vp9/encoder/x86/vp9_sad_sse3.asm582
-rw-r--r--vp9/encoder/x86/vp9_sad_sse4.asm118
-rw-r--r--vp9/encoder/x86/vp9_variance_sse2.c152
-rw-r--r--vp9/encoder/x86/vp9_x86_csystemdependent.c12
46 files changed, 5105 insertions, 7239 deletions
diff --git a/vp9/encoder/vp9_asm_enc_offsets.c b/vp9/encoder/vp9_asm_enc_offsets.c
index 71fad2e07..e174a894a 100644
--- a/vp9/encoder/vp9_asm_enc_offsets.c
+++ b/vp9/encoder/vp9_asm_enc_offsets.c
@@ -32,7 +32,6 @@ DEFINE(vp9_block_quant_shift, offsetof(BLOCK, quant_shift));
DEFINE(vp9_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant));
DEFINE(vp9_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
-DEFINE(vp9_blockd_eob, offsetof(BLOCKD, eob));
END
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 61aac5cd1..7101947a6 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -14,6 +14,7 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_findnearmv.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/common/vp9_systemdependent.h"
#include <assert.h>
@@ -41,12 +42,9 @@ unsigned __int64 Sectionbits[500];
int intra_mode_stats[VP9_KF_BINTRAMODES]
[VP9_KF_BINTRAMODES]
[VP9_KF_BINTRAMODES];
-vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4];
-vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4];
-vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8];
-vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8];
-vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16];
-vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16];
+vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES];
+vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES];
+vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES];
vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32];
extern unsigned int active_section;
@@ -189,15 +187,7 @@ static void update_refpred_stats(VP9_COMP *cpi) {
int old_cost, new_cost;
// Set the prediction probability structures to defaults
- if (cm->frame_type == KEY_FRAME) {
- // Set the prediction probabilities to defaults
- cm->ref_pred_probs[0] = 120;
- cm->ref_pred_probs[1] = 80;
- cm->ref_pred_probs[2] = 40;
-
- vpx_memset(cpi->ref_pred_probs_update, 0,
- sizeof(cpi->ref_pred_probs_update));
- } else {
+ if (cm->frame_type != KEY_FRAME) {
// From the prediction counts set the probabilities for each context
for (i = 0; i < PREDICTION_PROBS; i++) {
new_pred_probs[i] = get_binary_prob(cpi->ref_pred_count[i][0],
@@ -219,7 +209,6 @@ static void update_refpred_stats(VP9_COMP *cpi) {
cm->ref_pred_probs[i] = new_pred_probs[i];
} else
cpi->ref_pred_probs_update[i] = 0;
-
}
}
}
@@ -230,8 +219,8 @@ static void update_refpred_stats(VP9_COMP *cpi) {
//
// The branch counts table is re-populated during the actual pack stage and in
// the decoder to facilitate backwards update of the context.
-static void update_mode_probs(VP9_COMMON *cm,
- int mode_context[INTER_MODE_CONTEXTS][4]) {
+static void update_inter_mode_probs(VP9_COMMON *cm,
+ int mode_context[INTER_MODE_CONTEXTS][4]) {
int i, j;
unsigned int (*mv_ref_ct)[4][2];
@@ -508,7 +497,8 @@ static void write_sub_mv_ref
vp9_sub_mv_ref_encoding_array - LEFT4X4 + m);
}
-static void write_nmv(vp9_writer *bc, const MV *mv, const int_mv *ref,
+static void write_nmv(VP9_COMP *cpi, vp9_writer *bc,
+ const MV *mv, const int_mv *ref,
const nmv_context *nmvc, int usehp) {
MV e;
e.row = mv->row - ref->as_mv.row;
@@ -585,6 +575,28 @@ static void write_mb_segid(vp9_writer *bc,
}
}
+static void write_mb_segid_except(VP9_COMMON *cm,
+ vp9_writer *bc,
+ const MB_MODE_INFO *mi,
+ const MACROBLOCKD *xd,
+ int mb_row, int mb_col) {
+ // Encode the MB segment id.
+ int seg_id = mi->segment_id;
+ int pred_seg_id = vp9_get_pred_mb_segid(cm, xd,
+ mb_row * cm->mb_cols + mb_col);
+ const vp9_prob *p = xd->mb_segment_tree_probs;
+ const vp9_prob p1 = xd->mb_segment_mispred_tree_probs[pred_seg_id];
+
+ if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
+ vp9_write(bc, seg_id >= 2, p1);
+ if (pred_seg_id >= 2 && seg_id < 2) {
+ vp9_write(bc, seg_id == 1, p[1]);
+ } else if (pred_seg_id < 2 && seg_id >= 2) {
+ vp9_write(bc, seg_id == 3, p[2]);
+ }
+ }
+}
+
// This function encodes the reference frame
static void encode_ref_frame(vp9_writer *const bc,
VP9_COMMON *const cm,
@@ -728,7 +740,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
// If the mb segment id wasn't predicted code explicitly
if (!prediction_flag)
- write_mb_segid(bc, mi, &cpi->mb.e_mbd);
+ write_mb_segid_except(pc, bc, mi, &cpi->mb.e_mbd, mb_row, mb_col);
} else {
// Normal unpredicted coding
write_mb_segid(bc, mi, &cpi->mb.e_mbd);
@@ -737,8 +749,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
if (!pc->mb_no_coeff_skip) {
skip_coeff = 0;
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) {
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_coeff = 1;
} else {
const int nmbs = mb_size;
@@ -758,24 +769,18 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
}
// Encode the reference frame.
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)
- || vp9_get_segdata(xd, segment_id, SEG_LVL_MODE) >= NEARESTMV) {
- encode_ref_frame(bc, pc, xd, segment_id, rf);
- } else {
- assert(rf == INTRA_FRAME);
- }
+ encode_ref_frame(bc, pc, xd, segment_id, rf);
if (rf == INTRA_FRAME) {
#ifdef ENTROPY_STATS
active_section = 6;
#endif
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
- if (m->mbmi.sb_type)
- write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
- else
- write_ymode(bc, mode, pc->fc.ymode_prob);
- }
+ if (m->mbmi.sb_type)
+ write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
+ else
+ write_ymode(bc, mode, pc->fc.ymode_prob);
+
if (mode == B_PRED) {
int j = 0;
do {
@@ -801,14 +806,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]);
- // #ifdef ENTROPY_STATS
#ifdef ENTROPY_STATS
- accum_mv_refs(mode, ct);
active_section = 3;
#endif
- // Is the segment coding of mode enabled
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ // If segment skip is not enabled code the mode.
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
if (mi->sb_type) {
write_sb_mv_ref(bc, mode, mv_ref_p);
} else {
@@ -878,12 +881,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
#ifdef ENTROPY_STATS
active_section = 5;
#endif
- write_nmv(bc, &mi->mv[0].as_mv, &mi->best_mv,
+ write_nmv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
if (mi->second_ref_frame > 0) {
- write_nmv(bc, &mi->mv[1].as_mv, &mi->best_second_mv,
+ write_nmv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
}
@@ -915,7 +918,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
#else
while (j != L[++k]);
#endif
- leftmv.as_int = left_block_mv(m, k);
+ leftmv.as_int = left_block_mv(xd, m, k);
abovemv.as_int = above_block_mv(m, k, mis);
mv_contz = vp9_mv_cont(&leftmv, &abovemv);
@@ -926,12 +929,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
#ifdef ENTROPY_STATS
active_section = 11;
#endif
- write_nmv(bc, &blockmv.as_mv, &mi->best_mv,
+ write_nmv(cpi, bc, &blockmv.as_mv, &mi->best_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
if (mi->second_ref_frame > 0) {
- write_nmv(bc,
+ write_nmv(cpi, bc,
&cpi->mb.partition_info->bmi[j].second_mv.as_mv,
&mi->best_second_mv,
(const nmv_context*) nmvc,
@@ -951,8 +954,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
mi->partitioning == PARTITIONING_4X4))) &&
pc->txfm_mode == TX_MODE_SELECT &&
!((pc->mb_no_coeff_skip && skip_coeff) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
TX_SIZE sz = mi->txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
@@ -981,8 +983,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
if (!c->mb_no_coeff_skip) {
skip_coeff = 0;
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) {
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_coeff = 1;
} else {
const int nmbs = 1 << m->mbmi.sb_type;
@@ -1013,7 +1014,8 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
int i = 0;
do {
const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
- const B_PREDICTION_MODE L = left_block_mode(m, i);
+ const B_PREDICTION_MODE L = (xd->left_available || (i & 3)) ?
+ left_block_mode(m, i) : B_DC_PRED;
const int bm = m->bmi[i].as_mode.first;
#ifdef ENTROPY_STATS
@@ -1041,8 +1043,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT &&
!((c->mb_no_coeff_skip && skip_coeff) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
TX_SIZE sz = m->mbmi.txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
@@ -1061,6 +1062,10 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
xd->mode_info_context = m;
+ xd->left_available = mb_col > c->cur_tile_mb_col_start;
+ xd->right_available =
+ (mb_col + (1 << m->mbmi.sb_type)) < c->cur_tile_mb_col_end;
+ xd->up_available = mb_row > 0;
if (c->frame_type == KEY_FRAME) {
write_mb_modes_kf(cpi, m, bc,
c->mb_rows - mb_row, c->mb_cols - mb_col);
@@ -1079,20 +1084,22 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
pack_mb_tokens(bc, tok, tok_end);
}
-static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) {
+static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
+ TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
VP9_COMMON *const c = &cpi->common;
const int mis = c->mode_info_stride;
MODE_INFO *m, *m_ptr = c->mi;
int i, mb_row, mb_col;
- TOKENEXTRA *tok = cpi->tok;
- TOKENEXTRA *tok_end = tok + cpi->tok_count;
- for (mb_row = 0; mb_row < c->mb_rows; mb_row += 4, m_ptr += 4 * mis) {
+ m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis;
+ for (mb_row = c->cur_tile_mb_row_start;
+ mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) {
m = m_ptr;
- for (mb_col = 0; mb_col < c->mb_cols; mb_col += 4, m += 4) {
+ for (mb_col = c->cur_tile_mb_col_start;
+ mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) {
vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->sb64_coded);
if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
- write_modes_b(cpi, m, bc, &tok, tok_end, mb_row, mb_col);
+ write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
} else {
int j;
@@ -1107,7 +1114,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) {
vp9_write(bc, sb_m->mbmi.sb_type, c->sb32_coded);
if (sb_m->mbmi.sb_type) {
assert(sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32);
- write_modes_b(cpi, sb_m, bc, &tok, tok_end,
+ write_modes_b(cpi, sb_m, bc, tok, tok_end,
mb_row + y_idx_sb, mb_col + x_idx_sb);
} else {
// Process the 4 MBs in the order:
@@ -1123,7 +1130,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) {
}
assert(mb_m->mbmi.sb_type == BLOCK_SIZE_MB16X16);
- write_modes_b(cpi, mb_m, bc, &tok, tok_end,
+ write_modes_b(cpi, mb_m, bc, tok, tok_end,
mb_row + y_idx, mb_col + x_idx);
}
}
@@ -1135,20 +1142,23 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) {
/* This function is used for debugging probability trees. */
-static void print_prob_tree(vp9_coeff_probs *coef_probs) {
+static void print_prob_tree(vp9_coeff_probs *coef_probs, int block_types) {
/* print coef probability tree */
- int i, j, k, l;
+ int i, j, k, l, m;
FILE *f = fopen("enc_tree_probs.txt", "a");
fprintf(f, "{\n");
- for (i = 0; i < BLOCK_TYPES_4X4; i++) {
+ for (i = 0; i < block_types; i++) {
fprintf(f, " {\n");
- for (j = 0; j < COEF_BANDS; j++) {
- fprintf(f, " {\n");
- for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
- fprintf(f, " {");
- for (l = 0; l < ENTROPY_NODES; l++) {
- fprintf(f, "%3u, ",
- (unsigned int)(coef_probs [i][j][k][l]));
+ for (j = 0; j < REF_TYPES; ++j) {
+ fprintf(f, " {\n");
+ for (k = 0; k < COEF_BANDS; k++) {
+ fprintf(f, " {\n");
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+ fprintf(f, " {");
+ for (m = 0; m < ENTROPY_NODES; m++) {
+ fprintf(f, "%3u, ",
+ (unsigned int)(coef_probs[i][j][k][l][m]));
+ }
}
fprintf(f, " }\n");
}
@@ -1168,26 +1178,28 @@ static void build_tree_distribution(vp9_coeff_probs *coef_probs,
#endif
vp9_coeff_stats *coef_branch_ct,
int block_types) {
- int i = 0, j, k;
+ int i, j, k, l;
#ifdef ENTROPY_STATS
int t = 0;
#endif
for (i = 0; i < block_types; ++i) {
- for (j = 0; j < COEF_BANDS; ++j) {
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
- if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
- continue;
- vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS,
- vp9_coef_encodings, vp9_coef_tree,
- coef_probs[i][j][k],
- coef_branch_ct[i][j][k],
- coef_counts[i][j][k]);
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+ if (l >= 3 && k == 0)
+ continue;
+ vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS,
+ vp9_coef_encodings, vp9_coef_tree,
+ coef_probs[i][j][k][l],
+ coef_branch_ct[i][j][k][l],
+ coef_counts[i][j][k][l]);
#ifdef ENTROPY_STATS
if (!cpi->dummy_packing)
for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
- context_counters[i][j][k][t] += coef_counts[i][j][k][t];
+ context_counters[i][j][k][l][t] += coef_counts[i][j][k][l][t];
#endif
+ }
}
}
}
@@ -1199,37 +1211,19 @@ static void build_coeff_contexts(VP9_COMP *cpi) {
#ifdef ENTROPY_STATS
cpi, context_counters_4x4,
#endif
- cpi->frame_branch_ct_4x4, BLOCK_TYPES_4X4);
- build_tree_distribution(cpi->frame_hybrid_coef_probs_4x4,
- cpi->hybrid_coef_counts_4x4,
-#ifdef ENTROPY_STATS
- cpi, hybrid_context_counters_4x4,
-#endif
- cpi->frame_hybrid_branch_ct_4x4, BLOCK_TYPES_4X4);
+ cpi->frame_branch_ct_4x4, BLOCK_TYPES);
build_tree_distribution(cpi->frame_coef_probs_8x8,
cpi->coef_counts_8x8,
#ifdef ENTROPY_STATS
cpi, context_counters_8x8,
#endif
- cpi->frame_branch_ct_8x8, BLOCK_TYPES_8X8);
- build_tree_distribution(cpi->frame_hybrid_coef_probs_8x8,
- cpi->hybrid_coef_counts_8x8,
-#ifdef ENTROPY_STATS
- cpi, hybrid_context_counters_8x8,
-#endif
- cpi->frame_hybrid_branch_ct_8x8, BLOCK_TYPES_8X8);
+ cpi->frame_branch_ct_8x8, BLOCK_TYPES);
build_tree_distribution(cpi->frame_coef_probs_16x16,
cpi->coef_counts_16x16,
#ifdef ENTROPY_STATS
cpi, context_counters_16x16,
#endif
- cpi->frame_branch_ct_16x16, BLOCK_TYPES_16X16);
- build_tree_distribution(cpi->frame_hybrid_coef_probs_16x16,
- cpi->hybrid_coef_counts_16x16,
-#ifdef ENTROPY_STATS
- cpi, hybrid_context_counters_16x16,
-#endif
- cpi->frame_hybrid_branch_ct_16x16, BLOCK_TYPES_16X16);
+ cpi->frame_branch_ct_16x16, BLOCK_TYPES);
build_tree_distribution(cpi->frame_coef_probs_32x32,
cpi->coef_counts_32x32,
#ifdef ENTROPY_STATS
@@ -1247,7 +1241,7 @@ static void update_coef_probs_common(vp9_writer* const bc,
vp9_coeff_probs *old_frame_coef_probs,
vp9_coeff_stats *frame_branch_ct,
int block_types) {
- int i, j, k, t;
+ int i, j, k, l, t;
int update[2] = {0, 0};
int savings;
// vp9_prob bestupd = find_coef_update_prob(cpi);
@@ -1255,38 +1249,39 @@ static void update_coef_probs_common(vp9_writer* const bc,
/* dry run to see if there is any udpate at all needed */
savings = 0;
for (i = 0; i < block_types; ++i) {
- for (j = !i; j < COEF_BANDS; ++j) {
- int prev_coef_savings[ENTROPY_NODES] = {0};
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
- for (t = 0; t < ENTROPY_NODES; ++t) {
- vp9_prob newp = new_frame_coef_probs[i][j][k][t];
- const vp9_prob oldp = old_frame_coef_probs[i][j][k][t];
- const vp9_prob upd = COEF_UPDATE_PROB;
- int s = prev_coef_savings[t];
- int u = 0;
- if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
- continue;
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ int prev_coef_savings[ENTROPY_NODES] = {0};
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+ const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
+ const vp9_prob upd = COEF_UPDATE_PROB;
+ int s = prev_coef_savings[t];
+ int u = 0;
+
+ if (l >= 3 && k == 0)
+ continue;
#if defined(SEARCH_NEWP)
- s = prob_diff_update_savings_search(
- frame_branch_ct[i][j][k][t],
- oldp, &newp, upd);
- if (s > 0 && newp != oldp)
- u = 1;
- if (u)
- savings += s - (int)(vp9_cost_zero(upd));
- else
- savings -= (int)(vp9_cost_zero(upd));
+ s = prob_diff_update_savings_search(frame_branch_ct[i][j][k][l][t],
+ oldp, &newp, upd);
+ if (s > 0 && newp != oldp)
+ u = 1;
+ if (u)
+ savings += s - (int)(vp9_cost_zero(upd));
+ else
+ savings -= (int)(vp9_cost_zero(upd));
#else
- s = prob_update_savings(
- frame_branch_ct[i][j][k][t],
- oldp, newp, upd);
- if (s > 0)
- u = 1;
- if (u)
- savings += s;
+ s = prob_update_savings(frame_branch_ct[i][j][k][l][t],
+ oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+ if (u)
+ savings += s;
#endif
- update[u]++;
+ update[u]++;
+ }
}
}
}
@@ -1299,41 +1294,42 @@ static void update_coef_probs_common(vp9_writer* const bc,
} else {
vp9_write_bit(bc, 1);
for (i = 0; i < block_types; ++i) {
- for (j = !i; j < COEF_BANDS; ++j) {
- int prev_coef_savings[ENTROPY_NODES] = {0};
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
- // calc probs and branch cts for this frame only
- for (t = 0; t < ENTROPY_NODES; ++t) {
- vp9_prob newp = new_frame_coef_probs[i][j][k][t];
- vp9_prob *oldp = old_frame_coef_probs[i][j][k] + t;
- const vp9_prob upd = COEF_UPDATE_PROB;
- int s = prev_coef_savings[t];
- int u = 0;
- if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
- continue;
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ int prev_coef_savings[ENTROPY_NODES] = {0};
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+ // calc probs and branch cts for this frame only
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+ vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
+ const vp9_prob upd = COEF_UPDATE_PROB;
+ int s = prev_coef_savings[t];
+ int u = 0;
+ if (l >= 3 && k == 0)
+ continue;
#if defined(SEARCH_NEWP)
- s = prob_diff_update_savings_search(
- frame_branch_ct[i][j][k][t],
- *oldp, &newp, upd);
- if (s > 0 && newp != *oldp)
- u = 1;
+ s = prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][l][t],
+ *oldp, &newp, upd);
+ if (s > 0 && newp != *oldp)
+ u = 1;
#else
- s = prob_update_savings(
- frame_branch_ct[i][j][k][t],
- *oldp, newp, upd);
- if (s > 0)
- u = 1;
+ s = prob_update_savings(frame_branch_ct[i][j][k][l][t],
+ *oldp, newp, upd);
+ if (s > 0)
+ u = 1;
#endif
- vp9_write(bc, u, upd);
+ vp9_write(bc, u, upd);
#ifdef ENTROPY_STATS
- if (!cpi->dummy_packing)
- ++tree_update_hist[i][j][k][t][u];
+ if (!cpi->dummy_packing)
+ ++tree_update_hist[i][j][k][l][t][u];
#endif
- if (u) {
- /* send/use new probability */
- write_prob_diff_update(bc, newp, *oldp);
- *oldp = newp;
+ if (u) {
+ /* send/use new probability */
+ write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
}
}
}
@@ -1356,17 +1352,7 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) {
cpi->frame_coef_probs_4x4,
cpi->common.fc.coef_probs_4x4,
cpi->frame_branch_ct_4x4,
- BLOCK_TYPES_4X4);
-
- update_coef_probs_common(bc,
-#ifdef ENTROPY_STATS
- cpi,
- hybrid_tree_update_hist_4x4,
-#endif
- cpi->frame_hybrid_coef_probs_4x4,
- cpi->common.fc.hybrid_coef_probs_4x4,
- cpi->frame_hybrid_branch_ct_4x4,
- BLOCK_TYPES_4X4);
+ BLOCK_TYPES);
/* do not do this if not even allowed */
if (cpi->common.txfm_mode != ONLY_4X4) {
@@ -1378,17 +1364,7 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) {
cpi->frame_coef_probs_8x8,
cpi->common.fc.coef_probs_8x8,
cpi->frame_branch_ct_8x8,
- BLOCK_TYPES_8X8);
-
- update_coef_probs_common(bc,
-#ifdef ENTROPY_STATS
- cpi,
- hybrid_tree_update_hist_8x8,
-#endif
- cpi->frame_hybrid_coef_probs_8x8,
- cpi->common.fc.hybrid_coef_probs_8x8,
- cpi->frame_hybrid_branch_ct_8x8,
- BLOCK_TYPES_8X8);
+ BLOCK_TYPES);
}
if (cpi->common.txfm_mode > ALLOW_8X8) {
@@ -1400,16 +1376,7 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) {
cpi->frame_coef_probs_16x16,
cpi->common.fc.coef_probs_16x16,
cpi->frame_branch_ct_16x16,
- BLOCK_TYPES_16X16);
- update_coef_probs_common(bc,
-#ifdef ENTROPY_STATS
- cpi,
- hybrid_tree_update_hist_16x16,
-#endif
- cpi->frame_hybrid_coef_probs_16x16,
- cpi->common.fc.hybrid_coef_probs_16x16,
- cpi->frame_hybrid_branch_ct_16x16,
- BLOCK_TYPES_16X16);
+ BLOCK_TYPES);
}
if (cpi->common.txfm_mode > ALLOW_16X16) {
@@ -1523,33 +1490,37 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
* and color type.
*/
if (oh.type == KEY_FRAME) {
- int v;
-
// Start / synch code
cx_data[0] = 0x9D;
cx_data[1] = 0x01;
cx_data[2] = 0x2a;
+ extra_bytes_packed = 3;
+ cx_data += extra_bytes_packed;
+ }
+ {
+ int v;
+ /* TODO(jkoleszar): support arbitrary resolutions */
v = (pc->horiz_scale << 14) | pc->Width;
- cx_data[3] = v;
- cx_data[4] = v >> 8;
+ cx_data[0] = v;
+ cx_data[1] = v >> 8;
v = (pc->vert_scale << 14) | pc->Height;
- cx_data[5] = v;
- cx_data[6] = v >> 8;
+ cx_data[2] = v;
+ cx_data[3] = v >> 8;
- extra_bytes_packed = 7;
- cx_data += extra_bytes_packed;
+ extra_bytes_packed += 4;
+ cx_data += 4;
+ }
- vp9_start_encode(&header_bc, cx_data);
+ vp9_start_encode(&header_bc, cx_data);
- // signal clr type
- vp9_write_bit(&header_bc, pc->clr_type);
- vp9_write_bit(&header_bc, pc->clamp_type);
+ // TODO(jkoleszar): remove these two unused bits?
+ vp9_write_bit(&header_bc, pc->clr_type);
+ vp9_write_bit(&header_bc, pc->clamp_type);
- } else {
- vp9_start_encode(&header_bc, cx_data);
- }
+ // error resilient mode
+ vp9_write_bit(&header_bc, pc->error_resilient_mode);
// Signal whether or not Segmentation is enabled
vp9_write_bit(&header_bc, (xd->segmentation_enabled) ? 1 : 0);
@@ -1655,7 +1626,10 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]);
vp9_write_literal(&header_bc, pc->sb32_coded, 8);
- {
+ vp9_write_bit(&header_bc, cpi->mb.e_mbd.lossless);
+ if (cpi->mb.e_mbd.lossless) {
+ pc->txfm_mode = ONLY_4X4;
+ } else {
if (pc->txfm_mode == TX_MODE_SELECT) {
pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] +
cpi->txfm_count_16x16p[TX_4X4] +
@@ -1765,29 +1739,35 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
// Transmit Dc, Second order and Uv quantizer delta information
put_delta_q(&header_bc, pc->y1dc_delta_q);
- put_delta_q(&header_bc, pc->y2dc_delta_q);
- put_delta_q(&header_bc, pc->y2ac_delta_q);
put_delta_q(&header_bc, pc->uvdc_delta_q);
put_delta_q(&header_bc, pc->uvac_delta_q);
// When there is a key frame all reference buffers are updated using the new key frame
if (pc->frame_type != KEY_FRAME) {
- // Should the GF or ARF be updated using the transmitted frame or buffer
- vp9_write_bit(&header_bc, pc->refresh_golden_frame);
- vp9_write_bit(&header_bc, pc->refresh_alt_ref_frame);
-
- // For inter frames the current default behavior is that when
- // cm->refresh_golden_frame is set we copy the old GF over to
- // the ARF buffer. This is purely an encoder decision at present.
- if (pc->refresh_golden_frame)
- pc->copy_buffer_to_arf = 2;
-
- // If not being updated from current frame should either GF or ARF be updated from another buffer
- if (!pc->refresh_golden_frame)
- vp9_write_literal(&header_bc, pc->copy_buffer_to_gf, 2);
+ int refresh_mask;
- if (!pc->refresh_alt_ref_frame)
- vp9_write_literal(&header_bc, pc->copy_buffer_to_arf, 2);
+ // Should the GF or ARF be updated using the transmitted frame or buffer
+ if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
+ /* Preserve the previously existing golden frame and update the frame in
+ * the alt ref slot instead. This is highly specific to the use of
+ * alt-ref as a forward reference, and this needs to be generalized as
+ * other uses are implemented (like RTC/temporal scaling)
+ *
+ * gld_fb_idx and alt_fb_idx need to be swapped for future frames, but
+ * that happens in vp9_onyx_if.c:update_reference_frames() so that it can
+ * be done outside of the recode loop.
+ */
+ refresh_mask = (cpi->refresh_last_frame << cpi->lst_fb_idx) |
+ (cpi->refresh_golden_frame << cpi->alt_fb_idx);
+ } else {
+ refresh_mask = (cpi->refresh_last_frame << cpi->lst_fb_idx) |
+ (cpi->refresh_golden_frame << cpi->gld_fb_idx) |
+ (cpi->refresh_alt_ref_frame << cpi->alt_fb_idx);
+ }
+ vp9_write_literal(&header_bc, refresh_mask, NUM_REF_FRAMES);
+ vp9_write_literal(&header_bc, cpi->lst_fb_idx, NUM_REF_FRAMES_LG2);
+ vp9_write_literal(&header_bc, cpi->gld_fb_idx, NUM_REF_FRAMES_LG2);
+ vp9_write_literal(&header_bc, cpi->alt_fb_idx, NUM_REF_FRAMES_LG2);
// Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer)
vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]);
@@ -1831,10 +1811,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
#endif
}
- vp9_write_bit(&header_bc, pc->refresh_entropy_probs);
+ if (!pc->error_resilient_mode) {
+ vp9_write_bit(&header_bc, pc->refresh_entropy_probs);
+ vp9_write_bit(&header_bc, pc->frame_parallel_decoding_mode);
+ }
- if (pc->frame_type != KEY_FRAME)
- vp9_write_bit(&header_bc, pc->refresh_last_frame);
+ vp9_write_literal(&header_bc, pc->frame_context_idx,
+ NUM_FRAME_CONTEXTS_LG2);
#ifdef ENTROPY_STATS
if (pc->frame_type == INTER_FRAME)
@@ -1848,7 +1831,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
if (pc->frame_type != KEY_FRAME) {
int i, j;
int new_context[INTER_MODE_CONTEXTS][4];
- update_mode_probs(pc, new_context);
+ if (!cpi->dummy_packing) {
+ update_inter_mode_probs(pc, new_context);
+ } else {
+ // In dummy pack assume context unchanged.
+ vpx_memcpy(new_context, pc->fc.vp9_mode_contexts,
+ sizeof(pc->fc.vp9_mode_contexts));
+ }
for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
for (j = 0; j < 4; j++) {
@@ -1902,16 +1891,10 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
vp9_copy(cpi->common.fc.pre_coef_probs_4x4,
cpi->common.fc.coef_probs_4x4);
- vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_4x4,
- cpi->common.fc.hybrid_coef_probs_4x4);
vp9_copy(cpi->common.fc.pre_coef_probs_8x8,
cpi->common.fc.coef_probs_8x8);
- vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8,
- cpi->common.fc.hybrid_coef_probs_8x8);
vp9_copy(cpi->common.fc.pre_coef_probs_16x16,
cpi->common.fc.coef_probs_16x16);
- vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16,
- cpi->common.fc.hybrid_coef_probs_16x16);
vp9_copy(cpi->common.fc.pre_coef_probs_32x32,
cpi->common.fc.coef_probs_32x32);
vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob);
@@ -1960,7 +1943,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
if (pc->mcomp_filter_type == SWITCHABLE)
update_switchable_interp_probs(cpi, &header_bc);
- #if CONFIG_COMP_INTERINTRA_PRED
+#if CONFIG_COMP_INTERINTRA_PRED
if (pc->use_interintra) {
vp9_cond_prob_update(&header_bc,
&pc->fc.interintra_prob,
@@ -1995,6 +1978,25 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
vp9_write_nmv_probs(cpi, xd->allow_high_precision_mv, &header_bc);
}
+ /* tiling */
+ {
+ int min_log2_tiles, delta_log2_tiles, n_tile_bits, n;
+
+ vp9_get_tile_n_bits(pc, &min_log2_tiles, &delta_log2_tiles);
+ n_tile_bits = pc->log2_tile_columns - min_log2_tiles;
+ for (n = 0; n < delta_log2_tiles; n++) {
+ if (n_tile_bits--) {
+ vp9_write_bit(&header_bc, 1);
+ } else {
+ vp9_write_bit(&header_bc, 0);
+ break;
+ }
+ }
+ vp9_write_bit(&header_bc, pc->log2_tile_rows != 0);
+ if (pc->log2_tile_rows != 0)
+ vp9_write_bit(&header_bc, pc->log2_tile_rows != 1);
+ }
+
vp9_stop_encode(&header_bc);
oh.first_partition_length_in_bytes = header_bc.pos;
@@ -2012,42 +2014,80 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
}
*size = VP9_HEADER_SIZE + extra_bytes_packed + header_bc.pos;
- vp9_start_encode(&residual_bc, cx_data + header_bc.pos);
if (pc->frame_type == KEY_FRAME) {
decide_kf_ymode_entropy(cpi);
- write_modes(cpi, &residual_bc);
} else {
/* This is not required if the counts in cpi are consistent with the
* final packing pass */
// if (!cpi->dummy_packing) vp9_zero(cpi->NMVcount);
- write_modes(cpi, &residual_bc);
-
- vp9_update_mode_context(&cpi->common);
}
- vp9_stop_encode(&residual_bc);
+ {
+ int tile_row, tile_col, total_size = 0;
+ unsigned char *data_ptr = cx_data + header_bc.pos;
+ TOKENEXTRA *tok[1 << 6], *tok_end;
+
+ tok[0] = cpi->tok;
+ for (tile_col = 1; tile_col < pc->tile_columns; tile_col++)
+ tok[tile_col] = tok[tile_col - 1] + cpi->tok_count[tile_col - 1];
+
+ for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
+ vp9_get_tile_row_offsets(pc, tile_row);
+ tok_end = cpi->tok + cpi->tok_count[0];
+ for (tile_col = 0; tile_col < pc->tile_columns;
+ tile_col++, tok_end += cpi->tok_count[tile_col]) {
+ vp9_get_tile_col_offsets(pc, tile_col);
+
+ if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1)
+ vp9_start_encode(&residual_bc, data_ptr + total_size + 4);
+ else
+ vp9_start_encode(&residual_bc, data_ptr + total_size);
+ write_modes(cpi, &residual_bc, &tok[tile_col], tok_end);
+ vp9_stop_encode(&residual_bc);
+ if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) {
+ /* size of this tile */
+ data_ptr[total_size + 0] = residual_bc.pos;
+ data_ptr[total_size + 1] = residual_bc.pos >> 8;
+ data_ptr[total_size + 2] = residual_bc.pos >> 16;
+ data_ptr[total_size + 3] = residual_bc.pos >> 24;
+ total_size += 4;
+ }
+
+ total_size += residual_bc.pos;
+ }
+ }
+
+ assert((unsigned int)(tok[0] - cpi->tok) == cpi->tok_count[0]);
+ for (tile_col = 1; tile_col < pc->tile_columns; tile_col++)
+ assert((unsigned int)(tok[tile_col] - tok[tile_col - 1]) ==
+ cpi->tok_count[tile_col]);
- *size += residual_bc.pos;
+ *size += total_size;
+ }
}
#ifdef ENTROPY_STATS
static void print_tree_update_for_type(FILE *f,
vp9_coeff_stats *tree_update_hist,
int block_types, const char *header) {
- int i, j, k, l;
+ int i, j, k, l, m;
fprintf(f, "const vp9_coeff_prob %s = {\n", header);
for (i = 0; i < block_types; i++) {
fprintf(f, " { \n");
- for (j = 0; j < COEF_BANDS; j++) {
- fprintf(f, " {\n");
- for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
- fprintf(f, " {");
- for (l = 0; l < ENTROPY_NODES; l++) {
- fprintf(f, "%3d, ",
- get_binary_prob(tree_update_hist[i][j][k][l][0],
- tree_update_hist[i][j][k][l][1]));
+ for (j = 0; j < REF_TYPES; j++) {
+ fprintf(f, " { \n");
+ for (k = 0; k < COEF_BANDS; k++) {
+ fprintf(f, " {\n");
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+ fprintf(f, " {");
+ for (m = 0; m < ENTROPY_NODES; m++) {
+ fprintf(f, "%3d, ",
+ get_binary_prob(tree_update_hist[i][j][k][l][m][0],
+ tree_update_hist[i][j][k][l][m][1]));
+ }
+ fprintf(f, "},\n");
}
fprintf(f, "},\n");
}
@@ -2062,18 +2102,11 @@ void print_tree_update_probs() {
FILE *f = fopen("coefupdprob.h", "w");
fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n");
- print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES_4X4,
+ print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES,
"vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]");
- print_tree_update_for_type(f, hybrid_tree_update_hist_4x4, BLOCK_TYPES_4X4,
- "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]");
- print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES_8X8,
- "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]");
- print_tree_update_for_type(f, hybrid_tree_update_hist_8x8, BLOCK_TYPES_8X8,
+ print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES,
"vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]");
- print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES_16X16,
- "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]");
- print_tree_update_for_type(f, hybrid_tree_update_hist_16x16,
- BLOCK_TYPES_16X16,
+ print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES,
"vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]");
print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES_32X32,
"vp9_coef_update_probs_32x32[BLOCK_TYPES_32X32]");
@@ -2083,6 +2116,7 @@ void print_tree_update_probs() {
fwrite(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f);
fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
+ fwrite(tree_update_hist_32x32, sizeof(tree_update_hist_32x32), 1, f);
fclose(f);
}
#endif
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 1960b9162..79a021cfb 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -50,10 +50,7 @@ typedef struct block {
int src;
int src_stride;
- int eob_max_offset;
- int eob_max_offset_8x8;
- int eob_max_offset_16x16;
- int eob_max_offset_32x32;
+ int skip_block;
} BLOCK;
typedef struct {
@@ -91,12 +88,12 @@ typedef struct superblock {
DECLARE_ALIGNED(16, int16_t, coeff[32*32+16*16*2]);
} SUPERBLOCK;
-typedef struct macroblock {
- DECLARE_ALIGNED(16, int16_t, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
- DECLARE_ALIGNED(16, int16_t, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
+typedef struct macroblock MACROBLOCK;
+struct macroblock {
+ DECLARE_ALIGNED(16, int16_t, src_diff[384]); // 16x16 Y 8x8 U 8x8 V
+ DECLARE_ALIGNED(16, int16_t, coeff[384]); // 16x16 Y 8x8 U 8x8 V
// 16 Y blocks, 4 U blocks, 4 V blocks,
- // 1 DC 2nd order block each with 16 entries
- BLOCK block[25];
+ BLOCK block[24];
SUPERBLOCK sb_coeff_data;
@@ -160,8 +157,7 @@ typedef struct macroblock {
unsigned char *active_ptr;
- vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES_4X4];
- vp9_coeff_count hybrid_token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES_4X4];
+ vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES];
int optimize;
@@ -172,17 +168,14 @@ typedef struct macroblock {
PICK_MODE_CONTEXT sb32_context[4];
PICK_MODE_CONTEXT sb64_context;
- void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch);
- void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch);
- void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch);
- void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d);
- void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
- void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch);
- void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch);
- void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch);
- void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
- void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
- void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d);
-} MACROBLOCK;
+ void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
+ void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx);
+ void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2);
+ void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx);
+ void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx);
+};
#endif // VP9_ENCODER_VP9_BLOCK_H_
diff --git a/vp9/encoder/vp9_boolhuff.c b/vp9/encoder/vp9_boolhuff.c
index d1b1e0e89..a590902c2 100644
--- a/vp9/encoder/vp9_boolhuff.c
+++ b/vp9/encoder/vp9_boolhuff.c
@@ -40,7 +40,6 @@ const unsigned int vp9_prob_cost[256] = {
};
void vp9_start_encode(BOOL_CODER *br, unsigned char *source) {
-
br->lowvalue = 0;
br->range = 255;
br->value = 0;
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index bfde02ccb..e4ac2ce36 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -15,842 +15,362 @@
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/common/vp9_blockd.h"
-
-// TODO: these transforms can be converted into integer forms to reduce
-// the complexity
-static const float dct_4[16] = {
- 0.500000000000000, 0.500000000000000, 0.500000000000000, 0.500000000000000,
- 0.653281482438188, 0.270598050073099, -0.270598050073099, -0.653281482438188,
- 0.500000000000000, -0.500000000000000, -0.500000000000000, 0.500000000000000,
- 0.270598050073099, -0.653281482438188, 0.653281482438188, -0.270598050073099
-};
-
-static const float adst_4[16] = {
- 0.228013428883779, 0.428525073124360, 0.577350269189626, 0.656538502008139,
- 0.577350269189626, 0.577350269189626, 0.000000000000000, -0.577350269189626,
- 0.656538502008139, -0.228013428883779, -0.577350269189626, 0.428525073124359,
- 0.428525073124360, -0.656538502008139, 0.577350269189626, -0.228013428883779
-};
-
-static const float dct_8[64] = {
- 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
- 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
- 0.490392640201615, 0.415734806151273, 0.277785116509801, 0.097545161008064,
- -0.097545161008064, -0.277785116509801, -0.415734806151273, -0.490392640201615,
- 0.461939766255643, 0.191341716182545, -0.191341716182545, -0.461939766255643,
- -0.461939766255643, -0.191341716182545, 0.191341716182545, 0.461939766255643,
- 0.415734806151273, -0.097545161008064, -0.490392640201615, -0.277785116509801,
- 0.277785116509801, 0.490392640201615, 0.097545161008064, -0.415734806151273,
- 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
- 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
- 0.277785116509801, -0.490392640201615, 0.097545161008064, 0.415734806151273,
- -0.415734806151273, -0.097545161008064, 0.490392640201615, -0.277785116509801,
- 0.191341716182545, -0.461939766255643, 0.461939766255643, -0.191341716182545,
- -0.191341716182545, 0.461939766255643, -0.461939766255643, 0.191341716182545,
- 0.097545161008064, -0.277785116509801, 0.415734806151273, -0.490392640201615,
- 0.490392640201615, -0.415734806151273, 0.277785116509801, -0.097545161008064
-};
-
-static const float adst_8[64] = {
- 0.089131608307533, 0.175227946595735, 0.255357107325376, 0.326790388032145,
- 0.387095214016349, 0.434217976756762, 0.466553967085785, 0.483002021635509,
- 0.255357107325376, 0.434217976756762, 0.483002021635509, 0.387095214016349,
- 0.175227946595735, -0.089131608307533, -0.326790388032145, -0.466553967085785,
- 0.387095214016349, 0.466553967085785, 0.175227946595735, -0.255357107325376,
- -0.483002021635509, -0.326790388032145, 0.089131608307533, 0.434217976756762,
- 0.466553967085785, 0.255357107325376, -0.326790388032145, -0.434217976756762,
- 0.089131608307533, 0.483002021635509, 0.175227946595735, -0.387095214016348,
- 0.483002021635509, -0.089131608307533, -0.466553967085785, 0.175227946595735,
- 0.434217976756762, -0.255357107325376, -0.387095214016348, 0.326790388032145,
- 0.434217976756762, -0.387095214016348, -0.089131608307533, 0.466553967085786,
- -0.326790388032145, -0.175227946595735, 0.483002021635509, -0.255357107325375,
- 0.326790388032145, -0.483002021635509, 0.387095214016349, -0.089131608307534,
- -0.255357107325377, 0.466553967085785, -0.434217976756762, 0.175227946595736,
- 0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509,
- 0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532
-};
-
-/* Converted the transforms to integers. */
-static const int16_t dct_i4[16] = {
- 16384, 16384, 16384, 16384,
- 21407, 8867, -8867, -21407,
- 16384, -16384, -16384, 16384,
- 8867, -21407, 21407, -8867
-};
-
-static const int16_t adst_i4[16] = {
- 7472, 14042, 18919, 21513,
- 18919, 18919, 0, -18919,
- 21513, -7472, -18919, 14042,
- 14042, -21513, 18919, -7472
-};
-
-static const int16_t dct_i8[64] = {
- 11585, 11585, 11585, 11585,
- 11585, 11585, 11585, 11585,
- 16069, 13623, 9102, 3196,
- -3196, -9102, -13623, -16069,
- 15137, 6270, -6270, -15137,
- -15137, -6270, 6270, 15137,
- 13623, -3196, -16069, -9102,
- 9102, 16069, 3196, -13623,
- 11585, -11585, -11585, 11585,
- 11585, -11585, -11585, 11585,
- 9102, -16069, 3196, 13623,
- -13623, -3196, 16069, -9102,
- 6270, -15137, 15137, -6270,
- -6270, 15137, -15137, 6270,
- 3196, -9102, 13623, -16069,
- 16069, -13623, 9102, -3196
-};
-
-static const int16_t adst_i8[64] = {
- 2921, 5742, 8368, 10708,
- 12684, 14228, 15288, 15827,
- 8368, 14228, 15827, 12684,
- 5742, -2921, -10708, -15288,
- 12684, 15288, 5742, -8368,
- -15827, -10708, 2921, 14228,
- 15288, 8368, -10708, -14228,
- 2921, 15827, 5742, -12684,
- 15827, -2921, -15288, 5742,
- 14228, -8368, -12684, 10708,
- 14228, -12684, -2921, 15288,
- -10708, -5742, 15827, -8368,
- 10708, -15827, 12684, -2921,
- -8368, 15288, -14228, 5742,
- 5742, -10708, 14228, -15827,
- 15288, -12684, 8368, -2921
-};
-
-static const float dct_16[256] = {
- 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
- 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
- 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
- -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
- 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
- -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
- 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
- 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
- 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
- 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
- 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
- -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
- 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
- -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
- 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
- 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
- 0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000,
- 0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000,
- 0.224292, -0.311806, -0.102631, 0.351851, -0.034654, -0.338330, 0.166664, 0.273300,
- -0.273300, -0.166664, 0.338330, 0.034654, -0.351851, 0.102631, 0.311806, -0.224292,
- 0.196424, -0.346760, 0.068975, 0.293969, -0.293969, -0.068975, 0.346760, -0.196424,
- -0.196424, 0.346760, -0.068975, -0.293969, 0.293969, 0.068975, -0.346760, 0.196424,
- 0.166664, -0.351851, 0.224292, 0.102631, -0.338330, 0.273300, 0.034654, -0.311806,
- 0.311806, -0.034654, -0.273300, 0.338330, -0.102631, -0.224292, 0.351851, -0.166664,
- 0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299,
- 0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299,
- 0.102631, -0.273300, 0.351851, -0.311806, 0.166664, 0.034654, -0.224292, 0.338330,
- -0.338330, 0.224292, -0.034654, -0.166664, 0.311806, -0.351851, 0.273300, -0.102631,
- 0.068975, -0.196424, 0.293969, -0.346760, 0.346760, -0.293969, 0.196424, -0.068975,
- -0.068975, 0.196424, -0.293969, 0.346760, -0.346760, 0.293969, -0.196424, 0.068975,
- 0.034654, -0.102631, 0.166664, -0.224292, 0.273300, -0.311806, 0.338330, -0.351851,
- 0.351851, -0.338330, 0.311806, -0.273300, 0.224292, -0.166664, 0.102631, -0.034654
-};
-
-static const float adst_16[256] = {
- 0.033094, 0.065889, 0.098087, 0.129396, 0.159534, 0.188227, 0.215215, 0.240255,
- 0.263118, 0.283599, 0.301511, 0.316693, 0.329007, 0.338341, 0.344612, 0.347761,
- 0.098087, 0.188227, 0.263118, 0.316693, 0.344612, 0.344612, 0.316693, 0.263118,
- 0.188227, 0.098087, 0.000000, -0.098087, -0.188227, -0.263118, -0.316693, -0.344612,
- 0.159534, 0.283599, 0.344612, 0.329007, 0.240255, 0.098087, -0.065889, -0.215215,
- -0.316693, -0.347761, -0.301511, -0.188227, -0.033094, 0.129396, 0.263118, 0.338341,
- 0.215215, 0.338341, 0.316693, 0.159534, -0.065889, -0.263118, -0.347761, -0.283599,
- -0.098087, 0.129396, 0.301511, 0.344612, 0.240255, 0.033094, -0.188227, -0.329007,
- 0.263118, 0.344612, 0.188227, -0.098087, -0.316693, -0.316693, -0.098087, 0.188227,
- 0.344612, 0.263118, 0.000000, -0.263118, -0.344612, -0.188227, 0.098087, 0.316693,
- 0.301511, 0.301511, 0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511,
- 0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511, 0.000000, -0.301511,
- 0.329007, 0.215215, -0.188227, -0.338341, -0.033094, 0.316693, 0.240255, -0.159534,
- -0.344612, -0.065889, 0.301511, 0.263118, -0.129396, -0.347761, -0.098087, 0.283599,
- 0.344612, 0.098087, -0.316693, -0.188227, 0.263118, 0.263118, -0.188227, -0.316693,
- 0.098087, 0.344612, 0.000000, -0.344612, -0.098087, 0.316693, 0.188227, -0.263118,
- 0.347761, -0.033094, -0.344612, 0.065889, 0.338341, -0.098087, -0.329007, 0.129396,
- 0.316693, -0.159534, -0.301511, 0.188227, 0.283599, -0.215215, -0.263118, 0.240255,
- 0.338341, -0.159534, -0.263118, 0.283599, 0.129396, -0.344612, 0.033094, 0.329007,
- -0.188227, -0.240255, 0.301511, 0.098087, -0.347761, 0.065889, 0.316693, -0.215215,
- 0.316693, -0.263118, -0.098087, 0.344612, -0.188227, -0.188227, 0.344612, -0.098087,
- -0.263118, 0.316693, 0.000000, -0.316693, 0.263118, 0.098087, -0.344612, 0.188227,
- 0.283599, -0.329007, 0.098087, 0.215215, -0.347761, 0.188227, 0.129396, -0.338341,
- 0.263118, 0.033094, -0.301511, 0.316693, -0.065889, -0.240255, 0.344612, -0.159534,
- 0.240255, -0.347761, 0.263118, -0.033094, -0.215215, 0.344612, -0.283599, 0.065889,
- 0.188227, -0.338341, 0.301511, -0.098087, -0.159534, 0.329007, -0.316693, 0.129396,
- 0.188227, -0.316693, 0.344612, -0.263118, 0.098087, 0.098087, -0.263118, 0.344612,
- -0.316693, 0.188227, 0.000000, -0.188227, 0.316693, -0.344612, 0.263118, -0.098087,
- 0.129396, -0.240255, 0.316693, -0.347761, 0.329007, -0.263118, 0.159534, -0.033094,
- -0.098087, 0.215215, -0.301511, 0.344612, -0.338341, 0.283599, -0.188227, 0.065889,
- 0.065889, -0.129396, 0.188227, -0.240255, 0.283599, -0.316693, 0.338341, -0.347761,
- 0.344612, -0.329007, 0.301511, -0.263118, 0.215215, -0.159534, 0.098087, -0.033094
-};
-
-/* Converted the transforms to integers. */
-static const int16_t dct_i16[256] = {
- 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
- 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
- 11529, 11086, 10217, 8955, 7350, 5461, 3363, 1136,
- -1136, -3363, -5461, -7350, -8955, -10217, -11086, -11529,
- 11363, 9633, 6436, 2260, -2260, -6436, -9633, -11363,
- -11363, -9633, -6436, -2260, 2260, 6436, 9633, 11363,
- 11086, 7350, 1136, -5461, -10217, -11529, -8955, -3363,
- 3363, 8955, 11529, 10217, 5461, -1136, -7350, -11086,
- 10703, 4433, -4433, -10703, -10703, -4433, 4433, 10703,
- 10703, 4433, -4433, -10703, -10703, -4433, 4433, 10703,
- 10217, 1136, -8955, -11086, -3363, 7350, 11529, 5461,
- -5461, -11529, -7350, 3363, 11086, 8955, -1136, -10217,
- 9633, -2260, -11363, -6436, 6436, 11363, 2260, -9633,
- -9633, 2260, 11363, 6436, -6436, -11363, -2260, 9633,
- 8955, -5461, -11086, 1136, 11529, 3363, -10217, -7350,
- 7350, 10217, -3363, -11529, -1136, 11086, 5461, -8955,
- 8192, -8192, -8192, 8192, 8192, -8192, -8192, 8192,
- 8192, -8192, -8192, 8192, 8192, -8192, -8192, 8192,
- 7350, -10217, -3363, 11529, -1136, -11086, 5461, 8955,
- -8955, -5461, 11086, 1136, -11529, 3363, 10217, -7350,
- 6436, -11363, 2260, 9633, -9633, -2260, 11363, -6436,
- -6436, 11363, -2260, -9633, 9633, 2260, -11363, 6436,
- 5461, -11529, 7350, 3363, -11086, 8955, 1136, -10217,
- 10217, -1136, -8955, 11086, -3363, -7350, 11529, -5461,
- 4433, -10703, 10703, -4433, -4433, 10703, -10703, 4433,
- 4433, -10703, 10703, -4433, -4433, 10703, -10703, 4433,
- 3363, -8955, 11529, -10217, 5461, 1136, -7350, 11086,
- -11086, 7350, -1136, -5461, 10217, -11529, 8955, -3363,
- 2260, -6436, 9633, -11363, 11363, -9633, 6436, -2260,
- -2260, 6436, -9633, 11363, -11363, 9633, -6436, 2260,
- 1136, -3363, 5461, -7350, 8955, -10217, 11086, -11529,
- 11529, -11086, 10217, -8955, 7350, -5461, 3363, -1136
-};
-
-static const int16_t adst_i16[256] = {
- 1084, 2159, 3214, 4240, 5228, 6168, 7052, 7873,
- 8622, 9293, 9880, 10377, 10781, 11087, 11292, 11395,
- 3214, 6168, 8622, 10377, 11292, 11292, 10377, 8622,
- 6168, 3214, 0, -3214, -6168, -8622, -10377, -11292,
- 5228, 9293, 11292, 10781, 7873, 3214, -2159, -7052,
- -10377, -11395, -9880, -6168, -1084, 4240, 8622, 11087,
- 7052, 11087, 10377, 5228, -2159, -8622, -11395, -9293,
- -3214, 4240, 9880, 11292, 7873, 1084, -6168, -10781,
- 8622, 11292, 6168, -3214, -10377, -10377, -3214, 6168,
- 11292, 8622, 0, -8622, -11292, -6168, 3214, 10377,
- 9880, 9880, 0, -9880, -9880, 0, 9880, 9880,
- 0, -9880, -9880, 0, 9880, 9880, 0, -9880,
- 10781, 7052, -6168, -11087, -1084, 10377, 7873, -5228,
- -11292, -2159, 9880, 8622, -4240, -11395, -3214, 9293,
- 11292, 3214, -10377, -6168, 8622, 8622, -6168, -10377,
- 3214, 11292, 0, -11292, -3214, 10377, 6168, -8622,
- 11395, -1084, -11292, 2159, 11087, -3214, -10781, 4240,
- 10377, -5228, -9880, 6168, 9293, -7052, -8622, 7873,
- 11087, -5228, -8622, 9293, 4240, -11292, 1084, 10781,
- -6168, -7873, 9880, 3214, -11395, 2159, 10377, -7052,
- 10377, -8622, -3214, 11292, -6168, -6168, 11292, -3214,
- -8622, 10377, 0, -10377, 8622, 3214, -11292, 6168,
- 9293, -10781, 3214, 7052, -11395, 6168, 4240, -11087,
- 8622, 1084, -9880, 10377, -2159, -7873, 11292, -5228,
- 7873, -11395, 8622, -1084, -7052, 11292, -9293, 2159,
- 6168, -11087, 9880, -3214, -5228, 10781, -10377, 4240,
- 6168, -10377, 11292, -8622, 3214, 3214, -8622, 11292,
- -10377, 6168, 0, -6168, 10377, -11292, 8622, -3214,
- 4240, -7873, 10377, -11395, 10781, -8622, 5228, -1084,
- -3214, 7052, -9880, 11292, -11087, 9293, -6168, 2159,
- 2159, -4240, 6168, -7873, 9293, -10377, 11087, -11395,
- 11292, -10781, 9880, -8622, 7052, -5228, 3214, -1084
-};
-
-static const int xC1S7 = 16069;
-static const int xC2S6 = 15137;
-static const int xC3S5 = 13623;
-static const int xC4S4 = 11585;
-static const int xC5S3 = 9102;
-static const int xC6S2 = 6270;
-static const int xC7S1 = 3196;
-
-#define SHIFT_BITS 14
-#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
-
-#define FINAL_SHIFT 3
-#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
-#define IN_SHIFT (FINAL_SHIFT+1)
-
-
-void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) {
- int loop;
- int short_pitch = pitch >> 1;
- int is07, is12, is34, is56;
- int is0734, is1256;
- int id07, id12, id34, id56;
- int irot_input_x, irot_input_y;
- int icommon_product1; // Re-used product (c4s4 * (s12 - s56))
- int icommon_product2; // Re-used product (c4s4 * (d12 + d56))
- int temp1, temp2; // intermediate variable for computation
-
- int InterData[64];
- int *ip = InterData;
- short *op = OutputData;
-
- for (loop = 0; loop < 8; loop++) {
- // Pre calculate some common sums and differences.
- is07 = (InputData[0] + InputData[7]) << IN_SHIFT;
- is12 = (InputData[1] + InputData[2]) << IN_SHIFT;
- is34 = (InputData[3] + InputData[4]) << IN_SHIFT;
- is56 = (InputData[5] + InputData[6]) << IN_SHIFT;
- id07 = (InputData[0] - InputData[7]) << IN_SHIFT;
- id12 = (InputData[1] - InputData[2]) << IN_SHIFT;
- id34 = (InputData[3] - InputData[4]) << IN_SHIFT;
- id56 = (InputData[5] - InputData[6]) << IN_SHIFT;
-
- is0734 = is07 + is34;
- is1256 = is12 + is56;
-
- // Pre-Calculate some common product terms.
- icommon_product1 = xC4S4 * (is12 - is56);
- DOROUND(icommon_product1)
- icommon_product1 >>= SHIFT_BITS;
-
- icommon_product2 = xC4S4 * (id12 + id56);
- DOROUND(icommon_product2)
- icommon_product2 >>= SHIFT_BITS;
-
-
- ip[0] = (xC4S4 * (is0734 + is1256));
- DOROUND(ip[0]);
- ip[0] >>= SHIFT_BITS;
-
- ip[4] = (xC4S4 * (is0734 - is1256));
- DOROUND(ip[4]);
- ip[4] >>= SHIFT_BITS;
-
- // Define inputs to rotation for outputs 2 and 6
- irot_input_x = id12 - id56;
- irot_input_y = is07 - is34;
-
- // Apply rotation for outputs 2 and 6.
- temp1 = xC6S2 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC2S6 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[2] = temp1 + temp2;
-
- temp1 = xC6S2 * irot_input_y;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC2S6 * irot_input_x;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[6] = temp1 - temp2;
-
- // Define inputs to rotation for outputs 1 and 7
- irot_input_x = icommon_product1 + id07;
- irot_input_y = -(id34 + icommon_product2);
-
- // Apply rotation for outputs 1 and 7.
- temp1 = xC1S7 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC7S1 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[1] = temp1 - temp2;
-
- temp1 = xC7S1 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC1S7 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[7] = temp1 + temp2;
-
- // Define inputs to rotation for outputs 3 and 5
- irot_input_x = id07 - icommon_product1;
- irot_input_y = id34 - icommon_product2;
-
- // Apply rotation for outputs 3 and 5.
- temp1 = xC3S5 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC5S3 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[3] = temp1 - temp2;
-
-
- temp1 = xC5S3 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC3S5 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[5] = temp1 + temp2;
-
- // Increment data pointer for next row
- InputData += short_pitch;
- ip += 8;
- }
-
- // Performed DCT on rows, now transform the columns
- ip = InterData;
- for (loop = 0; loop < 8; loop++) {
- // Pre calculate some common sums and differences.
- is07 = ip[0 * 8] + ip[7 * 8];
- is12 = ip[1 * 8] + ip[2 * 8];
- is34 = ip[3 * 8] + ip[4 * 8];
- is56 = ip[5 * 8] + ip[6 * 8];
-
- id07 = ip[0 * 8] - ip[7 * 8];
- id12 = ip[1 * 8] - ip[2 * 8];
- id34 = ip[3 * 8] - ip[4 * 8];
- id56 = ip[5 * 8] - ip[6 * 8];
-
- is0734 = is07 + is34;
- is1256 = is12 + is56;
-
- // Pre-Calculate some common product terms
- icommon_product1 = xC4S4 * (is12 - is56);
- icommon_product2 = xC4S4 * (id12 + id56);
- DOROUND(icommon_product1)
- DOROUND(icommon_product2)
- icommon_product1 >>= SHIFT_BITS;
- icommon_product2 >>= SHIFT_BITS;
-
-
- temp1 = xC4S4 * (is0734 + is1256);
- temp2 = xC4S4 * (is0734 - is1256);
- DOROUND(temp1);
- DOROUND(temp2);
- temp1 >>= SHIFT_BITS;
-
- temp2 >>= SHIFT_BITS;
- op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT;
- op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- // Define inputs to rotation for outputs 2 and 6
- irot_input_x = id12 - id56;
- irot_input_y = is07 - is34;
-
- // Apply rotation for outputs 2 and 6.
- temp1 = xC6S2 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC2S6 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- temp1 = xC6S2 * irot_input_y;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC2S6 * irot_input_x;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- // Define inputs to rotation for outputs 1 and 7
- irot_input_x = icommon_product1 + id07;
- irot_input_y = -(id34 + icommon_product2);
-
- // Apply rotation for outputs 1 and 7.
- temp1 = xC1S7 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC7S1 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- temp1 = xC7S1 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC1S7 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- // Define inputs to rotation for outputs 3 and 5
- irot_input_x = id07 - icommon_product1;
- irot_input_y = id34 - icommon_product2;
-
- // Apply rotation for outputs 3 and 5.
- temp1 = xC3S5 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC5S3 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-
- temp1 = xC5S3 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC3S5 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- // Increment data pointer for next column.
- ip++;
- op++;
- }
+#include "vp9/common/vp9_idct.h"
+
+static void fdct4_1d(int16_t *input, int16_t *output) {
+ int16_t step[4];
+ int temp1, temp2;
+
+ step[0] = input[0] + input[3];
+ step[1] = input[1] + input[2];
+ step[2] = input[1] - input[2];
+ step[3] = input[0] - input[3];
+
+ temp1 = (step[0] + step[1]) * cospi_16_64;
+ temp2 = (step[0] - step[1]) * cospi_16_64;
+ output[0] = dct_const_round_shift(temp1);
+ output[2] = dct_const_round_shift(temp2);
+ temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+ temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
+ output[1] = dct_const_round_shift(temp1);
+ output[3] = dct_const_round_shift(temp2);
}
-void vp9_short_fhaar2x2_c(short *input, short *output, int pitch) {
- /* [1 1; 1 -1] orthogonal transform */
- /* use position: 0,1, 4, 8 */
- int i;
- short *ip1 = input;
- short *op1 = output;
- for (i = 0; i < 16; i++) {
- op1[i] = 0;
+void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[4 * 4];
+ int16_t *outptr = &out[0];
+ const int short_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[4], temp_out[4];
+
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = input[j * short_pitch + i] << 4;
+ if (i == 0 && temp_in[0])
+ temp_in[0] += 1;
+ fdct4_1d(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ outptr[j * 4 + i] = temp_out[j];
}
- op1[0] = (ip1[0] + ip1[1] + ip1[4] + ip1[8] + 1) >> 1;
- op1[1] = (ip1[0] - ip1[1] + ip1[4] - ip1[8]) >> 1;
- op1[4] = (ip1[0] + ip1[1] - ip1[4] - ip1[8]) >> 1;
- op1[8] = (ip1[0] - ip1[1] - ip1[4] + ip1[8]) >> 1;
-}
-
-/* For test */
-#define TEST_INT 1
-#if TEST_INT
-#define vp9_fht_int_c vp9_fht_c
-#else
-#define vp9_fht_float_c vp9_fht_c
-#endif
-
-void vp9_fht_float_c(const int16_t *input, int pitch, int16_t *output,
- TX_TYPE tx_type, int tx_dim) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int i, j, k;
- float bufa[256], bufb[256]; // buffers are for floating-point test purpose
- // the implementation could be simplified in
- // conjunction with integer transform
- const int16_t *ip = input;
- int16_t *op = output;
-
- float *pfa = &bufa[0];
- float *pfb = &bufb[0];
-
- // pointers to vertical and horizontal transforms
- const float *ptv, *pth;
-
- assert(tx_type != DCT_DCT);
- // load and convert residual array into floating-point
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- pfa[i] = (float)ip[i];
- }
- pfa += tx_dim;
- ip += pitch / 2;
- }
-
- // vertical transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
-
- switch (tx_type) {
- case ADST_ADST :
- case ADST_DCT :
- ptv = (tx_dim == 4) ? &adst_4[0] :
- ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
- break;
-
- default :
- ptv = (tx_dim == 4) ? &dct_4[0] :
- ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
- break;
- }
-
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- pfb[i] = 0;
- for (k = 0; k < tx_dim; k++) {
- pfb[i] += ptv[k] * pfa[(k * tx_dim)];
- }
- pfa += 1;
- }
- pfb += tx_dim;
- ptv += tx_dim;
- pfa = &bufa[0];
- }
-
- // horizontal transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
-
- switch (tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = (tx_dim == 4) ? &adst_4[0] :
- ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
- break;
-
- default :
- pth = (tx_dim == 4) ? &dct_4[0] :
- ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
- break;
- }
-
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- pfa[i] = 0;
- for (k = 0; k < tx_dim; k++) {
- pfa[i] += pfb[k] * pth[k];
- }
- pth += tx_dim;
- }
-
- pfa += tx_dim;
- pfb += tx_dim;
- // pth -= tx_dim * tx_dim;
-
- switch (tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = (tx_dim == 4) ? &adst_4[0] :
- ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
- break;
-
- default :
- pth = (tx_dim == 4) ? &dct_4[0] :
- ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
- break;
- }
- }
-
- // convert to short integer format and load BLOCKD buffer
- op = output;
- pfa = &bufa[0];
-
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- op[i] = (pfa[i] > 0 ) ? (int16_t)( 8 * pfa[i] + 0.49) :
- -(int16_t)(- 8 * pfa[i] + 0.49);
- }
- op += tx_dim;
- pfa += tx_dim;
- }
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j + i * 4];
+ fdct4_1d(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (temp_out[j] + 1) >> 2;
}
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
-/* Converted the transforms to integer form. */
-#define VERTICAL_SHIFT 11
-#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
-#define HORIZONTAL_SHIFT 16
-#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
-void vp9_fht_int_c(const int16_t *input, int pitch, int16_t *output,
- TX_TYPE tx_type, int tx_dim) {
- int i, j, k;
- int16_t imbuf[256];
-
- const int16_t *ip = input;
- int16_t *op = output;
- int16_t *im = &imbuf[0];
-
- /* pointers to vertical and horizontal transforms. */
- const int16_t *ptv = NULL, *pth = NULL;
-
- switch (tx_type) {
- case ADST_ADST :
- ptv = pth = (tx_dim == 4) ? &adst_i4[0]
- : ((tx_dim == 8) ? &adst_i8[0]
- : &adst_i16[0]);
- break;
- case ADST_DCT :
- ptv = (tx_dim == 4) ? &adst_i4[0]
- : ((tx_dim == 8) ? &adst_i8[0] : &adst_i16[0]);
- pth = (tx_dim == 4) ? &dct_i4[0]
- : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]);
- break;
- case DCT_ADST :
- ptv = (tx_dim == 4) ? &dct_i4[0]
- : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]);
- pth = (tx_dim == 4) ? &adst_i4[0]
- : ((tx_dim == 8) ? &adst_i8[0] : &adst_i16[0]);
- break;
- case DCT_DCT :
- ptv = pth = (tx_dim == 4) ? &dct_i4[0]
- : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]);
- break;
- default:
- assert(0);
- break;
- }
-
- /* vertical transformation */
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- int temp = 0;
+static void fadst4_1d(int16_t *input, int16_t *output) {
+ int x0, x1, x2, x3;
+ int s0, s1, s2, s3, s4, s5, s6, s7;
- for (k = 0; k < tx_dim; k++) {
- temp += ptv[k] * ip[(k * (pitch >> 1))];
- }
+ x0 = input[0];
+ x1 = input[1];
+ x2 = input[2];
+ x3 = input[3];
- im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
- ip++;
- }
- im += tx_dim; // 16
- ptv += tx_dim;
- ip = input;
+ if (!(x0 | x1 | x2 | x3)) {
+ output[0] = output[1] = output[2] = output[3] = 0;
+ return;
}
- /* horizontal transformation */
- im = &imbuf[0];
-
- for (j = 0; j < tx_dim; j++) {
- const int16_t *pthc = pth;
-
- for (i = 0; i < tx_dim; i++) {
- int temp = 0;
-
- for (k = 0; k < tx_dim; k++) {
- temp += im[k] * pthc[k];
- }
-
- op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
- pthc += tx_dim;
- }
-
- im += tx_dim; // 16
- op += tx_dim;
- }
+ s0 = sinpi_1_9 * x0;
+ s1 = sinpi_4_9 * x0;
+ s2 = sinpi_2_9 * x1;
+ s3 = sinpi_1_9 * x1;
+ s4 = sinpi_3_9 * x2;
+ s5 = sinpi_4_9 * x3;
+ s6 = sinpi_2_9 * x3;
+ s7 = x0 + x1 - x3;
+
+ x0 = s0 + s2 + s5;
+ x1 = sinpi_3_9 * s7;
+ x2 = s1 - s3 + s6;
+ x3 = s4;
+
+ s0 = x0 + x3;
+ s1 = x1;
+ s2 = x2 - x3;
+ s3 = x2 - x0 + x3;
+
+ // 1-D transform scaling factor is sqrt(2).
+ output[0] = dct_const_round_shift(s0);
+ output[1] = dct_const_round_shift(s1);
+ output[2] = dct_const_round_shift(s2);
+ output[3] = dct_const_round_shift(s3);
}
-void vp9_short_fdct4x4_c(short *input, short *output, int pitch) {
- int i;
- int a1, b1, c1, d1;
- short *ip = input;
- short *op = output;
-
- for (i = 0; i < 4; i++) {
- a1 = ((ip[0] + ip[3]) << 5);
- b1 = ((ip[1] + ip[2]) << 5);
- c1 = ((ip[1] - ip[2]) << 5);
- d1 = ((ip[0] - ip[3]) << 5);
-
- op[0] = a1 + b1;
- op[2] = a1 - b1;
-
- op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
- op[3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
-
- ip += pitch / 2;
- op += 4;
+static const transform_2d FHT_4[] = {
+ { fdct4_1d, fdct4_1d }, // DCT_DCT = 0
+ { fadst4_1d, fdct4_1d }, // ADST_DCT = 1
+ { fdct4_1d, fadst4_1d }, // DCT_ADST = 2
+ { fadst4_1d, fadst4_1d } // ADST_ADST = 3
+};
+void vp9_short_fht4x4_c(int16_t *input, int16_t *output,
+ int pitch, TX_TYPE tx_type) {
+ int16_t out[4 * 4];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[4], temp_out[4];
+ const transform_2d ht = FHT_4[tx_type];
+
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = input[j * pitch + i] << 4;
+ if (i == 0 && temp_in[0])
+ temp_in[0] += 1;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ outptr[j * 4 + i] = temp_out[j];
}
- ip = output;
- op = output;
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[12];
- b1 = ip[4] + ip[8];
- c1 = ip[4] - ip[8];
- d1 = ip[0] - ip[12];
-
- op[0] = (a1 + b1 + 7) >> 4;
- op[8] = (a1 - b1 + 7) >> 4;
- op[4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + (d1 != 0);
- op[12] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
-
- ip++;
- op++;
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j + i * 4];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (temp_out[j] + 1) >> 2;
}
}
-void vp9_short_fdct8x4_c(short *input, short *output, int pitch)
-{
- vp9_short_fdct4x4_c(input, output, pitch);
+void vp9_short_fdct8x4_c(int16_t *input, int16_t *output, int pitch) {
+ vp9_short_fdct4x4_c(input, output, pitch);
vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
}
-void vp9_short_walsh4x4_c(short *input, short *output, int pitch) {
- int i;
- int a1, b1, c1, d1;
- short *ip = input;
- short *op = output;
- int pitch_short = pitch >> 1;
+static void fdct8_1d(int16_t *input, int16_t *output) {
+ /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
+ /*needs32*/ int t0, t1, t2, t3;
+ /*canbe16*/ int x0, x1, x2, x3;
+
+ // stage 1
+ s0 = input[0] + input[7];
+ s1 = input[1] + input[6];
+ s2 = input[2] + input[5];
+ s3 = input[3] + input[4];
+ s4 = input[3] - input[4];
+ s5 = input[2] - input[5];
+ s6 = input[1] - input[6];
+ s7 = input[0] - input[7];
+
+ // fdct4_1d(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+ t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+ output[0] = dct_const_round_shift(t0);
+ output[2] = dct_const_round_shift(t2);
+ output[4] = dct_const_round_shift(t1);
+ output[6] = dct_const_round_shift(t3);
- for (i = 0; i < 4; i++) {
- a1 = ip[0 * pitch_short] + ip[3 * pitch_short];
- b1 = ip[1 * pitch_short] + ip[2 * pitch_short];
- c1 = ip[1 * pitch_short] - ip[2 * pitch_short];
- d1 = ip[0 * pitch_short] - ip[3 * pitch_short];
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = dct_const_round_shift(t0);
+ t3 = dct_const_round_shift(t1);
- op[0] = (a1 + b1 + 1) >> 1;
- op[4] = (c1 + d1) >> 1;
- op[8] = (a1 - b1) >> 1;
- op[12] = (d1 - c1) >> 1;
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
- ip++;
- op++;
- }
- ip = output;
- op = output;
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[1] = dct_const_round_shift(t0);
+ output[3] = dct_const_round_shift(t2);
+ output[5] = dct_const_round_shift(t1);
+ output[7] = dct_const_round_shift(t3);
+}
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[3];
- b1 = ip[1] + ip[2];
- c1 = ip[1] - ip[2];
- d1 = ip[0] - ip[3];
+void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int pitch) {
+ const int stride = pitch >> 1;
+ int i, j;
+ int16_t intermediate[64];
- op[0] = (a1 + b1 + 1) >> 1;
- op[1] = (c1 + d1) >> 1;
- op[2] = (a1 - b1) >> 1;
- op[3] = (d1 - c1) >> 1;
+ // Transform columns
+ {
+ int16_t *output = intermediate;
+ /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
+ /*needs32*/ int t0, t1, t2, t3;
+ /*canbe16*/ int x0, x1, x2, x3;
+
+ int i;
+ for (i = 0; i < 8; i++) {
+ // stage 1
+ s0 = (input[0 * stride] + input[7 * stride]) << 2;
+ s1 = (input[1 * stride] + input[6 * stride]) << 2;
+ s2 = (input[2 * stride] + input[5 * stride]) << 2;
+ s3 = (input[3 * stride] + input[4 * stride]) << 2;
+ s4 = (input[3 * stride] - input[4 * stride]) << 2;
+ s5 = (input[2 * stride] - input[5 * stride]) << 2;
+ s6 = (input[1 * stride] - input[6 * stride]) << 2;
+ s7 = (input[0 * stride] - input[7 * stride]) << 2;
+
+ // fdct4_1d(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+ t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+ output[0 * 8] = dct_const_round_shift(t0);
+ output[2 * 8] = dct_const_round_shift(t2);
+ output[4 * 8] = dct_const_round_shift(t1);
+ output[6 * 8] = dct_const_round_shift(t3);
+
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = dct_const_round_shift(t0);
+ t3 = dct_const_round_shift(t1);
+
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
+
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[1 * 8] = dct_const_round_shift(t0);
+ output[3 * 8] = dct_const_round_shift(t2);
+ output[5 * 8] = dct_const_round_shift(t1);
+ output[7 * 8] = dct_const_round_shift(t3);
+ input++;
+ output++;
+ }
+ }
- ip += 4;
- op += 4;
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ fdct8_1d(&intermediate[i * 8], &final_output[i * 8]);
+ for (j = 0; j < 8; ++j)
+ final_output[j + i * 8] /= 2;
}
}
-#if CONFIG_LOSSLESS
-void vp9_short_walsh4x4_lossless_c(short *input, short *output, int pitch) {
- int i;
- int a1, b1, c1, d1;
- short *ip = input;
- short *op = output;
- int pitch_short = pitch >> 1;
-
- for (i = 0; i < 4; i++) {
- a1 = (ip[0 * pitch_short] + ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
- b1 = (ip[1 * pitch_short] + ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
- c1 = (ip[1 * pitch_short] - ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
- d1 = (ip[0 * pitch_short] - ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
+static void fadst8_1d(int16_t *input, int16_t *output) {
+ int s0, s1, s2, s3, s4, s5, s6, s7;
+
+ int x0 = input[7];
+ int x1 = input[0];
+ int x2 = input[5];
+ int x3 = input[2];
+ int x4 = input[3];
+ int x5 = input[4];
+ int x6 = input[1];
+ int x7 = input[6];
+
+ // stage 1
+ s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
+ s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
+ s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
+ s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
+ s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
+ s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
+ s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
+ s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
+
+ x0 = dct_const_round_shift(s0 + s4);
+ x1 = dct_const_round_shift(s1 + s5);
+ x2 = dct_const_round_shift(s2 + s6);
+ x3 = dct_const_round_shift(s3 + s7);
+ x4 = dct_const_round_shift(s0 - s4);
+ x5 = dct_const_round_shift(s1 - s5);
+ x6 = dct_const_round_shift(s2 - s6);
+ x7 = dct_const_round_shift(s3 - s7);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
+ s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
+ s6 = - cospi_24_64 * x6 + cospi_8_64 * x7;
+ s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
+
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = dct_const_round_shift(s4 + s6);
+ x5 = dct_const_round_shift(s5 + s7);
+ x6 = dct_const_round_shift(s4 - s6);
+ x7 = dct_const_round_shift(s5 - s7);
+
+ // stage 3
+ s2 = cospi_16_64 * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (x6 - x7);
+
+ x2 = dct_const_round_shift(s2);
+ x3 = dct_const_round_shift(s3);
+ x6 = dct_const_round_shift(s6);
+ x7 = dct_const_round_shift(s7);
+
+ output[0] = x0;
+ output[1] = - x4;
+ output[2] = x6;
+ output[3] = - x2;
+ output[4] = x3;
+ output[5] = - x7;
+ output[6] = x5;
+ output[7] = - x1;
+}
- op[0] = (a1 + b1 + 1) >> 1;
- op[4] = (c1 + d1) >> 1;
- op[8] = (a1 - b1) >> 1;
- op[12] = (d1 - c1) >> 1;
+static const transform_2d FHT_8[] = {
+ { fdct8_1d, fdct8_1d }, // DCT_DCT = 0
+ { fadst8_1d, fdct8_1d }, // ADST_DCT = 1
+ { fdct8_1d, fadst8_1d }, // DCT_ADST = 2
+ { fadst8_1d, fadst8_1d } // ADST_ADST = 3
+};
- ip++;
- op++;
+void vp9_short_fht8x8_c(int16_t *input, int16_t *output,
+ int pitch, TX_TYPE tx_type) {
+ int16_t out[64];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[8], temp_out[8];
+ const transform_2d ht = FHT_8[tx_type];
+
+ // Columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = input[j * pitch + i] << 2;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ outptr[j * 8 + i] = temp_out[j];
}
- ip = output;
- op = output;
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[3];
- b1 = ip[1] + ip[2];
- c1 = ip[1] - ip[2];
- d1 = ip[0] - ip[3];
-
- op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[1] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[2] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[3] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
-
- ip += 4;
- op += 4;
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j + i * 8];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j + i * 8] = temp_out[j] >> 1;
}
}
@@ -898,1491 +418,642 @@ void vp9_short_walsh8x4_x8_c(short *input, short *output, int pitch) {
vp9_short_walsh4x4_x8_c(input, output, pitch);
vp9_short_walsh4x4_x8_c(input + 4, output + 16, pitch);
}
-#endif
-
-#define TEST_INT_16x16_DCT 1
-#if !TEST_INT_16x16_DCT
-
-static void dct16x16_1d(double input[16], double output[16]) {
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double step[16];
- double intermediate[16];
- double temp1, temp2;
-
- // step 1
- step[ 0] = input[0] + input[15];
- step[ 1] = input[1] + input[14];
- step[ 2] = input[2] + input[13];
- step[ 3] = input[3] + input[12];
- step[ 4] = input[4] + input[11];
- step[ 5] = input[5] + input[10];
- step[ 6] = input[6] + input[ 9];
- step[ 7] = input[7] + input[ 8];
- step[ 8] = input[7] - input[ 8];
- step[ 9] = input[6] - input[ 9];
- step[10] = input[5] - input[10];
- step[11] = input[4] - input[11];
- step[12] = input[3] - input[12];
- step[13] = input[2] - input[13];
- step[14] = input[1] - input[14];
- step[15] = input[0] - input[15];
-
- // step 2
- output[0] = step[0] + step[7];
- output[1] = step[1] + step[6];
- output[2] = step[2] + step[5];
- output[3] = step[3] + step[4];
- output[4] = step[3] - step[4];
- output[5] = step[2] - step[5];
- output[6] = step[1] - step[6];
- output[7] = step[0] - step[7];
-
- temp1 = step[ 8]*C7;
- temp2 = step[15]*C9;
- output[ 8] = temp1 + temp2;
-
- temp1 = step[ 9]*C11;
- temp2 = step[14]*C5;
- output[ 9] = temp1 - temp2;
-
- temp1 = step[10]*C3;
- temp2 = step[13]*C13;
- output[10] = temp1 + temp2;
-
- temp1 = step[11]*C15;
- temp2 = step[12]*C1;
- output[11] = temp1 - temp2;
-
- temp1 = step[11]*C1;
- temp2 = step[12]*C15;
- output[12] = temp2 + temp1;
-
- temp1 = step[10]*C13;
- temp2 = step[13]*C3;
- output[13] = temp2 - temp1;
-
- temp1 = step[ 9]*C5;
- temp2 = step[14]*C11;
- output[14] = temp2 + temp1;
-
- temp1 = step[ 8]*C9;
- temp2 = step[15]*C7;
- output[15] = temp2 - temp1;
-
- // step 3
- step[ 0] = output[0] + output[3];
- step[ 1] = output[1] + output[2];
- step[ 2] = output[1] - output[2];
- step[ 3] = output[0] - output[3];
-
- temp1 = output[4]*C14;
- temp2 = output[7]*C2;
- step[ 4] = temp1 + temp2;
-
- temp1 = output[5]*C10;
- temp2 = output[6]*C6;
- step[ 5] = temp1 + temp2;
-
- temp1 = output[5]*C6;
- temp2 = output[6]*C10;
- step[ 6] = temp2 - temp1;
-
- temp1 = output[4]*C2;
- temp2 = output[7]*C14;
- step[ 7] = temp2 - temp1;
-
- step[ 8] = output[ 8] + output[11];
- step[ 9] = output[ 9] + output[10];
- step[10] = output[ 9] - output[10];
- step[11] = output[ 8] - output[11];
-
- step[12] = output[12] + output[15];
- step[13] = output[13] + output[14];
- step[14] = output[13] - output[14];
- step[15] = output[12] - output[15];
-
- // step 4
- output[ 0] = (step[ 0] + step[ 1]);
- output[ 8] = (step[ 0] - step[ 1]);
-
- temp1 = step[2]*C12;
- temp2 = step[3]*C4;
- temp1 = temp1 + temp2;
- output[ 4] = 2*(temp1*C8);
-
- temp1 = step[2]*C4;
- temp2 = step[3]*C12;
- temp1 = temp2 - temp1;
- output[12] = 2*(temp1*C8);
-
- output[ 2] = 2*((step[4] + step[ 5])*C8);
- output[14] = 2*((step[7] - step[ 6])*C8);
-
- temp1 = step[4] - step[5];
- temp2 = step[6] + step[7];
- output[ 6] = (temp1 + temp2);
- output[10] = (temp1 - temp2);
-
- intermediate[8] = step[8] + step[14];
- intermediate[9] = step[9] + step[15];
-
- temp1 = intermediate[8]*C12;
- temp2 = intermediate[9]*C4;
- temp1 = temp1 - temp2;
- output[3] = 2*(temp1*C8);
-
- temp1 = intermediate[8]*C4;
- temp2 = intermediate[9]*C12;
- temp1 = temp2 + temp1;
- output[13] = 2*(temp1*C8);
-
- output[ 9] = 2*((step[10] + step[11])*C8);
-
- intermediate[11] = step[10] - step[11];
- intermediate[12] = step[12] + step[13];
- intermediate[13] = step[12] - step[13];
- intermediate[14] = step[ 8] - step[14];
- intermediate[15] = step[ 9] - step[15];
-
- output[15] = (intermediate[11] + intermediate[12]);
- output[ 1] = -(intermediate[11] - intermediate[12]);
-
- output[ 7] = 2*(intermediate[13]*C8);
-
- temp1 = intermediate[14]*C12;
- temp2 = intermediate[15]*C4;
- temp1 = temp1 - temp2;
- output[11] = -2*(temp1*C8);
-
- temp1 = intermediate[14]*C4;
- temp2 = intermediate[15]*C12;
- temp1 = temp2 + temp1;
- output[ 5] = 2*(temp1*C8);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-void vp9_short_fdct16x16_c(short *input, short *out, int pitch) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int shortpitch = pitch >> 1;
- int i, j;
- double output[256];
- // First transform columns
- for (i = 0; i < 16; i++) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; j++)
- temp_in[j] = input[j*shortpitch + i];
- dct16x16_1d(temp_in, temp_out);
- for (j = 0; j < 16; j++)
- output[j*16 + i] = temp_out[j];
- }
- // Then transform rows
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = output[j + i*16];
- dct16x16_1d(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- output[j + i*16] = temp_out[j];
- }
- // Scale by some magic number
- for (i = 0; i < 256; i++)
- out[i] = (short)round(output[i]/2);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-#else
-static const int16_t C1 = 16305;
-static const int16_t C2 = 16069;
-static const int16_t C3 = 15679;
-static const int16_t C4 = 15137;
-static const int16_t C5 = 14449;
-static const int16_t C6 = 13623;
-static const int16_t C7 = 12665;
-static const int16_t C8 = 11585;
-static const int16_t C9 = 10394;
-static const int16_t C10 = 9102;
-static const int16_t C11 = 7723;
-static const int16_t C12 = 6270;
-static const int16_t C13 = 4756;
-static const int16_t C14 = 3196;
-static const int16_t C15 = 1606;
-
-#define RIGHT_SHIFT 14
-#define ROUNDING (1 << (RIGHT_SHIFT - 1))
-
-static void dct16x16_1d(int16_t input[16], int16_t output[16],
- int last_shift_bits) {
- int16_t step[16];
- int intermediate[16];
- int temp1, temp2;
- int final_shift = RIGHT_SHIFT;
- int final_rounding = ROUNDING;
- int output_shift = 0;
- int output_rounding = 0;
-
- final_shift += last_shift_bits;
- if (final_shift > 0)
- final_rounding = 1 << (final_shift - 1);
-
- output_shift += last_shift_bits;
- if (output_shift > 0)
- output_rounding = 1 << (output_shift - 1);
-
- // step 1
- step[ 0] = input[0] + input[15];
- step[ 1] = input[1] + input[14];
- step[ 2] = input[2] + input[13];
- step[ 3] = input[3] + input[12];
- step[ 4] = input[4] + input[11];
- step[ 5] = input[5] + input[10];
- step[ 6] = input[6] + input[ 9];
- step[ 7] = input[7] + input[ 8];
- step[ 8] = input[7] - input[ 8];
- step[ 9] = input[6] - input[ 9];
- step[10] = input[5] - input[10];
- step[11] = input[4] - input[11];
- step[12] = input[3] - input[12];
- step[13] = input[2] - input[13];
- step[14] = input[1] - input[14];
- step[15] = input[0] - input[15];
-
- // step 2
- output[0] = step[0] + step[7];
- output[1] = step[1] + step[6];
- output[2] = step[2] + step[5];
- output[3] = step[3] + step[4];
- output[4] = step[3] - step[4];
- output[5] = step[2] - step[5];
- output[6] = step[1] - step[6];
- output[7] = step[0] - step[7];
-
- temp1 = step[ 8] * C7;
- temp2 = step[15] * C9;
- output[ 8] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 9] * C11;
- temp2 = step[14] * C5;
- output[ 9] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[10] * C3;
- temp2 = step[13] * C13;
- output[10] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[11] * C15;
- temp2 = step[12] * C1;
- output[11] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[11] * C1;
- temp2 = step[12] * C15;
- output[12] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[10] * C13;
- temp2 = step[13] * C3;
- output[13] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 9] * C5;
- temp2 = step[14] * C11;
- output[14] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 8] * C9;
- temp2 = step[15] * C7;
- output[15] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- // step 3
- step[ 0] = output[0] + output[3];
- step[ 1] = output[1] + output[2];
- step[ 2] = output[1] - output[2];
- step[ 3] = output[0] - output[3];
-
- temp1 = output[4] * C14;
- temp2 = output[7] * C2;
- step[ 4] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[5] * C10;
- temp2 = output[6] * C6;
- step[ 5] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[5] * C6;
- temp2 = output[6] * C10;
- step[ 6] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[4] * C2;
- temp2 = output[7] * C14;
- step[ 7] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- step[ 8] = output[ 8] + output[11];
- step[ 9] = output[ 9] + output[10];
- step[10] = output[ 9] - output[10];
- step[11] = output[ 8] - output[11];
-
- step[12] = output[12] + output[15];
- step[13] = output[13] + output[14];
- step[14] = output[13] - output[14];
- step[15] = output[12] - output[15];
-
- // step 4
- output[ 0] = (step[ 0] + step[ 1] + output_rounding) >> output_shift;
- output[ 8] = (step[ 0] - step[ 1] + output_rounding) >> output_shift;
-
- temp1 = step[2] * C12;
- temp2 = step[3] * C4;
- temp1 = (temp1 + temp2 + final_rounding) >> final_shift;
- output[ 4] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[2] * C4;
- temp2 = step[3] * C12;
- temp1 = (temp2 - temp1 + final_rounding) >> final_shift;
- output[12] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
-
- output[ 2] = (2 * ((step[4] + step[ 5]) * C8) + final_rounding)
- >> final_shift;
- output[14] = (2 * ((step[7] - step[ 6]) * C8) + final_rounding)
- >> final_shift;
-
- temp1 = step[4] - step[5];
- temp2 = step[6] + step[7];
- output[ 6] = (temp1 + temp2 + output_rounding) >> output_shift;
- output[10] = (temp1 - temp2 + output_rounding) >> output_shift;
-
- intermediate[8] = step[8] + step[14];
- intermediate[9] = step[9] + step[15];
-
- temp1 = intermediate[8] * C12;
- temp2 = intermediate[9] * C4;
- temp1 = (temp1 - temp2 + final_rounding) >> final_shift;
- output[3] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = intermediate[8] * C4;
- temp2 = intermediate[9] * C12;
- temp1 = (temp2 + temp1 + final_rounding) >> final_shift;
- output[13] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
-
- output[ 9] = (2 * ((step[10] + step[11]) * C8) + final_rounding)
- >> final_shift;
-
- intermediate[11] = step[10] - step[11];
- intermediate[12] = step[12] + step[13];
- intermediate[13] = step[12] - step[13];
- intermediate[14] = step[ 8] - step[14];
- intermediate[15] = step[ 9] - step[15];
-
- output[15] = (intermediate[11] + intermediate[12] + output_rounding)
- >> output_shift;
- output[ 1] = -(intermediate[11] - intermediate[12] + output_rounding)
- >> output_shift;
-
- output[ 7] = (2 * (intermediate[13] * C8) + final_rounding) >> final_shift;
-
- temp1 = intermediate[14] * C12;
- temp2 = intermediate[15] * C4;
- temp1 = (temp1 - temp2 + final_rounding) >> final_shift;
- output[11] = (-2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = intermediate[14] * C4;
- temp2 = intermediate[15] * C12;
- temp1 = (temp2 + temp1 + final_rounding) >> final_shift;
- output[ 5] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+// Rewrote to use same algorithm as others.
+static void fdct16_1d(int16_t input[16], int16_t output[16]) {
+ int16_t step[16];
+ int temp1, temp2;
+
+ // step 1
+ step[ 0] = input[0] + input[15];
+ step[ 1] = input[1] + input[14];
+ step[ 2] = input[2] + input[13];
+ step[ 3] = input[3] + input[12];
+ step[ 4] = input[4] + input[11];
+ step[ 5] = input[5] + input[10];
+ step[ 6] = input[6] + input[ 9];
+ step[ 7] = input[7] + input[ 8];
+ step[ 8] = input[7] - input[ 8];
+ step[ 9] = input[6] - input[ 9];
+ step[10] = input[5] - input[10];
+ step[11] = input[4] - input[11];
+ step[12] = input[3] - input[12];
+ step[13] = input[2] - input[13];
+ step[14] = input[1] - input[14];
+ step[15] = input[0] - input[15];
+
+ fdct8_1d(step, step);
+
+ // step 2
+ output[8] = step[8];
+ output[9] = step[9];
+ temp1 = (-step[10] + step[13]) * cospi_16_64;
+ temp2 = (-step[11] + step[12]) * cospi_16_64;
+ output[10] = dct_const_round_shift(temp1);
+ output[11] = dct_const_round_shift(temp2);
+ temp1 = (step[11] + step[12]) * cospi_16_64;
+ temp2 = (step[10] + step[13]) * cospi_16_64;
+ output[12] = dct_const_round_shift(temp1);
+ output[13] = dct_const_round_shift(temp2);
+ output[14] = step[14];
+ output[15] = step[15];
+
+ // step 3
+ step[ 8] = output[8] + output[11];
+ step[ 9] = output[9] + output[10];
+ step[ 10] = output[9] - output[10];
+ step[ 11] = output[8] - output[11];
+ step[ 12] = -output[12] + output[15];
+ step[ 13] = -output[13] + output[14];
+ step[ 14] = output[13] + output[14];
+ step[ 15] = output[12] + output[15];
+
+ // step 4
+ output[8] = step[8];
+ temp1 = -step[9] * cospi_8_64 + step[14] * cospi_24_64;
+ temp2 = -step[10] * cospi_24_64 - step[13] * cospi_8_64;
+ output[9] = dct_const_round_shift(temp1);
+ output[10] = dct_const_round_shift(temp2);
+ output[11] = step[11];
+ output[12] = step[12];
+ temp1 = -step[10] * cospi_8_64 + step[13] * cospi_24_64;
+ temp2 = step[9] * cospi_24_64 + step[14] * cospi_8_64;
+ output[13] = dct_const_round_shift(temp1);
+ output[14] = dct_const_round_shift(temp2);
+ output[15] = step[15];
+
+ // step 5
+ step[8] = output[8] + output[9];
+ step[9] = output[8] - output[9];
+ step[10] = -output[10] + output[11];
+ step[11] = output[10] + output[11];
+ step[12] = output[12] + output[13];
+ step[13] = output[12] - output[13];
+ step[14] = -output[14] + output[15];
+ step[15] = output[14] + output[15];
+
+ // step 6
+ output[0] = step[0];
+ output[8] = step[4];
+ output[4] = step[2];
+ output[12] = step[6];
+ output[2] = step[1];
+ output[10] = step[5];
+ output[6] = step[3];
+ output[14] = step[7];
+
+ temp1 = step[8] * cospi_30_64 + step[15] * cospi_2_64;
+ temp2 = step[9] * cospi_14_64 + step[14] * cospi_18_64;
+ output[1] = dct_const_round_shift(temp1);
+ output[9] = dct_const_round_shift(temp2);
+
+ temp1 = step[10] * cospi_22_64 + step[13] * cospi_10_64;
+ temp2 = step[11] * cospi_6_64 + step[12] * cospi_26_64;
+ output[5] = dct_const_round_shift(temp1);
+ output[13] = dct_const_round_shift(temp2);
+
+ temp1 = -step[11] * cospi_26_64 + step[12] * cospi_6_64;
+ temp2 = -step[10] * cospi_10_64 + step[13] * cospi_22_64;
+ output[3] = dct_const_round_shift(temp1);
+ output[11] = dct_const_round_shift(temp2);
+
+ temp1 = -step[9] * cospi_18_64 + step[14] * cospi_14_64;
+ temp2 = -step[8] * cospi_2_64 + step[15] * cospi_30_64;
+ output[7] = dct_const_round_shift(temp1);
+ output[15] = dct_const_round_shift(temp2);
}
void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) {
- int shortpitch = pitch >> 1;
- int i, j;
- int16_t output[256];
- int16_t *outptr = &output[0];
-
- // First transform columns
- for (i = 0; i < 16; i++) {
- int16_t temp_in[16];
- int16_t temp_out[16];
- for (j = 0; j < 16; j++)
- temp_in[j] = input[j * shortpitch + i];
- dct16x16_1d(temp_in, temp_out, 0);
- for (j = 0; j < 16; j++)
- output[j * 16 + i] = temp_out[j];
- }
-
- // Then transform rows
- for (i = 0; i < 16; ++i) {
- dct16x16_1d(outptr, out, 1);
- outptr += 16;
- out += 16;
- }
-}
-#undef RIGHT_SHIFT
-#undef ROUNDING
-#endif
-
-#if !CONFIG_DWTDCTHYBRID
-static void dct32_1d(double *input, double *output, int stride) {
- static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
- static const double C2 = 0.995184726672; // cos(pi * 2 / 64)
- static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
- static const double C4 = 0.980785280403; // cos(pi * 4 / 64)
- static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
- static const double C6 = 0.956940335732; // cos(pi * 6 / 64)
- static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
- static const double C8 = 0.923879532511; // cos(pi * 8 / 64)
- static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
- static const double C10 = 0.881921264348; // cos(pi * 10 / 64)
- static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
- static const double C12 = 0.831469612303; // cos(pi * 12 / 64)
- static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
- static const double C14 = 0.773010453363; // cos(pi * 14 / 64)
- static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
- static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
- static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
- static const double C18 = 0.634393284164; // cos(pi * 18 / 64)
- static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
- static const double C20 = 0.555570233020; // cos(pi * 20 / 64)
- static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
- static const double C22 = 0.471396736826; // cos(pi * 22 / 64)
- static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
- static const double C24 = 0.382683432365; // cos(pi * 24 / 64)
- static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
- static const double C26 = 0.290284677254; // cos(pi * 26 / 64)
- static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
- static const double C28 = 0.195090322016; // cos(pi * 28 / 64)
- static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
- static const double C30 = 0.098017140330; // cos(pi * 30 / 64)
- static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
-
- double step[32];
-
- // Stage 1
- step[0] = input[stride*0] + input[stride*(32 - 1)];
- step[1] = input[stride*1] + input[stride*(32 - 2)];
- step[2] = input[stride*2] + input[stride*(32 - 3)];
- step[3] = input[stride*3] + input[stride*(32 - 4)];
- step[4] = input[stride*4] + input[stride*(32 - 5)];
- step[5] = input[stride*5] + input[stride*(32 - 6)];
- step[6] = input[stride*6] + input[stride*(32 - 7)];
- step[7] = input[stride*7] + input[stride*(32 - 8)];
- step[8] = input[stride*8] + input[stride*(32 - 9)];
- step[9] = input[stride*9] + input[stride*(32 - 10)];
- step[10] = input[stride*10] + input[stride*(32 - 11)];
- step[11] = input[stride*11] + input[stride*(32 - 12)];
- step[12] = input[stride*12] + input[stride*(32 - 13)];
- step[13] = input[stride*13] + input[stride*(32 - 14)];
- step[14] = input[stride*14] + input[stride*(32 - 15)];
- step[15] = input[stride*15] + input[stride*(32 - 16)];
- step[16] = -input[stride*16] + input[stride*(32 - 17)];
- step[17] = -input[stride*17] + input[stride*(32 - 18)];
- step[18] = -input[stride*18] + input[stride*(32 - 19)];
- step[19] = -input[stride*19] + input[stride*(32 - 20)];
- step[20] = -input[stride*20] + input[stride*(32 - 21)];
- step[21] = -input[stride*21] + input[stride*(32 - 22)];
- step[22] = -input[stride*22] + input[stride*(32 - 23)];
- step[23] = -input[stride*23] + input[stride*(32 - 24)];
- step[24] = -input[stride*24] + input[stride*(32 - 25)];
- step[25] = -input[stride*25] + input[stride*(32 - 26)];
- step[26] = -input[stride*26] + input[stride*(32 - 27)];
- step[27] = -input[stride*27] + input[stride*(32 - 28)];
- step[28] = -input[stride*28] + input[stride*(32 - 29)];
- step[29] = -input[stride*29] + input[stride*(32 - 30)];
- step[30] = -input[stride*30] + input[stride*(32 - 31)];
- step[31] = -input[stride*31] + input[stride*(32 - 32)];
-
- // Stage 2
- output[stride*0] = step[0] + step[16 - 1];
- output[stride*1] = step[1] + step[16 - 2];
- output[stride*2] = step[2] + step[16 - 3];
- output[stride*3] = step[3] + step[16 - 4];
- output[stride*4] = step[4] + step[16 - 5];
- output[stride*5] = step[5] + step[16 - 6];
- output[stride*6] = step[6] + step[16 - 7];
- output[stride*7] = step[7] + step[16 - 8];
- output[stride*8] = -step[8] + step[16 - 9];
- output[stride*9] = -step[9] + step[16 - 10];
- output[stride*10] = -step[10] + step[16 - 11];
- output[stride*11] = -step[11] + step[16 - 12];
- output[stride*12] = -step[12] + step[16 - 13];
- output[stride*13] = -step[13] + step[16 - 14];
- output[stride*14] = -step[14] + step[16 - 15];
- output[stride*15] = -step[15] + step[16 - 16];
-
- output[stride*16] = step[16];
- output[stride*17] = step[17];
- output[stride*18] = step[18];
- output[stride*19] = step[19];
-
- output[stride*20] = (-step[20] + step[27])*C16;
- output[stride*21] = (-step[21] + step[26])*C16;
- output[stride*22] = (-step[22] + step[25])*C16;
- output[stride*23] = (-step[23] + step[24])*C16;
-
- output[stride*24] = (step[24] + step[23])*C16;
- output[stride*25] = (step[25] + step[22])*C16;
- output[stride*26] = (step[26] + step[21])*C16;
- output[stride*27] = (step[27] + step[20])*C16;
-
- output[stride*28] = step[28];
- output[stride*29] = step[29];
- output[stride*30] = step[30];
- output[stride*31] = step[31];
-
- // Stage 3
- step[0] = output[stride*0] + output[stride*(8 - 1)];
- step[1] = output[stride*1] + output[stride*(8 - 2)];
- step[2] = output[stride*2] + output[stride*(8 - 3)];
- step[3] = output[stride*3] + output[stride*(8 - 4)];
- step[4] = -output[stride*4] + output[stride*(8 - 5)];
- step[5] = -output[stride*5] + output[stride*(8 - 6)];
- step[6] = -output[stride*6] + output[stride*(8 - 7)];
- step[7] = -output[stride*7] + output[stride*(8 - 8)];
- step[8] = output[stride*8];
- step[9] = output[stride*9];
- step[10] = (-output[stride*10] + output[stride*13])*C16;
- step[11] = (-output[stride*11] + output[stride*12])*C16;
- step[12] = (output[stride*12] + output[stride*11])*C16;
- step[13] = (output[stride*13] + output[stride*10])*C16;
- step[14] = output[stride*14];
- step[15] = output[stride*15];
-
- step[16] = output[stride*16] + output[stride*23];
- step[17] = output[stride*17] + output[stride*22];
- step[18] = output[stride*18] + output[stride*21];
- step[19] = output[stride*19] + output[stride*20];
- step[20] = -output[stride*20] + output[stride*19];
- step[21] = -output[stride*21] + output[stride*18];
- step[22] = -output[stride*22] + output[stride*17];
- step[23] = -output[stride*23] + output[stride*16];
- step[24] = -output[stride*24] + output[stride*31];
- step[25] = -output[stride*25] + output[stride*30];
- step[26] = -output[stride*26] + output[stride*29];
- step[27] = -output[stride*27] + output[stride*28];
- step[28] = output[stride*28] + output[stride*27];
- step[29] = output[stride*29] + output[stride*26];
- step[30] = output[stride*30] + output[stride*25];
- step[31] = output[stride*31] + output[stride*24];
-
- // Stage 4
- output[stride*0] = step[0] + step[3];
- output[stride*1] = step[1] + step[2];
- output[stride*2] = -step[2] + step[1];
- output[stride*3] = -step[3] + step[0];
- output[stride*4] = step[4];
- output[stride*5] = (-step[5] + step[6])*C16;
- output[stride*6] = (step[6] + step[5])*C16;
- output[stride*7] = step[7];
- output[stride*8] = step[8] + step[11];
- output[stride*9] = step[9] + step[10];
- output[stride*10] = -step[10] + step[9];
- output[stride*11] = -step[11] + step[8];
- output[stride*12] = -step[12] + step[15];
- output[stride*13] = -step[13] + step[14];
- output[stride*14] = step[14] + step[13];
- output[stride*15] = step[15] + step[12];
-
- output[stride*16] = step[16];
- output[stride*17] = step[17];
- output[stride*18] = step[18]*-C8 + step[29]*C24;
- output[stride*19] = step[19]*-C8 + step[28]*C24;
- output[stride*20] = step[20]*-C24 + step[27]*-C8;
- output[stride*21] = step[21]*-C24 + step[26]*-C8;
- output[stride*22] = step[22];
- output[stride*23] = step[23];
- output[stride*24] = step[24];
- output[stride*25] = step[25];
- output[stride*26] = step[26]*C24 + step[21]*-C8;
- output[stride*27] = step[27]*C24 + step[20]*-C8;
- output[stride*28] = step[28]*C8 + step[19]*C24;
- output[stride*29] = step[29]*C8 + step[18]*C24;
- output[stride*30] = step[30];
- output[stride*31] = step[31];
-
- // Stage 5
- step[0] = (output[stride*0] + output[stride*1]) * C16;
- step[1] = (-output[stride*1] + output[stride*0]) * C16;
- step[2] = output[stride*2]*C24 + output[stride*3] * C8;
- step[3] = output[stride*3]*C24 - output[stride*2] * C8;
- step[4] = output[stride*4] + output[stride*5];
- step[5] = -output[stride*5] + output[stride*4];
- step[6] = -output[stride*6] + output[stride*7];
- step[7] = output[stride*7] + output[stride*6];
- step[8] = output[stride*8];
- step[9] = output[stride*9]*-C8 + output[stride*14]*C24;
- step[10] = output[stride*10]*-C24 + output[stride*13]*-C8;
- step[11] = output[stride*11];
- step[12] = output[stride*12];
- step[13] = output[stride*13]*C24 + output[stride*10]*-C8;
- step[14] = output[stride*14]*C8 + output[stride*9]*C24;
- step[15] = output[stride*15];
-
- step[16] = output[stride*16] + output[stride*19];
- step[17] = output[stride*17] + output[stride*18];
- step[18] = -output[stride*18] + output[stride*17];
- step[19] = -output[stride*19] + output[stride*16];
- step[20] = -output[stride*20] + output[stride*23];
- step[21] = -output[stride*21] + output[stride*22];
- step[22] = output[stride*22] + output[stride*21];
- step[23] = output[stride*23] + output[stride*20];
- step[24] = output[stride*24] + output[stride*27];
- step[25] = output[stride*25] + output[stride*26];
- step[26] = -output[stride*26] + output[stride*25];
- step[27] = -output[stride*27] + output[stride*24];
- step[28] = -output[stride*28] + output[stride*31];
- step[29] = -output[stride*29] + output[stride*30];
- step[30] = output[stride*30] + output[stride*29];
- step[31] = output[stride*31] + output[stride*28];
-
- // Stage 6
- output[stride*0] = step[0];
- output[stride*1] = step[1];
- output[stride*2] = step[2];
- output[stride*3] = step[3];
- output[stride*4] = step[4]*C28 + step[7]*C4;
- output[stride*5] = step[5]*C12 + step[6]*C20;
- output[stride*6] = step[6]*C12 + step[5]*-C20;
- output[stride*7] = step[7]*C28 + step[4]*-C4;
- output[stride*8] = step[8] + step[9];
- output[stride*9] = -step[9] + step[8];
- output[stride*10] = -step[10] + step[11];
- output[stride*11] = step[11] + step[10];
- output[stride*12] = step[12] + step[13];
- output[stride*13] = -step[13] + step[12];
- output[stride*14] = -step[14] + step[15];
- output[stride*15] = step[15] + step[14];
-
- output[stride*16] = step[16];
- output[stride*17] = step[17]*-C4 + step[30]*C28;
- output[stride*18] = step[18]*-C28 + step[29]*-C4;
- output[stride*19] = step[19];
- output[stride*20] = step[20];
- output[stride*21] = step[21]*-C20 + step[26]*C12;
- output[stride*22] = step[22]*-C12 + step[25]*-C20;
- output[stride*23] = step[23];
- output[stride*24] = step[24];
- output[stride*25] = step[25]*C12 + step[22]*-C20;
- output[stride*26] = step[26]*C20 + step[21]*C12;
- output[stride*27] = step[27];
- output[stride*28] = step[28];
- output[stride*29] = step[29]*C28 + step[18]*-C4;
- output[stride*30] = step[30]*C4 + step[17]*C28;
- output[stride*31] = step[31];
-
- // Stage 7
- step[0] = output[stride*0];
- step[1] = output[stride*1];
- step[2] = output[stride*2];
- step[3] = output[stride*3];
- step[4] = output[stride*4];
- step[5] = output[stride*5];
- step[6] = output[stride*6];
- step[7] = output[stride*7];
- step[8] = output[stride*8]*C30 + output[stride*15]*C2;
- step[9] = output[stride*9]*C14 + output[stride*14]*C18;
- step[10] = output[stride*10]*C22 + output[stride*13]*C10;
- step[11] = output[stride*11]*C6 + output[stride*12]*C26;
- step[12] = output[stride*12]*C6 + output[stride*11]*-C26;
- step[13] = output[stride*13]*C22 + output[stride*10]*-C10;
- step[14] = output[stride*14]*C14 + output[stride*9]*-C18;
- step[15] = output[stride*15]*C30 + output[stride*8]*-C2;
-
- step[16] = output[stride*16] + output[stride*17];
- step[17] = -output[stride*17] + output[stride*16];
- step[18] = -output[stride*18] + output[stride*19];
- step[19] = output[stride*19] + output[stride*18];
- step[20] = output[stride*20] + output[stride*21];
- step[21] = -output[stride*21] + output[stride*20];
- step[22] = -output[stride*22] + output[stride*23];
- step[23] = output[stride*23] + output[stride*22];
- step[24] = output[stride*24] + output[stride*25];
- step[25] = -output[stride*25] + output[stride*24];
- step[26] = -output[stride*26] + output[stride*27];
- step[27] = output[stride*27] + output[stride*26];
- step[28] = output[stride*28] + output[stride*29];
- step[29] = -output[stride*29] + output[stride*28];
- step[30] = -output[stride*30] + output[stride*31];
- step[31] = output[stride*31] + output[stride*30];
-
- // Final stage --- outputs indices are bit-reversed.
- output[stride*0] = step[0];
- output[stride*16] = step[1];
- output[stride*8] = step[2];
- output[stride*24] = step[3];
- output[stride*4] = step[4];
- output[stride*20] = step[5];
- output[stride*12] = step[6];
- output[stride*28] = step[7];
- output[stride*2] = step[8];
- output[stride*18] = step[9];
- output[stride*10] = step[10];
- output[stride*26] = step[11];
- output[stride*6] = step[12];
- output[stride*22] = step[13];
- output[stride*14] = step[14];
- output[stride*30] = step[15];
-
- output[stride*1] = step[16]*C31 + step[31]*C1;
- output[stride*17] = step[17]*C15 + step[30]*C17;
- output[stride*9] = step[18]*C23 + step[29]*C9;
- output[stride*25] = step[19]*C7 + step[28]*C25;
- output[stride*5] = step[20]*C27 + step[27]*C5;
- output[stride*21] = step[21]*C11 + step[26]*C21;
- output[stride*13] = step[22]*C19 + step[25]*C13;
- output[stride*29] = step[23]*C3 + step[24]*C29;
- output[stride*3] = step[24]*C3 + step[23]*-C29;
- output[stride*19] = step[25]*C19 + step[22]*-C13;
- output[stride*11] = step[26]*C11 + step[21]*-C21;
- output[stride*27] = step[27]*C27 + step[20]*-C5;
- output[stride*7] = step[28]*C7 + step[19]*-C25;
- output[stride*23] = step[29]*C23 + step[18]*-C9;
- output[stride*15] = step[30]*C15 + step[17]*-C17;
- output[stride*31] = step[31]*C31 + step[16]*-C1;
-}
-
-void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int shortpitch = pitch >> 1;
- int i, j;
- double output[1024];
- // First transform columns
- for (i = 0; i < 32; i++) {
- double temp_in[32], temp_out[32];
- for (j = 0; j < 32; j++)
- temp_in[j] = input[j*shortpitch + i];
- dct32_1d(temp_in, temp_out, 1);
- for (j = 0; j < 32; j++)
- output[j*32 + i] = temp_out[j];
- }
- // Then transform rows
- for (i = 0; i < 32; ++i) {
- double temp_in[32], temp_out[32];
- for (j = 0; j < 32; ++j)
- temp_in[j] = output[j + i*32];
- dct32_1d(temp_in, temp_out, 1);
- for (j = 0; j < 32; ++j)
- output[j + i*32] = temp_out[j];
- }
- // Scale by some magic number
- for (i = 0; i < 1024; i++) {
- out[i] = (short)round(output[i]/4);
- }
- }
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-#else // CONFIG_DWTDCTHYBRID
-
-#if DWT_TYPE == 53
-
-// Note: block length must be even for this implementation
-static void analysis_53_row(int length, short *x,
- short *lowpass, short *highpass) {
- int n;
- short r, *a, *b;
+ int shortpitch = pitch >> 1;
+ int i, j;
+ int16_t output[256];
+ int16_t temp_in[16], temp_out[16];
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *a++ = (r = *x++) << 1;
- *b++ = *x - ((r + x[1] + 1) >> 1);
- x++;
- }
- *a = (r = *x++) << 1;
- *b = *x - r;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ += (r + (*b) + 1) >> 1;
- r = *b++;
+ // First transform columns
+ for (i = 0; i < 16; i++) {
+ for (j = 0; j < 16; j++)
+ temp_in[j] = input[j * shortpitch + i] << 2;
+ fdct16_1d(temp_in, temp_out);
+ for (j = 0; j < 16; j++)
+ output[j * 16 + i] = (temp_out[j] + 1) >> 2;
}
-}
-static void analysis_53_col(int length, short *x,
- short *lowpass, short *highpass) {
- int n;
- short r, *a, *b;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *a++ = (r = *x++);
- *b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2;
- x++;
- }
- *a = (r = *x++);
- *b = (*x - r + 1) >> 1;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ += (r + (*b) + 1) >> 1;
- r = *b++;
+ // Then transform rows
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = output[j + i * 16];
+ fdct16_1d(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ out[j + i * 16] = temp_out[j];
}
}
-static void dyadic_analyze_53(int levels, int width, int height,
- short *x, int pitch_x, short *c, int pitch_c) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- short buffer[2 * DWT_MAX_LENGTH];
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS;
- }
- }
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(short));
- analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i + nh] = c[i * pitch_c + j];
- analysis_53_col(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i];
- }
- }
+void fadst16_1d(int16_t *input, int16_t *output) {
+ int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
+
+ int x0 = input[15];
+ int x1 = input[0];
+ int x2 = input[13];
+ int x3 = input[2];
+ int x4 = input[11];
+ int x5 = input[4];
+ int x6 = input[9];
+ int x7 = input[6];
+ int x8 = input[7];
+ int x9 = input[8];
+ int x10 = input[5];
+ int x11 = input[10];
+ int x12 = input[3];
+ int x13 = input[12];
+ int x14 = input[1];
+ int x15 = input[14];
+
+ // stage 1
+ s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
+ s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
+ s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
+ s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
+ s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
+ s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
+ s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
+ s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
+ s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
+ s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
+ s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
+ s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
+ s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
+ s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
+ s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
+ s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
+
+ x0 = dct_const_round_shift(s0 + s8);
+ x1 = dct_const_round_shift(s1 + s9);
+ x2 = dct_const_round_shift(s2 + s10);
+ x3 = dct_const_round_shift(s3 + s11);
+ x4 = dct_const_round_shift(s4 + s12);
+ x5 = dct_const_round_shift(s5 + s13);
+ x6 = dct_const_round_shift(s6 + s14);
+ x7 = dct_const_round_shift(s7 + s15);
+ x8 = dct_const_round_shift(s0 - s8);
+ x9 = dct_const_round_shift(s1 - s9);
+ x10 = dct_const_round_shift(s2 - s10);
+ x11 = dct_const_round_shift(s3 - s11);
+ x12 = dct_const_round_shift(s4 - s12);
+ x13 = dct_const_round_shift(s5 - s13);
+ x14 = dct_const_round_shift(s6 - s14);
+ x15 = dct_const_round_shift(s7 - s15);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4;
+ s5 = x5;
+ s6 = x6;
+ s7 = x7;
+ s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
+ s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
+ s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
+ s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
+ s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
+ s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
+ s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
+ s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
+
+ x0 = s0 + s4;
+ x1 = s1 + s5;
+ x2 = s2 + s6;
+ x3 = s3 + s7;
+ x4 = s0 - s4;
+ x5 = s1 - s5;
+ x6 = s2 - s6;
+ x7 = s3 - s7;
+ x8 = dct_const_round_shift(s8 + s12);
+ x9 = dct_const_round_shift(s9 + s13);
+ x10 = dct_const_round_shift(s10 + s14);
+ x11 = dct_const_round_shift(s11 + s15);
+ x12 = dct_const_round_shift(s8 - s12);
+ x13 = dct_const_round_shift(s9 - s13);
+ x14 = dct_const_round_shift(s10 - s14);
+ x15 = dct_const_round_shift(s11 - s15);
+
+ // stage 3
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
+ s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
+ s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
+ s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
+ s8 = x8;
+ s9 = x9;
+ s10 = x10;
+ s11 = x11;
+ s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
+ s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
+ s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
+ s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
+
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = dct_const_round_shift(s4 + s6);
+ x5 = dct_const_round_shift(s5 + s7);
+ x6 = dct_const_round_shift(s4 - s6);
+ x7 = dct_const_round_shift(s5 - s7);
+ x8 = s8 + s10;
+ x9 = s9 + s11;
+ x10 = s8 - s10;
+ x11 = s9 - s11;
+ x12 = dct_const_round_shift(s12 + s14);
+ x13 = dct_const_round_shift(s13 + s15);
+ x14 = dct_const_round_shift(s12 - s14);
+ x15 = dct_const_round_shift(s13 - s15);
+
+ // stage 4
+ s2 = (- cospi_16_64) * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (- x6 + x7);
+ s10 = cospi_16_64 * (x10 + x11);
+ s11 = cospi_16_64 * (- x10 + x11);
+ s14 = (- cospi_16_64) * (x14 + x15);
+ s15 = cospi_16_64 * (x14 - x15);
+
+ x2 = dct_const_round_shift(s2);
+ x3 = dct_const_round_shift(s3);
+ x6 = dct_const_round_shift(s6);
+ x7 = dct_const_round_shift(s7);
+ x10 = dct_const_round_shift(s10);
+ x11 = dct_const_round_shift(s11);
+ x14 = dct_const_round_shift(s14);
+ x15 = dct_const_round_shift(s15);
+
+ output[0] = x0;
+ output[1] = - x8;
+ output[2] = x12;
+ output[3] = - x4;
+ output[4] = x6;
+ output[5] = x14;
+ output[6] = x10;
+ output[7] = x2;
+ output[8] = x3;
+ output[9] = x11;
+ output[10] = x15;
+ output[11] = x7;
+ output[12] = x5;
+ output[13] = - x13;
+ output[14] = x9;
+ output[15] = - x1;
}
-#elif DWT_TYPE == 26
-
-static void analysis_26_row(int length, short *x,
- short *lowpass, short *highpass) {
- int i, n;
- short r, s, *a, *b;
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- r = *x++;
- s = *x++;
- *a++ = r + s;
- *b++ = r - s;
- }
- n = length >> 1;
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ -= (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b -= (r - *a + 4) >> 3;
- }
-}
+static const transform_2d FHT_16[] = {
+ { fdct16_1d, fdct16_1d }, // DCT_DCT = 0
+ { fadst16_1d, fdct16_1d }, // ADST_DCT = 1
+ { fdct16_1d, fadst16_1d }, // DCT_ADST = 2
+ { fadst16_1d, fadst16_1d } // ADST_ADST = 3
+};
-static void analysis_26_col(int length, short *x,
- short *lowpass, short *highpass) {
- int i, n;
- short r, s, *a, *b;
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- r = *x++;
- s = *x++;
- *a++ = (r + s + 1) >> 1;
- *b++ = (r - s + 1) >> 1;
- }
- n = length >> 1;
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ -= (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b -= (r - *a + 4) >> 3;
- }
-}
+void vp9_short_fht16x16_c(int16_t *input, int16_t *output,
+ int pitch, TX_TYPE tx_type) {
+ int16_t out[256];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+ const transform_2d ht = FHT_16[tx_type];
-static void dyadic_analyze_26(int levels, int width, int height,
- short *x, int pitch_x, short *c, int pitch_c) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- short buffer[2 * DWT_MAX_LENGTH];
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS;
- }
- }
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(short));
- analysis_26_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i + nh] = c[i * pitch_c + j];
- analysis_26_col(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i];
- }
+ // Columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = input[j * pitch + i] << 2;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
-}
-#elif DWT_TYPE == 97
-
-static void analysis_97(int length, double *x,
- double *lowpass, double *highpass) {
- static const double a_predict1 = -1.586134342;
- static const double a_update1 = -0.05298011854;
- static const double a_predict2 = 0.8829110762;
- static const double a_update2 = 0.4435068522;
- static const double s_low = 1.149604398;
- static const double s_high = 1/1.149604398;
- int i;
- double y[DWT_MAX_LENGTH];
- // Predict 1
- for (i = 1; i < length - 2; i += 2) {
- x[i] += a_predict1 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] += 2 * a_predict1 * x[length - 2];
- // Update 1
- for (i = 2; i < length; i += 2) {
- x[i] += a_update1 * (x[i - 1] + x[i + 1]);
- }
- x[0] += 2 * a_update1 * x[1];
- // Predict 2
- for (i = 1; i < length - 2; i += 2) {
- x[i] += a_predict2 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] += 2 * a_predict2 * x[length - 2];
- // Update 2
- for (i = 2; i < length; i += 2) {
- x[i] += a_update2 * (x[i - 1] + x[i + 1]);
- }
- x[0] += 2 * a_update2 * x[1];
- memcpy(y, x, sizeof(*y) * length);
- // Scale and pack
- for (i = 0; i < length / 2; i++) {
- lowpass[i] = y[2 * i] * s_low;
- highpass[i] = y[2 * i + 1] * s_high;
+ // Rows
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j + i * 16];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j + i * 16] = temp_out[j];
}
}
-static void dyadic_analyze_97(int levels, int width, int height,
- short *x, int pitch_x, short *c, int pitch_c) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- double buffer[2 * DWT_MAX_LENGTH];
- double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- y[i * DWT_MAX_LENGTH + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS;
- }
- }
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
- analysis_97(nw, buffer, &y[i * DWT_MAX_LENGTH],
- &y[i * DWT_MAX_LENGTH] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i + nh] = y[i * DWT_MAX_LENGTH + j];
- analysis_97(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = round(buffer[i]);
- }
- }
-}
-#endif // DWT_TYPE
-
-// TODO(debargha): Implement the scaling differently so as not to have to
-// use the floating point dct
-static void dct16x16_1d_f(double input[16], double output[16]) {
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double step[16];
- double intermediate[16];
- double temp1, temp2;
-
- // step 1
- step[ 0] = input[0] + input[15];
- step[ 1] = input[1] + input[14];
- step[ 2] = input[2] + input[13];
- step[ 3] = input[3] + input[12];
- step[ 4] = input[4] + input[11];
- step[ 5] = input[5] + input[10];
- step[ 6] = input[6] + input[ 9];
- step[ 7] = input[7] + input[ 8];
- step[ 8] = input[7] - input[ 8];
- step[ 9] = input[6] - input[ 9];
- step[10] = input[5] - input[10];
- step[11] = input[4] - input[11];
- step[12] = input[3] - input[12];
- step[13] = input[2] - input[13];
- step[14] = input[1] - input[14];
- step[15] = input[0] - input[15];
-
- // step 2
- output[0] = step[0] + step[7];
- output[1] = step[1] + step[6];
- output[2] = step[2] + step[5];
- output[3] = step[3] + step[4];
- output[4] = step[3] - step[4];
- output[5] = step[2] - step[5];
- output[6] = step[1] - step[6];
- output[7] = step[0] - step[7];
-
- temp1 = step[ 8]*C7;
- temp2 = step[15]*C9;
- output[ 8] = temp1 + temp2;
-
- temp1 = step[ 9]*C11;
- temp2 = step[14]*C5;
- output[ 9] = temp1 - temp2;
-
- temp1 = step[10]*C3;
- temp2 = step[13]*C13;
- output[10] = temp1 + temp2;
-
- temp1 = step[11]*C15;
- temp2 = step[12]*C1;
- output[11] = temp1 - temp2;
-
- temp1 = step[11]*C1;
- temp2 = step[12]*C15;
- output[12] = temp2 + temp1;
-
- temp1 = step[10]*C13;
- temp2 = step[13]*C3;
- output[13] = temp2 - temp1;
-
- temp1 = step[ 9]*C5;
- temp2 = step[14]*C11;
- output[14] = temp2 + temp1;
-
- temp1 = step[ 8]*C9;
- temp2 = step[15]*C7;
- output[15] = temp2 - temp1;
-
- // step 3
- step[ 0] = output[0] + output[3];
- step[ 1] = output[1] + output[2];
- step[ 2] = output[1] - output[2];
- step[ 3] = output[0] - output[3];
-
- temp1 = output[4]*C14;
- temp2 = output[7]*C2;
- step[ 4] = temp1 + temp2;
-
- temp1 = output[5]*C10;
- temp2 = output[6]*C6;
- step[ 5] = temp1 + temp2;
-
- temp1 = output[5]*C6;
- temp2 = output[6]*C10;
- step[ 6] = temp2 - temp1;
-
- temp1 = output[4]*C2;
- temp2 = output[7]*C14;
- step[ 7] = temp2 - temp1;
-
- step[ 8] = output[ 8] + output[11];
- step[ 9] = output[ 9] + output[10];
- step[10] = output[ 9] - output[10];
- step[11] = output[ 8] - output[11];
-
- step[12] = output[12] + output[15];
- step[13] = output[13] + output[14];
- step[14] = output[13] - output[14];
- step[15] = output[12] - output[15];
-
- // step 4
- output[ 0] = (step[ 0] + step[ 1]);
- output[ 8] = (step[ 0] - step[ 1]);
-
- temp1 = step[2]*C12;
- temp2 = step[3]*C4;
- temp1 = temp1 + temp2;
- output[ 4] = 2*(temp1*C8);
-
- temp1 = step[2]*C4;
- temp2 = step[3]*C12;
- temp1 = temp2 - temp1;
- output[12] = 2*(temp1*C8);
-
- output[ 2] = 2*((step[4] + step[ 5])*C8);
- output[14] = 2*((step[7] - step[ 6])*C8);
-
- temp1 = step[4] - step[5];
- temp2 = step[6] + step[7];
- output[ 6] = (temp1 + temp2);
- output[10] = (temp1 - temp2);
-
- intermediate[8] = step[8] + step[14];
- intermediate[9] = step[9] + step[15];
-
- temp1 = intermediate[8]*C12;
- temp2 = intermediate[9]*C4;
- temp1 = temp1 - temp2;
- output[3] = 2*(temp1*C8);
-
- temp1 = intermediate[8]*C4;
- temp2 = intermediate[9]*C12;
- temp1 = temp2 + temp1;
- output[13] = 2*(temp1*C8);
-
- output[ 9] = 2*((step[10] + step[11])*C8);
-
- intermediate[11] = step[10] - step[11];
- intermediate[12] = step[12] + step[13];
- intermediate[13] = step[12] - step[13];
- intermediate[14] = step[ 8] - step[14];
- intermediate[15] = step[ 9] - step[15];
-
- output[15] = (intermediate[11] + intermediate[12]);
- output[ 1] = -(intermediate[11] - intermediate[12]);
-
- output[ 7] = 2*(intermediate[13]*C8);
-
- temp1 = intermediate[14]*C12;
- temp2 = intermediate[15]*C4;
- temp1 = temp1 - temp2;
- output[11] = -2*(temp1*C8);
-
- temp1 = intermediate[14]*C4;
- temp2 = intermediate[15]*C12;
- temp1 = temp2 + temp1;
- output[ 5] = 2*(temp1*C8);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
+static void dct32_1d(int *input, int *output) {
+ int step[32];
+ // Stage 1
+ step[0] = input[0] + input[(32 - 1)];
+ step[1] = input[1] + input[(32 - 2)];
+ step[2] = input[2] + input[(32 - 3)];
+ step[3] = input[3] + input[(32 - 4)];
+ step[4] = input[4] + input[(32 - 5)];
+ step[5] = input[5] + input[(32 - 6)];
+ step[6] = input[6] + input[(32 - 7)];
+ step[7] = input[7] + input[(32 - 8)];
+ step[8] = input[8] + input[(32 - 9)];
+ step[9] = input[9] + input[(32 - 10)];
+ step[10] = input[10] + input[(32 - 11)];
+ step[11] = input[11] + input[(32 - 12)];
+ step[12] = input[12] + input[(32 - 13)];
+ step[13] = input[13] + input[(32 - 14)];
+ step[14] = input[14] + input[(32 - 15)];
+ step[15] = input[15] + input[(32 - 16)];
+ step[16] = -input[16] + input[(32 - 17)];
+ step[17] = -input[17] + input[(32 - 18)];
+ step[18] = -input[18] + input[(32 - 19)];
+ step[19] = -input[19] + input[(32 - 20)];
+ step[20] = -input[20] + input[(32 - 21)];
+ step[21] = -input[21] + input[(32 - 22)];
+ step[22] = -input[22] + input[(32 - 23)];
+ step[23] = -input[23] + input[(32 - 24)];
+ step[24] = -input[24] + input[(32 - 25)];
+ step[25] = -input[25] + input[(32 - 26)];
+ step[26] = -input[26] + input[(32 - 27)];
+ step[27] = -input[27] + input[(32 - 28)];
+ step[28] = -input[28] + input[(32 - 29)];
+ step[29] = -input[29] + input[(32 - 30)];
+ step[30] = -input[30] + input[(32 - 31)];
+ step[31] = -input[31] + input[(32 - 32)];
-static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch,
- int scale) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int shortpitch = pitch >> 1;
- int i, j;
- double output[256];
- // First transform columns
- for (i = 0; i < 16; i++) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; j++)
- temp_in[j] = input[j*shortpitch + i];
- dct16x16_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; j++)
- output[j*16 + i] = temp_out[j];
- }
- // Then transform rows
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = output[j + i*16];
- dct16x16_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- output[j + i*16] = temp_out[j];
- }
- // Scale by some magic number
- for (i = 0; i < 256; i++)
- out[i] = (short)round(output[i] / (2 << scale));
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
+ // Stage 2
+ output[0] = step[0] + step[16 - 1];
+ output[1] = step[1] + step[16 - 2];
+ output[2] = step[2] + step[16 - 3];
+ output[3] = step[3] + step[16 - 4];
+ output[4] = step[4] + step[16 - 5];
+ output[5] = step[5] + step[16 - 6];
+ output[6] = step[6] + step[16 - 7];
+ output[7] = step[7] + step[16 - 8];
+ output[8] = -step[8] + step[16 - 9];
+ output[9] = -step[9] + step[16 - 10];
+ output[10] = -step[10] + step[16 - 11];
+ output[11] = -step[11] + step[16 - 12];
+ output[12] = -step[12] + step[16 - 13];
+ output[13] = -step[13] + step[16 - 14];
+ output[14] = -step[14] + step[16 - 15];
+ output[15] = -step[15] + step[16 - 16];
+
+ output[16] = step[16];
+ output[17] = step[17];
+ output[18] = step[18];
+ output[19] = step[19];
+
+ output[20] = dct_32_round((-step[20] + step[27]) * cospi_16_64);
+ output[21] = dct_32_round((-step[21] + step[26]) * cospi_16_64);
+ output[22] = dct_32_round((-step[22] + step[25]) * cospi_16_64);
+ output[23] = dct_32_round((-step[23] + step[24]) * cospi_16_64);
+
+ output[24] = dct_32_round((step[24] + step[23]) * cospi_16_64);
+ output[25] = dct_32_round((step[25] + step[22]) * cospi_16_64);
+ output[26] = dct_32_round((step[26] + step[21]) * cospi_16_64);
+ output[27] = dct_32_round((step[27] + step[20]) * cospi_16_64);
+
+ output[28] = step[28];
+ output[29] = step[29];
+ output[30] = step[30];
+ output[31] = step[31];
-void vp9_short_fdct8x8_c_f(short *block, short *coefs, int pitch, int scale) {
- int j1, i, j, k;
- float b[8];
- float b1[8];
- float d[8][8];
- float f0 = (float) .7071068;
- float f1 = (float) .4903926;
- float f2 = (float) .4619398;
- float f3 = (float) .4157348;
- float f4 = (float) .3535534;
- float f5 = (float) .2777851;
- float f6 = (float) .1913417;
- float f7 = (float) .0975452;
- pitch = pitch / 2;
- for (i = 0, k = 0; i < 8; i++, k += pitch) {
- for (j = 0; j < 8; j++) {
- b[j] = (float)(block[k + j] << (3 - scale));
- }
- /* Horizontal transform */
- for (j = 0; j < 4; j++) {
- j1 = 7 - j;
- b1[j] = b[j] + b[j1];
- b1[j1] = b[j] - b[j1];
- }
- b[0] = b1[0] + b1[3];
- b[1] = b1[1] + b1[2];
- b[2] = b1[1] - b1[2];
- b[3] = b1[0] - b1[3];
- b[4] = b1[4];
- b[5] = (b1[6] - b1[5]) * f0;
- b[6] = (b1[6] + b1[5]) * f0;
- b[7] = b1[7];
- d[i][0] = (b[0] + b[1]) * f4;
- d[i][4] = (b[0] - b[1]) * f4;
- d[i][2] = b[2] * f6 + b[3] * f2;
- d[i][6] = b[3] * f6 - b[2] * f2;
- b1[4] = b[4] + b[5];
- b1[7] = b[7] + b[6];
- b1[5] = b[4] - b[5];
- b1[6] = b[7] - b[6];
- d[i][1] = b1[4] * f7 + b1[7] * f1;
- d[i][5] = b1[5] * f3 + b1[6] * f5;
- d[i][7] = b1[7] * f7 - b1[4] * f1;
- d[i][3] = b1[6] * f3 - b1[5] * f5;
- }
- /* Vertical transform */
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 4; j++) {
- j1 = 7 - j;
- b1[j] = d[j][i] + d[j1][i];
- b1[j1] = d[j][i] - d[j1][i];
- }
- b[0] = b1[0] + b1[3];
- b[1] = b1[1] + b1[2];
- b[2] = b1[1] - b1[2];
- b[3] = b1[0] - b1[3];
- b[4] = b1[4];
- b[5] = (b1[6] - b1[5]) * f0;
- b[6] = (b1[6] + b1[5]) * f0;
- b[7] = b1[7];
- d[0][i] = (b[0] + b[1]) * f4;
- d[4][i] = (b[0] - b[1]) * f4;
- d[2][i] = b[2] * f6 + b[3] * f2;
- d[6][i] = b[3] * f6 - b[2] * f2;
- b1[4] = b[4] + b[5];
- b1[7] = b[7] + b[6];
- b1[5] = b[4] - b[5];
- b1[6] = b[7] - b[6];
- d[1][i] = b1[4] * f7 + b1[7] * f1;
- d[5][i] = b1[5] * f3 + b1[6] * f5;
- d[7][i] = b1[7] * f7 - b1[4] * f1;
- d[3][i] = b1[6] * f3 - b1[5] * f5;
- }
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- *(coefs + j + i * 8) = (short) floor(d[i][j] + 0.5);
- }
- }
- return;
-}
+ // Stage 3
+ step[0] = output[0] + output[(8 - 1)];
+ step[1] = output[1] + output[(8 - 2)];
+ step[2] = output[2] + output[(8 - 3)];
+ step[3] = output[3] + output[(8 - 4)];
+ step[4] = -output[4] + output[(8 - 5)];
+ step[5] = -output[5] + output[(8 - 6)];
+ step[6] = -output[6] + output[(8 - 7)];
+ step[7] = -output[7] + output[(8 - 8)];
+ step[8] = output[8];
+ step[9] = output[9];
+ step[10] = dct_32_round((-output[10] + output[13]) * cospi_16_64);
+ step[11] = dct_32_round((-output[11] + output[12]) * cospi_16_64);
+ step[12] = dct_32_round((output[12] + output[11]) * cospi_16_64);
+ step[13] = dct_32_round((output[13] + output[10]) * cospi_16_64);
+ step[14] = output[14];
+ step[15] = output[15];
+
+ step[16] = output[16] + output[23];
+ step[17] = output[17] + output[22];
+ step[18] = output[18] + output[21];
+ step[19] = output[19] + output[20];
+ step[20] = -output[20] + output[19];
+ step[21] = -output[21] + output[18];
+ step[22] = -output[22] + output[17];
+ step[23] = -output[23] + output[16];
+ step[24] = -output[24] + output[31];
+ step[25] = -output[25] + output[30];
+ step[26] = -output[26] + output[29];
+ step[27] = -output[27] + output[28];
+ step[28] = output[28] + output[27];
+ step[29] = output[29] + output[26];
+ step[30] = output[30] + output[25];
+ step[31] = output[31] + output[24];
-#define divide_bits(d, n) ((n) < 0 ? (d) << (n) : (d) >> (n))
+ // Stage 4
+ output[0] = step[0] + step[3];
+ output[1] = step[1] + step[2];
+ output[2] = -step[2] + step[1];
+ output[3] = -step[3] + step[0];
+ output[4] = step[4];
+ output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64);
+ output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64);
+ output[7] = step[7];
+ output[8] = step[8] + step[11];
+ output[9] = step[9] + step[10];
+ output[10] = -step[10] + step[9];
+ output[11] = -step[11] + step[8];
+ output[12] = -step[12] + step[15];
+ output[13] = -step[13] + step[14];
+ output[14] = step[14] + step[13];
+ output[15] = step[15] + step[12];
+
+ output[16] = step[16];
+ output[17] = step[17];
+ output[18] = dct_32_round(step[18] * -cospi_8_64 + step[29] * cospi_24_64);
+ output[19] = dct_32_round(step[19] * -cospi_8_64 + step[28] * cospi_24_64);
+ output[20] = dct_32_round(step[20] * -cospi_24_64 + step[27] * -cospi_8_64);
+ output[21] = dct_32_round(step[21] * -cospi_24_64 + step[26] * -cospi_8_64);
+ output[22] = step[22];
+ output[23] = step[23];
+ output[24] = step[24];
+ output[25] = step[25];
+ output[26] = dct_32_round(step[26] * cospi_24_64 + step[21] * -cospi_8_64);
+ output[27] = dct_32_round(step[27] * cospi_24_64 + step[20] * -cospi_8_64);
+ output[28] = dct_32_round(step[28] * cospi_8_64 + step[19] * cospi_24_64);
+ output[29] = dct_32_round(step[29] * cospi_8_64 + step[18] * cospi_24_64);
+ output[30] = step[30];
+ output[31] = step[31];
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
+ // Stage 5
+ step[0] = dct_32_round((output[0] + output[1]) * cospi_16_64);
+ step[1] = dct_32_round((-output[1] + output[0]) * cospi_16_64);
+ step[2] = dct_32_round(output[2] * cospi_24_64 + output[3] * cospi_8_64);
+ step[3] = dct_32_round(output[3] * cospi_24_64 - output[2] * cospi_8_64);
+ step[4] = output[4] + output[5];
+ step[5] = -output[5] + output[4];
+ step[6] = -output[6] + output[7];
+ step[7] = output[7] + output[6];
+ step[8] = output[8];
+ step[9] = dct_32_round(output[9] * -cospi_8_64 + output[14] * cospi_24_64);
+ step[10] = dct_32_round(output[10] * -cospi_24_64 + output[13] * -cospi_8_64);
+ step[11] = output[11];
+ step[12] = output[12];
+ step[13] = dct_32_round(output[13] * cospi_24_64 + output[10] * -cospi_8_64);
+ step[14] = dct_32_round(output[14] * cospi_8_64 + output[9] * cospi_24_64);
+ step[15] = output[15];
+
+ step[16] = output[16] + output[19];
+ step[17] = output[17] + output[18];
+ step[18] = -output[18] + output[17];
+ step[19] = -output[19] + output[16];
+ step[20] = -output[20] + output[23];
+ step[21] = -output[21] + output[22];
+ step[22] = output[22] + output[21];
+ step[23] = output[23] + output[20];
+ step[24] = output[24] + output[27];
+ step[25] = output[25] + output[26];
+ step[26] = -output[26] + output[25];
+ step[27] = -output[27] + output[24];
+ step[28] = -output[28] + output[31];
+ step[29] = -output[29] + output[30];
+ step[30] = output[30] + output[29];
+ step[31] = output[31] + output[28];
-void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
- // assume out is a 32x32 buffer
- short buffer[16 * 16];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16);
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
-}
+ // Stage 6
+ output[0] = step[0];
+ output[1] = step[1];
+ output[2] = step[2];
+ output[3] = step[3];
+ output[4] = dct_32_round(step[4] * cospi_28_64 + step[7] * cospi_4_64);
+ output[5] = dct_32_round(step[5] * cospi_12_64 + step[6] * cospi_20_64);
+ output[6] = dct_32_round(step[6] * cospi_12_64 + step[5] * -cospi_20_64);
+ output[7] = dct_32_round(step[7] * cospi_28_64 + step[4] * -cospi_4_64);
+ output[8] = step[8] + step[9];
+ output[9] = -step[9] + step[8];
+ output[10] = -step[10] + step[11];
+ output[11] = step[11] + step[10];
+ output[12] = step[12] + step[13];
+ output[13] = -step[13] + step[12];
+ output[14] = -step[14] + step[15];
+ output[15] = step[15] + step[14];
+
+ output[16] = step[16];
+ output[17] = dct_32_round(step[17] * -cospi_4_64 + step[30] * cospi_28_64);
+ output[18] = dct_32_round(step[18] * -cospi_28_64 + step[29] * -cospi_4_64);
+ output[19] = step[19];
+ output[20] = step[20];
+ output[21] = dct_32_round(step[21] * -cospi_20_64 + step[26] * cospi_12_64);
+ output[22] = dct_32_round(step[22] * -cospi_12_64 + step[25] * -cospi_20_64);
+ output[23] = step[23];
+ output[24] = step[24];
+ output[25] = dct_32_round(step[25] * cospi_12_64 + step[22] * -cospi_20_64);
+ output[26] = dct_32_round(step[26] * cospi_20_64 + step[21] * cospi_12_64);
+ output[27] = step[27];
+ output[28] = step[28];
+ output[29] = dct_32_round(step[29] * cospi_28_64 + step[18] * -cospi_4_64);
+ output[30] = dct_32_round(step[30] * cospi_4_64 + step[17] * cospi_28_64);
+ output[31] = step[31];
-#elif DWTDCT_TYPE == DWTDCT16X16
+ // Stage 7
+ step[0] = output[0];
+ step[1] = output[1];
+ step[2] = output[2];
+ step[3] = output[3];
+ step[4] = output[4];
+ step[5] = output[5];
+ step[6] = output[6];
+ step[7] = output[7];
+ step[8] = dct_32_round(output[8] * cospi_30_64 + output[15] * cospi_2_64);
+ step[9] = dct_32_round(output[9] * cospi_14_64 + output[14] * cospi_18_64);
+ step[10] = dct_32_round(output[10] * cospi_22_64 + output[13] * cospi_10_64);
+ step[11] = dct_32_round(output[11] * cospi_6_64 + output[12] * cospi_26_64);
+ step[12] = dct_32_round(output[12] * cospi_6_64 + output[11] * -cospi_26_64);
+ step[13] = dct_32_round(output[13] * cospi_22_64 + output[10] * -cospi_10_64);
+ step[14] = dct_32_round(output[14] * cospi_14_64 + output[9] * -cospi_18_64);
+ step[15] = dct_32_round(output[15] * cospi_30_64 + output[8] * -cospi_2_64);
+
+ step[16] = output[16] + output[17];
+ step[17] = -output[17] + output[16];
+ step[18] = -output[18] + output[19];
+ step[19] = output[19] + output[18];
+ step[20] = output[20] + output[21];
+ step[21] = -output[21] + output[20];
+ step[22] = -output[22] + output[23];
+ step[23] = output[23] + output[22];
+ step[24] = output[24] + output[25];
+ step[25] = -output[25] + output[24];
+ step[26] = -output[26] + output[27];
+ step[27] = output[27] + output[26];
+ step[28] = output[28] + output[29];
+ step[29] = -output[29] + output[28];
+ step[30] = -output[30] + output[31];
+ step[31] = output[31] + output[30];
-void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
- // assume out is a 32x32 buffer
- short buffer[16 * 16];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16);
- vp9_short_fdct16x16_c_f(out + 16, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32 + 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32 + 32 * 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16);
+ // Final stage --- outputs indices are bit-reversed.
+ output[0] = step[0];
+ output[16] = step[1];
+ output[8] = step[2];
+ output[24] = step[3];
+ output[4] = step[4];
+ output[20] = step[5];
+ output[12] = step[6];
+ output[28] = step[7];
+ output[2] = step[8];
+ output[18] = step[9];
+ output[10] = step[10];
+ output[26] = step[11];
+ output[6] = step[12];
+ output[22] = step[13];
+ output[14] = step[14];
+ output[30] = step[15];
+
+ output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64);
+ output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64);
+ output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64);
+ output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64);
+ output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64);
+ output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64);
+ output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64);
+ output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64);
+ output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64);
+ output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64);
+ output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64);
+ output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64);
+ output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64);
+ output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64);
+ output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);
+ output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
}
-#elif DWTDCT_TYPE == DWTDCT8X8
-
-void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
- // assume out is a 32x32 buffer
- short buffer[8 * 8];
+void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
+ int shortpitch = pitch >> 1;
int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(2, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(2, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(2, 32, 32, input, short_pitch, out, 32);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct8x8_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32, buffer + i * 8, sizeof(short) * 8);
-
- vp9_short_fdct8x8_c_f(out + 8, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32 + 8, buffer + i * 8, sizeof(short) * 8);
-
- vp9_short_fdct8x8_c_f(out + 32 * 8, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32 + 32 * 8, buffer + i * 8, sizeof(short) * 8);
-
- vp9_short_fdct8x8_c_f(out + 33 * 8, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32 + 33 * 8, buffer + i * 8, sizeof(short) * 8);
-
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
+ int output[32 * 32];
+
+ // Columns
+ for (i = 0; i < 32; i++) {
+ int temp_in[32], temp_out[32];
+ for (j = 0; j < 32; j++)
+ temp_in[j] = input[j * shortpitch + i] << 2;
+ dct32_1d(temp_in, temp_out);
+ for (j = 0; j < 32; j++)
+ output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
-}
-#endif
-
-#if CONFIG_TX64X64
-void vp9_short_fdct64x64_c(short *input, short *out, int pitch) {
- // assume out is a 64x64 buffer
- short buffer[16 * 16];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(2, 64, 64, input, short_pitch, out, 64);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(2, 64, 64, input, short_pitch, out, 64);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(2, 64, 64, input, short_pitch, out, 64);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64, buffer + i * 16, sizeof(short) * 16);
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 48; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 16; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
-#elif DWTDCT_TYPE == DWTDCT16X16
- vp9_short_fdct16x16_c_f(out + 16, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64 + 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 64 * 16, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64 + 64 * 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 65 * 16, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64 + 65 * 16, buffer + i * 16, sizeof(short) * 16);
-
- // There is no dct used on the highest bands for now.
- // Need to scale these coeffs by a factor of 2/2^DWT_PRECISION_BITS
- // TODO(debargha): experiment with turning these coeffs to 0
+ // Rows
for (i = 0; i < 32; ++i) {
- for (j = 32; j < 64; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 32; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
+ int temp_in[32], temp_out[32];
+ for (j = 0; j < 32; ++j)
+ temp_in[j] = output[j + i * 32];
+ dct32_1d(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
-#endif // DWTDCT_TYPE
}
-#endif // CONFIG_TX64X64
-#endif // CONFIG_DWTDCTHYBRID
+
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 3f5133062..5271a597c 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -21,7 +21,6 @@
#include "vp9/common/vp9_quant_common.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/common/vp9_setupintrarecon.h"
-#include "vp9/common/vp9_reconintra4x4.h"
#include "vp9/encoder/vp9_encodeintra.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_invtrans.h"
@@ -29,8 +28,9 @@
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_tokenize.h"
-#include "vp9_rtcd.h"
+#include "./vp9_rtcd.h"
#include <stdio.h>
#include <math.h>
#include <limits.h>
@@ -45,18 +45,15 @@
int enc_debug = 0;
#endif
-extern void select_interp_filter_type(VP9_COMP *cpi);
+void vp9_select_interp_filter_type(VP9_COMP *cpi);
static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col);
static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col);
static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col);
static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
@@ -103,7 +100,7 @@ static unsigned int tt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x) {
*/
act = vp9_variance16x16(x->src.y_buffer, x->src.y_stride, VP9_VAR_OFFS, 0,
&sse);
- act = act << 4;
+ act <<= 4;
/* If the region is flat, lower the activity some more. */
if (act < 8 << 12)
@@ -488,8 +485,7 @@ static void update_state(VP9_COMP *cpi,
{
int segment_id = mbmi->segment_id;
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB)) {
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
for (i = 0; i < NB_TXFM_MODES; i++) {
cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i];
}
@@ -625,27 +621,19 @@ static unsigned find_seg_id(uint8_t *buf, int block_size,
}
static void set_offsets(VP9_COMP *cpi,
- int mb_row, int mb_col, int block_size,
- int *ref_yoffset, int *ref_uvoffset) {
+ int mb_row, int mb_col, int block_size) {
MACROBLOCK *const x = &cpi->mb;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi;
const int dst_fb_idx = cm->new_fb_idx;
- const int recon_y_stride = cm->yv12_fb[dst_fb_idx].y_stride;
- const int recon_uv_stride = cm->yv12_fb[dst_fb_idx].uv_stride;
- const int recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col;
- const int recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col;
- const int src_y_stride = x->src.y_stride;
- const int src_uv_stride = x->src.uv_stride;
- const int src_yoffset = 16 * mb_row * src_y_stride + 16 * mb_col;
- const int src_uvoffset = 8 * mb_row * src_uv_stride + 8 * mb_col;
- const int ref_fb_idx = cm->lst_fb_idx;
- const int ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
- const int ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
const int idx_map = mb_row * cm->mb_cols + mb_col;
const int idx_str = xd->mode_info_stride * mb_row + mb_col;
+#ifdef ENC_DEBUG
+ enc_debug = (cpi->common.current_video_frame == 2 &&
+ mb_row == 4 && mb_col == 5);
+#endif
// entropy context structures
xd->above_context = cm->above_context + mb_col;
xd->left_context = cm->left_context + (mb_row & 3);
@@ -664,9 +652,9 @@ static void set_offsets(VP9_COMP *cpi,
xd->prev_mode_info_context = cm->prev_mi + idx_str;
// Set up destination pointers
- xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
- xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
- xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->dst,
+ &cm->yv12_fb[dst_fb_idx],
+ mb_row, mb_col, NULL, NULL);
/* Set up limit values for MV components to prevent them from
* extending beyond the UMV borders assuming 16x16 block size */
@@ -686,17 +674,12 @@ static void set_offsets(VP9_COMP *cpi,
xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3;
// Are edges available for intra prediction?
- xd->up_available = (mb_row != 0);
- xd->left_available = (mb_col != 0);
-
- /* Reference buffer offsets */
- *ref_yoffset = (mb_row * ref_y_stride * 16) + (mb_col * 16);
- *ref_uvoffset = (mb_row * ref_uv_stride * 8) + (mb_col * 8);
+ xd->up_available = (mb_row != 0);
+ xd->left_available = (mb_col > cm->cur_tile_mb_col_start);
+ xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end);
/* set up source buffers */
- x->src.y_buffer = cpi->Source->y_buffer + src_yoffset;
- x->src.u_buffer = cpi->Source->u_buffer + src_uvoffset;
- x->src.v_buffer = cpi->Source->v_buffer + src_uvoffset;
+ setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL);
/* R/D setup */
x->rddiv = cpi->RDDIV;
@@ -727,34 +710,36 @@ static void set_offsets(VP9_COMP *cpi,
const int x = mb_col & ~3;
const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
+ const int tile_progress = cm->cur_tile_mb_col_start * cm->mb_rows;
+ const int mb_cols = cm->cur_tile_mb_col_end - cm->cur_tile_mb_col_start;
cpi->seg0_progress =
- ((y * cm->mb_cols + x * 4 + p32 + p16) << 16) / cm->MBs;
+ ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) << 16) / cm->MBs;
}
} else {
mbmi->segment_id = 0;
}
}
-static void pick_mb_modes(VP9_COMP *cpi,
- int mb_row,
- int mb_col,
- TOKENEXTRA **tp,
- int *totalrate,
- int *totaldist) {
+static int pick_mb_modes(VP9_COMP *cpi,
+ int mb_row0,
+ int mb_col0,
+ TOKENEXTRA **tp,
+ int *totalrate,
+ int *totaldist) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
int i;
- int recon_yoffset, recon_uvoffset;
+ int splitmodes_used = 0;
ENTROPY_CONTEXT_PLANES left_context[2];
ENTROPY_CONTEXT_PLANES above_context[2];
ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context
- + mb_col;
+ + mb_col0;
/* Function should not modify L & A contexts; save and restore on exit */
vpx_memcpy(left_context,
- cm->left_context + (mb_row & 2),
+ cm->left_context + (mb_row0 & 2),
sizeof(left_context));
vpx_memcpy(above_context,
initial_above_context_ptr,
@@ -763,17 +748,18 @@ static void pick_mb_modes(VP9_COMP *cpi,
/* Encode MBs in raster order within the SB */
for (i = 0; i < 4; i++) {
const int x_idx = i & 1, y_idx = i >> 1;
+ const int mb_row = mb_row0 + y_idx;
+ const int mb_col = mb_col0 + x_idx;
MB_MODE_INFO *mbmi;
- if ((mb_row + y_idx >= cm->mb_rows) || (mb_col + x_idx >= cm->mb_cols)) {
+ if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) {
// MB lies outside frame, move on
continue;
}
// Index of the MB in the SB 0..3
xd->mb_index = i;
- set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16,
- &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 16);
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x);
@@ -781,10 +767,6 @@ static void pick_mb_modes(VP9_COMP *cpi,
mbmi = &xd->mode_info_context->mbmi;
mbmi->sb_type = BLOCK_SIZE_MB16X16;
- cpi->update_context = 0; // TODO Do we need this now??
-
- vp9_intra_prediction_down_copy(xd);
-
// Find best coding mode & reconstruct the MB so it is available
// as a predictor for MBs that follow in the SB
if (cm->frame_type == KEY_FRAME) {
@@ -798,8 +780,8 @@ static void pick_mb_modes(VP9_COMP *cpi,
*totaldist += d;
// Dummy encode, do not do the tokenization
- encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, 0,
- mb_row + y_idx, mb_col + x_idx);
+ encode_macroblock(cpi, tp, 0, mb_row, mb_col);
+
// Note the encoder may have changed the segment_id
// Save the coding context
@@ -812,14 +794,14 @@ static void pick_mb_modes(VP9_COMP *cpi,
if (enc_debug)
printf("inter pick_mb_modes %d %d\n", mb_row, mb_col);
#endif
- vp9_pick_mode_inter_macroblock(cpi, x, recon_yoffset,
- recon_uvoffset, &r, &d);
+ vp9_pick_mode_inter_macroblock(cpi, x, mb_row, mb_col, &r, &d);
*totalrate += r;
*totaldist += d;
+ splitmodes_used += (mbmi->mode == SPLITMV);
+
// Dummy encode, do not do the tokenization
- encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, 0,
- mb_row + y_idx, mb_col + x_idx);
+ encode_macroblock(cpi, tp, 0, mb_row, mb_col);
seg_id = mbmi->segment_id;
if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) {
@@ -842,12 +824,14 @@ static void pick_mb_modes(VP9_COMP *cpi,
}
/* Restore L & A coding context to those in place on entry */
- vpx_memcpy(cm->left_context + (mb_row & 2),
+ vpx_memcpy(cm->left_context + (mb_row0 & 2),
left_context,
sizeof(left_context));
vpx_memcpy(initial_above_context_ptr,
above_context,
sizeof(above_context));
+
+ return splitmodes_used;
}
static void pick_sb_modes(VP9_COMP *cpi,
@@ -859,13 +843,11 @@ static void pick_sb_modes(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- int recon_yoffset, recon_uvoffset;
- set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 32);
xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB32X32;
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x);
- cpi->update_context = 0; // TODO Do we need this now??
/* Find best coding mode & reconstruct the MB so it is available
* as a predictor for MBs that follow in the SB */
@@ -878,11 +860,7 @@ static void pick_sb_modes(VP9_COMP *cpi,
vpx_memcpy(&x->sb32_context[xd->sb_index].mic, xd->mode_info_context,
sizeof(MODE_INFO));
} else {
- vp9_rd_pick_inter_mode_sb32(cpi, x,
- recon_yoffset,
- recon_uvoffset,
- totalrate,
- totaldist);
+ vp9_rd_pick_inter_mode_sb32(cpi, x, mb_row, mb_col, totalrate, totaldist);
}
}
@@ -895,30 +873,21 @@ static void pick_sb64_modes(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- int recon_yoffset, recon_uvoffset;
- set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 64);
xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB64X64;
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x);
- cpi->update_context = 0; // TODO(rbultje) Do we need this now??
/* Find best coding mode & reconstruct the MB so it is available
* as a predictor for MBs that follow in the SB */
if (cm->frame_type == KEY_FRAME) {
- vp9_rd_pick_intra_mode_sb64(cpi, x,
- totalrate,
- totaldist);
+ vp9_rd_pick_intra_mode_sb64(cpi, x, totalrate, totaldist);
/* Save the coding context */
- vpx_memcpy(&x->sb64_context.mic, xd->mode_info_context,
- sizeof(MODE_INFO));
+ vpx_memcpy(&x->sb64_context.mic, xd->mode_info_context, sizeof(MODE_INFO));
} else {
- vp9_rd_pick_inter_mode_sb64(cpi, x,
- recon_yoffset,
- recon_uvoffset,
- totalrate,
- totaldist);
+ vp9_rd_pick_inter_mode_sb64(cpi, x, mb_row, mb_col, totalrate, totaldist);
}
}
@@ -986,14 +955,13 @@ static void encode_sb(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- int recon_yoffset, recon_uvoffset;
cpi->sb32_count[is_sb]++;
if (is_sb) {
- set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 32);
update_state(cpi, &x->sb32_context[xd->sb_index], 32, output_enabled);
- encode_superblock32(cpi, tp, recon_yoffset, recon_uvoffset,
+ encode_superblock32(cpi, tp,
output_enabled, mb_row, mb_col);
if (output_enabled)
update_stats(cpi);
@@ -1015,17 +983,14 @@ static void encode_sb(VP9_COMP *cpi,
continue;
}
- set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16,
- &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16);
xd->mb_index = i;
update_state(cpi, &x->mb_context[xd->sb_index][i], 16, output_enabled);
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x);
- vp9_intra_prediction_down_copy(xd);
-
- encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset,
+ encode_macroblock(cpi, tp,
output_enabled, mb_row + y_idx, mb_col + x_idx);
if (output_enabled)
update_stats(cpi);
@@ -1060,11 +1025,9 @@ static void encode_sb64(VP9_COMP *cpi,
cpi->sb64_count[is_sb[0] == 2]++;
if (is_sb[0] == 2) {
- int recon_yoffset, recon_uvoffset;
-
- set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 64);
update_state(cpi, &x->sb64_context, 64, 1);
- encode_superblock64(cpi, tp, recon_yoffset, recon_uvoffset,
+ encode_superblock64(cpi, tp,
1, mb_row, mb_col);
update_stats(cpi);
@@ -1098,17 +1061,18 @@ static void encode_sb_row(VP9_COMP *cpi,
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
int mb_col;
- int mb_cols = cm->mb_cols;
// Initialize the left context for the new SB row
vpx_memset(cm->left_context, 0, sizeof(cm->left_context));
// Code each SB in the row
- for (mb_col = 0; mb_col < mb_cols; mb_col += 4) {
+ for (mb_col = cm->cur_tile_mb_col_start;
+ mb_col < cm->cur_tile_mb_col_end; mb_col += 4) {
int i;
int sb32_rate = 0, sb32_dist = 0;
int is_sb[4];
int sb64_rate = INT_MAX, sb64_dist;
+ int sb64_skip = 0;
ENTROPY_CONTEXT_PLANES l[4], a[4];
TOKENEXTRA *tp_orig = *tp;
@@ -1118,18 +1082,27 @@ static void encode_sb_row(VP9_COMP *cpi,
const int x_idx = (i & 1) << 1, y_idx = i & 2;
int mb_rate = 0, mb_dist = 0;
int sb_rate = INT_MAX, sb_dist;
+ int splitmodes_used = 0;
+ int sb32_skip = 0;
if (mb_row + y_idx >= cm->mb_rows || mb_col + x_idx >= cm->mb_cols)
continue;
xd->sb_index = i;
- pick_mb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
- tp, &mb_rate, &mb_dist);
+ splitmodes_used = pick_mb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
+ tp, &mb_rate, &mb_dist);
+
mb_rate += vp9_cost_bit(cm->sb32_coded, 0);
- if (!((( mb_cols & 1) && mb_col + x_idx == mb_cols - 1) ||
- ((cm->mb_rows & 1) && mb_row + y_idx == cm->mb_rows - 1))) {
+ if (cpi->sf.splitmode_breakout) {
+ sb32_skip = splitmodes_used;
+ sb64_skip += splitmodes_used;
+ }
+
+ if ( !sb32_skip &&
+ !(((cm->mb_cols & 1) && mb_col + x_idx == cm->mb_cols - 1) ||
+ ((cm->mb_rows & 1) && mb_row + y_idx == cm->mb_rows - 1))) {
/* Pick a mode assuming that it applies to all 4 of the MBs in the SB */
pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
tp, &sb_rate, &sb_dist);
@@ -1147,6 +1120,11 @@ static void encode_sb_row(VP9_COMP *cpi,
is_sb[i] = 0;
sb32_rate += mb_rate;
sb32_dist += mb_dist;
+
+ // If we used 16x16 instead of 32x32 then skip 64x64 (if enabled).
+ if (cpi->sf.mb16_breakout) {
+ ++sb64_skip;
+ }
}
/* Encode SB using best computed mode(s) */
@@ -1162,7 +1140,8 @@ static void encode_sb_row(VP9_COMP *cpi,
memcpy(cm->left_context, &l, sizeof(l));
sb32_rate += vp9_cost_bit(cm->sb64_coded, 0);
- if (!((( mb_cols & 3) && mb_col + 3 >= mb_cols) ||
+ if (!sb64_skip &&
+ !(((cm->mb_cols & 3) && mb_col + 3 >= cm->mb_cols) ||
((cm->mb_rows & 3) && mb_row + 3 >= cm->mb_rows))) {
pick_sb64_modes(cpi, mb_row, mb_col, tp, &sb64_rate, &sb64_dist);
sb64_rate += vp9_cost_bit(cm->sb64_coded, 1);
@@ -1205,7 +1184,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
// Copy data over into macro block data structures.
x->src = *cpi->Source;
- xd->pre = cm->yv12_fb[cm->lst_fb_idx];
+ xd->pre = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]];
xd->dst = cm->yv12_fb[cm->new_fb_idx];
// set up frame for intra coded blocks
@@ -1239,18 +1218,33 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
vpx_memset(cm->above_context, 0,
sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
- xd->fullpixel_mask = 0xffffffff;
- if (cm->full_pixel)
- xd->fullpixel_mask = 0xfffffff8;
+ xd->fullpixel_mask = cm->full_pixel ? 0xfffffff8 : 0xffffffff;
}
+static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
+ if (lossless) {
+ cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4_x8;
+ cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4_x8;
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_inv_walsh4x4_x8;
+ cpi->mb.optimize = 0;
+ cpi->common.filter_level = 0;
+ cpi->zbin_mode_boost_enabled = FALSE;
+ cpi->common.txfm_mode = ONLY_4X4;
+ } else {
+ cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
+ cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4llm_1;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4llm;
+ }
+}
+
+
static void encode_frame_internal(VP9_COMP *cpi) {
int mb_row;
MACROBLOCK *const x = &cpi->mb;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
-
- TOKENEXTRA *tp = cpi->tok;
int totalrate;
// printf("encode_frame_internal frame %d (%d)\n",
@@ -1273,9 +1267,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
totalrate = 0;
- // Functions setup for all frame types so we can use MC in AltRef
- vp9_setup_interp_filters(xd, cm->mcomp_filter_type, cm);
-
// Reset frame count of inter 0,0 motion vector usage.
cpi->inter_zz_count = 0;
@@ -1292,16 +1283,21 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_zero(cpi->NMVcount);
vp9_zero(cpi->coef_counts_4x4);
- vp9_zero(cpi->hybrid_coef_counts_4x4);
vp9_zero(cpi->coef_counts_8x8);
- vp9_zero(cpi->hybrid_coef_counts_8x8);
vp9_zero(cpi->coef_counts_16x16);
- vp9_zero(cpi->hybrid_coef_counts_16x16);
vp9_zero(cpi->coef_counts_32x32);
#if CONFIG_NEW_MVREF
vp9_zero(cpi->mb_mv_ref_count);
#endif
+
+ // force lossless mode when Q0 is selected
+ cpi->mb.e_mbd.lossless = (cm->base_qindex == 0 &&
+ cm->y1dc_delta_q == 0 &&
+ cm->uvdc_delta_q == 0 &&
+ cm->uvac_delta_q == 0);
+ switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
+
vp9_frame_init_quantizer(cpi);
vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
@@ -1330,12 +1326,20 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vpx_usec_timer_start(&emr_timer);
{
- // For each row of SBs in the frame
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) {
- encode_sb_row(cpi, mb_row, &tp, &totalrate);
- }
+ // Take tiles into account and give start/end MB
+ int tile_col;
+ TOKENEXTRA *tp = cpi->tok;
+
+ for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
+ TOKENEXTRA *tp_old = tp;
- cpi->tok_count = (unsigned int)(tp - cpi->tok);
+ // For each row of SBs in the frame
+ vp9_get_tile_col_offsets(cm, tile_col);
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) {
+ encode_sb_row(cpi, mb_row, &tp, &totalrate);
+ }
+ cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old);
+ }
}
vpx_usec_timer_mark(&emr_timer);
@@ -1388,8 +1392,7 @@ static void reset_skip_txfm_size_mb(VP9_COMP *cpi,
const int segment_id = mbmi->segment_id;
xd->mode_info_context = mi;
- assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) ||
+ assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) ||
(cm->mb_no_coeff_skip && mbmi->mb_skip_coeff));
mbmi->txfm_size = txfm_max;
}
@@ -1413,9 +1416,8 @@ static void set_txfm_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs,
int x, y;
for (y = 0; y < ymbs; y++) {
- for (x = 0; x < xmbs; x++) {
+ for (x = 0; x < xmbs; x++)
mi[y * mis + x].mbmi.txfm_size = txfm_size;
- }
}
}
@@ -1433,8 +1435,7 @@ static void reset_skip_txfm_size_sb32(VP9_COMP *cpi, MODE_INFO *mi,
const int xmbs = MIN(2, mb_cols_left);
xd->mode_info_context = mi;
- assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) ||
+ assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) ||
(cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs)));
set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max);
}
@@ -1454,8 +1455,7 @@ static void reset_skip_txfm_size_sb64(VP9_COMP *cpi, MODE_INFO *mi,
const int xmbs = MIN(4, mb_cols_left);
xd->mode_info_context = mi;
- assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) ||
+ assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) ||
(cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs)));
set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max);
}
@@ -1526,9 +1526,9 @@ void vp9_encode_frame(VP9_COMP *cpi) {
*/
if (cpi->common.frame_type == KEY_FRAME)
frame_type = 0;
- else if (cpi->is_src_frame_alt_ref && cpi->common.refresh_golden_frame)
+ else if (cpi->is_src_frame_alt_ref && cpi->refresh_golden_frame)
frame_type = 3;
- else if (cpi->common.refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
+ else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
frame_type = 1;
else
frame_type = 2;
@@ -1549,11 +1549,12 @@ void vp9_encode_frame(VP9_COMP *cpi) {
pred_type = HYBRID_PREDICTION;
/* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */
-#if CONFIG_LOSSLESS
+
+ cpi->mb.e_mbd.lossless = 0;
if (cpi->oxcf.lossless) {
txfm_type = ONLY_4X4;
+ cpi->mb.e_mbd.lossless = 1;
} else
-#endif
/* FIXME (rbultje)
* this is a hack (no really), basically to work around the complete
* nonsense coefficient cost prediction for keyframes. The probabilities
@@ -1671,7 +1672,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
// Update interpolation filter strategy for next frame.
if ((cpi->common.frame_type != KEY_FRAME) && (cpi->sf.search_best_filter))
- select_interp_filter_type(cpi);
+ vp9_select_interp_filter_type(cpi);
} else {
encode_frame_internal(cpi);
}
@@ -1683,30 +1684,23 @@ void vp9_setup_block_ptrs(MACROBLOCK *x) {
int i;
for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
+ for (c = 0; c < 4; c++)
x->block[r * 4 + c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
- }
}
for (r = 0; r < 2; r++) {
- for (c = 0; c < 2; c++) {
+ for (c = 0; c < 2; c++)
x->block[16 + r * 2 + c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
- }
}
for (r = 0; r < 2; r++) {
- for (c = 0; c < 2; c++) {
+ for (c = 0; c < 2; c++)
x->block[20 + r * 2 + c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
- }
}
- x->block[24].src_diff = x->src_diff + 384;
-
-
- for (i = 0; i < 25; i++) {
+ for (i = 0; i < 24; i++)
x->block[i].coeff = x->coeff + i * 16;
- }
}
void vp9_build_block_offsets(MACROBLOCK *x) {
@@ -1995,7 +1989,6 @@ static void update_sb64_skip_coeff_state(VP9_COMP *cpi,
}
static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled,
int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
@@ -2007,8 +2000,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
assert(!xd->mode_info_context->mbmi.sb_type);
#ifdef ENC_DEBUG
- enc_debug = (cpi->common.current_video_frame == 46 &&
- mb_row == 5 && mb_col == 2);
+ enc_debug = (cpi->common.current_video_frame == 2 &&
+ mb_row == 5 && mb_col == 18);
if (enc_debug)
printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled);
#endif
@@ -2086,58 +2079,50 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
assert(cm->frame_type != KEY_FRAME);
if (mbmi->ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.lst_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (mbmi->ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.gld_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- ref_fb_idx = cpi->common.alt_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
- xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->pre,
+ &cpi->common.yv12_fb[ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
if (mbmi->second_ref_frame > 0) {
int second_ref_fb_idx;
if (mbmi->second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.lst_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (mbmi->second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.gld_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- second_ref_fb_idx = cpi->common.alt_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer +
- recon_yoffset;
- xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer +
- recon_uvoffset;
- xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer +
- recon_uvoffset;
+ setup_pred_block(&xd->second_pre,
+ &cpi->common.yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
}
if (!x->skip) {
- vp9_encode_inter16x16(x);
+ vp9_encode_inter16x16(x, mb_row, mb_col);
// Clear mb_skip_coeff if mb_no_coeff_skip is not set
if (!cpi->common.mb_no_coeff_skip)
mbmi->mb_skip_coeff = 0;
} else {
- vp9_build_1st_inter16x16_predictors_mb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
- if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- vp9_build_2nd_inter16x16_predictors_mb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
- }
+ vp9_build_inter16x16_predictors_mb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
- else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+ if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
@@ -2150,7 +2135,7 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
}
if (!x->skip) {
-#ifdef ENC_DEBUG
+#if 0 // def ENC_DEBUG
if (enc_debug) {
int i, j;
printf("\n");
@@ -2227,8 +2212,7 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
int segment_id = mbmi->segment_id;
if (cpi->common.txfm_mode == TX_MODE_SELECT &&
!((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) ||
- (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(&x->e_mbd, segment_id, SEG_LVL_EOB) == 0))) {
+ (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP)))) {
assert(mbmi->txfm_size <= TX_16X16);
if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
mbmi->mode != SPLITMV) {
@@ -2253,7 +2237,6 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
}
static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
@@ -2326,37 +2309,37 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t,
assert(cm->frame_type != KEY_FRAME);
if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.lst_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.gld_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- ref_fb_idx = cpi->common.alt_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
- xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->pre,
+ &cpi->common.yv12_fb[ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
int second_ref_fb_idx;
if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.lst_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.gld_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- second_ref_fb_idx = cpi->common.alt_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer +
- recon_yoffset;
- xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer +
- recon_uvoffset;
- xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer +
- recon_uvoffset;
+ setup_pred_block(&xd->second_pre,
+ &cpi->common.yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
}
vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
@@ -2465,8 +2448,7 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t,
if (output_enabled) {
if (cm->txfm_mode == TX_MODE_SELECT &&
!((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++;
} else {
TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ?
@@ -2485,7 +2467,6 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t,
}
static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
@@ -2557,40 +2538,37 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t,
assert(cm->frame_type != KEY_FRAME);
if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.lst_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.gld_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- ref_fb_idx = cpi->common.alt_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->pre.y_buffer =
- cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
- xd->pre.u_buffer =
- cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
- xd->pre.v_buffer =
- cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->pre,
+ &cpi->common.yv12_fb[ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
int second_ref_fb_idx;
if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.lst_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.gld_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- second_ref_fb_idx = cpi->common.alt_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->second_pre.y_buffer =
- cpi->common.yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
- xd->second_pre.u_buffer =
- cpi->common.yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
- xd->second_pre.v_buffer =
- cpi->common.yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->second_pre,
+ &cpi->common.yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
}
vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
@@ -2729,8 +2707,7 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t,
skip[4] && skip[5] && skip[6] && skip[7] &&
skip[8] && skip[9] && skip[10] && skip[11] &&
skip[12] && skip[13] && skip[14] && skip[15]))) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++;
} else {
int x, y;
diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h
index 1b056e163..9f13edcec 100644
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -14,8 +14,8 @@
struct macroblock;
-extern void vp9_build_block_offsets(struct macroblock *x);
+void vp9_build_block_offsets(struct macroblock *x);
-extern void vp9_setup_block_ptrs(struct macroblock *x);
+void vp9_setup_block_ptrs(struct macroblock *x);
#endif // VP9_ENCODER_VP9_ENCODEFRAME_H_
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index ce9a38003..be9c224b3 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -12,14 +12,11 @@
#include "vp9_rtcd.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/common/vp9_reconintra.h"
-#include "vp9/common/vp9_reconintra4x4.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/common/vp9_invtrans.h"
#include "vp9/encoder/vp9_encodeintra.h"
int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
- int i;
- int intra_pred_var = 0;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
(void) cpi;
@@ -30,15 +27,15 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
vp9_encode_intra16x16mby(x);
} else {
+ int i;
+
for (i = 0; i < 16; i++) {
x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED;
vp9_encode_intra4x4block(x, i);
}
}
- intra_pred_var = vp9_get_mb_ss(x->src_diff);
-
- return intra_pred_var;
+ return vp9_get_mb_ss(x->src_diff);
}
void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
@@ -50,17 +47,17 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
b->bmi.as_mode.context = vp9_find_bpred_context(b);
#endif
- vp9_intra4x4_predict(b, b->bmi.as_mode.first, b->predictor);
+ vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, b->predictor);
vp9_subtract_b(be, b, 16);
tx_type = get_tx_type_4x4(&x->e_mbd, b);
if (tx_type != DCT_DCT) {
- vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
- vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
+ vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
+ vp9_ht_quantize_b_4x4(x, ib, tx_type);
+ vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b) ;
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, ib);
vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
}
@@ -72,7 +69,6 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb) {
for (i = 0; i < 16; i++)
vp9_encode_intra4x4block(mb, i);
- return;
}
void vp9_encode_intra16x16mby(MACROBLOCK *x) {
@@ -84,24 +80,28 @@ void vp9_encode_intra16x16mby(MACROBLOCK *x) {
vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
- if (tx_size == TX_16X16) {
- vp9_transform_mby_16x16(x);
- vp9_quantize_mby_16x16(x);
- if (x->optimize)
- vp9_optimize_mby_16x16(x);
- vp9_inverse_transform_mby_16x16(xd);
- } else if (tx_size == TX_8X8) {
- vp9_transform_mby_8x8(x);
- vp9_quantize_mby_8x8(x);
- if (x->optimize)
- vp9_optimize_mby_8x8(x);
- vp9_inverse_transform_mby_8x8(xd);
- } else {
- vp9_transform_mby_4x4(x);
- vp9_quantize_mby_4x4(x);
- if (x->optimize)
- vp9_optimize_mby_4x4(x);
- vp9_inverse_transform_mby_4x4(xd);
+ switch (tx_size) {
+ case TX_16X16:
+ vp9_transform_mby_16x16(x);
+ vp9_quantize_mby_16x16(x);
+ if (x->optimize)
+ vp9_optimize_mby_16x16(x);
+ vp9_inverse_transform_mby_16x16(xd);
+ break;
+ case TX_8X8:
+ vp9_transform_mby_8x8(x);
+ vp9_quantize_mby_8x8(x);
+ if (x->optimize)
+ vp9_optimize_mby_8x8(x);
+ vp9_inverse_transform_mby_8x8(xd);
+ break;
+ default:
+ vp9_transform_mby_4x4(x);
+ vp9_quantize_mby_4x4(x);
+ if (x->optimize)
+ vp9_optimize_mby_4x4(x);
+ vp9_inverse_transform_mby_4x4(xd);
+ break;
}
vp9_recon_mby(xd);
@@ -116,19 +116,22 @@ void vp9_encode_intra16x16mbuv(MACROBLOCK *x) {
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
xd->predictor, x->src.uv_stride);
- if (tx_size == TX_4X4) {
- vp9_transform_mbuv_4x4(x);
- vp9_quantize_mbuv_4x4(x);
- if (x->optimize)
- vp9_optimize_mbuv_4x4(x);
- vp9_inverse_transform_mbuv_4x4(xd);
- } else /* 16x16 or 8x8 */ {
- vp9_transform_mbuv_8x8(x);
- vp9_quantize_mbuv_8x8(x);
- if (x->optimize)
- vp9_optimize_mbuv_8x8(x);
- vp9_inverse_transform_mbuv_8x8(xd);
- }
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_transform_mbuv_4x4(x);
+ vp9_quantize_mbuv_4x4(x);
+ if (x->optimize)
+ vp9_optimize_mbuv_4x4(x);
+ vp9_inverse_transform_mbuv_4x4(xd);
+ break;
+ default: // 16x16 or 8x8
+ vp9_transform_mbuv_8x8(x);
+ vp9_quantize_mbuv_8x8(x);
+ if (x->optimize)
+ vp9_optimize_mbuv_8x8(x);
+ vp9_inverse_transform_mbuv_8x8(xd);
+ break;
+ }
vp9_recon_intra_mbuv(xd);
}
@@ -141,7 +144,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
int i;
TX_TYPE tx_type;
- vp9_intra8x8_predict(b, b->bmi.as_mode.first, b->predictor);
+ vp9_intra8x8_predict(xd, b, b->bmi.as_mode.first, b->predictor);
// generate residual blocks
vp9_subtract_4b_c(be, b, 16);
@@ -150,14 +153,13 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
if (tx_type != DCT_DCT) {
- vp9_fht(be->src_diff, 32, (x->block + idx)->coeff,
- tx_type, 8);
- x->quantize_b_8x8(x->block + idx, xd->block + idx);
- vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
- tx_type, 8, xd->block[idx].eob);
+ vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
+ x->quantize_b_8x8(x, idx);
+ vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
+ 16, tx_type);
} else {
- x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
- x->quantize_b_8x8(x->block + idx, xd->block + idx);
+ x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
+ x->quantize_b_8x8(x, idx);
vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
}
} else {
@@ -166,12 +168,18 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
be = &x->block[ib + iblock[i]];
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
- vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
- vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
+ vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
+ vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
+ vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
+ } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
+ vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
+ vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32);
+ i++;
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, ib + iblock[i]);
vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
}
}
@@ -186,25 +194,22 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
}
void vp9_encode_intra8x8mby(MACROBLOCK *x) {
- int i, ib;
+ int i;
- for (i = 0; i < 4; i++) {
- ib = vp9_i8x8_block[i];
- vp9_encode_intra8x8(x, ib);
- }
+ for (i = 0; i < 4; i++)
+ vp9_encode_intra8x8(x, vp9_i8x8_block[i]);
}
-static void encode_intra_uv4x4(MACROBLOCK *x, int ib,
- int mode) {
+static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) {
BLOCKD *b = &x->e_mbd.block[ib];
BLOCK *be = &x->block[ib];
- vp9_intra_uv4x4_predict(b, mode, b->predictor);
+ vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor);
vp9_subtract_b(be, b, 8);
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16);
- x->quantize_b_4x4(be, b);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 16);
+ x->quantize_b_4x4(x, ib);
vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);
vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
@@ -212,17 +217,13 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib,
}
void vp9_encode_intra8x8mbuv(MACROBLOCK *x) {
- int i, ib, mode;
- BLOCKD *b;
+ int i;
for (i = 0; i < 4; i++) {
- ib = vp9_i8x8_block[i];
- b = &x->e_mbd.block[ib];
- mode = b->bmi.as_mode.first;
-
- /*u */
- encode_intra_uv4x4(x, i + 16, mode);
- /*v */
- encode_intra_uv4x4(x, i + 20, mode);
+ BLOCKD *b = &x->e_mbd.block[vp9_i8x8_block[i]];
+ int mode = b->bmi.as_mode.first;
+
+ encode_intra_uv4x4(x, i + 16, mode); // u
+ encode_intra_uv4x4(x, i + 20, mode); // v
}
}
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 45278a71b..62f1a2a30 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -29,9 +29,8 @@ void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
int r, c;
for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
+ for (c = 0; c < 4; c++)
diff_ptr[c] = src_ptr[c] - pred_ptr[c];
- }
diff_ptr += pitch;
pred_ptr += pitch;
@@ -47,9 +46,9 @@ void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
int r, c;
for (r = 0; r < 8; r++) {
- for (c = 0; c < 8; c++) {
+ for (c = 0; c < 8; c++)
diff_ptr[c] = src_ptr[c] - pred_ptr[c];
- }
+
diff_ptr += pitch;
pred_ptr += pitch;
src_ptr += src_stride;
@@ -65,9 +64,8 @@ void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc,
int r, c;
for (r = 0; r < 8; r++) {
- for (c = 0; c < 8; c++) {
+ for (c = 0; c < 8; c++)
udiff[c] = usrc[c] - upred[c];
- }
udiff += 8;
upred += dst_stride;
@@ -98,9 +96,8 @@ void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride,
int r, c;
for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
+ for (c = 0; c < 16; c++)
diff[c] = src[c] - pred[c];
- }
diff += 16;
pred += dst_stride;
@@ -113,9 +110,8 @@ void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride,
int r, c;
for (r = 0; r < 32; r++) {
- for (c = 0; c < 32; c++) {
+ for (c = 0; c < 32; c++)
diff[c] = src[c] - pred[c];
- }
diff += 32;
pred += dst_stride;
@@ -132,9 +128,8 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc,
int r, c;
for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
+ for (c = 0; c < 16; c++)
udiff[c] = usrc[c] - upred[c];
- }
udiff += 16;
upred += dst_stride;
@@ -142,9 +137,8 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc,
}
for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
+ for (c = 0; c < 16; c++)
vdiff[c] = vsrc[c] - vpred[c];
- }
vdiff += 16;
vpred += dst_stride;
@@ -166,52 +160,29 @@ static void subtract_mb(MACROBLOCK *x) {
x->e_mbd.predictor, x->src.uv_stride);
}
-static void build_dcblock_4x4(MACROBLOCK *x) {
- int16_t *src_diff_ptr = &x->src_diff[384];
- int i;
-
- for (i = 0; i < 16; i++) {
- src_diff_ptr[i] = x->coeff[i * 16];
- x->coeff[i * 16] = 0;
- }
-}
-
void vp9_transform_mby_4x4(MACROBLOCK *x) {
int i;
MACROBLOCKD *xd = &x->e_mbd;
- int has_2nd_order = get_2nd_order_usage(xd);
for (i = 0; i < 16; i++) {
BLOCK *b = &x->block[i];
TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
- assert(has_2nd_order == 0);
- vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
+ vp9_short_fht4x4(b->src_diff, b->coeff, 16, tx_type);
+ } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {
+ x->fwd_txm8x4(x->block[i].src_diff, x->block[i].coeff, 32);
+ i++;
} else {
- x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
- &x->block[i].coeff[0], 32);
+ x->fwd_txm4x4(x->block[i].src_diff, x->block[i].coeff, 32);
}
}
-
- if (has_2nd_order) {
- // build dc block from 16 y dc values
- build_dcblock_4x4(x);
-
- // do 2nd order transform on the dc block
- x->short_walsh4x4(&x->block[24].src_diff[0],
- &x->block[24].coeff[0], 8);
- } else {
- vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
- }
}
void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
int i;
- for (i = 16; i < 24; i += 2) {
- x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
- &x->block[i].coeff[0], 16);
- }
+ for (i = 16; i < 24; i += 2)
+ x->fwd_txm8x4(x->block[i].src_diff, x->block[i].coeff, 16);
}
static void transform_mb_4x4(MACROBLOCK *x) {
@@ -219,71 +190,36 @@ static void transform_mb_4x4(MACROBLOCK *x) {
vp9_transform_mbuv_4x4(x);
}
-static void build_dcblock_8x8(MACROBLOCK *x) {
- int16_t *src_diff_ptr = x->block[24].src_diff;
- int i;
-
- for (i = 0; i < 16; i++) {
- src_diff_ptr[i] = 0;
- }
- src_diff_ptr[0] = x->coeff[0 * 16];
- src_diff_ptr[1] = x->coeff[4 * 16];
- src_diff_ptr[4] = x->coeff[8 * 16];
- src_diff_ptr[8] = x->coeff[12 * 16];
- x->coeff[0 * 16] = 0;
- x->coeff[4 * 16] = 0;
- x->coeff[8 * 16] = 0;
- x->coeff[12 * 16] = 0;
-}
-
void vp9_transform_mby_8x8(MACROBLOCK *x) {
int i;
MACROBLOCKD *xd = &x->e_mbd;
TX_TYPE tx_type;
- int has_2nd_order = get_2nd_order_usage(xd);
for (i = 0; i < 9; i += 8) {
BLOCK *b = &x->block[i];
tx_type = get_tx_type_8x8(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
- assert(has_2nd_order == 0);
- vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
+ vp9_short_fht8x8(b->src_diff, b->coeff, 16, tx_type);
} else {
- x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
- &x->block[i].coeff[0], 32);
+ x->fwd_txm8x8(x->block[i].src_diff, x->block[i].coeff, 32);
}
}
for (i = 2; i < 11; i += 8) {
BLOCK *b = &x->block[i];
tx_type = get_tx_type_8x8(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
- assert(has_2nd_order == 0);
- vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
+ vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 16, tx_type);
} else {
- x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
- &x->block[i + 2].coeff[0], 32);
+ x->fwd_txm8x8(x->block[i].src_diff, x->block[i + 2].coeff, 32);
}
}
-
- if (has_2nd_order) {
- // build dc block from 2x2 y dc values
- build_dcblock_8x8(x);
-
- // do 2nd order transform on the dc block
- x->short_fhaar2x2(&x->block[24].src_diff[0],
- &x->block[24].coeff[0], 8);
- } else {
- vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
- }
}
void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
int i;
- for (i = 16; i < 24; i += 4) {
- x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
- &x->block[i].coeff[0], 16);
- }
+ for (i = 16; i < 24; i += 4)
+ x->fwd_txm8x8(x->block[i].src_diff, x->block[i].coeff, 16);
}
void vp9_transform_mb_8x8(MACROBLOCK *x) {
@@ -297,10 +233,9 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) {
TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
vp9_clear_system_state();
if (tx_type != DCT_DCT) {
- vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
+ vp9_short_fht16x16(b->src_diff, b->coeff, 16, tx_type);
} else {
- x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
- &x->block[0].coeff[0], 32);
+ x->fwd_txm16x16(x->block[0].src_diff, x->block[0].coeff, 32);
}
}
@@ -317,10 +252,8 @@ void vp9_transform_sby_32x32(MACROBLOCK *x) {
void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
SUPERBLOCK * const x_sb = &x->sb_coeff_data;
vp9_clear_system_state();
- x->vp9_short_fdct16x16(x_sb->src_diff + 1024,
- x_sb->coeff + 1024, 32);
- x->vp9_short_fdct16x16(x_sb->src_diff + 1280,
- x_sb->coeff + 1280, 32);
+ x->fwd_txm16x16(x_sb->src_diff + 1024, x_sb->coeff + 1024, 32);
+ x->fwd_txm16x16(x_sb->src_diff + 1280, x_sb->coeff + 1280, 32);
}
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
@@ -338,13 +271,10 @@ struct vp9_token_state {
// TODO: experiments to find optimal multiple numbers
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
-#define Y2_RD_MULT 4
static const int plane_rd_mult[4] = {
Y1_RD_MULT,
- Y2_RD_MULT,
UV_RD_MULT,
- Y1_RD_MULT
};
#define UPDATE_RD_COST()\
@@ -357,34 +287,39 @@ static const int plane_rd_mult[4] = {
}\
}
+// This function is a place holder for now but may ultimately need
+// to scan previous tokens to work out the correct context.
+static int trellis_get_coeff_context(int token) {
+ int recent_energy = 0;
+ return vp9_get_coef_context(&recent_energy, token);
+}
+
static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int tx_size) {
+ const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
+ MACROBLOCKD *const xd = &mb->e_mbd;
BLOCK *b = &mb->block[i];
- BLOCKD *d = &mb->e_mbd.block[i];
+ BLOCKD *d = &xd->block[i];
vp9_token_state tokens[257][2];
unsigned best_index[257][2];
const int16_t *dequant_ptr = d->dequant, *coeff_ptr = b->coeff;
int16_t *qcoeff_ptr = d->qcoeff;
int16_t *dqcoeff_ptr = d->dqcoeff;
- int eob = d->eob, final_eob, sz = 0;
- int i0 = (type == PLANE_TYPE_Y_NO_DC);
+ int eob = xd->eobs[i], final_eob, sz = 0;
+ const int i0 = 0;
int rc, x, next;
int64_t rdmult, rddiv, rd_cost0, rd_cost1;
int rate0, rate1, error0, error1, t0, t1;
int best, band, pt;
int err_mult = plane_rd_mult[type];
int default_eob;
- int const *scan, *bands;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
-#endif
+ int const *scan;
switch (tx_size) {
default:
case TX_4X4:
scan = vp9_default_zig_zag1d_4x4;
- bands = vp9_coef_bands_4x4;
default_eob = 16;
// TODO: this isn't called (for intra4x4 modes), but will be left in
// since it could be used later
@@ -411,18 +346,13 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
break;
case TX_8X8:
scan = vp9_default_zig_zag1d_8x8;
- bands = vp9_coef_bands_8x8;
default_eob = 64;
break;
case TX_16X16:
scan = vp9_default_zig_zag1d_16x16;
- bands = vp9_coef_bands_16x16;
default_eob = 256;
break;
}
-#if CONFIG_NEWCOEFCONTEXT
- neighbors = vp9_get_coef_neighbors_handle(scan);
-#endif
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
rdmult = mb->rdmult * err_mult;
@@ -454,17 +384,12 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
t0 = (vp9_dct_value_tokens_ptr + x)->Token;
/* Consider both possible successor states. */
if (next < default_eob) {
- band = bands[i + 1];
- pt = vp9_prev_token_class[t0];
-#if CONFIG_NEWCOEFCONTEXT
- if (NEWCOEFCONTEXT_BAND_COND(band))
- pt = vp9_get_coef_neighbor_context(
- qcoeff_ptr, i0, neighbors, scan[i + 1]);
-#endif
+ band = get_coef_band(tx_size, i + 1);
+ pt = trellis_get_coeff_context(t0);
rate0 +=
- mb->token_costs[tx_size][type][band][pt][tokens[next][0].token];
+ mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token];
rate1 +=
- mb->token_costs[tx_size][type][band][pt][tokens[next][1].token];
+ mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token];
}
UPDATE_RD_COST();
/* And pick the best. */
@@ -506,37 +431,15 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token;
}
if (next < default_eob) {
- band = bands[i + 1];
+ band = get_coef_band(tx_size, i + 1);
if (t0 != DCT_EOB_TOKEN) {
-#if CONFIG_NEWCOEFCONTEXT
- int tmp = qcoeff_ptr[scan[i]];
- qcoeff_ptr[scan[i]] = x;
- if (NEWCOEFCONTEXT_BAND_COND(band))
- pt = vp9_get_coef_neighbor_context(
- qcoeff_ptr, i0, neighbors, scan[i + 1]);
- else
- pt = vp9_prev_token_class[t0];
- qcoeff_ptr[scan[i]] = tmp;
-#else
- pt = vp9_prev_token_class[t0];
-#endif
- rate0 += mb->token_costs[tx_size][type][band][pt][
+ pt = trellis_get_coeff_context(t0);
+ rate0 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][0].token];
}
if (t1 != DCT_EOB_TOKEN) {
-#if CONFIG_NEWCOEFCONTEXT
- int tmp = qcoeff_ptr[scan[i]];
- qcoeff_ptr[scan[i]] = x;
- if (NEWCOEFCONTEXT_BAND_COND(band))
- pt = vp9_get_coef_neighbor_context(
- qcoeff_ptr, i0, neighbors, scan[i + 1]);
- else
- pt = vp9_prev_token_class[t1];
- qcoeff_ptr[scan[i]] = tmp;
-#else
- pt = vp9_prev_token_class[t1];
-#endif
- rate1 += mb->token_costs[tx_size][type][band][pt][
+ pt = trellis_get_coeff_context(t1);
+ rate1 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][1].token];
}
}
@@ -563,16 +466,18 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
* add a new trellis node, but we do need to update the costs.
*/
else {
- band = bands[i + 1];
+ band = get_coef_band(tx_size, i + 1);
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
/* Update the cost of each path if we're past the EOB token. */
if (t0 != DCT_EOB_TOKEN) {
- tokens[next][0].rate += mb->token_costs[tx_size][type][band][0][t0];
+ tokens[next][0].rate +=
+ mb->token_costs[tx_size][type][ref][band][0][t0];
tokens[next][0].token = ZERO_TOKEN;
}
if (t1 != DCT_EOB_TOKEN) {
- tokens[next][1].rate += mb->token_costs[tx_size][type][band][0][t1];
+ tokens[next][1].rate +=
+ mb->token_costs[tx_size][type][ref][band][0][t1];
tokens[next][1].token = ZERO_TOKEN;
}
/* Don't update next, because we didn't add a new node. */
@@ -580,7 +485,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
}
/* Now pick the best path through the whole trellis. */
- band = bands[i + 1];
+ band = get_coef_band(tx_size, i + 1);
VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
@@ -588,8 +493,8 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
- rate0 += mb->token_costs[tx_size][type][band][pt][t0];
- rate1 += mb->token_costs[tx_size][type][band][pt][t1];
+ rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
+ rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = i0 - 1;
@@ -606,81 +511,12 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
}
final_eob++;
- d->eob = final_eob;
- *a = *l = (d->eob > !type);
-}
-
-/**************************************************************************
-our inverse hadamard transform effectively is weighted sum of all 16 inputs
-with weight either 1 or -1. It has a last stage scaling of (sum+1)>>2. And
-dc only idct is (dc+16)>>5. So if all the sums are between -65 and 63 the
-output after inverse wht and idct will be all zero. A sum of absolute value
-smaller than 65 guarantees all 16 different (+1/-1) weighted sums in wht
-fall between -65 and +65.
-**************************************************************************/
-#define SUM_2ND_COEFF_THRESH 65
-
-static void check_reset_2nd_coeffs(MACROBLOCKD *xd,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
- int sum = 0;
- int i;
- BLOCKD *bd = &xd->block[24];
- if (bd->dequant[0] >= SUM_2ND_COEFF_THRESH
- && bd->dequant[1] >= SUM_2ND_COEFF_THRESH)
- return;
-
- for (i = 0; i < bd->eob; i++) {
- int coef = bd->dqcoeff[vp9_default_zig_zag1d_4x4[i]];
- sum += (coef >= 0) ? coef : -coef;
- if (sum >= SUM_2ND_COEFF_THRESH)
- return;
- }
-
- if (sum < SUM_2ND_COEFF_THRESH) {
- for (i = 0; i < bd->eob; i++) {
- int rc = vp9_default_zig_zag1d_4x4[i];
- bd->qcoeff[rc] = 0;
- bd->dqcoeff[rc] = 0;
- }
- bd->eob = 0;
- *a = *l = (bd->eob != 0);
- }
-}
-
-#define SUM_2ND_COEFF_THRESH_8X8 32
-static void check_reset_8x8_2nd_coeffs(MACROBLOCKD *xd,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
- int sum = 0;
- BLOCKD *bd = &xd->block[24];
- int coef;
-
- coef = bd->dqcoeff[0];
- sum += (coef >= 0) ? coef : -coef;
- coef = bd->dqcoeff[1];
- sum += (coef >= 0) ? coef : -coef;
- coef = bd->dqcoeff[4];
- sum += (coef >= 0) ? coef : -coef;
- coef = bd->dqcoeff[8];
- sum += (coef >= 0) ? coef : -coef;
-
- if (sum < SUM_2ND_COEFF_THRESH_8X8) {
- bd->qcoeff[0] = 0;
- bd->dqcoeff[0] = 0;
- bd->qcoeff[1] = 0;
- bd->dqcoeff[1] = 0;
- bd->qcoeff[4] = 0;
- bd->dqcoeff[4] = 0;
- bd->qcoeff[8] = 0;
- bd->dqcoeff[8] = 0;
- bd->eob = 0;
- *a = *l = (bd->eob != 0);
- }
+ xd->eobs[d - xd->block] = final_eob;
+ *a = *l = (final_eob > 0);
}
void vp9_optimize_mby_4x4(MACROBLOCK *x) {
int b;
- PLANE_TYPE type;
- int has_2nd_order;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
@@ -694,25 +530,11 @@ void vp9_optimize_mby_4x4(MACROBLOCK *x) {
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
- has_2nd_order = get_2nd_order_usage(&x->e_mbd);
-
- type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
-
for (b = 0; b < 16; b++) {
- optimize_b(x, b, type,
+ optimize_b(x, b, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b], TX_4X4);
}
-
- if (has_2nd_order) {
- b = 24;
- optimize_b(x, b, PLANE_TYPE_Y2,
- ta + vp9_block2above[TX_4X4][b],
- tl + vp9_block2left[TX_4X4][b], TX_4X4);
- check_reset_2nd_coeffs(&x->e_mbd,
- ta + vp9_block2above[TX_4X4][b],
- tl + vp9_block2left[TX_4X4][b]);
- }
}
void vp9_optimize_mbuv_4x4(MACROBLOCK *x) {
@@ -744,11 +566,9 @@ static void optimize_mb_4x4(MACROBLOCK *x) {
void vp9_optimize_mby_8x8(MACROBLOCK *x) {
int b;
- PLANE_TYPE type;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
- int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
if (!x->e_mbd.above_context || !x->e_mbd.left_context)
return;
@@ -758,28 +578,15 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) {
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
- type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
for (b = 0; b < 16; b += 4) {
ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
-#if CONFIG_CNVCONTEXT
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = a[0];
- ENTROPY_CONTEXT left_ec = l[0];
-#endif
- optimize_b(x, b, type, &above_ec, &left_ec, TX_8X8);
+ optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, &above_ec, &left_ec, TX_8X8);
a[1] = a[0] = above_ec;
l[1] = l[0] = left_ec;
}
-
- // 8x8 always have 2nd order block
- if (has_2nd_order) {
- check_reset_8x8_2nd_coeffs(&x->e_mbd,
- ta + vp9_block2above[TX_8X8][24],
- tl + vp9_block2left[TX_8X8][24]);
- }
}
void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
@@ -793,13 +600,8 @@ void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
for (b = 16; b < 24; b += 4) {
ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
-#if CONFIG_CNVCONTEXT
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = a[0];
- ENTROPY_CONTEXT left_ec = l[0];
-#endif
optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8);
}
}
@@ -817,13 +619,8 @@ void vp9_optimize_mby_16x16(MACROBLOCK *x) {
if (!t_above || !t_left)
return;
-#if CONFIG_CNVCONTEXT
ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0;
tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0;
-#else
- ta = t_above->y1[0];
- tl = t_left->y1[0];
-#endif
optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16);
}
@@ -871,21 +668,21 @@ void vp9_fidct_mb(MACROBLOCK *x) {
}
}
-void vp9_encode_inter16x16(MACROBLOCK *x) {
+void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col) {
MACROBLOCKD *const xd = &x->e_mbd;
- vp9_build_inter_predictors_mb(xd);
+ vp9_build_inter_predictors_mb(xd, mb_row, mb_col);
subtract_mb(x);
vp9_fidct_mb(x);
vp9_recon_mb(xd);
}
/* this function is used by first pass only */
-void vp9_encode_inter16x16y(MACROBLOCK *x) {
+void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) {
MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
- vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col);
vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index f3c679227..6356df215 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -23,14 +23,14 @@ typedef struct {
#include "vp9/encoder/vp9_onyx_int.h"
struct VP9_ENCODER_RTCD;
-void vp9_encode_inter16x16(MACROBLOCK *x);
+void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col);
void vp9_transform_mbuv_4x4(MACROBLOCK *x);
void vp9_transform_mby_4x4(MACROBLOCK *x);
void vp9_optimize_mby_4x4(MACROBLOCK *x);
void vp9_optimize_mbuv_4x4(MACROBLOCK *x);
-void vp9_encode_inter16x16y(MACROBLOCK *x);
+void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col);
void vp9_transform_mb_8x8(MACROBLOCK *mb);
void vp9_transform_mby_8x8(MACROBLOCK *x);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 8df6c20a7..337276d59 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -435,9 +435,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
MACROBLOCKD *const xd = &x->e_mbd;
int recon_yoffset, recon_uvoffset;
- YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
+ YV12_BUFFER_CONFIG *lst_yv12 =
+ &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]];
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
- YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
+ YV12_BUFFER_CONFIG *gld_yv12 =
+ &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]];
int recon_y_stride = lst_yv12->y_stride;
int recon_uv_stride = lst_yv12->uv_stride;
int64_t intra_error = 0;
@@ -611,7 +613,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
this_error = motion_error;
vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- vp9_encode_inter16x16y(x);
+ vp9_encode_inter16x16y(x, mb_row, mb_col);
sum_mvr += mv.as_mv.row;
sum_mvr_abs += abs(mv.as_mv.row);
sum_mvc += mv.as_mv.col;
@@ -843,16 +845,13 @@ static double calc_correction_factor(double err_per_mb,
power_term = (vp9_convert_qindex_to_q(Q) * 0.01) + pt_low;
power_term = (power_term > pt_high) ? pt_high : power_term;
- // Adjustments to error term
- // TBD
-
// Calculate correction factor
correction_factor = pow(error_term, power_term);
// Clip range
correction_factor =
(correction_factor < 0.05)
- ? 0.05 : (correction_factor > 2.0) ? 2.0 : correction_factor;
+ ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor;
return correction_factor;
}
@@ -886,8 +885,7 @@ static void adjust_maxq_qrange(VP9_COMP *cpi) {
static int estimate_max_q(VP9_COMP *cpi,
FIRSTPASS_STATS *fpstats,
- int section_target_bandwitdh,
- int overhead_bits) {
+ int section_target_bandwitdh) {
int Q;
int num_mbs = cpi->common.MBs;
int target_norm_bits_per_mb;
@@ -898,7 +896,6 @@ static int estimate_max_q(VP9_COMP *cpi,
double err_per_mb = section_err / num_mbs;
double err_correction_factor;
double speed_correction = 1.0;
- double overhead_bits_per_mb;
if (section_target_bandwitdh <= 0)
return cpi->twopass.maxq_max_limit; // Highest value allowed
@@ -950,13 +947,6 @@ static int estimate_max_q(VP9_COMP *cpi,
speed_correction = 1.25;
}
- // Estimate of overhead bits per mb
- // Correction to overhead bits for min allowed Q.
- // PGW TODO.. This code is broken for the extended Q range
- // for now overhead set to 0.
- overhead_bits_per_mb = overhead_bits / num_mbs;
- overhead_bits_per_mb *= pow(0.98, (double)cpi->twopass.maxq_min_limit);
-
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) {
@@ -967,23 +957,9 @@ static int estimate_max_q(VP9_COMP *cpi,
sr_correction * speed_correction *
cpi->twopass.est_max_qcorrection_factor;
- if (err_correction_factor < 0.05)
- err_correction_factor = 0.05;
- else if (err_correction_factor > 5.0)
- err_correction_factor = 5.0;
bits_per_mb_at_this_q =
- vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;
-
- bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *
- (double)bits_per_mb_at_this_q);
-
- // Mode and motion overhead
- // As Q rises in real encode loop rd code will force overhead down
- // We make a crude adjustment for this here as *.98 per Q step.
- // PGW TODO.. This code is broken for the extended Q range
- // for now overhead set to 0.
- // overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
+ vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor);
if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
break;
@@ -1001,7 +977,7 @@ static int estimate_max_q(VP9_COMP *cpi,
// PGW TODO.. This code is broken for the extended Q range
if ((cpi->ni_frames >
((int)cpi->twopass.total_stats->count >> 8)) &&
- (cpi->ni_frames > 150)) {
+ (cpi->ni_frames > 25)) {
adjust_maxq_qrange(cpi);
}
@@ -1012,8 +988,7 @@ static int estimate_max_q(VP9_COMP *cpi,
// complexity and data rate.
static int estimate_cq(VP9_COMP *cpi,
FIRSTPASS_STATS *fpstats,
- int section_target_bandwitdh,
- int overhead_bits) {
+ int section_target_bandwitdh) {
int Q;
int num_mbs = cpi->common.MBs;
int target_norm_bits_per_mb;
@@ -1026,15 +1001,11 @@ static int estimate_cq(VP9_COMP *cpi,
double speed_correction = 1.0;
double clip_iiratio;
double clip_iifactor;
- double overhead_bits_per_mb;
-
target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20))
? (512 * section_target_bandwitdh) / num_mbs
: 512 * (section_target_bandwitdh / num_mbs);
- // Estimate of overhead bits per mb
- overhead_bits_per_mb = overhead_bits / num_mbs;
// Corrections for higher compression speed settings
// (reduced compression expected)
@@ -1073,23 +1044,8 @@ static int estimate_cq(VP9_COMP *cpi,
calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, Q) *
sr_correction * speed_correction * clip_iifactor;
- if (err_correction_factor < 0.05)
- err_correction_factor = 0.05;
- else if (err_correction_factor > 5.0)
- err_correction_factor = 5.0;
-
bits_per_mb_at_this_q =
- vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;
-
- bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *
- (double)bits_per_mb_at_this_q);
-
- // Mode and motion overhead
- // As Q rises in real encode loop rd code will force overhead down
- // We make a crude adjustment for this here as *.98 per Q step.
- // PGW TODO.. This code is broken for the extended Q range
- // for now overhead set to 0.
- overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
+ vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor);
if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
break;
@@ -1953,8 +1909,6 @@ void vp9_second_pass(VP9_COMP *cpi) {
double this_frame_intra_error;
double this_frame_coded_error;
- int overhead_bits;
-
if (!cpi->twopass.stats_in) {
return;
}
@@ -2018,11 +1972,6 @@ void vp9_second_pass(VP9_COMP *cpi) {
if (cpi->target_bandwidth < 0)
cpi->target_bandwidth = 0;
-
- // Account for mv, mode and other overheads.
- overhead_bits = (int)estimate_modemvcost(
- cpi, cpi->twopass.total_left_stats);
-
// Special case code for first frame.
if (cpi->common.current_video_frame == 0) {
cpi->twopass.est_max_qcorrection_factor = 1.0;
@@ -2034,8 +1983,7 @@ void vp9_second_pass(VP9_COMP *cpi) {
est_cq =
estimate_cq(cpi,
cpi->twopass.total_left_stats,
- (int)(cpi->twopass.bits_left / frames_left),
- overhead_bits);
+ (int)(cpi->twopass.bits_left / frames_left));
cpi->cq_target_quality = cpi->oxcf.cq_level;
if (est_cq > cpi->cq_target_quality)
@@ -2049,21 +1997,23 @@ void vp9_second_pass(VP9_COMP *cpi) {
tmp_q = estimate_max_q(
cpi,
cpi->twopass.total_left_stats,
- (int)(cpi->twopass.bits_left / frames_left),
- overhead_bits);
+ (int)(cpi->twopass.bits_left / frames_left));
cpi->active_worst_quality = tmp_q;
cpi->ni_av_qi = tmp_q;
cpi->avg_q = vp9_convert_qindex_to_q(tmp_q);
+#ifndef ONE_SHOT_Q_ESTIMATE
// Limit the maxq value returned subsequently.
// This increases the risk of overspend or underspend if the initial
// estimate for the clip is bad, but helps prevent excessive
// variation in Q, especially near the end of a clip
// where for example a small overspend may cause Q to crash
adjust_maxq_qrange(cpi);
+#endif
}
+#ifndef ONE_SHOT_Q_ESTIMATE
// The last few frames of a clip almost always have to few or too many
// bits and for the sake of over exact rate control we dont want to make
// radical adjustments to the allowed quantizer range just to use up a
@@ -2078,13 +2028,13 @@ void vp9_second_pass(VP9_COMP *cpi) {
tmp_q = estimate_max_q(
cpi,
cpi->twopass.total_left_stats,
- (int)(cpi->twopass.bits_left / frames_left),
- overhead_bits);
+ (int)(cpi->twopass.bits_left / frames_left));
// Make a damped adjustment to active max Q
cpi->active_worst_quality =
adjust_active_maxq(cpi->active_worst_quality, tmp_q);
}
+#endif
cpi->twopass.frames_to_key--;
@@ -2092,7 +2042,6 @@ void vp9_second_pass(VP9_COMP *cpi) {
subtract_stats(cpi->twopass.total_left_stats, &this_frame);
}
-
static int test_candidate_kf(VP9_COMP *cpi,
FIRSTPASS_STATS *last_frame,
FIRSTPASS_STATS *this_frame,
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 19bc4d67d..2296a6669 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -11,12 +11,12 @@
#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
#define VP9_ENCODER_VP9_FIRSTPASS_H_
-extern void vp9_init_first_pass(VP9_COMP *cpi);
-extern void vp9_first_pass(VP9_COMP *cpi);
-extern void vp9_end_first_pass(VP9_COMP *cpi);
+void vp9_init_first_pass(VP9_COMP *cpi);
+void vp9_first_pass(VP9_COMP *cpi);
+void vp9_end_first_pass(VP9_COMP *cpi);
-extern void vp9_init_second_pass(VP9_COMP *cpi);
-extern void vp9_second_pass(VP9_COMP *cpi);
-extern void vp9_end_second_pass(VP9_COMP *cpi);
+void vp9_init_second_pass(VP9_COMP *cpi);
+void vp9_second_pass(VP9_COMP *cpi);
+void vp9_end_second_pass(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_FIRSTPASS_H_
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 0ff60c8b0..121de653f 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -20,14 +20,16 @@
static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
int_mv *ref_mv,
- int_mv *dst_mv) {
+ int_mv *dst_mv,
+ int mb_row,
+ int mb_col) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
BLOCK *b = &x->block[0];
BLOCKD *d = &xd->block[0];
vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
unsigned int best_err;
- int step_param;
+
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
@@ -36,11 +38,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
int_mv ref_full;
// Further step/diamond searches as necessary
- if (cpi->Speed < 8) {
- step_param = cpi->sf.first_step + ((cpi->Speed > 5) ? 1 : 0);
- } else {
- step_param = cpi->sf.first_step + 2;
- }
+ int step_param = cpi->sf.first_step +
+ (cpi->Speed < 8 ? (cpi->Speed > 5 ? 1 : 0) : 2);
vp9_clamp_mv_min_max(x, ref_mv);
@@ -72,7 +71,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
}
vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);
- vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col);
best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride,
xd->predictor, 16, INT_MAX);
@@ -93,8 +92,9 @@ static int do_16x16_motion_search
YV12_BUFFER_CONFIG *buf,
int buf_mb_y_offset,
YV12_BUFFER_CONFIG *ref,
- int mb_y_offset
-) {
+ int mb_y_offset,
+ int mb_row,
+ int mb_col) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
unsigned int err, tmp_err;
@@ -124,7 +124,7 @@ static int do_16x16_motion_search
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search
- tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv);
+ tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col);
if (tmp_err < err) {
err = tmp_err;
dst_mv->as_int = tmp_mv.as_int;
@@ -136,7 +136,8 @@ static int do_16x16_motion_search
int_mv zero_ref_mv, tmp_mv;
zero_ref_mv.as_int = 0;
- tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv);
+ tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv,
+ mb_row, mb_col);
if (tmp_err < err) {
dst_mv->as_int = tmp_mv.as_int;
err = tmp_err;
@@ -229,7 +230,9 @@ static void update_mbgraph_mb_stats
int gld_y_offset,
YV12_BUFFER_CONFIG *alt_ref,
int_mv *prev_alt_ref_mv,
- int arf_y_offset
+ int arf_y_offset,
+ int mb_row,
+ int mb_col
) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -249,7 +252,8 @@ static void update_mbgraph_mb_stats
int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv,
&stats->ref[GOLDEN_FRAME].m.mv,
buf, mb_y_offset,
- golden_ref, gld_y_offset);
+ golden_ref, gld_y_offset,
+ mb_row, mb_col);
stats->ref[GOLDEN_FRAME].err = g_motion_error;
} else {
stats->ref[GOLDEN_FRAME].err = INT_MAX;
@@ -292,6 +296,9 @@ static void update_mbgraph_frame_stats
int_mv arf_top_mv, gld_top_mv;
MODE_INFO mi_local;
+ // Make sure the mi context starts in a consistent state.
+ memset(&mi_local, 0, sizeof(mi_local));
+
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_top_mv.as_int = 0;
gld_top_mv.as_int = 0;
@@ -323,7 +330,8 @@ static void update_mbgraph_frame_stats
update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
golden_ref, &gld_left_mv, gld_y_in_offset,
- alt_ref, &arf_left_mv, arf_y_in_offset);
+ alt_ref, &arf_left_mv, arf_y_in_offset,
+ mb_row, mb_col);
arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int;
gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int;
if (mb_col == 0) {
@@ -427,13 +435,11 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
vpx_free(arf_not_zz);
}
-void vp9_update_mbgraph_stats
-(
- VP9_COMP *cpi
-) {
+void vp9_update_mbgraph_stats(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
int i, n_frames = vp9_lookahead_depth(cpi->lookahead);
- YV12_BUFFER_CONFIG *golden_ref = &cm->yv12_fb[cm->gld_fb_idx];
+ YV12_BUFFER_CONFIG *golden_ref =
+ &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]];
// we need to look ahead beyond where the ARF transitions into
// being a GF - so exit if we don't look ahead beyond that
diff --git a/vp9/encoder/vp9_mbgraph.h b/vp9/encoder/vp9_mbgraph.h
index db23eca33..c5bca4d01 100644
--- a/vp9/encoder/vp9_mbgraph.h
+++ b/vp9/encoder/vp9_mbgraph.h
@@ -11,6 +11,6 @@
#ifndef VP9_ENCODER_VP9_MBGRAPH_H_
#define VP9_ENCODER_VP9_MBGRAPH_H_
-extern void vp9_update_mbgraph_stats(VP9_COMP *cpi);
+void vp9_update_mbgraph_stats(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_MBGRAPH_H_
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 4694a92c6..300d9f85c 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -8,22 +8,17 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <stdio.h>
+#include <limits.h>
+#include <math.h>
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vpx_mem/vpx_mem.h"
#include "./vpx_config.h"
-#include <stdio.h>
-#include <limits.h>
-#include <math.h>
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_common.h"
-#ifdef ENTROPY_STATS
-static int mv_ref_ct [31] [4] [2];
-static int mv_mode_cts [4] [2];
-#endif
-
void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL +
((ref_mv->as_mv.col & 7) ? 1 : 0);
@@ -44,21 +39,20 @@ void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
}
int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
- int Weight, int ishp) {
+ int weight, int ishp) {
MV v;
- v.row = (mv->as_mv.row - ref->as_mv.row);
- v.col = (mv->as_mv.col - ref->as_mv.col);
+ v.row = mv->as_mv.row - ref->as_mv.row;
+ v.col = mv->as_mv.col - ref->as_mv.col;
return ((mvjcost[vp9_get_mv_joint(v)] +
- mvcost[0][v.row] + mvcost[1][v.col]) *
- Weight) >> 7;
+ mvcost[0][v.row] + mvcost[1][v.col]) * weight) >> 7;
}
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
int error_per_bit, int ishp) {
if (mvcost) {
MV v;
- v.row = (mv->as_mv.row - ref->as_mv.row);
- v.col = (mv->as_mv.col - ref->as_mv.col);
+ v.row = mv->as_mv.row - ref->as_mv.row;
+ v.col = mv->as_mv.col - ref->as_mv.col;
return ((mvjcost[vp9_get_mv_joint(v)] +
mvcost[0][v.row] + mvcost[1][v.col]) *
error_per_bit + 128) >> 8;
@@ -68,11 +62,10 @@ static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
int *mvsadcost[2], int error_per_bit) {
-
if (mvsadcost) {
MV v;
- v.row = (mv->as_mv.row - ref->as_mv.row);
- v.col = (mv->as_mv.col - ref->as_mv.col);
+ v.row = mv->as_mv.row - ref->as_mv.row;
+ v.col = mv->as_mv.col - ref->as_mv.col;
return ((mvjsadcost[vp9_get_mv_joint(v)] +
mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
error_per_bit + 128) >> 8;
@@ -81,45 +74,39 @@ static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
}
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
- int Len;
+ int len;
int search_site_count = 0;
-
// Generate offsets for 4 search sites per step.
- Len = MAX_FIRST_STEP;
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = 0;
search_site_count++;
- while (Len > 0) {
-
+ for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride;
+ x->ss[search_site_count].mv.row = -len;
+ x->ss[search_site_count].offset = -len * stride;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride;
+ x->ss[search_site_count].mv.row = len;
+ x->ss[search_site_count].offset = len * stride;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
+ x->ss[search_site_count].mv.col = -len;
x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = -Len;
+ x->ss[search_site_count].offset = -len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
+ x->ss[search_site_count].mv.col = len;
x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = Len;
+ x->ss[search_site_count].offset = len;
search_site_count++;
-
- // Contract.
- Len /= 2;
}
x->ss_count = search_site_count;
@@ -127,68 +114,63 @@ void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
}
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
- int Len;
+ int len;
int search_site_count = 0;
// Generate offsets for 8 search sites per step.
- Len = MAX_FIRST_STEP;
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = 0;
search_site_count++;
- while (Len > 0) {
-
+ for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride;
+ x->ss[search_site_count].mv.row = -len;
+ x->ss[search_site_count].offset = -len * stride;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride;
+ x->ss[search_site_count].mv.row = len;
+ x->ss[search_site_count].offset = len * stride;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
+ x->ss[search_site_count].mv.col = -len;
x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = -Len;
+ x->ss[search_site_count].offset = -len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
+ x->ss[search_site_count].mv.col = len;
x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = Len;
+ x->ss[search_site_count].offset = len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride - Len;
+ x->ss[search_site_count].mv.col = -len;
+ x->ss[search_site_count].mv.row = -len;
+ x->ss[search_site_count].offset = -len * stride - len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride + Len;
+ x->ss[search_site_count].mv.col = len;
+ x->ss[search_site_count].mv.row = -len;
+ x->ss[search_site_count].offset = -len * stride + len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride - Len;
+ x->ss[search_site_count].mv.col = -len;
+ x->ss[search_site_count].mv.row = len;
+ x->ss[search_site_count].offset = len * stride - len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride + Len;
+ x->ss[search_site_count].mv.col = len;
+ x->ss[search_site_count].mv.row = len;
+ x->ss[search_site_count].offset = len * stride + len;
search_site_count++;
-
- // Contract.
- Len /= 2;
}
x->ss_count = search_site_count;
@@ -1546,7 +1528,7 @@ int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
uint8_t *bestaddress;
- int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv *best_mv = &d->bmi.as_mv[0];
int_mv this_mv;
int bestsad = INT_MAX;
int r, c;
@@ -1641,7 +1623,7 @@ int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
uint8_t *bestaddress;
- int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv *best_mv = &d->bmi.as_mv[0];
int_mv this_mv;
unsigned int bestsad = INT_MAX;
int r, c;
@@ -1770,7 +1752,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
uint8_t *bestaddress;
- int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv *best_mv = &d->bmi.as_mv[0];
int_mv this_mv;
unsigned int bestsad = INT_MAX;
int r, c;
@@ -1787,7 +1769,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int col_min = ref_col - distance;
int col_max = ref_col + distance;
- DECLARE_ALIGNED_ARRAY(16, uint16_t, sad_array8, 8);
+ DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
unsigned int sad_array[3];
int_mv fcenter_mv;
@@ -2023,12 +2005,10 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
for (i = 0; i < search_range; i++) {
int best_site = -1;
- int all_in = 1;
-
- all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
- all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
- all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
- all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
+ int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) &
+ ((ref_mv->as_mv.row + 1) < x->mv_row_max) &
+ ((ref_mv->as_mv.col - 1) > x->mv_col_min) &
+ ((ref_mv->as_mv.col + 1) < x->mv_col_max);
if (all_in) {
unsigned int sad_array[4];
@@ -2103,21 +2083,22 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
#ifdef ENTROPY_STATS
-void print_mode_context(void) {
+void print_mode_context(VP9_COMMON *pc) {
FILE *f = fopen("vp9_modecont.c", "a");
int i, j;
fprintf(f, "#include \"vp9_entropy.h\"\n");
- fprintf(f, "const int vp9_mode_contexts[6][4] =");
+ fprintf(f, "const int vp9_mode_contexts[INTER_MODE_CONTEXTS][4] =");
fprintf(f, "{\n");
- for (j = 0; j < 6; j++) {
+ for (j = 0; j < INTER_MODE_CONTEXTS; j++) {
fprintf(f, " {/* %d */ ", j);
fprintf(f, " ");
for (i = 0; i < 4; i++) {
int this_prob;
// context probs
- this_prob = get_binary_prob(mv_ref_ct[j][i][0], mv_ref_ct[j][i][1]);
+ this_prob = get_binary_prob(pc->fc.mv_ref_ct[j][i][0],
+ pc->fc.mv_ref_ct[j][i][1]);
fprintf(f, "%5d, ", this_prob);
}
@@ -2128,44 +2109,4 @@ void print_mode_context(void) {
fclose(f);
}
-/* MV ref count ENTROPY_STATS stats code */
-void init_mv_ref_counts() {
- vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
- vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
-}
-
-void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
- if (m == ZEROMV) {
- ++mv_ref_ct [ct[0]] [0] [0];
- ++mv_mode_cts[0][0];
- } else {
- ++mv_ref_ct [ct[0]] [0] [1];
- ++mv_mode_cts[0][1];
-
- if (m == NEARESTMV) {
- ++mv_ref_ct [ct[1]] [1] [0];
- ++mv_mode_cts[1][0];
- } else {
- ++mv_ref_ct [ct[1]] [1] [1];
- ++mv_mode_cts[1][1];
-
- if (m == NEARMV) {
- ++mv_ref_ct [ct[2]] [2] [0];
- ++mv_mode_cts[2][0];
- } else {
- ++mv_ref_ct [ct[2]] [2] [1];
- ++mv_mode_cts[2][1];
-
- if (m == NEWMV) {
- ++mv_ref_ct [ct[3]] [3] [0];
- ++mv_mode_cts[3][0];
- } else {
- ++mv_ref_ct [ct[3]] [3] [1];
- ++mv_mode_cts[3][1];
- }
- }
- }
- }
-}
-
#endif/* END MV ref count ENTROPY_STATS stats code */
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 358d10bc6..2479d7235 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -16,9 +16,7 @@
#include "vp9/encoder/vp9_variance.h"
#ifdef ENTROPY_STATS
-extern void init_mv_ref_counts();
-extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
-void print_mode_context(void);
+void print_mode_context(VP9_COMMON *pc);
#endif
@@ -26,11 +24,12 @@ void print_mode_context(void);
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Max full pel mv specified in 1 pel units
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units
-extern void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv);
-extern int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
- int *mvcost[2], int Weight, int ishp);
-extern void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
-extern void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
+void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv);
+int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
+ int *mvcost[2], int weight, int ishp);
+void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
+void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
+
// Runs sequence of diamond searches in smaller steps for RD
struct VP9_COMP;
int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b,
@@ -39,20 +38,13 @@ int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b,
vp9_variance_fn_ptr_t *fn_ptr,
int_mv *ref_mv, int_mv *dst_mv);
-extern int vp9_hex_search
-(
- MACROBLOCK *x,
- BLOCK *b,
- BLOCKD *d,
- int_mv *ref_mv,
- int_mv *best_mv,
- int search_param,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vf,
- int *mvjsadcost, int *mvsadcost[2],
- int *mvjcost, int *mvcost[2],
- int_mv *center_mv
-);
+int vp9_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *ref_mv, int_mv *best_mv,
+ int search_param, int error_per_bit,
+ const vp9_variance_fn_ptr_t *vf,
+ int *mvjsadcost, int *mvsadcost[2],
+ int *mvjcost, int *mvcost[2],
+ int_mv *center_mv);
typedef int (fractional_mv_step_fp) (MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv
*bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 27e0e48a3..5278ac2a3 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -10,7 +10,9 @@
#include "vpx_config.h"
+#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_quantize.h"
@@ -22,6 +24,7 @@
#include "vp9/common/vp9_extend.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "./vp9_rtcd.h"
#include "./vpx_scale_rtcd.h"
@@ -236,12 +239,12 @@ static void update_base_skip_probs(VP9_COMP *cpi) {
if (cm->frame_type != KEY_FRAME) {
vp9_update_skip_probs(cpi);
- if (cm->refresh_alt_ref_frame) {
+ if (cpi->refresh_alt_ref_frame) {
int k;
for (k = 0; k < MBSKIP_CONTEXTS; ++k)
cpi->last_skip_false_probs[2][k] = cm->mbskip_pred_probs[k];
cpi->last_skip_probs_q[2] = cm->base_qindex;
- } else if (cpi->common.refresh_golden_frame) {
+ } else if (cpi->refresh_golden_frame) {
int k;
for (k = 0; k < MBSKIP_CONTEXTS; ++k)
cpi->last_skip_false_probs[1][k] = cm->mbskip_pred_probs[k];
@@ -388,7 +391,7 @@ static int compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget) {
return target_index - start_index;
}
-static void init_seg_features(VP9_COMP *cpi) {
+static void configure_static_seg_features(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
@@ -408,10 +411,8 @@ static void init_seg_features(VP9_COMP *cpi) {
// Clear down the segment features.
vp9_clearall_segfeatures(xd);
- }
-
- // If this is an alt ref frame
- else if (cm->refresh_alt_ref_frame) {
+ } else if (cpi->refresh_alt_ref_frame) {
+ // If this is an alt ref frame
// Clear down the global segmentation map
vpx_memset(cpi->segmentation_map, 0, (cm->mb_rows * cm->mb_cols));
xd->update_mb_segmentation_map = 0;
@@ -448,7 +449,7 @@ static void init_seg_features(VP9_COMP *cpi) {
else if (xd->segmentation_enabled) {
// First normal frame in a valid gf or alt ref group
if (cpi->common.frames_since_golden == 0) {
- // Set up segment features for normal frames in an af group
+ // Set up segment features for normal frames in an arf group
if (cpi->source_alt_ref_active) {
xd->update_mb_segmentation_map = 0;
xd->update_mb_segmentation_data = 1;
@@ -465,16 +466,9 @@ static void init_seg_features(VP9_COMP *cpi) {
// Segment coding disabled for compred testing
if (high_q || (cpi->static_mb_pct == 100)) {
- // set_segref(xd, 1, LAST_FRAME);
vp9_set_segref(xd, 1, ALTREF_FRAME);
vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME);
-
- vp9_set_segdata(xd, 1, SEG_LVL_MODE, ZEROMV);
- vp9_enable_segfeature(xd, 1, SEG_LVL_MODE);
-
- // EOB segment coding not fixed for 8x8 yet
- vp9_set_segdata(xd, 1, SEG_LVL_EOB, 0);
- vp9_enable_segfeature(xd, 1, SEG_LVL_EOB);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_SKIP);
}
}
// Disable segmentation and clear down features if alt ref
@@ -493,29 +487,23 @@ static void init_seg_features(VP9_COMP *cpi) {
}
// Special case where we are coding over the top of a previous
- // alt ref frame
+ // alt ref frame.
// Segment coding disabled for compred testing
else if (cpi->is_src_frame_alt_ref) {
- // Enable mode and ref frame features for segment 0 as well
+ // Enable ref frame features for segment 0 as well
vp9_enable_segfeature(xd, 0, SEG_LVL_REF_FRAME);
- vp9_enable_segfeature(xd, 0, SEG_LVL_MODE);
vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME);
- vp9_enable_segfeature(xd, 1, SEG_LVL_MODE);
- // All mbs should use ALTREF_FRAME, ZEROMV exclusively
+ // All mbs should use ALTREF_FRAME
vp9_clear_segref(xd, 0);
vp9_set_segref(xd, 0, ALTREF_FRAME);
vp9_clear_segref(xd, 1);
vp9_set_segref(xd, 1, ALTREF_FRAME);
- vp9_set_segdata(xd, 0, SEG_LVL_MODE, ZEROMV);
- vp9_set_segdata(xd, 1, SEG_LVL_MODE, ZEROMV);
- // Skip all MBs if high Q
+ // Skip all MBs if high Q (0,0 mv and skip coeffs)
if (high_q) {
- vp9_enable_segfeature(xd, 0, SEG_LVL_EOB);
- vp9_set_segdata(xd, 0, SEG_LVL_EOB, 0);
- vp9_enable_segfeature(xd, 1, SEG_LVL_EOB);
- vp9_set_segdata(xd, 1, SEG_LVL_EOB, 0);
+ vp9_enable_segfeature(xd, 0, SEG_LVL_SKIP);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_SKIP);
}
// Enable data udpate
xd->update_mb_segmentation_data = 1;
@@ -590,16 +578,165 @@ static void set_default_lf_deltas(VP9_COMP *cpi) {
cpi->mb.e_mbd.mode_lf_deltas[3] = 4; // Split mv
}
+static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
+ SPEED_FEATURES *sf = &cpi->sf;
+ int speed_multiplier = speed + 1;
+ int i;
+
+ // Set baseline threshold values
+ for (i = 0; i < MAX_MODES; ++i) {
+ sf->thresh_mult[i] = (mode == 0) ? -500 : 0;
+ }
+
+ sf->thresh_mult[THR_ZEROMV ] = 0;
+ sf->thresh_mult[THR_ZEROG ] = 0;
+ sf->thresh_mult[THR_ZEROA ] = 0;
+
+ sf->thresh_mult[THR_NEARESTMV] = 0;
+ sf->thresh_mult[THR_NEARESTG ] = 0;
+ sf->thresh_mult[THR_NEARESTA ] = 0;
+
+ sf->thresh_mult[THR_NEARMV ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_NEARG ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_NEARA ] += speed_multiplier * 1000;
+
+ sf->thresh_mult[THR_DC ] = 0;
+ sf->thresh_mult[THR_TM ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_V_PRED ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_H_PRED ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_D45_PRED ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D135_PRED] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D117_PRED] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D153_PRED] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D27_PRED ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D63_PRED ] += speed_multiplier * 1500;
+
+ sf->thresh_mult[THR_B_PRED ] += speed_multiplier * 2500;
+ sf->thresh_mult[THR_I8X8_PRED] += speed_multiplier * 2500;
+
+ sf->thresh_mult[THR_NEWMV ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_NEWG ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_NEWA ] += speed_multiplier * 1000;
+
+ sf->thresh_mult[THR_SPLITMV ] += speed_multiplier * 2500;
+ sf->thresh_mult[THR_SPLITG ] += speed_multiplier * 2500;
+ sf->thresh_mult[THR_SPLITA ] += speed_multiplier * 2500;
+
+ sf->thresh_mult[THR_COMP_ZEROLG ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_ZEROLA ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_ZEROGA ] += speed_multiplier * 1500;
+
+ sf->thresh_mult[THR_COMP_NEARESTLG] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_NEARESTLA] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_NEARESTGA] += speed_multiplier * 1500;
+
+ sf->thresh_mult[THR_COMP_NEARLG ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_NEARLA ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_NEARGA ] += speed_multiplier * 1500;
+
+ sf->thresh_mult[THR_COMP_NEWLG ] += speed_multiplier * 2000;
+ sf->thresh_mult[THR_COMP_NEWLA ] += speed_multiplier * 2000;
+ sf->thresh_mult[THR_COMP_NEWGA ] += speed_multiplier * 2000;
+
+ sf->thresh_mult[THR_COMP_SPLITLA ] += speed_multiplier * 4500;
+ sf->thresh_mult[THR_COMP_SPLITGA ] += speed_multiplier * 4500;
+ sf->thresh_mult[THR_COMP_SPLITLG ] += speed_multiplier * 4500;
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] += speed_multiplier * 1500;
+
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] += speed_multiplier * 1500;
+
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] += speed_multiplier * 1500;
+
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] += speed_multiplier * 2000;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] += speed_multiplier * 2000;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] += speed_multiplier * 2000;
+#endif
+
+ /* disable frame modes if flags not set */
+ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
+ sf->thresh_mult[THR_NEWMV ] = INT_MAX;
+ sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
+ sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
+ sf->thresh_mult[THR_NEARMV ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = INT_MAX;
+#endif
+ }
+ if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
+ sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
+ sf->thresh_mult[THR_ZEROG ] = INT_MAX;
+ sf->thresh_mult[THR_NEARG ] = INT_MAX;
+ sf->thresh_mult[THR_NEWG ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITG ] = INT_MAX;
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = INT_MAX;
+#endif
+ }
+ if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
+ sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
+ sf->thresh_mult[THR_ZEROA ] = INT_MAX;
+ sf->thresh_mult[THR_NEARA ] = INT_MAX;
+ sf->thresh_mult[THR_NEWA ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITA ] = INT_MAX;
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = INT_MAX;
+#endif
+ }
+
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) !=
+ (VP9_LAST_FLAG | VP9_GOLD_FLAG)) {
+ sf->thresh_mult[THR_COMP_ZEROLG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARESTLG] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARLG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEWLG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_SPLITLG ] = INT_MAX;
+ }
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
+ sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_SPLITLA ] = INT_MAX;
+ }
+ if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
+ sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_SPLITGA ] = INT_MAX;
+ }
+}
+
void vp9_set_speed_features(VP9_COMP *cpi) {
SPEED_FEATURES *sf = &cpi->sf;
- int Mode = cpi->compressor_speed;
- int Speed = cpi->Speed;
+ int mode = cpi->compressor_speed;
+ int speed = cpi->Speed;
int i;
VP9_COMMON *cm = &cpi->common;
// Only modes 0 and 1 supported for now in experimental code basae
- if (Mode > 1)
- Mode = 1;
+ if (mode > 1)
+ mode = 1;
// Initialise default mode frequency sampling variables
for (i = 0; i < MAX_MODES; i ++) {
@@ -617,167 +754,29 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->quarter_pixel_search = 1;
sf->half_pixel_search = 1;
sf->iterative_sub_pixel = 1;
-#if CONFIG_LOSSLESS
- sf->optimize_coefficients = 0;
-#else
- sf->optimize_coefficients = 1;
-#endif
sf->no_skip_block4x4_search = 1;
+ if (cpi->oxcf.lossless)
+ sf->optimize_coefficients = 0;
+ else
+ sf->optimize_coefficients = 1;
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->static_segmentation = 1;
+ sf->splitmode_breakout = 0;
+ sf->mb16_breakout = 0;
- // default thresholds to 0
- for (i = 0; i < MAX_MODES; i++)
- sf->thresh_mult[i] = 0;
-
- switch (Mode) {
+ switch (mode) {
case 0: // best quality mode
- sf->thresh_mult[THR_ZEROMV ] = 0;
- sf->thresh_mult[THR_ZEROG ] = 0;
- sf->thresh_mult[THR_ZEROA ] = 0;
- sf->thresh_mult[THR_NEARESTMV] = 0;
- sf->thresh_mult[THR_NEARESTG ] = 0;
- sf->thresh_mult[THR_NEARESTA ] = 0;
- sf->thresh_mult[THR_NEARMV ] = 0;
- sf->thresh_mult[THR_NEARG ] = 0;
- sf->thresh_mult[THR_NEARA ] = 0;
-
- sf->thresh_mult[THR_DC ] = 0;
-
- sf->thresh_mult[THR_V_PRED ] = 1000;
- sf->thresh_mult[THR_H_PRED ] = 1000;
- sf->thresh_mult[THR_D45_PRED ] = 1000;
- sf->thresh_mult[THR_D135_PRED] = 1000;
- sf->thresh_mult[THR_D117_PRED] = 1000;
- sf->thresh_mult[THR_D153_PRED] = 1000;
- sf->thresh_mult[THR_D27_PRED ] = 1000;
- sf->thresh_mult[THR_D63_PRED ] = 1000;
- sf->thresh_mult[THR_B_PRED ] = 2000;
- sf->thresh_mult[THR_I8X8_PRED] = 2000;
- sf->thresh_mult[THR_TM ] = 1000;
-
- sf->thresh_mult[THR_NEWMV ] = 1000;
- sf->thresh_mult[THR_NEWG ] = 1000;
- sf->thresh_mult[THR_NEWA ] = 1000;
-
- sf->thresh_mult[THR_SPLITMV ] = 2500;
- sf->thresh_mult[THR_SPLITG ] = 5000;
- sf->thresh_mult[THR_SPLITA ] = 5000;
-
- sf->thresh_mult[THR_COMP_ZEROLG ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTLG] = 0;
- sf->thresh_mult[THR_COMP_NEARLG ] = 0;
- sf->thresh_mult[THR_COMP_ZEROLA ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTLA] = 0;
- sf->thresh_mult[THR_COMP_NEARLA ] = 0;
- sf->thresh_mult[THR_COMP_ZEROGA ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTGA] = 0;
- sf->thresh_mult[THR_COMP_NEARGA ] = 0;
-
- sf->thresh_mult[THR_COMP_NEWLG ] = 1000;
- sf->thresh_mult[THR_COMP_NEWLA ] = 1000;
- sf->thresh_mult[THR_COMP_NEWGA ] = 1000;
-
- sf->thresh_mult[THR_COMP_SPLITLA ] = 2500;
- sf->thresh_mult[THR_COMP_SPLITGA ] = 5000;
- sf->thresh_mult[THR_COMP_SPLITLG ] = 5000;
-
-#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
-#endif
-
- sf->first_step = 0;
- sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
sf->search_best_filter = SEARCH_BEST_FILTER;
break;
+
case 1:
- sf->thresh_mult[THR_NEARESTMV] = 0;
- sf->thresh_mult[THR_ZEROMV ] = 0;
- sf->thresh_mult[THR_DC ] = 0;
- sf->thresh_mult[THR_NEARMV ] = 0;
- sf->thresh_mult[THR_V_PRED ] = 1000;
- sf->thresh_mult[THR_H_PRED ] = 1000;
- sf->thresh_mult[THR_D45_PRED ] = 1000;
- sf->thresh_mult[THR_D135_PRED] = 1000;
- sf->thresh_mult[THR_D117_PRED] = 1000;
- sf->thresh_mult[THR_D153_PRED] = 1000;
- sf->thresh_mult[THR_D27_PRED ] = 1000;
- sf->thresh_mult[THR_D63_PRED ] = 1000;
- sf->thresh_mult[THR_B_PRED ] = 2500;
- sf->thresh_mult[THR_I8X8_PRED] = 2500;
- sf->thresh_mult[THR_TM ] = 1000;
-
- sf->thresh_mult[THR_NEARESTG ] = 1000;
- sf->thresh_mult[THR_NEARESTA ] = 1000;
-
- sf->thresh_mult[THR_ZEROG ] = 1000;
- sf->thresh_mult[THR_ZEROA ] = 1000;
- sf->thresh_mult[THR_NEARG ] = 1000;
- sf->thresh_mult[THR_NEARA ] = 1000;
-
- sf->thresh_mult[THR_ZEROMV ] = 0;
- sf->thresh_mult[THR_ZEROG ] = 0;
- sf->thresh_mult[THR_ZEROA ] = 0;
- sf->thresh_mult[THR_NEARESTMV] = 0;
- sf->thresh_mult[THR_NEARESTG ] = 0;
- sf->thresh_mult[THR_NEARESTA ] = 0;
- sf->thresh_mult[THR_NEARMV ] = 0;
- sf->thresh_mult[THR_NEARG ] = 0;
- sf->thresh_mult[THR_NEARA ] = 0;
-
- sf->thresh_mult[THR_NEWMV ] = 1000;
- sf->thresh_mult[THR_NEWG ] = 1000;
- sf->thresh_mult[THR_NEWA ] = 1000;
-
- sf->thresh_mult[THR_SPLITMV ] = 1700;
- sf->thresh_mult[THR_SPLITG ] = 4500;
- sf->thresh_mult[THR_SPLITA ] = 4500;
-
- sf->thresh_mult[THR_COMP_ZEROLG ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTLG] = 0;
- sf->thresh_mult[THR_COMP_NEARLG ] = 0;
- sf->thresh_mult[THR_COMP_ZEROLA ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTLA] = 0;
- sf->thresh_mult[THR_COMP_NEARLA ] = 0;
- sf->thresh_mult[THR_COMP_ZEROGA ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTGA] = 0;
- sf->thresh_mult[THR_COMP_NEARGA ] = 0;
-
- sf->thresh_mult[THR_COMP_NEWLG ] = 1000;
- sf->thresh_mult[THR_COMP_NEWLA ] = 1000;
- sf->thresh_mult[THR_COMP_NEWGA ] = 1000;
-
- sf->thresh_mult[THR_COMP_SPLITLA ] = 1700;
- sf->thresh_mult[THR_COMP_SPLITGA ] = 4500;
- sf->thresh_mult[THR_COMP_SPLITLG ] = 4500;
-#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
-#endif
+ sf->static_segmentation = 1;
+ sf->splitmode_breakout = 1;
+ sf->mb16_breakout = 0;
- if (Speed > 0) {
+ if (speed > 0) {
/* Disable coefficient optimization above speed 0 */
sf->optimize_coefficients = 0;
sf->no_skip_block4x4_search = 0;
@@ -793,7 +792,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi->mode_check_freq[THR_COMP_SPLITLA] = 0;
}
- if (Speed > 1) {
+ if (speed > 1) {
cpi->mode_check_freq[THR_SPLITG] = 4;
cpi->mode_check_freq[THR_SPLITA] = 4;
cpi->mode_check_freq[THR_SPLITMV] = 2;
@@ -801,73 +800,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi->mode_check_freq[THR_COMP_SPLITGA] = 4;
cpi->mode_check_freq[THR_COMP_SPLITLG] = 4;
cpi->mode_check_freq[THR_COMP_SPLITLA] = 2;
-
- sf->thresh_mult[THR_TM ] = 1500;
- sf->thresh_mult[THR_V_PRED ] = 1500;
- sf->thresh_mult[THR_H_PRED ] = 1500;
- sf->thresh_mult[THR_D45_PRED ] = 1500;
- sf->thresh_mult[THR_D135_PRED] = 1500;
- sf->thresh_mult[THR_D117_PRED] = 1500;
- sf->thresh_mult[THR_D153_PRED] = 1500;
- sf->thresh_mult[THR_D27_PRED ] = 1500;
- sf->thresh_mult[THR_D63_PRED ] = 1500;
- sf->thresh_mult[THR_B_PRED ] = 5000;
- sf->thresh_mult[THR_I8X8_PRED] = 5000;
-
- if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- sf->thresh_mult[THR_NEWMV ] = 2000;
- sf->thresh_mult[THR_SPLITMV ] = 10000;
- sf->thresh_mult[THR_COMP_SPLITLG ] = 20000;
- }
-
- if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- sf->thresh_mult[THR_NEARESTG ] = 1500;
- sf->thresh_mult[THR_ZEROG ] = 1500;
- sf->thresh_mult[THR_NEARG ] = 1500;
- sf->thresh_mult[THR_NEWG ] = 2000;
- sf->thresh_mult[THR_SPLITG ] = 20000;
- sf->thresh_mult[THR_COMP_SPLITGA ] = 20000;
- }
-
- if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
- sf->thresh_mult[THR_NEARESTA ] = 1500;
- sf->thresh_mult[THR_ZEROA ] = 1500;
- sf->thresh_mult[THR_NEARA ] = 1500;
- sf->thresh_mult[THR_NEWA ] = 2000;
- sf->thresh_mult[THR_SPLITA ] = 20000;
- sf->thresh_mult[THR_COMP_SPLITLA ] = 10000;
- }
-
- sf->thresh_mult[THR_COMP_ZEROLG ] = 1500;
- sf->thresh_mult[THR_COMP_NEARESTLG] = 1500;
- sf->thresh_mult[THR_COMP_NEARLG ] = 1500;
- sf->thresh_mult[THR_COMP_ZEROLA ] = 1500;
- sf->thresh_mult[THR_COMP_NEARESTLA] = 1500;
- sf->thresh_mult[THR_COMP_NEARLA ] = 1500;
- sf->thresh_mult[THR_COMP_ZEROGA ] = 1500;
- sf->thresh_mult[THR_COMP_NEARESTGA] = 1500;
- sf->thresh_mult[THR_COMP_NEARGA ] = 1500;
-
- sf->thresh_mult[THR_COMP_NEWLG ] = 2000;
- sf->thresh_mult[THR_COMP_NEWLA ] = 2000;
- sf->thresh_mult[THR_COMP_NEWGA ] = 2000;
-#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
-#endif
}
- if (Speed > 2) {
+ if (speed > 2) {
cpi->mode_check_freq[THR_SPLITG] = 15;
cpi->mode_check_freq[THR_SPLITA] = 15;
cpi->mode_check_freq[THR_SPLITMV] = 7;
@@ -876,150 +811,19 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi->mode_check_freq[THR_COMP_SPLITLG] = 15;
cpi->mode_check_freq[THR_COMP_SPLITLA] = 7;
- sf->thresh_mult[THR_TM ] = 2000;
- sf->thresh_mult[THR_V_PRED ] = 2000;
- sf->thresh_mult[THR_H_PRED ] = 2000;
- sf->thresh_mult[THR_D45_PRED ] = 2000;
- sf->thresh_mult[THR_D135_PRED] = 2000;
- sf->thresh_mult[THR_D117_PRED] = 2000;
- sf->thresh_mult[THR_D153_PRED] = 2000;
- sf->thresh_mult[THR_D27_PRED ] = 2000;
- sf->thresh_mult[THR_D63_PRED ] = 2000;
- sf->thresh_mult[THR_B_PRED ] = 7500;
- sf->thresh_mult[THR_I8X8_PRED] = 7500;
-
- if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- sf->thresh_mult[THR_NEWMV ] = 2000;
- sf->thresh_mult[THR_SPLITMV ] = 25000;
- sf->thresh_mult[THR_COMP_SPLITLG ] = 50000;
- }
-
- if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- sf->thresh_mult[THR_NEARESTG ] = 2000;
- sf->thresh_mult[THR_ZEROG ] = 2000;
- sf->thresh_mult[THR_NEARG ] = 2000;
- sf->thresh_mult[THR_NEWG ] = 2500;
- sf->thresh_mult[THR_SPLITG ] = 50000;
- sf->thresh_mult[THR_COMP_SPLITGA ] = 50000;
- }
-
- if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
- sf->thresh_mult[THR_NEARESTA ] = 2000;
- sf->thresh_mult[THR_ZEROA ] = 2000;
- sf->thresh_mult[THR_NEARA ] = 2000;
- sf->thresh_mult[THR_NEWA ] = 2500;
- sf->thresh_mult[THR_SPLITA ] = 50000;
- sf->thresh_mult[THR_COMP_SPLITLA ] = 25000;
- }
-
- sf->thresh_mult[THR_COMP_ZEROLG ] = 2000;
- sf->thresh_mult[THR_COMP_NEARESTLG] = 2000;
- sf->thresh_mult[THR_COMP_NEARLG ] = 2000;
- sf->thresh_mult[THR_COMP_ZEROLA ] = 2000;
- sf->thresh_mult[THR_COMP_NEARESTLA] = 2000;
- sf->thresh_mult[THR_COMP_NEARLA ] = 2000;
- sf->thresh_mult[THR_COMP_ZEROGA ] = 2000;
- sf->thresh_mult[THR_COMP_NEARESTGA] = 2000;
- sf->thresh_mult[THR_COMP_NEARGA ] = 2000;
-
- sf->thresh_mult[THR_COMP_NEWLG ] = 2500;
- sf->thresh_mult[THR_COMP_NEWLA ] = 2500;
- sf->thresh_mult[THR_COMP_NEWGA ] = 2500;
-#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
-#endif
-
sf->improved_dct = 0;
// Only do recode loop on key frames, golden frames and
// alt ref frames
sf->recode_loop = 2;
-
}
break;
}; /* switch */
- /* disable frame modes if flags not set */
- if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
- sf->thresh_mult[THR_NEWMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
- sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARMV ] = INT_MAX;
- sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
- }
-
- if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
- sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
- sf->thresh_mult[THR_ZEROG ] = INT_MAX;
- sf->thresh_mult[THR_NEARG ] = INT_MAX;
- sf->thresh_mult[THR_NEWG ] = INT_MAX;
-#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = INT_MAX;
-#endif
- sf->thresh_mult[THR_SPLITG ] = INT_MAX;
- }
-
- if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
- sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
- sf->thresh_mult[THR_ZEROA ] = INT_MAX;
- sf->thresh_mult[THR_NEARA ] = INT_MAX;
- sf->thresh_mult[THR_NEWA ] = INT_MAX;
-#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = INT_MAX;
-#endif
- sf->thresh_mult[THR_SPLITA ] = INT_MAX;
- }
-
- if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) != (VP9_LAST_FLAG | VP9_GOLD_FLAG)) {
- sf->thresh_mult[THR_COMP_ZEROLG ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARESTLG] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARLG ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEWLG ] = INT_MAX;
- sf->thresh_mult[THR_COMP_SPLITLG ] = INT_MAX;
- }
-
- if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
- sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_SPLITLA ] = INT_MAX;
- }
-
- if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
- sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_SPLITGA ] = INT_MAX;
- }
-#if CONFIG_COMP_INTERINTRA_PRED
- if ((cpi->ref_frame_flags & VP9_LAST_FLAG) != VP9_LAST_FLAG) {
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = INT_MAX;
- }
-#endif
+ // Set rd thresholds based on mode and speed setting
+ set_rd_speed_thresholds(cpi, mode, speed);
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
@@ -1028,36 +832,29 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->improved_dct = 0;
}
- if (cpi->sf.search_method == NSTEP) {
- vp9_init3smotion_compensation(&cpi->mb,
- cm->yv12_fb[cm->lst_fb_idx].y_stride);
- } else if (cpi->sf.search_method == DIAMOND) {
- vp9_init_dsmotion_compensation(&cpi->mb,
- cm->yv12_fb[cm->lst_fb_idx].y_stride);
- }
+ {
+ int y_stride = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].y_stride;
- cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16;
- cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8;
- cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4;
- cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4;
- cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
- cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
+ if (cpi->sf.search_method == NSTEP) {
+ vp9_init3smotion_compensation(&cpi->mb, y_stride);
+ } else if (cpi->sf.search_method == DIAMOND) {
+ vp9_init_dsmotion_compensation(&cpi->mb, y_stride);
+ }
+ }
-#if CONFIG_LOSSLESS
- if (cpi->oxcf.lossless) {
- cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8;
- cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8;
- cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
- cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
- cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless;
+ cpi->mb.fwd_txm16x16 = vp9_short_fdct16x16;
+ cpi->mb.fwd_txm8x8 = vp9_short_fdct8x8;
+ cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
+ cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
+ if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
+ cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4_x8;
+ cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4_x8;
}
-#endif
cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4;
cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair;
cpi->mb.quantize_b_8x8 = vp9_regular_quantize_b_8x8;
cpi->mb.quantize_b_16x16 = vp9_regular_quantize_b_16x16;
- cpi->mb.quantize_b_2x2 = vp9_regular_quantize_b_2x2;
vp9_init_quantizer(cpi);
@@ -1078,6 +875,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
frames_at_speed[cpi->Speed]++;
#endif
}
+
static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
int width = (cpi->oxcf.Width + 15) & ~15;
int height = (cpi->oxcf.Height + 15) & ~15;
@@ -1144,7 +942,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate scaled source buffer");
-
vpx_free(cpi->tok);
{
@@ -1199,6 +996,38 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
}
+static void update_frame_size(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ /* our internal buffers are always multiples of 16 */
+ int width = (cm->Width + 15) & ~15;
+ int height = (cm->Height + 15) & ~15;
+
+ cm->mb_rows = height >> 4;
+ cm->mb_cols = width >> 4;
+ cm->MBs = cm->mb_rows * cm->mb_cols;
+ cm->mode_info_stride = cm->mb_cols + 1;
+ memset(cm->mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO));
+ vp9_update_mode_info_border(cm, cm->mip);
+
+ cm->mi = cm->mip + cm->mode_info_stride + 1;
+ cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
+ vp9_update_mode_info_in_image(cm, cm->mi);
+
+ /* Update size of buffers local to this frame */
+ if (vp8_yv12_realloc_frame_buffer(&cpi->last_frame_uf,
+ width, height, VP9BORDERINPIXELS))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to reallocate last frame buffer");
+
+ if (vp8_yv12_realloc_frame_buffer(&cpi->scaled_source,
+ width, height, VP9BORDERINPIXELS))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to reallocate scaled source buffer");
+}
+
+
// TODO perhaps change number of steps expose to outside world when setting
// max and min limits. Also this will likely want refining for the extended Q
// range.
@@ -1239,10 +1068,7 @@ void vp9_new_frame_rate(VP9_COMP *cpi, double framerate) {
cpi->min_frame_bandwidth = FRAME_OVERHEAD_BITS;
// Set Maximum gf/arf interval
- cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
-
- if (cpi->max_gf_interval < 12)
- cpi->max_gf_interval = 12;
+ cpi->max_gf_interval = 15;
// Extended interval for genuinely static scenes
cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
@@ -1270,10 +1096,26 @@ rescale(int val, int num, int denom) {
return (int)(llval * llnum / llden);
}
+static void set_tile_limits(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ int min_log2_tiles, max_log2_tiles;
+
+ cm->log2_tile_columns = cpi->oxcf.tile_columns;
+ cm->log2_tile_rows = cpi->oxcf.tile_rows;
+
+ vp9_get_tile_n_bits(cm, &min_log2_tiles, &max_log2_tiles);
+ max_log2_tiles += min_log2_tiles;
+ if (cm->log2_tile_columns < min_log2_tiles)
+ cm->log2_tile_columns = min_log2_tiles;
+ else if (cm->log2_tile_columns > max_log2_tiles)
+ cm->log2_tile_columns = max_log2_tiles;
+ cm->tile_columns = 1 << cm->log2_tile_columns;
+ cm->tile_rows = 1 << cm->log2_tile_rows;
+}
static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
VP9_COMP *cpi = (VP9_COMP *)(ptr);
- VP9_COMMON *cm = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
cpi->oxcf = *oxcf;
@@ -1304,6 +1146,12 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->static_mb_pct = 0;
+ cpi->lst_fb_idx = 0;
+ cpi->gld_fb_idx = 1;
+ cpi->alt_fb_idx = 2;
+
+ set_tile_limits(cpi);
+
#if VP9_TEMPORAL_ALT_REF
{
int i;
@@ -1319,7 +1167,7 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
VP9_COMP *cpi = (VP9_COMP *)(ptr);
- VP9_COMMON *cm = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
if (!cpi)
return;
@@ -1351,7 +1199,6 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
if (cpi->oxcf.cpu_used > 5)
cpi->oxcf.cpu_used = 5;
-
break;
case MODE_SECONDPASS_BEST:
@@ -1364,20 +1211,14 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
- cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
- cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_idct4x4llm;
- cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
- cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-
-#if CONFIG_LOSSLESS
cpi->oxcf.lossless = oxcf->lossless;
if (cpi->oxcf.lossless) {
- cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8;
- cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8;
- cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
- cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_inv_walsh4x4_x8;
+ } else {
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4llm_1;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4llm;
}
-#endif
cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL;
@@ -1385,8 +1226,8 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
// cpi->use_golden_frame_only = 0;
// cpi->use_last_frame_only = 0;
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 1;
cm->refresh_entropy_probs = 1;
setup_features(cpi);
@@ -1491,14 +1332,18 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
}
- if (((cm->Width + 15) & 0xfffffff0) !=
- cm->yv12_fb[cm->lst_fb_idx].y_width ||
- ((cm->Height + 15) & 0xfffffff0) !=
- cm->yv12_fb[cm->lst_fb_idx].y_height ||
- cm->yv12_fb[cm->lst_fb_idx].y_width == 0) {
+ // Increasing the size of the frame beyond the first seen frame, or some
+ // otherwise signalled maximum size, is not supported.
+ // TODO(jkoleszar): exit gracefully.
+ if (!cpi->initial_width) {
alloc_raw_frame_buffers(cpi);
vp9_alloc_compressor_data(cpi);
+ cpi->initial_width = cm->Width;
+ cpi->initial_height = cm->Height;
}
+ assert(cm->Width <= cpi->initial_width);
+ assert(cm->Height <= cpi->initial_height);
+ update_frame_size(cpi);
if (cpi->oxcf.fixed_q >= 0) {
cpi->last_q[0] = cpi->oxcf.fixed_q;
@@ -1526,6 +1371,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->last_frame_distortion = 0;
#endif
+ set_tile_limits(cpi);
}
#define M_LOG2_E 0.693147180559945309417
@@ -1693,7 +1539,7 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->source_alt_ref_pending = FALSE;
cpi->source_alt_ref_active = FALSE;
- cpi->common.refresh_alt_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
@@ -1795,10 +1641,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->rd_thresh_mult[i] = 128;
}
-#ifdef ENTROPY_STATS
- init_mv_ref_counts();
-#endif
-
#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].vf = VF; \
@@ -1838,14 +1680,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
-#if ARCH_X86 || ARCH_X86_64
- cpi->fn_ptr[BLOCK_16X16].copymem = vp9_copy32xn;
- cpi->fn_ptr[BLOCK_16X8].copymem = vp9_copy32xn;
- cpi->fn_ptr[BLOCK_8X16].copymem = vp9_copy32xn;
- cpi->fn_ptr[BLOCK_8X8].copymem = vp9_copy32xn;
- cpi->fn_ptr[BLOCK_4X4].copymem = vp9_copy32xn;
-#endif
-
cpi->full_search_sad = vp9_full_search_sad;
cpi->diamond_search_sad = vp9_diamond_search_sad;
cpi->refining_search_sad = vp9_refining_search_sad;
@@ -1885,7 +1719,7 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
if (cpi->pass != 1) {
print_context_counters();
print_tree_update_probs();
- print_mode_context();
+ print_mode_context(&cpi->common);
}
#endif
#ifdef NMV_STATS
@@ -1908,7 +1742,8 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
print_mode_contexts(&cpi->common);
#endif
if (cpi->b_calculate_psnr) {
- YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+ YV12_BUFFER_CONFIG *lst_yv12 =
+ &cpi->common.yv12_fb[cpi->common.ref_frame_map[cpi->lst_fb_idx]];
double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height;
double total_psnr = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error);
double total_psnr2 = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error2);
@@ -2230,18 +2065,18 @@ int vp9_update_reference(VP9_PTR ptr, int ref_frame_flags) {
if (ref_frame_flags > 7)
return -1;
- cpi->common.refresh_golden_frame = 0;
- cpi->common.refresh_alt_ref_frame = 0;
- cpi->common.refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->refresh_last_frame = 0;
if (ref_frame_flags & VP9_LAST_FLAG)
- cpi->common.refresh_last_frame = 1;
+ cpi->refresh_last_frame = 1;
if (ref_frame_flags & VP9_GOLD_FLAG)
- cpi->common.refresh_golden_frame = 1;
+ cpi->refresh_golden_frame = 1;
if (ref_frame_flags & VP9_ALT_FLAG)
- cpi->common.refresh_alt_ref_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
return 0;
}
@@ -2253,11 +2088,11 @@ int vp9_get_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
int ref_fb_idx;
if (ref_frame_flag == VP9_LAST_FLAG)
- ref_fb_idx = cm->lst_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx];
else if (ref_frame_flag == VP9_GOLD_FLAG)
- ref_fb_idx = cm->gld_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx];
else if (ref_frame_flag == VP9_ALT_FLAG)
- ref_fb_idx = cm->alt_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx];
else
return -1;
@@ -2274,11 +2109,11 @@ int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
int ref_fb_idx;
if (ref_frame_flag == VP9_LAST_FLAG)
- ref_fb_idx = cm->lst_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx];
else if (ref_frame_flag == VP9_GOLD_FLAG)
- ref_fb_idx = cm->gld_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx];
else if (ref_frame_flag == VP9_ALT_FLAG)
- ref_fb_idx = cm->alt_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx];
else
return -1;
@@ -2349,9 +2184,73 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
fwrite(src, s->uv_width, 1, yuv_rec_file);
src += s->uv_stride;
} while (--h);
+ fflush(yuv_rec_file);
}
#endif
+static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
+ YV12_BUFFER_CONFIG *dst_fb) {
+ const int in_w = src_fb->y_width;
+ const int in_h = src_fb->y_height;
+ const int out_w = dst_fb->y_width;
+ const int out_h = dst_fb->y_height;
+ int x, y;
+
+ for (y = 0; y < out_h; y += 16) {
+ for (x = 0; x < out_w; x += 16) {
+ int x_q4 = x * 16 * in_w / out_w;
+ int y_q4 = y * 16 * in_h / out_h;
+ uint8_t *src, *dst;
+ int src_stride, dst_stride;
+
+
+ src = src_fb->y_buffer +
+ y * in_h / out_h * src_fb->y_stride +
+ x * in_w / out_w;
+ dst = dst_fb->y_buffer +
+ y * dst_fb->y_stride +
+ x;
+ src_stride = src_fb->y_stride;
+ dst_stride = dst_fb->y_stride;
+
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 16, 16);
+
+ x_q4 >>= 1;
+ y_q4 >>= 1;
+ src_stride = src_fb->uv_stride;
+ dst_stride = dst_fb->uv_stride;
+
+ src = src_fb->u_buffer +
+ y / 2 * in_h / out_h * src_fb->uv_stride +
+ x / 2 * in_w / out_w;
+ dst = dst_fb->u_buffer +
+ y / 2 * dst_fb->uv_stride +
+ x / 2;
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 8, 8);
+
+ src = src_fb->v_buffer +
+ y / 2 * in_h / out_h * src_fb->uv_stride +
+ x / 2 * in_w / out_w;
+ dst = dst_fb->v_buffer +
+ y / 2 * dst_fb->uv_stride +
+ x / 2;
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 8, 8);
+ }
+ }
+
+ vp8_yv12_extend_frame_borders(dst_fb);
+}
+
+
static void update_alt_ref_frame_stats(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
@@ -2374,13 +2273,13 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
// Update the Golden frame usage counts.
- if (cm->refresh_golden_frame) {
+ if (cpi->refresh_golden_frame) {
// Update data structure that monitors level of reference to last GF
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
// this frame refreshes means next frames don't unless specified by user
- cm->refresh_golden_frame = 0;
+ cpi->refresh_golden_frame = 0;
cpi->common.frames_since_golden = 0;
// if ( cm->frame_type == KEY_FRAME )
@@ -2402,7 +2301,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
// ******** Fixed Q test code only ************
// If we are going to use the ALT reference for the next group of frames set a flag to say so.
if (cpi->oxcf.fixed_q >= 0 &&
- cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame) {
+ cpi->oxcf.play_alternate && !cpi->refresh_alt_ref_frame) {
cpi->source_alt_ref_pending = TRUE;
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
}
@@ -2414,7 +2313,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
if (cpi->frames_till_gf_update_due > 0)
cpi->frames_till_gf_update_due--;
- } else if (!cpi->common.refresh_alt_ref_frame) {
+ } else if (!cpi->refresh_alt_ref_frame) {
// Decrement count down till next gf
if (cpi->frames_till_gf_update_due > 0)
cpi->frames_till_gf_update_due--;
@@ -2535,8 +2434,8 @@ static int recode_loop_test(VP9_COMP *cpi,
if ((cpi->sf.recode_loop == 1) ||
((cpi->sf.recode_loop == 2) &&
((cm->frame_type == KEY_FRAME) ||
- cm->refresh_golden_frame ||
- cm->refresh_alt_ref_frame))) {
+ cpi->refresh_golden_frame ||
+ cpi->refresh_alt_ref_frame))) {
// General over and under shoot tests
if (((cpi->projected_frame_size > high_limit) && (q < maxq)) ||
((cpi->projected_frame_size < low_limit) && (q > minq))) {
@@ -2563,86 +2462,56 @@ static int recode_loop_test(VP9_COMP *cpi,
return force_recode;
}
-static void update_reference_frames(VP9_COMMON *cm) {
- YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb;
+static void update_reference_frames(VP9_COMP * const cpi) {
+ VP9_COMMON * const cm = &cpi->common;
// At this point the new frame has been encoded.
// If any buffer copy / swapping is signaled it should be done here.
-
if (cm->frame_type == KEY_FRAME) {
- yv12_fb[cm->new_fb_idx].flags |= VP9_GOLD_FLAG | VP9_ALT_FLAG;
-
- yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
- yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
-
- cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx;
- } else { /* For non key frames */
- if (cm->refresh_alt_ref_frame) {
- assert(!cm->copy_buffer_to_arf);
-
- cm->yv12_fb[cm->new_fb_idx].flags |= VP9_ALT_FLAG;
- cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
- cm->alt_fb_idx = cm->new_fb_idx;
- } else if (cm->copy_buffer_to_arf) {
- assert(!(cm->copy_buffer_to_arf & ~0x3));
-
- if (cm->copy_buffer_to_arf == 1) {
- if (cm->alt_fb_idx != cm->lst_fb_idx) {
- yv12_fb[cm->lst_fb_idx].flags |= VP9_ALT_FLAG;
- yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
- cm->alt_fb_idx = cm->lst_fb_idx;
- }
- } else { /* if (cm->copy_buffer_to_arf == 2) */
- if (cm->alt_fb_idx != cm->gld_fb_idx) {
- yv12_fb[cm->gld_fb_idx].flags |= VP9_ALT_FLAG;
- yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
- cm->alt_fb_idx = cm->gld_fb_idx;
- }
- }
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
+ } else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
+ /* Preserve the previously existing golden frame and update the frame in
+ * the alt ref slot instead. This is highly specific to the current use of
+ * alt-ref as a forward reference, and this needs to be generalized as
+ * other uses are implemented (like RTC/temporal scaling)
+ *
+ * The update to the buffer in the alt ref slot was signalled in
+ * vp9_pack_bitstream(), now swap the buffer pointers so that it's treated
+ * as the golden frame next time.
+ */
+ int tmp;
+
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
+
+ tmp = cpi->alt_fb_idx;
+ cpi->alt_fb_idx = cpi->gld_fb_idx;
+ cpi->gld_fb_idx = tmp;
+ } else { /* For non key/golden frames */
+ if (cpi->refresh_alt_ref_frame) {
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
}
- if (cm->refresh_golden_frame) {
- assert(!cm->copy_buffer_to_gf);
-
- cm->yv12_fb[cm->new_fb_idx].flags |= VP9_GOLD_FLAG;
- cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
- cm->gld_fb_idx = cm->new_fb_idx;
- } else if (cm->copy_buffer_to_gf) {
- assert(!(cm->copy_buffer_to_arf & ~0x3));
-
- if (cm->copy_buffer_to_gf == 1) {
- if (cm->gld_fb_idx != cm->lst_fb_idx) {
- yv12_fb[cm->lst_fb_idx].flags |= VP9_GOLD_FLAG;
- yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
- cm->gld_fb_idx = cm->lst_fb_idx;
- }
- } else { /* if (cm->copy_buffer_to_gf == 2) */
- if (cm->alt_fb_idx != cm->gld_fb_idx) {
- yv12_fb[cm->alt_fb_idx].flags |= VP9_GOLD_FLAG;
- yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
- cm->gld_fb_idx = cm->alt_fb_idx;
- }
- }
+ if (cpi->refresh_golden_frame) {
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
}
}
- if (cm->refresh_last_frame) {
- cm->yv12_fb[cm->new_fb_idx].flags |= VP9_LAST_FLAG;
- cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP9_LAST_FLAG;
- cm->lst_fb_idx = cm->new_fb_idx;
+ if (cpi->refresh_last_frame) {
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
}
}
static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
- if (cm->no_lpf) {
+ if (cm->no_lpf || cpi->mb.e_mbd.lossless) {
cm->filter_level = 0;
- }
-#if CONFIG_LOSSLESS
- else if (cpi->oxcf.lossless) {
- cm->filter_level = 0;
- }
-#endif
- else {
+ } else {
struct vpx_usec_timer timer;
vp9_clear_system_state();
@@ -2666,7 +2535,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
}
-void select_interp_filter_type(VP9_COMP *cpi) {
+void vp9_select_interp_filter_type(VP9_COMP *cpi) {
int i;
int high_filter_index = 0;
unsigned int thresh;
@@ -2719,6 +2588,38 @@ static void select_interintra_mode(VP9_COMP *cpi) {
}
#endif
+static void scale_references(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->ref_frame_map[i]];
+
+ if (ref->y_width != cm->mb_cols * 16 || ref->y_height != cm->mb_rows * 16) {
+ int new_fb = get_free_fb(cm);
+
+ vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[new_fb],
+ cm->mb_cols * 16,
+ cm->mb_rows * 16,
+ VP9BORDERINPIXELS);
+ scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]);
+ cpi->scaled_ref_idx[i] = new_fb;
+ } else {
+ cpi->scaled_ref_idx[i] = cm->ref_frame_map[i];
+ cm->fb_idx_ref_cnt[cm->ref_frame_map[i]]++;
+ }
+ }
+}
+
+static void release_scaled_references(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ cm->fb_idx_ref_cnt[cpi->scaled_ref_idx[i]]--;
+ }
+}
+
static void encode_frame_to_data_rate(VP9_COMP *cpi,
unsigned long *size,
unsigned char *dest,
@@ -2735,8 +2636,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
int q_low;
int q_high;
- int zbin_oq_high;
- int zbin_oq_low = 0;
int top_index;
int bottom_index;
@@ -2749,11 +2648,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
#if RESET_FOREACH_FILTER
int q_low0;
int q_high0;
- int zbin_oq_high0;
- int zbin_oq_low0 = 0;
int Q0;
- int last_zbin_oq;
- int last_zbin_oq0;
int active_best_quality0;
int active_worst_quality0;
double rate_correction_factor0;
@@ -2773,36 +2668,43 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
int mcomp_filter_index = 0;
int64_t mcomp_filter_cost[4];
+ /* Scale the source buffer, if required */
+ if (cm->mb_cols * 16 != cpi->un_scaled_source->y_width ||
+ cm->mb_rows * 16 != cpi->un_scaled_source->y_height) {
+ scale_and_extend_frame(cpi->un_scaled_source, &cpi->scaled_source);
+ cpi->Source = &cpi->scaled_source;
+ } else {
+ cpi->Source = cpi->un_scaled_source;
+ }
+
+ scale_references(cpi);
+
// Clear down mmx registers to allow floating point in what follows
vp9_clear_system_state();
// For an alt ref frame in 2 pass we skip the call to the second
// pass function that sets the target bandwidth so must set it here
- if (cpi->common.refresh_alt_ref_frame) {
+ if (cpi->refresh_alt_ref_frame) {
cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame
// per second target bitrate
cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
cpi->output_frame_rate);
}
- // Default turn off buffer to buffer copying
- cm->copy_buffer_to_gf = 0;
- cm->copy_buffer_to_arf = 0;
-
// Clear zbin over-quant value and mode boost values.
- cpi->zbin_over_quant = 0;
cpi->zbin_mode_boost = 0;
// Enable or disable mode based tweaking of the zbin
// For 2 Pass Only used where GF/ARF prediction quality
// is above a threshold
cpi->zbin_mode_boost = 0;
-#if CONFIG_LOSSLESS
- cpi->zbin_mode_boost_enabled = FALSE;
-#else
- cpi->zbin_mode_boost_enabled = TRUE;
-#endif
+
+ if (cpi->oxcf.lossless)
+ cpi->zbin_mode_boost_enabled = FALSE;
+ else
+ cpi->zbin_mode_boost_enabled = TRUE;
+
if (cpi->gfu_boost <= 400) {
cpi->zbin_mode_boost_enabled = FALSE;
}
@@ -2846,10 +2748,22 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
for (i = 0; i < MAX_MODES; i++) {
cpi->rd_thresh_mult[i] = 128;
}
+
+ cm->error_resilient_mode = (cpi->oxcf.error_resilient_mode != 0);
+ cm->frame_parallel_decoding_mode =
+ (cpi->oxcf.frame_parallel_decoding_mode != 0);
+ if (cm->error_resilient_mode) {
+ cm->frame_parallel_decoding_mode = 1;
+ cm->refresh_entropy_probs = 0;
+ }
}
- // Test code for new segment features
- init_seg_features(cpi);
+ // Configure use of segmentation for enhanced coding of static regions.
+ // Only allowed for now in second pass of two pass (as requires lagged coding)
+ // and if the relevent speed feature flag is set.
+ if ((cpi->pass == 2) && (cpi->sf.static_segmentation)) {
+ configure_static_seg_features(cpi);
+ }
// Decide how big to make the frame
vp9_pick_frame_size(cpi);
@@ -2896,9 +2810,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (cpi->active_best_quality < cpi->best_quality)
cpi->active_best_quality = cpi->best_quality;
}
- }
-
- else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) {
+ } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) {
int high = 2000;
int low = 400;
@@ -2971,17 +2883,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Determine initial Q to try
Q = vp9_regulate_q(cpi, cpi->this_frame_target);
}
-#if RESET_FOREACH_FILTER
- last_zbin_oq = cpi->zbin_over_quant;
-#endif
-
- // Set highest allowed value for Zbin over quant
- if (cm->frame_type == KEY_FRAME)
- zbin_oq_high = 0; // ZBIN_OQ_MAX/16
- else if (cm->refresh_alt_ref_frame || (cm->refresh_golden_frame && !cpi->source_alt_ref_active))
- zbin_oq_high = 16;
- else
- zbin_oq_high = ZBIN_OQ_MAX;
vp9_compute_frame_size_bounds(cpi, &frame_under_shoot_limit,
&frame_over_shoot_limit);
@@ -3064,9 +2965,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
q_low0 = q_low;
q_high0 = q_high;
Q0 = Q;
- zbin_oq_low0 = zbin_oq_low;
- zbin_oq_high0 = zbin_oq_high;
- last_zbin_oq0 = last_zbin_oq;
rate_correction_factor0 = cpi->rate_correction_factor;
gf_rate_correction_factor0 = cpi->gf_rate_correction_factor;
active_best_quality0 = cpi->active_best_quality;
@@ -3087,12 +2985,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cm->mbskip_pred_probs[k] = cpi->base_skip_false_prob[Q][k];
if (cm->frame_type != KEY_FRAME) {
- if (cpi->common.refresh_alt_ref_frame) {
+ if (cpi->refresh_alt_ref_frame) {
for (k = 0; k < MBSKIP_CONTEXTS; k++) {
if (cpi->last_skip_false_probs[2][k] != 0)
cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[2][k];
}
- } else if (cpi->common.refresh_golden_frame) {
+ } else if (cpi->refresh_golden_frame) {
for (k = 0; k < MBSKIP_CONTEXTS; k++) {
if (cpi->last_skip_false_probs[1][k] != 0)
cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[1][k];
@@ -3124,10 +3022,21 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
// Set up entropy depending on frame type.
- if (cm->frame_type == KEY_FRAME)
+ if (cm->frame_type == KEY_FRAME) {
+ /* Choose which entropy context to use. When using a forward reference
+ * frame, it immediately follows the keyframe, and thus benefits from
+ * using the same entropy context established by the keyframe. Otherwise,
+ * use the default context 0.
+ */
+ cm->frame_context_idx = cpi->oxcf.play_alternate;
vp9_setup_key_frame(cpi);
- else
+ } else {
+ /* Choose which entropy context to use. Currently there are only two
+ * contexts used, one for normal frames and one for alt ref frames.
+ */
+ cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
vp9_setup_inter_frame(cpi);
+ }
}
// transform / motion compensation build reconstruction frame
@@ -3214,23 +3123,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (cpi->projected_frame_size > cpi->this_frame_target) {
q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value
- if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low
- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
-
if (undershoot_seen || (loop_count > 1)) {
// Update rate_correction_factor unless cpi->active_worst_quality has changed.
if (!active_worst_qchanged)
vp9_update_rate_correction_factors(cpi, 1);
Q = (q_high + q_low + 1) / 2;
-
- // Adjust cpi->zbin_over_quant (only allowed when Q is max)
- if (Q < MAXQ)
- cpi->zbin_over_quant = 0;
- else {
- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
- }
} else {
// Update rate_correction_factor unless cpi->active_worst_quality has changed.
if (!active_worst_qchanged)
@@ -3238,7 +3136,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
Q = vp9_regulate_q(cpi, cpi->this_frame_target);
- while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10)) {
+ while ((Q < q_low) && (Retries < 10)) {
vp9_update_rate_correction_factors(cpi, 0);
Q = vp9_regulate_q(cpi, cpi->this_frame_target);
Retries++;
@@ -3249,10 +3147,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
// Frame is too small
else {
- if (cpi->zbin_over_quant == 0)
- q_high = (Q > q_low) ? (Q - 1) : q_low; // Lower q_high if not using over quant
- else // else lower zbin_oq_high
- zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low;
+ q_high = (Q > q_low) ? (Q - 1) : q_low;
if (overshoot_seen || (loop_count > 1)) {
// Update rate_correction_factor unless cpi->active_worst_quality has changed.
@@ -3260,12 +3155,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_update_rate_correction_factors(cpi, 1);
Q = (q_high + q_low) / 2;
-
- // Adjust cpi->zbin_over_quant (only allowed when Q is max)
- if (Q < MAXQ)
- cpi->zbin_over_quant = 0;
- else
- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
} else {
// Update rate_correction_factor unless cpi->active_worst_quality has changed.
if (!active_worst_qchanged)
@@ -3282,7 +3171,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
q_low = Q;
}
- while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10)) {
+ while ((Q > q_high) && (Retries < 10)) {
vp9_update_rate_correction_factors(cpi, 0);
Q = vp9_regulate_q(cpi, cpi->this_frame_target);
Retries++;
@@ -3298,16 +3187,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
else if (Q < q_low)
Q = q_low;
- // Clamp cpi->zbin_over_quant
- cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ?
- zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ?
- zbin_oq_high : cpi->zbin_over_quant;
-
- // Loop = ((Q != last_q) || (last_zbin_oq != cpi->zbin_over_quant)) ? TRUE : FALSE;
Loop = ((Q != last_q)) ? TRUE : FALSE;
-#if RESET_FOREACH_FILTER
- last_zbin_oq = cpi->zbin_over_quant;
-#endif
} else
Loop = FALSE;
@@ -3351,12 +3231,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (Loop == TRUE) {
overshoot_seen = FALSE;
undershoot_seen = FALSE;
- zbin_oq_low = zbin_oq_low0;
- zbin_oq_high = zbin_oq_high0;
q_low = q_low0;
q_high = q_high0;
Q = Q0;
- cpi->zbin_over_quant = last_zbin_oq = last_zbin_oq0;
cpi->rate_correction_factor = rate_correction_factor0;
cpi->gf_rate_correction_factor = gf_rate_correction_factor0;
cpi->active_best_quality = active_best_quality0;
@@ -3412,12 +3289,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_update_gf_useage_maps(cpi, cm, &cpi->mb);
if (cm->frame_type == KEY_FRAME)
- cm->refresh_last_frame = 1;
+ cpi->refresh_last_frame = 1;
#if 0
{
FILE *f = fopen("gfactive.stt", "a");
- fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
+ fprintf(f, "%8d %8d %8d %8d %8d\n",
+ cm->current_video_frame,
+ (100 * cpi->gf_active_count)
+ / (cpi->common.mb_rows * cpi->common.mb_cols),
+ cpi->this_iiratio,
+ cpi->next_iiratio,
+ cpi->refresh_golden_frame);
fclose(f);
}
#endif
@@ -3444,18 +3327,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
update_reference_segmentation_map(cpi);
}
- update_reference_frames(cm);
+ release_scaled_references(cpi);
+ update_reference_frames(cpi);
vp9_copy(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4);
- vp9_copy(cpi->common.fc.hybrid_coef_counts_4x4,
- cpi->hybrid_coef_counts_4x4);
vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8);
- vp9_copy(cpi->common.fc.hybrid_coef_counts_8x8,
- cpi->hybrid_coef_counts_8x8);
vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
- vp9_copy(cpi->common.fc.hybrid_coef_counts_16x16,
- cpi->hybrid_coef_counts_16x16);
vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32);
- vp9_adapt_coef_probs(&cpi->common);
+ if (!cpi->common.error_resilient_mode &&
+ !cpi->common.frame_parallel_decoding_mode)
+ vp9_adapt_coef_probs(&cpi->common);
if (cpi->common.frame_type != KEY_FRAME) {
vp9_copy(cpi->common.fc.sb_ymode_counts, cpi->sb_ymode_count);
vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count);
@@ -3467,14 +3347,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
#if CONFIG_COMP_INTERINTRA_PRED
vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count);
#endif
- vp9_adapt_mode_probs(&cpi->common);
-
cpi->common.fc.NMVcount = cpi->NMVcount;
- /*
- printf("2: %d %d %d %d\n", cpi->NMVcount.joints[0], cpi->NMVcount.joints[1],
- cpi->NMVcount.joints[2], cpi->NMVcount.joints[3]);
- */
- vp9_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
+ if (!cpi->common.error_resilient_mode &&
+ !cpi->common.frame_parallel_decoding_mode) {
+ vp9_adapt_mode_probs(&cpi->common);
+ vp9_adapt_mode_context(&cpi->common);
+ vp9_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
+ }
}
#if CONFIG_COMP_INTERINTRA_PRED
if (cm->frame_type != KEY_FRAME)
@@ -3502,8 +3381,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if ((cm->base_qindex < cpi->last_boosted_qindex) ||
((cpi->static_mb_pct < 100) &&
((cm->frame_type == KEY_FRAME) ||
- cm->refresh_alt_ref_frame ||
- (cm->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) {
+ cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) {
cpi->last_boosted_qindex = cm->base_qindex;
}
@@ -3516,7 +3395,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2;
// Keep a record from which we can calculate the average Q excluding GF updates and key frames
- if ((cm->frame_type != KEY_FRAME) && !cm->refresh_golden_frame && !cm->refresh_alt_ref_frame) {
+ if ((cm->frame_type != KEY_FRAME)
+ && !cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
cpi->ni_frames++;
cpi->tot_q += vp9_convert_qindex_to_q(Q);
cpi->avg_q = cpi->tot_q / (double)cpi->ni_frames;
@@ -3538,11 +3418,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
- // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass.
- cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
- cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4;
- cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32;
- cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32;
+ // Rolling monitors of whether we are over or underspending used to help
+ // regulate min and Max Q in two pass.
+ if (cm->frame_type != KEY_FRAME) {
+ cpi->rolling_target_bits =
+ ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
+ cpi->rolling_actual_bits =
+ ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4;
+ cpi->long_rolling_target_bits =
+ ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32;
+ cpi->long_rolling_actual_bits =
+ ((cpi->long_rolling_actual_bits * 31) +
+ cpi->projected_frame_size + 16) / 32;
+ }
// Actual bits spent
cpi->total_actual_bits += cpi->projected_frame_size;
@@ -3558,7 +3446,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (cpi->twopass.kf_group_bits < 0)
cpi->twopass.kf_group_bits = 0;
- } else if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame) {
+ } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) {
cpi->twopass.gf_group_bits += cpi->this_frame_target - cpi->projected_frame_size;
if (cpi->twopass.gf_group_bits < 0)
@@ -3582,7 +3470,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (cpi->twopass.total_left_stats->coded_error != 0.0)
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
"%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
- "%6d %5d %5d %5d %8d %8.2f %10d %10.3f"
+ "%6d %5d %5d %5d %8.2f %10d %10.3f"
"%10.3f %8d %10d %10d %10d\n",
cpi->common.current_video_frame, cpi->this_frame_target,
cpi->projected_frame_size, 0, //loop_size_estimate,
@@ -3597,9 +3485,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->avg_q,
vp9_convert_qindex_to_q(cpi->ni_av_qi),
vp9_convert_qindex_to_q(cpi->cq_target_quality),
- cpi->zbin_over_quant,
- // cpi->avg_frame_qindex, cpi->zbin_over_quant,
- cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+ cpi->refresh_golden_frame, cpi->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
(int)cpi->twopass.bits_left,
@@ -3611,7 +3497,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
else
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
"%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
- "%6d %5d %5d %5d %8d %8.2f %10d %10.3f"
+ "%5d %5d %8d %8d %8.2f %10d %10.3f"
"%8d %10d %10d %10d\n",
cpi->common.current_video_frame,
cpi->this_frame_target, cpi->projected_frame_size,
@@ -3627,9 +3513,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->avg_q,
vp9_convert_qindex_to_q(cpi->ni_av_qi),
vp9_convert_qindex_to_q(cpi->cq_target_quality),
- cpi->zbin_over_quant,
- // cpi->avg_frame_qindex, cpi->zbin_over_quant,
- cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+ cpi->refresh_golden_frame, cpi->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
(int)cpi->twopass.bits_left,
@@ -3645,8 +3529,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
fprintf(fmodes, "%6d:%1d:%1d:%1d ",
cpi->common.current_video_frame,
- cm->frame_type, cm->refresh_golden_frame,
- cm->refresh_alt_ref_frame);
+ cm->frame_type, cpi->refresh_golden_frame,
+ cpi->refresh_alt_ref_frame);
for (i = 0; i < MAX_MODES; i++)
fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
@@ -3665,33 +3549,34 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
#endif
// If this was a kf or Gf note the Q
- if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
+ if ((cm->frame_type == KEY_FRAME)
+ || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
cm->last_kf_gf_q = cm->base_qindex;
- if (cm->refresh_golden_frame == 1)
+ if (cpi->refresh_golden_frame == 1)
cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
else
cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_GOLDEN;
- if (cm->refresh_alt_ref_frame == 1)
+ if (cpi->refresh_alt_ref_frame == 1)
cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF;
else
cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF;
- if (cm->refresh_last_frame & cm->refresh_golden_frame) // both refreshed
+ if (cpi->refresh_last_frame & cpi->refresh_golden_frame)
cpi->gold_is_last = 1;
- else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
+ else if (cpi->refresh_last_frame ^ cpi->refresh_golden_frame)
cpi->gold_is_last = 0;
- if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) // both refreshed
+ if (cpi->refresh_last_frame & cpi->refresh_alt_ref_frame)
cpi->alt_is_last = 1;
- else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) // 1 refreshed but not the other
+ else if (cpi->refresh_last_frame ^ cpi->refresh_alt_ref_frame)
cpi->alt_is_last = 0;
- if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) // both refreshed
+ if (cpi->refresh_alt_ref_frame & cpi->refresh_golden_frame)
cpi->gold_is_alt = 1;
- else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
+ else if (cpi->refresh_alt_ref_frame ^ cpi->refresh_golden_frame)
cpi->gold_is_alt = 0;
cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
@@ -3705,7 +3590,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (cpi->gold_is_alt)
cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
- if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME))
+ if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame
+ && (cm->frame_type != KEY_FRAME))
// Update the alternate reference frame stats as appropriate.
update_alt_ref_frame_stats(cpi);
else
@@ -3727,6 +3613,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
xd->update_mb_segmentation_data = 0;
xd->mode_ref_lf_delta_update = 0;
+ // keep track of the last coded dimensions
+ cm->last_width = cm->Width;
+ cm->last_height = cm->Height;
// Dont increment frame counters if this was an altref buffer update not a real frame
if (cm->show_frame) {
@@ -3744,8 +3633,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
FILE *recon_file;
sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame);
recon_file = fopen(filename, "wb");
- fwrite(cm->yv12_fb[cm->lst_fb_idx].buffer_alloc,
- cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file);
+ fwrite(cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].buffer_alloc,
+ cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].frame_size,
+ 1, recon_file);
fclose(recon_file);
}
#endif
@@ -3765,13 +3655,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
static void Pass2Encode(VP9_COMP *cpi, unsigned long *size,
unsigned char *dest, unsigned int *frame_flags) {
- if (!cpi->common.refresh_alt_ref_frame)
+ if (!cpi->refresh_alt_ref_frame)
vp9_second_pass(cpi);
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+
+#ifdef DISABLE_RC_LONG_TERM_MEM
+ cpi->twopass.bits_left -= cpi->this_frame_target;
+#else
cpi->twopass.bits_left -= 8 * *size;
+#endif
- if (!cpi->common.refresh_alt_ref_frame) {
+ if (!cpi->refresh_alt_ref_frame) {
double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.frame_rate;
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
* cpi->oxcf.two_pass_vbrmin_section / 100);
@@ -3808,9 +3703,8 @@ static int frame_is_reference(const VP9_COMP *cpi) {
const VP9_COMMON *cm = &cpi->common;
const MACROBLOCKD *xd = &cpi->mb.e_mbd;
- return cm->frame_type == KEY_FRAME || cm->refresh_last_frame
- || cm->refresh_golden_frame || cm->refresh_alt_ref_frame
- || cm->copy_buffer_to_gf || cm->copy_buffer_to_arf
+ return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame
+ || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame
|| cm->refresh_entropy_probs
|| xd->mode_ref_lf_delta_update
|| xd->update_mb_segmentation_map || xd->update_mb_segmentation_data;
@@ -3846,9 +3740,9 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
force_src_buffer = &cpi->alt_ref_buffer;
}
cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
- cm->refresh_alt_ref_frame = 1;
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 0;
+ cpi->refresh_alt_ref_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 0;
cm->show_frame = 0;
cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag.
cpi->is_src_frame_alt_ref = 0;
@@ -3889,7 +3783,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
}
// adjust frame rates based on timestamps given
- if (!cm->refresh_alt_ref_frame) {
+ if (!cpi->refresh_alt_ref_frame) {
int64_t this_duration;
int step = 0;
@@ -3945,28 +3839,34 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
#if 0
- if (cm->refresh_alt_ref_frame) {
- // cm->refresh_golden_frame = 1;
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 0;
+ if (cpi->refresh_alt_ref_frame) {
+ // cpi->refresh_golden_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 0;
} else {
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 1;
}
#endif
- /* find a free buffer for the new frame */
- {
- int i = 0;
- for (; i < NUM_YV12_BUFFERS; i++) {
- if (!cm->yv12_fb[i].flags) {
- cm->new_fb_idx = i;
- break;
- }
- }
- assert(i < NUM_YV12_BUFFERS);
- }
+ /* find a free buffer for the new frame, releasing the reference previously
+ * held.
+ */
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+ cm->new_fb_idx = get_free_fb(cm);
+
+ /* Get the mapping of L/G/A to the reference buffer pool */
+ cm->active_ref_idx[0] = cm->ref_frame_map[cpi->lst_fb_idx];
+ cm->active_ref_idx[1] = cm->ref_frame_map[cpi->gld_fb_idx];
+ cm->active_ref_idx[2] = cm->ref_frame_map[cpi->alt_fb_idx];
+
+ /* Reset the frame pointers to the current frame size */
+ vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx],
+ cm->mb_cols * 16, cm->mb_rows * 16,
+ VP9BORDERINPIXELS);
+
+ vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm);
if (cpi->pass == 1) {
Pass1Encode(cpi, size, dest, frame_flags);
} else if (cpi->pass == 2) {
@@ -3976,10 +3876,8 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
}
if (cm->refresh_entropy_probs) {
- if (cm->refresh_alt_ref_frame)
- vpx_memcpy(&cm->lfc_a, &cm->fc, sizeof(cm->fc));
- else
- vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
+ vpx_memcpy(&cm->frame_contexts[cm->frame_context_idx], &cm->fc,
+ sizeof(cm->fc));
}
// if its a dropped frame honor the requests on subsequent frames
@@ -3988,9 +3886,9 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
// return to normal state
cm->refresh_entropy_probs = 1;
- cm->refresh_alt_ref_frame = 0;
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 1;
cm->frame_type = INTER_FRAME;
}
@@ -4113,7 +4011,7 @@ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
vp9_ppflags_t *flags) {
VP9_COMP *cpi = (VP9_COMP *) comp;
- if (cpi->common.refresh_alt_ref_frame)
+ if (cpi->refresh_alt_ref_frame)
return -1;
else {
int ret;
@@ -4217,17 +4115,31 @@ int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
int vp9_set_internal_size(VP9_PTR comp,
VPX_SCALING horiz_mode, VPX_SCALING vert_mode) {
VP9_COMP *cpi = (VP9_COMP *) comp;
+ VP9_COMMON *cm = &cpi->common;
- if (horiz_mode <= ONETWO)
- cpi->common.horiz_scale = horiz_mode;
- else
+ if (horiz_mode > ONETWO)
return -1;
- if (vert_mode <= ONETWO)
- cpi->common.vert_scale = vert_mode;
- else
+ if (vert_mode > ONETWO)
return -1;
+ if (cm->horiz_scale != horiz_mode || cm->vert_scale != vert_mode) {
+ int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs);
+ int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs);
+
+ cm->horiz_scale = horiz_mode;
+ cm->vert_scale = vert_mode;
+
+ Scale2Ratio(cm->horiz_scale, &hr, &hs);
+ Scale2Ratio(cm->vert_scale, &vr, &vs);
+
+ // always go to the next whole number
+ cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs;
+ cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
+ }
+ assert(cm->Width <= cpi->initial_width);
+ assert(cm->Height <= cpi->initial_height);
+ update_frame_size(cpi);
return 0;
}
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 74a58b430..02a371964 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -29,6 +29,10 @@
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/encoder/vp9_lookahead.h"
+// Experimental rate control switches
+// #define ONE_SHOT_Q_ESTIMATE 1
+// #define DISABLE_RC_LONG_TERM_MEM 1
+
// #define SPEEDSTATS 1
#define MIN_GF_INTERVAL 4
#define DEFAULT_GF_INTERVAL 7
@@ -53,7 +57,6 @@
#define GF_ZEROMV_ZBIN_BOOST 12
#define LF_ZEROMV_ZBIN_BOOST 6
#define MV_ZBIN_BOOST 4
-#define ZBIN_OQ_MAX 192
#define VP9_TEMPORAL_ALT_REF 1
@@ -86,12 +89,9 @@ typedef struct {
// 0 = BPRED, ZERO_MV, MV, SPLIT
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
- vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs hybrid_coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16];
+ vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
+ vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
+ vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES];
vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32];
vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
@@ -259,7 +259,9 @@ typedef struct {
int optimize_coefficients;
int no_skip_block4x4_search;
int search_best_filter;
-
+ int splitmode_breakout;
+ int mb16_breakout;
+ int static_segmentation;
} SPEED_FEATURES;
typedef struct {
@@ -301,41 +303,14 @@ typedef struct VP9_COMP {
DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);
-
DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(64, short, Y1zbin_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, Y2zbin_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, UVzbin_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, zrun_zbin_boost_y1_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, zrun_zbin_boost_y2_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, zrun_zbin_boost_uv_8x8[QINDEX_RANGE][64]);
-
- DECLARE_ALIGNED(16, short, Y1zbin_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, Y2zbin_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, UVzbin_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_16x16[QINDEX_RANGE][256]);
-
- DECLARE_ALIGNED(16, short, Y1zbin_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, Y2zbin_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, UVzbin_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_32x32[QINDEX_RANGE][1024]);
-
MACROBLOCK mb;
VP9_COMMON common;
VP9_CONFIG oxcf;
@@ -357,11 +332,17 @@ typedef struct VP9_COMP {
int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
int gold_is_alt; // don't do both alt and gold search ( just do gold).
- // int refresh_alt_ref_frame;
+ int scaled_ref_idx[3];
+ int lst_fb_idx;
+ int gld_fb_idx;
+ int alt_fb_idx;
+ int refresh_last_frame;
+ int refresh_golden_frame;
+ int refresh_alt_ref_frame;
YV12_BUFFER_CONFIG last_frame_uf;
TOKENEXTRA *tok;
- unsigned int tok_count;
+ unsigned int tok_count[1 << 6];
unsigned int frames_since_key;
@@ -441,7 +422,6 @@ typedef struct VP9_COMP {
double tot_q;
double avg_q;
- int zbin_over_quant;
int zbin_mode_boost;
int zbin_mode_boost_enabled;
@@ -484,26 +464,17 @@ typedef struct VP9_COMP {
nmv_context_counts NMVcount;
- vp9_coeff_count coef_counts_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs frame_coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_stats frame_branch_ct_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs frame_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_stats frame_hybrid_branch_ct_4x4[BLOCK_TYPES_4X4];
-
- vp9_coeff_count coef_counts_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs frame_coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_stats frame_branch_ct_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs frame_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_stats frame_hybrid_branch_ct_8x8[BLOCK_TYPES_8X8];
-
- vp9_coeff_count coef_counts_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs frame_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_stats frame_hybrid_branch_ct_16x16[BLOCK_TYPES_16X16];
+ vp9_coeff_count coef_counts_4x4[BLOCK_TYPES];
+ vp9_coeff_probs frame_coef_probs_4x4[BLOCK_TYPES];
+ vp9_coeff_stats frame_branch_ct_4x4[BLOCK_TYPES];
+
+ vp9_coeff_count coef_counts_8x8[BLOCK_TYPES];
+ vp9_coeff_probs frame_coef_probs_8x8[BLOCK_TYPES];
+ vp9_coeff_stats frame_branch_ct_8x8[BLOCK_TYPES];
+
+ vp9_coeff_count coef_counts_16x16[BLOCK_TYPES];
+ vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES];
+ vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES];
vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32];
vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES_32X32];
@@ -683,9 +654,6 @@ typedef struct VP9_COMP {
int droppable;
- // TODO Do we still need this??
- int update_context;
-
int dummy_packing; /* flag to indicate if packing is dummy */
unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1]
@@ -696,6 +664,8 @@ typedef struct VP9_COMP {
unsigned int mb_mv_ref_count[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
#endif
+ int initial_width;
+ int initial_height;
} VP9_COMP;
void vp9_encode_frame(VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index b443ede6f..6f9333521 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
+#include <assert.h>
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_picklpf.h"
@@ -27,6 +27,7 @@ void vp9_yv12_copy_partial_frame_c(YV12_BUFFER_CONFIG *src_ybc,
int yoffset;
int linestocopy;
+ assert(src_ybc->y_stride == dst_ybc->y_stride);
yheight = src_ybc->y_height;
ystride = src_ybc->y_stride;
diff --git a/vp9/encoder/vp9_psnr.c b/vp9/encoder/vp9_psnr.c
index eb00f4159..94394341d 100644
--- a/vp9/encoder/vp9_psnr.c
+++ b/vp9/encoder/vp9_psnr.c
@@ -11,17 +11,16 @@
#include "vpx_scale/yv12config.h"
#include "math.h"
-#include "vp9/common/vp9_systemdependent.h" /* for vp9_clear_system_state() */
#define MAX_PSNR 100
-double vp9_mse2psnr(double Samples, double Peak, double Mse) {
+double vp9_mse2psnr(double samples, double peak, double mse) {
double psnr;
- if ((double)Mse > 0.0)
- psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
+ if (mse > 0.0)
+ psnr = 10.0 * log10(peak * peak * samples / mse);
else
- psnr = MAX_PSNR; // Limit to prevent / 0
+ psnr = MAX_PSNR; // Limit to prevent / 0
if (psnr > MAX_PSNR)
psnr = MAX_PSNR;
diff --git a/vp9/encoder/vp9_psnr.h b/vp9/encoder/vp9_psnr.h
index 121f0dc98..15dd8366b 100644
--- a/vp9/encoder/vp9_psnr.h
+++ b/vp9/encoder/vp9_psnr.h
@@ -12,6 +12,6 @@
#ifndef VP9_ENCODER_VP9_PSNR_H_
#define VP9_ENCODER_VP9_PSNR_H_
-extern double vp9_mse2psnr(double Samples, double Peak, double Mse);
+double vp9_mse2psnr(double samples, double peak, double mse);
#endif // VP9_ENCODER_VP9_PSNR_H_
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 36b656713..399e8ecda 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -21,7 +21,10 @@
extern int enc_debug;
#endif
-void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
+void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const b = &mb->block[b_idx];
+ BLOCKD *const d = &xd->block[b_idx];
int i, rc, eob;
int zbin;
int x, y, z, sz;
@@ -57,35 +60,40 @@ void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
eob = -1;
- for (i = 0; i < b->eob_max_offset; i++) {
- rc = pt_scan[i];
- z = coeff_ptr[rc];
-
- zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
- zbin_boost_ptr ++;
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- if (x >= zbin) {
- x += round_ptr[rc];
- y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ if (!b->skip_block) {
+ for (i = 0; i < 16; i++) {
+ rc = pt_scan[i];
+ z = coeff_ptr[rc];
+
+ zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+ zbin_boost_ptr++;
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += round_ptr[rc];
+ y = (((x * quant_ptr[rc]) >> 16) + x)
+ >> quant_shift_ptr[rc]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ }
}
}
}
- d->eob = eob + 1;
+ xd->eobs[b_idx] = eob + 1;
}
-void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const b = &mb->block[b_idx];
+ BLOCKD *const d = &xd->block[b_idx];
int i, rc, eob;
int zbin;
int x, y, z, sz;
@@ -105,64 +113,55 @@ void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
eob = -1;
- for (i = 0; i < b->eob_max_offset; i++) {
- rc = vp9_default_zig_zag1d_4x4[i];
- z = coeff_ptr[rc];
+ if (!b->skip_block) {
+ for (i = 0; i < 16; i++) {
+ rc = vp9_default_zig_zag1d_4x4[i];
+ z = coeff_ptr[rc];
- zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
- zbin_boost_ptr ++;
+ zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+ zbin_boost_ptr++;
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
- if (x >= zbin) {
- x += round_ptr[rc];
+ if (x >= zbin) {
+ x += round_ptr[rc];
- y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+ y = (((x * quant_ptr[rc]) >> 16) + x)
+ >> quant_shift_ptr[rc]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ }
}
}
}
- d->eob = eob + 1;
+ xd->eobs[b_idx] = eob + 1;
}
void vp9_quantize_mby_4x4_c(MACROBLOCK *x) {
int i;
- int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
for (i = 0; i < 16; i++) {
TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, &x->e_mbd.block[i]);
if (tx_type != DCT_DCT) {
- assert(has_2nd_order == 0);
- vp9_ht_quantize_b_4x4(&x->block[i], &x->e_mbd.block[i], tx_type);
+ vp9_ht_quantize_b_4x4(x, i, tx_type);
} else {
- x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_4x4(x, i);
}
}
- if (has_2nd_order) {
- x->quantize_b_4x4(&x->block[24], &x->e_mbd.block[24]);
- } else {
- vpx_memset(x->e_mbd.block[24].qcoeff, 0,
- 16 * sizeof(x->e_mbd.block[24].qcoeff[0]));
- vpx_memset(x->e_mbd.block[24].dqcoeff, 0,
- 16 * sizeof(x->e_mbd.block[24].dqcoeff[0]));
- x->e_mbd.block[24].eob = 0;
- }
}
void vp9_quantize_mbuv_4x4_c(MACROBLOCK *x) {
int i;
for (i = 16; i < 24; i++)
- x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_4x4(x, i);
}
void vp9_quantize_mb_4x4_c(MACROBLOCK *x) {
@@ -170,138 +169,101 @@ void vp9_quantize_mb_4x4_c(MACROBLOCK *x) {
vp9_quantize_mbuv_4x4_c(x);
}
-void vp9_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) {
- int i, rc, eob;
- int zbin;
- int x, y, z, sz;
- int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
- int zbin_zrun_index = 0;
- int16_t *coeff_ptr = b->coeff;
- int16_t *zbin_ptr = b->zbin;
- int16_t *round_ptr = b->round;
- int16_t *quant_ptr = b->quant;
- uint8_t *quant_shift_ptr = b->quant_shift;
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
- int16_t *dequant_ptr = d->dequant;
- int zbin_oq_value = b->zbin_extra;
- // double q2nd = 4;
- vpx_memset(qcoeff_ptr, 0, 32);
- vpx_memset(dqcoeff_ptr, 0, 32);
-
- eob = -1;
-
- for (i = 0; i < b->eob_max_offset_8x8; i++) {
- rc = vp9_default_zig_zag1d_4x4[i];
- z = coeff_ptr[rc];
-
- zbin_boost_ptr = &b->zrun_zbin_boost[zbin_zrun_index];
- zbin_zrun_index += 4;
- zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value);
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- if (x >= zbin) {
- x += (round_ptr[rc]);
- y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_zrun_index = 0;
- }
- }
- }
-
- d->eob = eob + 1;
-}
-
-void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) {
- int i, rc, eob;
- int zbin;
- int x, y, z, sz;
- int16_t *zbin_boost_ptr = b->zrun_zbin_boost_8x8;
- int16_t *coeff_ptr = b->coeff;
- int16_t *zbin_ptr = b->zbin_8x8;
- int16_t *round_ptr = b->round;
- int16_t *quant_ptr = b->quant;
- uint8_t *quant_shift_ptr = b->quant_shift;
+void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const b = &mb->block[b_idx];
+ BLOCKD *const d = &xd->block[b_idx];
int16_t *qcoeff_ptr = d->qcoeff;
int16_t *dqcoeff_ptr = d->dqcoeff;
- int16_t *dequant_ptr = d->dequant;
- int zbin_oq_value = b->zbin_extra;
vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t));
- eob = -1;
-
- for (i = 0; i < b->eob_max_offset_8x8; i++) {
- rc = vp9_default_zig_zag1d_8x8[i];
- z = coeff_ptr[rc];
-
- zbin = (zbin_ptr[rc != 0] + *zbin_boost_ptr + zbin_oq_value);
- zbin_boost_ptr++;
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- if (x >= zbin) {
- x += (round_ptr[rc != 0]);
- y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
- >> quant_shift_ptr[rc != 0]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0]; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = b->zrun_zbin_boost_8x8;
+ if (!b->skip_block) {
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ int zero_run;
+ int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
+ int16_t *coeff_ptr = b->coeff;
+ int16_t *zbin_ptr = b->zbin;
+ int16_t *round_ptr = b->round;
+ int16_t *quant_ptr = b->quant;
+ uint8_t *quant_shift_ptr = b->quant_shift;
+ int16_t *dequant_ptr = d->dequant;
+ int zbin_oq_value = b->zbin_extra;
+
+ eob = -1;
+
+ // Special case for DC as it is the one triggering access in various
+ // tables: {zbin, quant, quant_shift, dequant}_ptr[rc != 0]
+ {
+ z = coeff_ptr[0];
+ zbin = (zbin_ptr[0] + zbin_boost_ptr[0] + zbin_oq_value);
+ zero_run = 1;
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += (round_ptr[0]);
+ y = ((int)(((int)(x * quant_ptr[0]) >> 16) + x))
+ >> quant_shift_ptr[0]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[0] = x; // write to destination
+ dqcoeff_ptr[0] = x * dequant_ptr[0]; // dequantized value
+
+ if (y) {
+ eob = 0; // last nonzero coeffs
+ zero_run = 0;
+ }
+ }
+ }
+ for (i = 1; i < 64; i++) {
+ rc = vp9_default_zig_zag1d_8x8[i];
+ z = coeff_ptr[rc];
+ zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value);
+ // The original code was incrementing zero_run while keeping it at
+ // maximum 15 by adding "(zero_run < 15)". The same is achieved by
+ // removing the opposite of the sign mask of "(zero_run - 15)".
+ zero_run -= (zero_run - 15) >> 31;
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += (round_ptr[rc != 0]);
+ y = ((int)(((int)(x * quant_ptr[1]) >> 16) + x))
+ >> quant_shift_ptr[1]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[1]; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zero_run = 0;
+ }
}
}
+ xd->eobs[b_idx] = eob + 1;
+ } else {
+ xd->eobs[b_idx] = 0;
}
-
- d->eob = eob + 1;
}
void vp9_quantize_mby_8x8(MACROBLOCK *x) {
int i;
- int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
- for (i = 0; i < 16; i ++) {
- x->e_mbd.block[i].eob = 0;
- }
- x->e_mbd.block[24].eob = 0;
for (i = 0; i < 16; i += 4) {
- int ib = (i & 8) + ((i & 4) >> 1);
- TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, &x->e_mbd.block[ib]);
- if (tx_type != DCT_DCT)
- assert(has_2nd_order == 0);
- x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
- }
-
- if (has_2nd_order) {
- x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]);
- } else {
- vpx_memset(x->e_mbd.block[24].qcoeff, 0,
- 16 * sizeof(x->e_mbd.block[24].qcoeff[0]));
- vpx_memset(x->e_mbd.block[24].dqcoeff, 0,
- 16 * sizeof(x->e_mbd.block[24].dqcoeff[0]));
- x->e_mbd.block[24].eob = 0;
+ x->quantize_b_8x8(x, i);
}
}
void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
int i;
- for (i = 16; i < 24; i ++)
- x->e_mbd.block[i].eob = 0;
for (i = 16; i < 24; i += 4)
- x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_8x8(x, i);
}
void vp9_quantize_mb_8x8(MACROBLOCK *x) {
@@ -310,12 +272,7 @@ void vp9_quantize_mb_8x8(MACROBLOCK *x) {
}
void vp9_quantize_mby_16x16(MACROBLOCK *x) {
- int i;
-
- for (i = 0; i < 16; i++)
- x->e_mbd.block[i].eob = 0;
- x->e_mbd.block[24].eob = 0;
- x->quantize_b_16x16(&x->block[0], &x->e_mbd.block[0]);
+ x->quantize_b_16x16(x, 0);
}
void vp9_quantize_mb_16x16(MACROBLOCK *x) {
@@ -324,42 +281,46 @@ void vp9_quantize_mb_16x16(MACROBLOCK *x) {
}
static void quantize(int16_t *zbin_boost_orig_ptr,
- int16_t *coeff_ptr, int n_coeffs, int max_coeffs,
+ int16_t *coeff_ptr, int n_coeffs, int skip_block,
int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr,
uint8_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
int16_t *dequant_ptr, int zbin_oq_value,
- int *eob_ptr, const int *scan, int mul) {
+ uint16_t *eob_ptr, const int *scan, int mul) {
int i, rc, eob;
int zbin;
int x, y, z, sz;
+ int zero_run = 0;
int16_t *zbin_boost_ptr = zbin_boost_orig_ptr;
vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
eob = -1;
- for (i = 0; i < max_coeffs; i++) {
- rc = scan[i];
- z = coeff_ptr[rc] * mul;
-
- zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value);
- zbin_boost_ptr ++;
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- if (x >= zbin) {
- x += (round_ptr[rc!=0]);
- y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
- >> quant_shift_ptr[rc!=0]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / mul; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = zbin_boost_orig_ptr;
+
+ if (!skip_block) {
+ for (i = 0; i < n_coeffs; i++) {
+ rc = scan[i];
+ z = coeff_ptr[rc] * mul;
+
+ zbin = (zbin_ptr[rc != 0] + zbin_boost_ptr[zero_run] + zbin_oq_value);
+ zero_run += (zero_run < 15);
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += (round_ptr[rc != 0]);
+ y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
+ >> quant_shift_ptr[rc != 0]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / mul; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zero_run = 0;
+ }
}
}
}
@@ -367,49 +328,54 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
*eob_ptr = eob + 1;
}
-void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) {
- quantize(b->zrun_zbin_boost_16x16,
+void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const b = &mb->block[b_idx];
+ BLOCKD *const d = &xd->block[b_idx];
+ quantize(b->zrun_zbin_boost,
b->coeff,
- 256, b->eob_max_offset_16x16,
- b->zbin_16x16, b->round, b->quant, b->quant_shift,
+ 256, b->skip_block,
+ b->zbin, b->round, b->quant, b->quant_shift,
d->qcoeff,
d->dqcoeff,
d->dequant,
b->zbin_extra,
- &d->eob, vp9_default_zig_zag1d_16x16, 1);
+ &xd->eobs[b_idx], vp9_default_zig_zag1d_16x16, 1);
}
void vp9_quantize_sby_32x32(MACROBLOCK *x) {
- x->e_mbd.block[0].eob = 0;
- quantize(x->block[0].zrun_zbin_boost_32x32,
+ MACROBLOCKD *xd = &x->e_mbd;
+ BLOCK *b = &x->block[0];
+ BLOCKD *d = &xd->block[0];
+
+ quantize(b->zrun_zbin_boost,
x->sb_coeff_data.coeff,
- 1024, x->block[0].eob_max_offset_32x32,
- x->block[0].zbin_32x32,
- x->block[0].round, x->block[0].quant, x->block[0].quant_shift,
- x->e_mbd.sb_coeff_data.qcoeff,
- x->e_mbd.sb_coeff_data.dqcoeff,
- x->e_mbd.block[0].dequant,
- x->block[0].zbin_extra,
- &x->e_mbd.block[0].eob,
+ 1024, b->skip_block,
+ b->zbin,
+ b->round, b->quant, b->quant_shift,
+ xd->sb_coeff_data.qcoeff,
+ xd->sb_coeff_data.dqcoeff,
+ d->dequant,
+ b->zbin_extra,
+ &xd->eobs[0],
vp9_default_zig_zag1d_32x32, 2);
}
void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
int i;
+ MACROBLOCKD *xd = &x->e_mbd;
- x->e_mbd.block[16].eob = 0;
- x->e_mbd.block[20].eob = 0;
for (i = 16; i < 24; i += 4)
- quantize(x->block[i].zrun_zbin_boost_16x16,
+ quantize(x->block[i].zrun_zbin_boost,
x->sb_coeff_data.coeff + 1024 + (i - 16) * 64,
- 256, x->block[i].eob_max_offset_16x16,
- x->block[i].zbin_16x16,
+ 256, x->block[i].skip_block,
+ x->block[i].zbin,
x->block[i].round, x->block[0].quant, x->block[i].quant_shift,
- x->e_mbd.sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
- x->e_mbd.sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64,
- x->e_mbd.block[i].dequant,
+ xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
+ xd->sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64,
+ xd->block[i].dequant,
x->block[i].zbin_extra,
- &x->e_mbd.block[i].eob,
+ &xd->eobs[i],
vp9_default_zig_zag1d_16x16, 1);
}
@@ -417,10 +383,9 @@ void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
* these two C functions if corresponding optimized routine is not available.
* NEON optimized version implements currently the fast quantization for pair
* of blocks. */
-void vp9_regular_quantize_b_4x4_pair(BLOCK *b1, BLOCK *b2,
- BLOCKD *d1, BLOCKD *d2) {
- vp9_regular_quantize_b_4x4(b1, d1);
- vp9_regular_quantize_b_4x4(b2, d2);
+void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2) {
+ vp9_regular_quantize_b_4x4(x, b_idx1);
+ vp9_regular_quantize_b_4x4(x, b_idx2);
}
static void invert_quant(int16_t *quant,
@@ -439,164 +404,33 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
int i;
int quant_val;
int Q;
- static const int zbin_boost[16] = { 0, 0, 8, 10, 12, 14, 16, 20,
- 24, 28, 32, 36, 40, 44, 44, 44
- };
-
- static const int zbin_boost_8x8[64] = { 0, 0, 0, 8, 8, 8, 10, 12,
- 14, 16, 18, 20, 22, 24, 26, 28,
- 30, 32, 34, 36, 38, 40, 42, 44,
- 46, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48
- };
- static const int zbin_boost_16x16[256] = {
- 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28,
- 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- };
- static const int zbin_boost_32x32[1024] = {
- 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28,
- 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- };
- int qrounding_factor = 48;
+ static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12,
+ 14, 16, 20, 24, 28, 32, 36, 40 };
for (Q = 0; Q < QINDEX_RANGE; Q++) {
int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;
-
-#if CONFIG_LOSSLESS
- if (cpi->oxcf.lossless) {
- if (Q == 0) {
- qzbin_factor = 64;
- qrounding_factor = 64;
- }
+ int qrounding_factor = 48;
+ if (Q == 0) {
+ qzbin_factor = 64;
+ qrounding_factor = 64;
}
-#endif
-
// dc values
quant_val = vp9_dc_quant(Q, cpi->common.y1dc_delta_q);
invert_quant(cpi->Y1quant[Q] + 0,
cpi->Y1quant_shift[Q] + 0, quant_val);
cpi->Y1zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y1zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y1zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
cpi->Y1round[Q][0] = (qrounding_factor * quant_val) >> 7;
cpi->common.Y1dequant[Q][0] = quant_val;
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- cpi->zrun_zbin_boost_y1_8x8[Q][0] =
- ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_16x16[Q][0] =
- ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
- cpi->Y1zbin_32x32[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_32x32[Q][0] =
- ((quant_val * zbin_boost_32x32[0]) + 64) >> 7;
-
-
- quant_val = vp9_dc2quant(Q, cpi->common.y2dc_delta_q);
- invert_quant(cpi->Y2quant[Q] + 0,
- cpi->Y2quant_shift[Q] + 0, quant_val);
- cpi->Y2zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y2zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y2zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y2round[Q][0] = (qrounding_factor * quant_val) >> 7;
- cpi->common.Y2dequant[Q][0] = quant_val;
- cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- cpi->zrun_zbin_boost_y2_8x8[Q][0] =
- ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
- cpi->zrun_zbin_boost_y2_16x16[Q][0] =
- ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
quant_val = vp9_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
invert_quant(cpi->UVquant[Q] + 0,
cpi->UVquant_shift[Q] + 0, quant_val);
cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->UVzbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->UVzbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
cpi->UVround[Q][0] = (qrounding_factor * quant_val) >> 7;
cpi->common.UVdequant[Q][0] = quant_val;
cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- cpi->zrun_zbin_boost_uv_8x8[Q][0] =
- ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
- cpi->zrun_zbin_boost_uv_16x16[Q][0] =
- ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
// all the 4x4 ac values =;
for (i = 1; i < 16; i++) {
@@ -611,15 +445,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->zrun_zbin_boost_y1[Q][i] =
((quant_val * zbin_boost[i]) + 64) >> 7;
- quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
- invert_quant(cpi->Y2quant[Q] + rc,
- cpi->Y2quant_shift[Q] + rc, quant_val);
- cpi->Y2zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y2round[Q][rc] = (qrounding_factor * quant_val) >> 7;
- cpi->common.Y2dequant[Q][rc] = quant_val;
- cpi->zrun_zbin_boost_y2[Q][i] =
- ((quant_val * zbin_boost[i]) + 64) >> 7;
-
quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
invert_quant(cpi->UVquant[Q] + rc,
cpi->UVquant_shift[Q] + rc, quant_val);
@@ -629,57 +454,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->zrun_zbin_boost_uv[Q][i] =
((quant_val * zbin_boost[i]) + 64) >> 7;
}
-
- // 8x8 structures... only zbin seperated out for now
- // This needs cleaning up for 8x8 especially if we are to add
- // support for non flat Q matices
- for (i = 1; i < 64; i++) {
- int rc = vp9_default_zig_zag1d_8x8[i];
-
- quant_val = vp9_ac_yquant(Q);
- cpi->Y1zbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_8x8[Q][i] =
- ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
-
- quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
- cpi->Y2zbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y2_8x8[Q][i] =
- ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
-
- quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- cpi->UVzbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_uv_8x8[Q][i] =
- ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
- }
-
- // 16x16 structures. Same comment above applies.
- for (i = 1; i < 256; i++) {
- int rc = vp9_default_zig_zag1d_16x16[i];
-
- quant_val = vp9_ac_yquant(Q);
- cpi->Y1zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_16x16[Q][i] =
- ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
-
- quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
- cpi->Y2zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y2_16x16[Q][i] =
- ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
-
- quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- cpi->UVzbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_uv_16x16[Q][i] =
- ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
- }
- // 32x32 structures. Same comment above applies.
- for (i = 1; i < 1024; i++) {
- int rc = vp9_default_zig_zag1d_32x32[i];
-
- quant_val = vp9_ac_yquant(Q);
- cpi->Y1zbin_32x32[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_32x32[Q][i] =
- ((quant_val * zbin_boost_32x32[i]) + 64) >> 7;
- }
}
}
@@ -709,106 +483,40 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
// Y
zbin_extra = (cpi->common.Y1dequant[QIndex][1] *
- (cpi->zbin_over_quant +
- cpi->zbin_mode_boost +
+ (cpi->zbin_mode_boost +
x->act_zbin_adj)) >> 7;
for (i = 0; i < 16; i++) {
x->block[i].quant = cpi->Y1quant[QIndex];
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
x->block[i].zbin = cpi->Y1zbin[QIndex];
- x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex];
- x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex];
- x->block[i].zbin_32x32 = cpi->Y1zbin_32x32[QIndex];
x->block[i].round = cpi->Y1round[QIndex];
x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
- x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex];
- x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex];
- x->block[i].zrun_zbin_boost_32x32 = cpi->zrun_zbin_boost_y1_32x32[QIndex];
x->block[i].zbin_extra = (int16_t)zbin_extra;
- // Segment max eob offset feature.
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
- x->block[i].eob_max_offset =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_8x8 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_16x16 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_32x32 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- } else {
- x->block[i].eob_max_offset = 16;
- x->block[i].eob_max_offset_8x8 = 64;
- x->block[i].eob_max_offset_16x16 = 256;
- x->block[i].eob_max_offset_32x32 = 1024;
- }
+ // Segment skip feature.
+ x->block[i].skip_block =
+ vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
}
// UV
zbin_extra = (cpi->common.UVdequant[QIndex][1] *
- (cpi->zbin_over_quant +
- cpi->zbin_mode_boost +
+ (cpi->zbin_mode_boost +
x->act_zbin_adj)) >> 7;
for (i = 16; i < 24; i++) {
x->block[i].quant = cpi->UVquant[QIndex];
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
x->block[i].zbin = cpi->UVzbin[QIndex];
- x->block[i].zbin_8x8 = cpi->UVzbin_8x8[QIndex];
- x->block[i].zbin_16x16 = cpi->UVzbin_16x16[QIndex];
x->block[i].round = cpi->UVround[QIndex];
x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
- x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_uv_8x8[QIndex];
- x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_uv_16x16[QIndex];
-
x->block[i].zbin_extra = (int16_t)zbin_extra;
- // Segment max eob offset feature.
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
- x->block[i].eob_max_offset =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_8x8 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_16x16 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- } else {
- x->block[i].eob_max_offset = 16;
- x->block[i].eob_max_offset_8x8 = 64;
- x->block[i].eob_max_offset_16x16 = 256;
- }
- }
-
- // Y2
- zbin_extra = (cpi->common.Y2dequant[QIndex][1] *
- ((cpi->zbin_over_quant / 2) +
- cpi->zbin_mode_boost +
- x->act_zbin_adj)) >> 7;
-
- x->block[24].quant = cpi->Y2quant[QIndex];
- x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
- x->block[24].zbin = cpi->Y2zbin[QIndex];
- x->block[24].zbin_8x8 = cpi->Y2zbin_8x8[QIndex];
- x->block[24].zbin_16x16 = cpi->Y2zbin_16x16[QIndex];
- x->block[24].round = cpi->Y2round[QIndex];
- x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
- x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
- x->block[24].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y2_8x8[QIndex];
- x->block[24].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y2_16x16[QIndex];
- x->block[24].zbin_extra = (int16_t)zbin_extra;
-
- // TBD perhaps not use for Y2
- // Segment max eob offset feature.
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
- x->block[24].eob_max_offset =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[24].eob_max_offset_8x8 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- } else {
- x->block[24].eob_max_offset = 16;
- x->block[24].eob_max_offset_8x8 = 4;
+ // Segment skip feature.
+ x->block[i].skip_block =
+ vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
}
/* save this macroblock QIndex for vp9_update_zbin_extra() */
@@ -822,8 +530,7 @@ void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) {
// Y
zbin_extra = (cpi->common.Y1dequant[QIndex][1] *
- (cpi->zbin_over_quant +
- cpi->zbin_mode_boost +
+ (cpi->zbin_mode_boost +
x->act_zbin_adj)) >> 7;
for (i = 0; i < 16; i++) {
x->block[i].zbin_extra = (int16_t)zbin_extra;
@@ -831,21 +538,12 @@ void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) {
// UV
zbin_extra = (cpi->common.UVdequant[QIndex][1] *
- (cpi->zbin_over_quant +
- cpi->zbin_mode_boost +
+ (cpi->zbin_mode_boost +
x->act_zbin_adj)) >> 7;
for (i = 16; i < 24; i++) {
x->block[i].zbin_extra = (int16_t)zbin_extra;
}
-
- // Y2
- zbin_extra = (cpi->common.Y2dequant[QIndex][1] *
- ((cpi->zbin_over_quant / 2) +
- cpi->zbin_mode_boost +
- x->act_zbin_adj)) >> 7;
-
- x->block[24].zbin_extra = (int16_t)zbin_extra;
}
void vp9_frame_init_quantizer(VP9_COMP *cpi) {
@@ -864,10 +562,8 @@ void vp9_set_quantizer(struct VP9_COMP *cpi, int Q) {
// if any of the delta_q values are changing update flag will
// have to be set.
cm->y1dc_delta_q = 0;
- cm->y2ac_delta_q = 0;
cm->uvdc_delta_q = 0;
cm->uvac_delta_q = 0;
- cm->y2dc_delta_q = 0;
// quantizer has to be reinitialized if any delta_q changes.
// As there are not any here for now this is inactive code.
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index ac44a751c..d338e620a 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -14,10 +14,10 @@
#include "vp9/encoder/vp9_block.h"
#define prototype_quantize_block(sym) \
- void (sym)(BLOCK *b,BLOCKD *d)
+ void (sym)(MACROBLOCK *mb, int b_idx)
#define prototype_quantize_block_pair(sym) \
- void (sym)(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
+ void (sym)(MACROBLOCK *mb, int b_idx1, int b_idx2)
#define prototype_quantize_mb(sym) \
void (sym)(MACROBLOCK *x)
@@ -27,7 +27,7 @@
#endif
#define prototype_quantize_block_type(sym) \
- void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type)
+ void (sym)(MACROBLOCK *mb, int b_ix, TX_TYPE type)
extern prototype_quantize_block_type(vp9_ht_quantize_b_4x4);
#ifndef vp9_quantize_quantb_4x4
@@ -50,11 +50,6 @@ extern prototype_quantize_block(vp9_quantize_quantb_8x8);
#endif
extern prototype_quantize_block(vp9_quantize_quantb_16x16);
-#ifndef vp9_quantize_quantb_2x2
-#define vp9_quantize_quantb_2x2 vp9_regular_quantize_b_2x2
-#endif
-extern prototype_quantize_block(vp9_quantize_quantb_2x2);
-
#ifndef vp9_quantize_mb_4x4
#define vp9_quantize_mb_4x4 vp9_quantize_mb_4x4_c
#endif
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index f663b56c9..a2a79574d 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -14,8 +14,8 @@
#include <string.h>
#include <limits.h>
#include <assert.h>
+#include <math.h>
-#include "math.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_modecont.h"
#include "vp9/common/vp9_common.h"
@@ -25,9 +25,10 @@
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_seg_common.h"
-#define MIN_BPB_FACTOR 0.005
-#define MAX_BPB_FACTOR 50
+#define MIN_BPB_FACTOR 0.005
+#define MAX_BPB_FACTOR 50
#ifdef MODE_STATS
extern unsigned int y_modes[VP9_YMODES];
@@ -113,13 +114,19 @@ static int kfboost_qadjust(int qindex) {
return retval;
}
-int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex) {
- if (frame_type == KEY_FRAME)
- return (int)(4500000 / vp9_convert_qindex_to_q(qindex));
- else
- return (int)(2850000 / vp9_convert_qindex_to_q(qindex));
-}
+int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+ double correction_factor) {
+ int enumerator;
+ double q = vp9_convert_qindex_to_q(qindex);
+
+ if (frame_type == KEY_FRAME) {
+ enumerator = 4500000;
+ } else {
+ enumerator = 2850000;
+ }
+ return (int)(0.5 + (enumerator * correction_factor / q));
+}
void vp9_save_coding_context(VP9_COMP *cpi) {
CODING_CONTEXT *const cc = &cpi->coding_context;
@@ -168,11 +175,8 @@ void vp9_save_coding_context(VP9_COMP *cpi) {
vp9_copy(cc->last_mode_lf_deltas, xd->last_mode_lf_deltas);
vp9_copy(cc->coef_probs_4x4, cm->fc.coef_probs_4x4);
- vp9_copy(cc->hybrid_coef_probs_4x4, cm->fc.hybrid_coef_probs_4x4);
vp9_copy(cc->coef_probs_8x8, cm->fc.coef_probs_8x8);
- vp9_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8);
vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16);
- vp9_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16);
vp9_copy(cc->coef_probs_32x32, cm->fc.coef_probs_32x32);
vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
#if CONFIG_COMP_INTERINTRA_PRED
@@ -226,11 +230,8 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
vp9_copy(xd->last_mode_lf_deltas, cc->last_mode_lf_deltas);
vp9_copy(cm->fc.coef_probs_4x4, cc->coef_probs_4x4);
- vp9_copy(cm->fc.hybrid_coef_probs_4x4, cc->hybrid_coef_probs_4x4);
vp9_copy(cm->fc.coef_probs_8x8, cc->coef_probs_8x8);
- vp9_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8);
vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16);
- vp9_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16);
vp9_copy(cm->fc.coef_probs_32x32, cc->coef_probs_32x32);
vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
#if CONFIG_COMP_INTERINTRA_PRED
@@ -238,68 +239,33 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
#endif
}
-
void vp9_setup_key_frame(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
- // Setup for Key frame:
- vp9_default_coef_probs(& cpi->common);
- vp9_kf_default_bmode_probs(cpi->common.kf_bmode_prob);
- vp9_init_mbmode_probs(& cpi->common);
- vp9_default_bmode_probs(cm->fc.bmode_prob);
-
- if(cm->last_frame_seg_map)
- vpx_memset(cm->last_frame_seg_map, 0, (cm->mb_rows * cm->mb_cols));
-
- vp9_init_mv_probs(& cpi->common);
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
- // cpi->common.filter_level = 0; // Reset every key frame.
- cpi->common.filter_level = cpi->common.base_qindex * 3 / 8;
+ vp9_setup_past_independence(cm, xd);
// interval before next GF
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
-
- cpi->common.refresh_golden_frame = TRUE;
- cpi->common.refresh_alt_ref_frame = TRUE;
-
- vp9_init_mode_contexts(&cpi->common);
- vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
- vpx_memcpy(&cpi->common.lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
-
- vpx_memset(cm->prev_mip, 0,
- (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
- vpx_memset(cm->mip, 0,
- (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
-
- vp9_update_mode_info_border(cm, cm->mip);
- vp9_update_mode_info_in_image(cm, cm->mi);
-
-#if CONFIG_NEW_MVREF
- if (1) {
- MACROBLOCKD *xd = &cpi->mb.e_mbd;
-
- // Defaults probabilities for encoding the MV ref id signal
- vpx_memset(xd->mb_mv_ref_probs, VP9_DEFAULT_MV_REF_PROB,
- sizeof(xd->mb_mv_ref_probs));
- }
-#endif
+ /* All buffers are implicitly updated on key frames. */
+ cpi->refresh_golden_frame = TRUE;
+ cpi->refresh_alt_ref_frame = TRUE;
}
void vp9_setup_inter_frame(VP9_COMP *cpi) {
- if (cpi->common.refresh_alt_ref_frame) {
- vpx_memcpy(&cpi->common.fc,
- &cpi->common.lfc_a,
- sizeof(cpi->common.fc));
- } else {
- vpx_memcpy(&cpi->common.fc,
- &cpi->common.lfc,
- sizeof(cpi->common.fc));
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ if (cm->error_resilient_mode) {
+ vp9_setup_past_independence(cm, xd);
}
+ assert(cm->frame_context_idx < NUM_FRAME_CONTEXTS);
+ vpx_memcpy(&cm->fc, &cm->frame_contexts[cm->frame_context_idx],
+ sizeof(cm->fc));
}
-
static int estimate_bits_at_q(int frame_kind, int Q, int MBs,
double correction_factor) {
- int Bpm = (int)(.5 + correction_factor * vp9_bits_per_mb(frame_kind, Q));
+ int Bpm = (int)(vp9_bits_per_mb(frame_kind, Q, correction_factor));
/* Attempt to retain reasonable accuracy without overflow. The cutoff is
* chosen such that the maximum product of Bpm and MBs fits 31 bits. The
@@ -358,7 +324,7 @@ static void calc_pframe_target_size(VP9_COMP *cpi) {
// Special alt reference frame case
- if (cpi->common.refresh_alt_ref_frame) {
+ if (cpi->refresh_alt_ref_frame) {
// Per frame bit target for the alt ref frame
cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
cpi->this_frame_target = cpi->per_frame_bandwidth;
@@ -377,7 +343,7 @@ static void calc_pframe_target_size(VP9_COMP *cpi) {
if (cpi->this_frame_target < min_frame_target)
cpi->this_frame_target = min_frame_target;
- if (!cpi->common.refresh_alt_ref_frame)
+ if (!cpi->refresh_alt_ref_frame)
// Note the baseline target data rate for this inter frame.
cpi->inter_frame_target = cpi->this_frame_target;
@@ -386,7 +352,7 @@ static void calc_pframe_target_size(VP9_COMP *cpi) {
// int Boost = 0;
int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
- cpi->common.refresh_golden_frame = TRUE;
+ cpi->refresh_golden_frame = TRUE;
calc_gf_params(cpi);
@@ -431,35 +397,18 @@ void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
if (cpi->common.frame_type == KEY_FRAME) {
rate_correction_factor = cpi->key_frame_rate_correction_factor;
} else {
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
rate_correction_factor = cpi->gf_rate_correction_factor;
else
rate_correction_factor = cpi->rate_correction_factor;
}
- // Work out how big we would have expected the frame to be at this Q given the current correction factor.
+ // Work out how big we would have expected the frame to be at this Q given
+ // the current correction factor.
// Stay in double to avoid int overflow when values are large
projected_size_based_on_q =
- (int)(((.5 + rate_correction_factor *
- vp9_bits_per_mb(cpi->common.frame_type, Q)) *
- cpi->common.MBs) / (1 << BPER_MB_NORMBITS));
-
- // Make some allowance for cpi->zbin_over_quant
- if (cpi->zbin_over_quant > 0) {
- int Z = cpi->zbin_over_quant;
- double Factor = 0.99;
- double factor_adjustment = 0.01 / 256.0; // (double)ZBIN_OQ_MAX;
-
- while (Z > 0) {
- Z--;
- projected_size_based_on_q =
- (int)(Factor * projected_size_based_on_q);
- Factor += factor_adjustment;
-
- if (Factor >= 0.999)
- Factor = 0.999;
- }
- }
+ estimate_bits_at_q(cpi->common.frame_type, Q,
+ cpi->common.MBs, rate_correction_factor);
// Work out a size correction factor.
// if ( cpi->this_frame_target > 0 )
@@ -505,7 +454,7 @@ void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
if (cpi->common.frame_type == KEY_FRAME)
cpi->key_frame_rate_correction_factor = rate_correction_factor;
else {
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
cpi->gf_rate_correction_factor = rate_correction_factor;
else
cpi->rate_correction_factor = rate_correction_factor;
@@ -522,14 +471,11 @@ int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame) {
int bits_per_mb_at_this_q;
double correction_factor;
- // Reset Zbin OQ value
- cpi->zbin_over_quant = 0;
-
// Select the appropriate correction factor based upon type of frame.
if (cpi->common.frame_type == KEY_FRAME)
correction_factor = cpi->key_frame_rate_correction_factor;
else {
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
correction_factor = cpi->gf_rate_correction_factor;
else
correction_factor = cpi->rate_correction_factor;
@@ -545,8 +491,7 @@ int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame) {
do {
bits_per_mb_at_this_q =
- (int)(.5 + correction_factor *
- vp9_bits_per_mb(cpi->common.frame_type, i));
+ (int)(vp9_bits_per_mb(cpi->common.frame_type, i, correction_factor));
if (bits_per_mb_at_this_q <= target_bits_per_mb) {
if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
@@ -559,45 +504,6 @@ int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame) {
last_error = bits_per_mb_at_this_q - target_bits_per_mb;
} while (++i <= cpi->active_worst_quality);
-
- // If we are at MAXQ then enable Q over-run which seeks to claw back additional bits through things like
- // the RD multiplier and zero bin size.
- if (Q >= MAXQ) {
- int zbin_oqmax;
-
- double Factor = 0.99;
- double factor_adjustment = 0.01 / 256.0; // (double)ZBIN_OQ_MAX;
-
- if (cpi->common.frame_type == KEY_FRAME)
- zbin_oqmax = 0; // ZBIN_OQ_MAX/16
- else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active))
- zbin_oqmax = 16;
- else
- zbin_oqmax = ZBIN_OQ_MAX;
-
- // Each incrment in the zbin is assumed to have a fixed effect on bitrate. This is not of course true.
- // The effect will be highly clip dependent and may well have sudden steps.
- // The idea here is to acheive higher effective quantizers than the normal maximum by expanding the zero
- // bin and hence decreasing the number of low magnitude non zero coefficients.
- while (cpi->zbin_over_quant < zbin_oqmax) {
- cpi->zbin_over_quant++;
-
- if (cpi->zbin_over_quant > zbin_oqmax)
- cpi->zbin_over_quant = zbin_oqmax;
-
- // Adjust bits_per_mb_at_this_q estimate
- bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q);
- Factor += factor_adjustment;
-
- if (Factor >= 0.999)
- Factor = 0.999;
-
- if (bits_per_mb_at_this_q <= target_bits_per_mb) // Break out if we get down to the target rate
- break;
- }
-
- }
-
return Q;
}
@@ -671,7 +577,7 @@ void vp9_compute_frame_size_bounds(VP9_COMP *cpi, int *frame_under_shoot_limit,
*frame_over_shoot_limit = cpi->this_frame_target * 9 / 8;
*frame_under_shoot_limit = cpi->this_frame_target * 7 / 8;
} else {
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) {
+ if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) {
*frame_over_shoot_limit = cpi->this_frame_target * 9 / 8;
*frame_under_shoot_limit = cpi->this_frame_target * 7 / 8;
} else {
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index c6484817f..473317605 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -16,23 +16,24 @@
#define FRAME_OVERHEAD_BITS 200
-extern void vp9_save_coding_context(VP9_COMP *cpi);
-extern void vp9_restore_coding_context(VP9_COMP *cpi);
+void vp9_save_coding_context(VP9_COMP *cpi);
+void vp9_restore_coding_context(VP9_COMP *cpi);
-extern void vp9_setup_key_frame(VP9_COMP *cpi);
-extern void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var);
-extern int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame);
-extern void vp9_adjust_key_frame_context(VP9_COMP *cpi);
-extern void vp9_compute_frame_size_bounds(VP9_COMP *cpi,
- int *frame_under_shoot_limit,
- int *frame_over_shoot_limit);
+void vp9_setup_key_frame(VP9_COMP *cpi);
+void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var);
+int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame);
+void vp9_adjust_key_frame_context(VP9_COMP *cpi);
+void vp9_compute_frame_size_bounds(VP9_COMP *cpi,
+ int *frame_under_shoot_limit,
+ int *frame_over_shoot_limit);
// return of 0 means drop frame
-extern int vp9_pick_frame_size(VP9_COMP *cpi);
+int vp9_pick_frame_size(VP9_COMP *cpi);
-extern double vp9_convert_qindex_to_q(int qindex);
-extern int vp9_gfboost_qadjust(int qindex);
-extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex);
+double vp9_convert_qindex_to_q(int qindex);
+int vp9_gfboost_qadjust(int qindex);
+extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+ double correction_factor);
void vp9_setup_inter_frame(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_RATECTRL_H_
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index e8d0cc68e..59e33a464 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -23,7 +23,6 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
-#include "vp9/common/vp9_reconintra4x4.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/encoder/vp9_encodemb.h"
@@ -151,20 +150,21 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
static void fill_token_costs(vp9_coeff_count *c,
vp9_coeff_probs *p,
int block_type_counts) {
- int i, j, k;
+ int i, j, k, l;
for (i = 0; i < block_type_counts; i++)
- for (j = 0; j < COEF_BANDS; j++)
- for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
- if (k == 0 && ((j > 0 && i > 0) || (j > 1 && i == 0)))
- vp9_cost_tokens_skip((int *)(c[i][j][k]),
- p[i][j][k],
- vp9_coef_tree);
- else
- vp9_cost_tokens((int *)(c[i][j][k]),
- p[i][j][k],
- vp9_coef_tree);
- }
+ for (j = 0; j < REF_TYPES; j++)
+ for (k = 0; k < COEF_BANDS; k++)
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+ if (l == 0 && k > 0)
+ vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
+ p[i][j][k][l],
+ vp9_coef_tree);
+ else
+ vp9_cost_tokens((int *)(c[i][j][k][l]),
+ p[i][j][k][l],
+ vp9_coef_tree);
+ }
}
@@ -218,16 +218,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
cpi->RDMULT = compute_rd_mult(QIndex);
- // Extend rate multiplier along side quantizer zbin increases
- if (cpi->zbin_over_quant > 0) {
- double oq_factor;
-
- // Experimental code using the same basic equation as used for Q above
- // The units of cpi->zbin_over_quant are 1/128 of Q bin size
- oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
- cpi->RDMULT = (int)((double)cpi->RDMULT * oq_factor * oq_factor);
- }
-
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
if (cpi->twopass.next_iiratio > 31)
cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
@@ -279,20 +269,11 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
}
fill_token_costs(cpi->mb.token_costs[TX_4X4],
- cpi->common.fc.coef_probs_4x4, BLOCK_TYPES_4X4);
- fill_token_costs(cpi->mb.hybrid_token_costs[TX_4X4],
- cpi->common.fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4);
-
+ cpi->common.fc.coef_probs_4x4, BLOCK_TYPES);
fill_token_costs(cpi->mb.token_costs[TX_8X8],
- cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8);
- fill_token_costs(cpi->mb.hybrid_token_costs[TX_8X8],
- cpi->common.fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8);
-
+ cpi->common.fc.coef_probs_8x8, BLOCK_TYPES);
fill_token_costs(cpi->mb.token_costs[TX_16X16],
- cpi->common.fc.coef_probs_16x16, BLOCK_TYPES_16X16);
- fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16],
- cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16);
-
+ cpi->common.fc.coef_probs_16x16, BLOCK_TYPES);
fill_token_costs(cpi->mb.token_costs[TX_32X32],
cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
@@ -321,26 +302,7 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
return error;
}
-int vp9_mbblock_error_8x8_c(MACROBLOCK *mb, int dc) {
- BLOCK *be;
- BLOCKD *bd;
- int i, j;
- int berror, error = 0;
-
- for (i = 0; i < 16; i+=4) {
- be = &mb->block[i];
- bd = &mb->e_mbd.block[i];
- berror = 0;
- for (j = dc; j < 64; j++) {
- int this_diff = be->coeff[j] - bd->dqcoeff[j];
- berror += this_diff * this_diff;
- }
- error += berror;
- }
- return error;
-}
-
-int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) {
+int vp9_mbblock_error_c(MACROBLOCK *mb) {
BLOCK *be;
BLOCKD *bd;
int i, j;
@@ -350,7 +312,7 @@ int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) {
be = &mb->block[i];
bd = &mb->e_mbd.block[i];
berror = 0;
- for (j = dc; j < 16; j++) {
+ for (j = 0; j < 16; j++) {
int this_diff = be->coeff[j] - bd->dqcoeff[j];
berror += this_diff * this_diff;
}
@@ -417,41 +379,36 @@ int vp9_uvsse(MACROBLOCK *x) {
sse2 += sse1;
}
return sse2;
-
}
-#if CONFIG_NEWCOEFCONTEXT
-#define PT pn
-#else
-#define PT pt
-#endif
-static int cost_coeffs(MACROBLOCK *mb,
- BLOCKD *b, PLANE_TYPE type,
- ENTROPY_CONTEXT *a,
- ENTROPY_CONTEXT *l,
- TX_SIZE tx_size) {
+static INLINE int cost_coeffs(MACROBLOCK *mb,
+ BLOCKD *b, PLANE_TYPE type,
+ ENTROPY_CONTEXT *a,
+ ENTROPY_CONTEXT *l,
+ TX_SIZE tx_size) {
int pt;
- const int eob = b->eob;
- MACROBLOCKD *xd = &mb->e_mbd;
+ MACROBLOCKD *const xd = &mb->e_mbd;
const int ib = (int)(b - xd->block);
- int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
+ const int eob = xd->eobs[ib];
+ int c = 0;
int cost = 0, seg_eob;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
- const int *scan, *band;
+ const int *scan;
int16_t *qcoeff_ptr = b->qcoeff;
+ const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type(xd, b) : DCT_DCT;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
- int pn;
-#endif
-
+ unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
+ mb->token_costs[tx_size][type][ref];
ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
+ ENTROPY_CONTEXT *const a1 = a +
+ sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
+ ENTROPY_CONTEXT *const l1 = l +
+ sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
switch (tx_size) {
case TX_4X4:
scan = vp9_default_zig_zag1d_4x4;
- band = vp9_coef_bands_4x4;
seg_eob = 16;
if (type == PLANE_TYPE_Y_WITH_DC) {
if (tx_type == ADST_DCT) {
@@ -462,30 +419,32 @@ static int cost_coeffs(MACROBLOCK *mb,
}
break;
case TX_8X8:
- if (type == PLANE_TYPE_Y2) {
- scan = vp9_default_zig_zag1d_4x4;
- band = vp9_coef_bands_4x4;
- seg_eob = 4;
- } else {
- scan = vp9_default_zig_zag1d_8x8;
- band = vp9_coef_bands_8x8;
- seg_eob = 64;
- }
+ a_ec = (a[0] + a[1]) != 0;
+ l_ec = (l[0] + l[1]) != 0;
+ scan = vp9_default_zig_zag1d_8x8;
+ seg_eob = 64;
break;
case TX_16X16:
scan = vp9_default_zig_zag1d_16x16;
- band = vp9_coef_bands_16x16;
seg_eob = 256;
if (type == PLANE_TYPE_UV) {
const int uv_idx = ib - 16;
qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
+ a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
+ } else {
+ a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
}
break;
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
- band = vp9_coef_bands_32x32;
seg_eob = 1024;
qcoeff_ptr = xd->sb_coeff_data.qcoeff;
+ a_ec = (a[0] + a[1] + a[2] + a[3] +
+ a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3] +
+ l1[0] + l1[1] + l1[2] + l1[3]) != 0;
break;
default:
abort();
@@ -493,59 +452,45 @@ static int cost_coeffs(MACROBLOCK *mb,
}
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
-#if CONFIG_NEWCOEFCONTEXT
- neighbors = vp9_get_coef_neighbors_handle(scan);
- pn = pt;
-#endif
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB))
- seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
+ seg_eob = 0;
- if (tx_type != DCT_DCT) {
- for (; c < eob; c++) {
- int v = qcoeff_ptr[scan[c]];
- int t = vp9_dct_value_tokens_ptr[v].Token;
- cost += mb->hybrid_token_costs[tx_size][type][band[c]][PT][t];
- cost += vp9_dct_value_cost_ptr[v];
- pt = vp9_prev_token_class[t];
-#if CONFIG_NEWCOEFCONTEXT
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1]))
- pn = vp9_get_coef_neighbor_context(
- qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
- else
- pn = pt;
-#endif
- }
- if (c < seg_eob)
- cost += mb->hybrid_token_costs[tx_size][type][band[c]]
- [PT][DCT_EOB_TOKEN];
- } else {
+ {
+ int recent_energy = 0;
for (; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
int t = vp9_dct_value_tokens_ptr[v].Token;
- cost += mb->token_costs[tx_size][type][band[c]][pt][t];
+ cost += token_costs[get_coef_band(tx_size, c)][pt][t];
cost += vp9_dct_value_cost_ptr[v];
- pt = vp9_prev_token_class[t];
-#if CONFIG_NEWCOEFCONTEXT
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1]))
- pn = vp9_get_coef_neighbor_context(
- qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
- else
- pn = pt;
-#endif
+ pt = vp9_get_coef_context(&recent_energy, t);
}
if (c < seg_eob)
- cost += mb->token_costs[tx_size][type][band[c]]
- [PT][DCT_EOB_TOKEN];
+ cost += mb->token_costs[tx_size][type][ref][get_coef_band(tx_size, c)]
+ [pt][DCT_EOB_TOKEN];
}
// is eob first coefficient;
- pt = (c > !type);
+ pt = (c > 0);
*a = *l = pt;
+ if (tx_size >= TX_8X8) {
+ a[1] = l[1] = pt;
+ if (tx_size >= TX_16X16) {
+ if (type == PLANE_TYPE_UV) {
+ a1[0] = a1[1] = l1[0] = l1[1] = pt;
+ } else {
+ a[2] = a[3] = l[2] = l[3] = pt;
+ if (tx_size >= TX_32X32) {
+ a1[0] = a1[1] = a1[2] = a1[3] = pt;
+ l1[0] = l1[1] = l1[2] = l1[3] = pt;
+ }
+ }
+ }
+ }
return cost;
}
-static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) {
+static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) {
int cost = 0;
int b;
MACROBLOCKD *xd = &mb->e_mbd;
@@ -565,19 +510,11 @@ static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) {
}
for (b = 0; b < 16; b++)
- cost += cost_coeffs(mb, xd->block + b,
- (has_2nd_order ?
- PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
+ cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b],
TX_4X4);
- if (has_2nd_order)
- cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2,
- ta + vp9_block2above[TX_4X4][24],
- tl + vp9_block2left[TX_4X4][24],
- TX_4X4);
-
return cost;
}
@@ -586,26 +523,17 @@ static void macro_block_yrd_4x4(MACROBLOCK *mb,
int *Distortion,
int *skippable, int backup) {
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCK *const mb_y2 = mb->block + 24;
- BLOCKD *const x_y2 = xd->block + 24;
- int d, has_2nd_order;
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- has_2nd_order = get_2nd_order_usage(xd);
- // Fdct and building the 2nd order block
vp9_transform_mby_4x4(mb);
vp9_quantize_mby_4x4(mb);
- d = vp9_mbblock_error(mb, has_2nd_order);
- if (has_2nd_order)
- d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
-
- *Distortion = (d >> 2);
- // rate
- *Rate = rdcost_mby_4x4(mb, has_2nd_order, backup);
- *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, has_2nd_order);
+
+ *Distortion = vp9_mbblock_error(mb) >> 2;
+ *Rate = rdcost_mby_4x4(mb, backup);
+ *skippable = vp9_mby_is_skippable_4x4(xd);
}
-static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) {
+static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) {
int cost = 0;
int b;
MACROBLOCKD *xd = &mb->e_mbd;
@@ -625,18 +553,11 @@ static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) {
}
for (b = 0; b < 16; b += 4)
- cost += cost_coeffs(mb, xd->block + b,
- (has_2nd_order ?
- PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
+ cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b],
TX_8X8);
- if (has_2nd_order)
- cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2,
- ta + vp9_block2above[TX_8X8][24],
- tl + vp9_block2left[TX_8X8][24],
- TX_8X8);
return cost;
}
@@ -645,23 +566,14 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb,
int *Distortion,
int *skippable, int backup) {
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCK *const mb_y2 = mb->block + 24;
- BLOCKD *const x_y2 = xd->block + 24;
- int d, has_2nd_order;
xd->mode_info_context->mbmi.txfm_size = TX_8X8;
-
vp9_transform_mby_8x8(mb);
vp9_quantize_mby_8x8(mb);
- has_2nd_order = get_2nd_order_usage(xd);
- d = vp9_mbblock_error_8x8_c(mb, has_2nd_order);
- if (has_2nd_order)
- d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
-
- *Distortion = (d >> 2);
- // rate
- *Rate = rdcost_mby_8x8(mb, has_2nd_order, backup);
- *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, has_2nd_order);
+
+ *Distortion = vp9_mbblock_error(mb) >> 2;
+ *Rate = rdcost_mby_8x8(mb, backup);
+ *skippable = vp9_mby_is_skippable_8x8(xd);
}
static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
@@ -687,7 +599,6 @@ static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
int *skippable, int backup) {
- int d;
MACROBLOCKD *xd = &mb->e_mbd;
xd->mode_info_context->mbmi.txfm_size = TX_16X16;
@@ -696,15 +607,13 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
// TODO(jingning) is it possible to quickly determine whether to force
// trailing coefficients to be zero, instead of running trellis
// optimization in the rate-distortion optimization loop?
- if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
+ if (mb->optimize &&
+ xd->mode_info_context->mbmi.mode < I8X8_PRED)
vp9_optimize_mby_16x16(mb);
- d = vp9_mbblock_error(mb, 0);
-
- *Distortion = (d >> 2);
- // rate
+ *Distortion = vp9_mbblock_error(mb) >> 2;
*Rate = rdcost_mby_16x16(mb, backup);
- *skippable = vp9_mby_is_skippable_16x16(&mb->e_mbd);
+ *skippable = vp9_mby_is_skippable_16x16(xd);
}
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
@@ -820,15 +729,15 @@ static void copy_predictor(uint8_t *dst, const uint8_t *predictor) {
static int rdcost_sby_32x32(MACROBLOCK *x, int backup) {
MACROBLOCKD * const xd = &x->e_mbd;
- ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
ENTROPY_CONTEXT *ta, *tl;
if (backup) {
ta = (ENTROPY_CONTEXT *) &t_above,
tl = (ENTROPY_CONTEXT *) &t_left;
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
} else {
ta = (ENTROPY_CONTEXT *) xd->above_context;
tl = (ENTROPY_CONTEXT *) xd->left_context;
@@ -857,21 +766,18 @@ static void super_block_yrd_32x32(MACROBLOCK *x,
SUPERBLOCK * const x_sb = &x->sb_coeff_data;
MACROBLOCKD * const xd = &x->e_mbd;
SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
-#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID
+#if DEBUG_ERROR
int16_t out[1024];
#endif
vp9_transform_sby_32x32(x);
vp9_quantize_sby_32x32(x);
-#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID
+#if DEBUG_ERROR
vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif
-#if !CONFIG_DWTDCTHYBRID
*distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
-#else
- *distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4;
-#endif
+
#if DEBUG_ERROR
printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
@@ -1129,17 +1035,17 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
rate = bmode_costs[mode];
#endif
- vp9_intra4x4_predict(b, mode, b->predictor);
+ vp9_intra4x4_predict(xd, b, mode, b->predictor);
vp9_subtract_b(be, b, 16);
b->bmi.as_mode.first = mode;
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
- vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
+ vp9_ht_quantize_b_4x4(x, be - x->block, tx_type);
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, be - x->block);
}
tempa = ta;
@@ -1168,9 +1074,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
// inverse transform
if (best_tx_type != DCT_DCT)
- vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
+ vp9_short_iht4x4(best_dqcoeff, b->diff, 16, best_tx_type);
else
- xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
+ xd->inv_txm4x4(best_dqcoeff, b->diff, 32);
vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -1179,8 +1085,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
int *Rate, int *rate_y,
- int *Distortion, int64_t best_rd,
- int update_contexts) {
+ int *Distortion, int64_t best_rd) {
int i;
MACROBLOCKD *const xd = &mb->e_mbd;
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
@@ -1191,18 +1096,13 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
ENTROPY_CONTEXT *ta, *tl;
int *bmode_costs;
- if (update_contexts) {
- ta = (ENTROPY_CONTEXT *)xd->above_context;
- tl = (ENTROPY_CONTEXT *)xd->left_context;
- } else {
- vpx_memcpy(&t_above, xd->above_context,
- sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context,
- sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_above, xd->above_context,
+ sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context,
+ sizeof(ENTROPY_CONTEXT_PLANES));
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
- }
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
xd->mode_info_context->mbmi.mode = B_PRED;
bmode_costs = mb->inter_bmode_costs;
@@ -1407,8 +1307,9 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
int distortion = 0, rate = 0;
BLOCK *be = x->block + ib;
BLOCKD *b = xd->block + ib;
- ENTROPY_CONTEXT ta0, ta1, besta0 = 0, besta1 = 0;
- ENTROPY_CONTEXT tl0, tl1, bestl0 = 0, bestl1 = 0;
+ ENTROPY_CONTEXT_PLANES ta, tl;
+ ENTROPY_CONTEXT *ta0, *ta1, besta0 = 0, besta1 = 0;
+ ENTROPY_CONTEXT *tl0, *tl1, bestl0 = 0, bestl1 = 0;
/*
* The predictor buffer is a 2d buffer with a stride of 16. Create
@@ -1430,58 +1331,75 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
rate = mode_costs[mode];
b->bmi.as_mode.first = mode;
- vp9_intra8x8_predict(b, mode, b->predictor);
+ vp9_intra8x8_predict(xd, b, mode, b->predictor);
vp9_subtract_4b_c(be, b, 16);
- assert(get_2nd_order_usage(xd) == 0);
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
TX_TYPE tx_type = get_tx_type_8x8(xd, b);
if (tx_type != DCT_DCT)
- vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
+ vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
else
- x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
- x->quantize_b_8x8(x->block + idx, xd->block + idx);
+ x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
+ x->quantize_b_8x8(x, idx);
// compute quantization mse of 8x8 block
distortion = vp9_block_error_c((x->block + idx)->coeff,
(xd->block + idx)->dqcoeff, 64);
- ta0 = a[vp9_block2above[TX_8X8][idx]];
- tl0 = l[vp9_block2left[TX_8X8][idx]];
+
+ vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_8X8][idx];
+ tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_8X8][idx];
+ ta1 = ta0 + 1;
+ tl1 = tl0 + 1;
rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC,
- &ta0, &tl0, TX_8X8);
+ ta0, tl0, TX_8X8);
rate += rate_t;
- ta1 = ta0;
- tl1 = tl0;
} else {
static const int iblock[4] = {0, 1, 4, 5};
TX_TYPE tx_type;
int i;
- ta0 = a[vp9_block2above[TX_4X4][ib]];
- ta1 = a[vp9_block2above[TX_4X4][ib + 1]];
- tl0 = l[vp9_block2left[TX_4X4][ib]];
- tl1 = l[vp9_block2left[TX_4X4][ib + 4]];
+ vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES));
+ ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_4X4][ib];
+ tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_4X4][ib];
+ ta1 = ta0 + 1;
+ tl1 = tl0 + 1;
distortion = 0;
rate_t = 0;
for (i = 0; i < 4; ++i) {
+ int do_two = 0;
b = &xd->block[ib + iblock[i]];
be = &x->block[ib + iblock[i]];
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
- vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
+ vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
+ } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
+ do_two = 1;
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, ib + iblock[i]);
}
- distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16);
+ distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC,
- // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0,
- &ta0, &tl0,
+ i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
TX_4X4);
+ if (do_two) {
+ i++;
+ rate_t += cost_coeffs(x, b + 1, PLANE_TYPE_Y_WITH_DC,
+ i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
+ TX_4X4);
+ }
}
+ b = &xd->block[ib];
+ be = &x->block[ib];
rate += rate_t;
}
@@ -1491,10 +1409,10 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
*bestrate = rate;
*bestratey = rate_t;
*bestdistortion = distortion;
- besta0 = ta0;
- besta1 = ta1;
- bestl0 = tl0;
- bestl1 = tl1;
+ besta0 = *ta0;
+ besta1 = *ta1;
+ bestl0 = *tl0;
+ bestl1 = *tl1;
best_rd = this_rd;
*best_mode = mode;
copy_predictor_8x8(best_predictor, b->predictor);
@@ -1647,12 +1565,12 @@ static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) {
int b;
int cost = 0;
MACROBLOCKD *const xd = &x->e_mbd;
- ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
ENTROPY_CONTEXT *ta, *tl;
if (backup) {
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
ta = (ENTROPY_CONTEXT *) &t_above;
tl = (ENTROPY_CONTEXT *) &t_left;
@@ -1752,8 +1670,9 @@ static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
}
static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
- int *distortion, int *skip, int fullpixel) {
- vp9_build_inter4x4_predictors_mbuv(&x->e_mbd);
+ int *distortion, int *skip, int fullpixel,
+ int mb_row, int mb_col) {
+ vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col);
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
x->e_mbd.predictor, x->src.uv_stride);
return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1);
@@ -2082,12 +2001,8 @@ int vp9_cost_mv_ref(VP9_COMP *cpi,
MACROBLOCKD *xd = &cpi->mb.e_mbd;
int segment_id = xd->mode_info_context->mbmi.segment_id;
- // If the mode coding is done entirely at the segment level
- // we should not account for it at the per mb level in rd code.
- // Note that if the segment level coding is expanded from single mode
- // to multiple mode masks as per reference frame coding we will need
- // to do something different here.
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ // Dont account for mode here if segment skip is enabled.
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
VP9_COMMON *pc = &cpi->common;
vp9_prob p [VP9_MVREFS - 1];
@@ -2156,14 +2071,18 @@ static int labels2mode(
}
break;
case LEFT4X4:
- this_mv->as_int = col ? d[-1].bmi.as_mv.first.as_int : left_block_mv(mic, i);
+ this_mv->as_int = col ? d[-1].bmi.as_mv[0].as_int :
+ left_block_mv(xd, mic, i);
if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int = col ? d[-1].bmi.as_mv.second.as_int : left_block_second_mv(mic, i);
+ this_second_mv->as_int = col ? d[-1].bmi.as_mv[1].as_int :
+ left_block_second_mv(xd, mic, i);
break;
case ABOVE4X4:
- this_mv->as_int = row ? d[-4].bmi.as_mv.first.as_int : above_block_mv(mic, i, mis);
+ this_mv->as_int = row ? d[-4].bmi.as_mv[0].as_int :
+ above_block_mv(mic, i, mis);
if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int = row ? d[-4].bmi.as_mv.second.as_int : above_block_second_mv(mic, i, mis);
+ this_second_mv->as_int = row ? d[-4].bmi.as_mv[1].as_int :
+ above_block_second_mv(mic, i, mis);
break;
case ZERO4X4:
this_mv->as_int = 0;
@@ -2178,11 +2097,11 @@ static int labels2mode(
int_mv left_mv, left_second_mv;
left_second_mv.as_int = 0;
- left_mv.as_int = col ? d[-1].bmi.as_mv.first.as_int :
- left_block_mv(mic, i);
+ left_mv.as_int = col ? d[-1].bmi.as_mv[0].as_int :
+ left_block_mv(xd, mic, i);
if (mbmi->second_ref_frame > 0)
- left_second_mv.as_int = col ? d[-1].bmi.as_mv.second.as_int :
- left_block_second_mv(mic, i);
+ left_second_mv.as_int = col ? d[-1].bmi.as_mv[1].as_int :
+ left_block_second_mv(xd, mic, i);
if (left_mv.as_int == this_mv->as_int &&
(mbmi->second_ref_frame <= 0 ||
@@ -2198,9 +2117,9 @@ static int labels2mode(
#endif
}
- d->bmi.as_mv.first.as_int = this_mv->as_int;
+ d->bmi.as_mv[0].as_int = this_mv->as_int;
if (mbmi->second_ref_frame > 0)
- d->bmi.as_mv.second.as_int = this_second_mv->as_int;
+ d->bmi.as_mv[1].as_int = this_second_mv->as_int;
x->partition_info->bmi[i].mode = m;
x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
@@ -2230,12 +2149,25 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x,
BLOCK *be = &x->block[i];
int thisdistortion;
- vp9_build_inter_predictors_b(bd, 16, xd->subpixel_predict4x4);
- if (xd->mode_info_context->mbmi.second_ref_frame > 0)
- vp9_build_2nd_inter_predictors_b(bd, 16, xd->subpixel_predict_avg4x4);
+ vp9_build_inter_predictor(*(bd->base_pre) + bd->pre,
+ bd->pre_stride,
+ bd->predictor, 16,
+ &bd->bmi.as_mv[0],
+ &xd->scale_factor[0],
+ 4, 4, 0 /* no avg */, &xd->subpix);
+
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ vp9_build_inter_predictor(*(bd->base_second_pre) + bd->pre,
+ bd->pre_stride,
+ bd->predictor, 16,
+ &bd->bmi.as_mv[1],
+ &xd->scale_factor[1],
+ 4, 4, 1 /* avg */, &xd->subpix);
+ }
+
vp9_subtract_b(be, bd, 16);
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, bd);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, i);
thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
*distortion += thisdistortion;
*labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
@@ -2274,20 +2206,31 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
int ib = vp9_i8x8_block[i];
if (labels[ib] == which_label) {
+ const int use_second_ref =
+ xd->mode_info_context->mbmi.second_ref_frame > 0;
+ int which_mv;
int idx = (ib & 8) + ((ib & 2) << 1);
BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx];
BLOCK *be = &x->block[ib], *be2 = &x->block[idx];
int thisdistortion;
- vp9_build_inter_predictors4b(xd, bd, 16);
- if (xd->mode_info_context->mbmi.second_ref_frame > 0)
- vp9_build_2nd_inter_predictors4b(xd, bd, 16);
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre;
+
+ vp9_build_inter_predictor(*base_pre + bd->pre,
+ bd->pre_stride,
+ bd->predictor, 16,
+ &bd->bmi.as_mv[which_mv],
+ &xd->scale_factor[which_mv],
+ 8, 8, which_mv, &xd->subpix);
+ }
+
vp9_subtract_4b_c(be, bd, 16);
if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
if (otherrd) {
- x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
- x->quantize_b_8x8(be2, bd2);
+ x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
+ x->quantize_b_8x8(x, idx);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
otherdist += thisdistortion;
othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
@@ -2298,8 +2241,8 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
for (j = 0; j < 4; j += 2) {
bd = &xd->block[ib + iblock[j]];
be = &x->block[ib + iblock[j]];
- x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
*distortion += thisdistortion;
*labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
@@ -2316,8 +2259,8 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
for (j = 0; j < 4; j += 2) {
BLOCKD *bd = &xd->block[ib + iblock[j]];
BLOCK *be = &x->block[ib + iblock[j]];
- x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j]);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
otherdist += thisdistortion;
othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
@@ -2330,8 +2273,8 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
TX_4X4);
}
}
- x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
- x->quantize_b_8x8(be2, bd2);
+ x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
+ x->quantize_b_8x8(x, idx);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
*distortion += thisdistortion;
*labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
@@ -2373,8 +2316,7 @@ typedef struct {
} BEST_SEG_INFO;
-static __inline
-int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
+static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
int r = 0;
r |= (mv->as_mv.row >> 3) < x->mv_row_min;
r |= (mv->as_mv.row >> 3) > x->mv_row_max;
@@ -2487,9 +2429,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
// use previous block's result as next block's MV predictor.
if (segmentation == PARTITIONING_4X4 && i > 0) {
- bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int;
+ bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv[0].as_int;
if (i == 4 || i == 8 || i == 12)
- bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int;
+ bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv[0].as_int;
step_param = 2;
}
}
@@ -2528,11 +2470,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (thissme < bestsme) {
bestsme = thissme;
- mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int;
+ mode_mv[NEW4X4].as_int = e->bmi.as_mv[0].as_int;
} else {
/* The full search result is actually worse so re-instate the
* previous best vector */
- e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int;
+ e->bmi.as_mv[0].as_int = mode_mv[NEW4X4].as_int;
}
}
}
@@ -2595,13 +2537,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
for (j = 0; j < 16; j++)
if (labels[j] == i)
- best_eobs[j] = x->e_mbd.block[j].eob;
+ best_eobs[j] = x->e_mbd.eobs[j];
} else {
for (j = 0; j < 4; j++) {
int ib = vp9_i8x8_block[j], idx = j * 4;
if (labels[ib] == i)
- best_eobs[idx] = x->e_mbd.block[idx].eob;
+ best_eobs[idx] = x->e_mbd.eobs[idx];
}
}
if (other_rd < best_other_rd)
@@ -2734,8 +2676,9 @@ static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x,
if (base_rd < txfm_cache[ONLY_4X4]) {
txfm_cache[ONLY_4X4] = base_rd;
}
- if (base_rd + diff < txfm_cache[1]) {
- txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = base_rd + diff;
+ if (base_rd + diff < txfm_cache[ALLOW_8X8]) {
+ txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] =
+ txfm_cache[ALLOW_32X32] = base_rd + diff;
}
if (diff < 0) {
base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
@@ -2749,7 +2692,7 @@ static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x,
}
}
-static __inline void cal_step_param(int sr, int *sp) {
+static INLINE void cal_step_param(int sr, int *sp) {
int step = 0;
if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
@@ -2872,18 +2815,18 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < 16; i++) {
BLOCKD *bd = &x->e_mbd.block[i];
- bd->bmi.as_mv.first.as_int = bsi.mvs[i].as_int;
+ bd->bmi.as_mv[0].as_int = bsi.mvs[i].as_int;
if (mbmi->second_ref_frame > 0)
- bd->bmi.as_mv.second.as_int = bsi.second_mvs[i].as_int;
- bd->eob = bsi.eobs[i];
+ bd->bmi.as_mv[1].as_int = bsi.second_mvs[i].as_int;
+ x->e_mbd.eobs[i] = bsi.eobs[i];
}
*returntotrate = bsi.r;
*returndistortion = bsi.d;
*returnyrate = bsi.segment_yrate;
*skippable = bsi.txfm_size == TX_4X4 ?
- vp9_mby_is_skippable_4x4(&x->e_mbd, 0) :
- vp9_mby_is_skippable_8x8(&x->e_mbd, 0);
+ vp9_mby_is_skippable_4x4(&x->e_mbd) :
+ vp9_mby_is_skippable_8x8(&x->e_mbd);
/* save partitions */
mbmi->txfm_size = bsi.txfm_size;
@@ -3016,7 +2959,8 @@ static void estimate_curframe_refprobs(VP9_COMP *cpi, vp9_prob mod_refprobs[3],
}
}
-static __inline unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1, int idx, int val, int weight) {
+static INLINE unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1,
+ int idx, int val, int weight) {
unsigned cost0 = tab0[idx] ? vp9_cost_bit(tab0[idx], val) : 0;
unsigned cost1 = tab1[idx] ? vp9_cost_bit(tab1[idx], val) : 0;
// weight is 16-bit fixed point, so this basically calculates:
@@ -3160,43 +3104,104 @@ static void inter_mode_cost(VP9_COMP *cpi, MACROBLOCK *x,
static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
int idx, MV_REFERENCE_FRAME frame_type,
int block_size,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int_mv frame_nearest_mv[MAX_REF_FRAMES],
int_mv frame_near_mv[MAX_REF_FRAMES],
int frame_mdcounts[4][4],
- uint8_t *y_buffer[4],
- uint8_t *u_buffer[4],
- uint8_t *v_buffer[4]) {
- YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx];
+ YV12_BUFFER_CONFIG yv12_mb[4],
+ struct scale_factors scale[MAX_REF_FRAMES]) {
+ VP9_COMMON *cm = &cpi->common;
+ YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+ int use_prev_in_find_mv_refs, use_prev_in_find_best_ref;
+
+ // set up scaling factors
+ scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
+ scale[frame_type].x_offset_q4 =
+ (mb_col * 16 * scale[frame_type].x_num / scale[frame_type].x_den) & 0xf;
+ scale[frame_type].y_offset_q4 =
+ (mb_row * 16 * scale[frame_type].y_num / scale[frame_type].y_den) & 0xf;
- y_buffer[frame_type] = yv12->y_buffer + recon_yoffset;
- u_buffer[frame_type] = yv12->u_buffer + recon_uvoffset;
- v_buffer[frame_type] = yv12->v_buffer + recon_uvoffset;
+ // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
+ // use the UV scaling factors.
+ setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col,
+ &scale[frame_type], &scale[frame_type]);
// Gets an initial list of candidate vectors from neighbours and orders them
- vp9_find_mv_refs(xd, xd->mode_info_context,
- xd->prev_mode_info_context,
+ use_prev_in_find_mv_refs = cm->Width == cm->last_width &&
+ cm->Height == cm->last_height &&
+ !cpi->common.error_resilient_mode;
+ vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
+ use_prev_in_find_mv_refs ? xd->prev_mode_info_context : NULL,
frame_type,
mbmi->ref_mvs[frame_type],
cpi->common.ref_frame_sign_bias);
// Candidate refinement carried out at encoder and decoder
- vp9_find_best_ref_mvs(xd, y_buffer[frame_type],
+ use_prev_in_find_best_ref =
+ scale[frame_type].x_num == scale[frame_type].x_den &&
+ scale[frame_type].y_num == scale[frame_type].y_den &&
+ !cm->error_resilient_mode &&
+ !cm->frame_parallel_decoding_mode;
+ vp9_find_best_ref_mvs(xd,
+ use_prev_in_find_best_ref ?
+ yv12_mb[frame_type].y_buffer : NULL,
yv12->y_stride,
mbmi->ref_mvs[frame_type],
&frame_nearest_mv[frame_type],
&frame_near_mv[frame_type]);
-
// Further refinement that is encode side only to test the top few candidates
// in full and choose the best as the centre point for subsequent searches.
- mv_pred(cpi, x, y_buffer[frame_type], yv12->y_stride,
+ mv_pred(cpi, x, yv12_mb[frame_type].y_buffer, yv12->y_stride,
frame_type, block_size);
}
+static void model_rd_from_var_lapndz(int var, int n, int qstep,
+ int *rate, int *dist) {
+ // This function models the rate and distortion for a Laplacian
+ // source with given variance when quantized with a uniform quantizer
+ // with given stepsize. The closed form expressions are in:
+ // Hang and Chen, "Source Model for transform video coder and its
+ // application - Part I: Fundamental Theory", IEEE Trans. Circ.
+ // Sys. for Video Tech., April 1997.
+ // The function is implemented as piecewise approximation to the
+ // exact computation.
+ // TODO(debargha): Implement the functions by interpolating from a
+ // look-up table
+ vp9_clear_system_state();
+ {
+ double D, R;
+ double s2 = (double) var / n;
+ double s = sqrt(s2);
+ double x = qstep / s;
+ if (x > 1.0) {
+ double y = exp(-x / 2);
+ double y2 = y * y;
+ D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275;
+ R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017;
+ } else {
+ double x2 = x * x;
+ D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807;
+ if (x > 0.125)
+ R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x +
+ 0.1626989668625);
+ else
+ R = -1.442252874826093 * log(x) + 1.944647760719664;
+ }
+ if (R < 0) {
+ *rate = 0;
+ *dist = var;
+ } else {
+ *rate = (n * R * 256 + 0.5);
+ *dist = (n * D * s2 + 0.5);
+ }
+ }
+ vp9_clear_system_state();
+}
+
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
enum BlockSize block_size,
int *saddone, int near_sadidx[],
@@ -3209,9 +3214,12 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *rate_y, int *distortion_y,
int *rate_uv, int *distortion_uv,
int *mode_excluded, int *disable_skip,
- int recon_yoffset, int mode_index,
+ int mode_index,
+ INTERPOLATIONFILTERTYPE *best_filter,
int_mv frame_mv[MB_MODE_COUNT]
- [MAX_REF_FRAMES]) {
+ [MAX_REF_FRAMES],
+ YV12_BUFFER_CONFIG *scaled_ref_frame,
+ int mb_row, int mb_col) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
@@ -3229,6 +3237,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int_mv cur_mv[2];
int_mv ref_mv[2];
int64_t this_rd = 0;
+ unsigned char tmp_ybuf[64 * 64];
+ unsigned char tmp_ubuf[32 * 32];
+ unsigned char tmp_vbuf[32 * 32];
+ int pred_exists = 0;
+ int interpolating_intpel_seen = 0;
+ int intpel_mv;
+ int64_t rd, best_rd = INT64_MAX;
switch (this_mode) {
case NEWMV:
@@ -3248,6 +3263,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->nmvjointcost, x->mvcost, 96,
x->e_mbd.allow_high_precision_mv);
} else {
+ YV12_BUFFER_CONFIG backup_yv12 = xd->pre;
int bestsme = INT_MAX;
int further_steps, step_param = cpi->sf.first_step;
int sadpb = x->sadperbit16;
@@ -3259,6 +3275,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
+ if (scaled_ref_frame) {
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ xd->pre = *scaled_ref_frame;
+ xd->pre.y_buffer += mb_row * 16 * xd->pre.y_stride + mb_col * 16;
+ xd->pre.u_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8;
+ xd->pre.v_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8;
+ }
+
vp9_clamp_mv_min_max(x, &ref_mv[0]);
// mvp_full.as_int = ref_mv[0].as_int;
@@ -3267,9 +3293,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.as_mv.col >>= 3;
mvp_full.as_mv.row >>= 3;
- if (mvp_full.as_int != mvp_full.as_int) {
- mvp_full.as_int = mvp_full.as_int;
- }
// adjust search range according to sr from mv prediction
step_param = MAX(step_param, sr);
@@ -3297,22 +3320,22 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->nmvjointcost, x->mvcost,
&dis, &sse);
}
- d->bmi.as_mv.first.as_int = tmp_mv.as_int;
- frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv.first.as_int;
+ d->bmi.as_mv[0].as_int = tmp_mv.as_int;
+ frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv[0].as_int;
// Add the new motion vector cost to our rolling cost variable
*rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0],
x->nmvjointcost, x->mvcost,
96, xd->allow_high_precision_mv);
+
+ // restore the predictor, if required
+ if (scaled_ref_frame) {
+ xd->pre = backup_yv12;
+ }
}
break;
- case NEARESTMV:
case NEARMV:
- // Do not bother proceeding if the vector (from newmv, nearest or
- // near) is 0,0 as this should then be coded using the zeromv mode.
- for (i = 0; i < num_refs; ++i)
- if (frame_mv[this_mode][refs[i]].as_int == 0)
- return INT64_MAX;
+ case NEARESTMV:
case ZEROMV:
default:
break;
@@ -3326,11 +3349,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->mv[i].as_int = cur_mv[i].as_int;
}
- if (cpi->common.mcomp_filter_type == SWITCHABLE) {
- const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
- const int m = vp9_switchable_interp_map[mbmi->interp_filter];
- *rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
- }
/* We don't include the cost of the second reference here, because there
* are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
@@ -3355,36 +3373,363 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
#endif
+ pred_exists = 0;
+ interpolating_intpel_seen = 0;
+ // Are all MVs integer pel for Y and UV
+ intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
+ (mbmi->mv[0].as_mv.col & 15) == 0;
+ if (is_comp_pred)
+ intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
+ (mbmi->mv[1].as_mv.col & 15) == 0;
+ // Search for best switchable filter by checking the variance of
+ // pred error irrespective of whether the filter will be used
if (block_size == BLOCK_64X64) {
- vp9_build_inter64x64_predictors_sb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
+ int switchable_filter_index, newbest;
+ int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
+ int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
+ for (switchable_filter_index = 0;
+ switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+ int rs = 0;
+ mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+ const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+ rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
+ }
+ if (interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
+ tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
+ } else {
+ unsigned int sse, var;
+ int tmp_rate_y, tmp_rate_u, tmp_rate_v;
+ int tmp_dist_y, tmp_dist_u, tmp_dist_v;
+ vp9_build_inter64x64_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
+ var = vp9_variance64x64(*(b->base_src), b->src_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, &sse);
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ model_rd_from_var_lapndz(var, 64 * 64, xd->block[0].dequant[1] >> 3,
+ &tmp_rate_y, &tmp_dist_y);
+ var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride,
+ xd->dst.u_buffer, xd->dst.uv_stride, &sse);
+ model_rd_from_var_lapndz(var, 32 * 32, xd->block[16].dequant[1] >> 3,
+ &tmp_rate_u, &tmp_dist_u);
+ var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride,
+ xd->dst.v_buffer, xd->dst.uv_stride, &sse);
+ model_rd_from_var_lapndz(var, 32 * 32, xd->block[20].dequant[1] >> 3,
+ &tmp_rate_v, &tmp_dist_v);
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
+ tmp_dist_y + tmp_dist_u + tmp_dist_v);
+ if (!interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ tmp_rate_y_i = tmp_rate_y;
+ tmp_rate_u_i = tmp_rate_u;
+ tmp_rate_v_i = tmp_rate_v;
+ tmp_dist_y_i = tmp_dist_y;
+ tmp_dist_u_i = tmp_dist_u;
+ tmp_dist_v_i = tmp_dist_v;
+ }
+ }
+ newbest = (switchable_filter_index == 0 || rd < best_rd);
+ if (newbest) {
+ best_rd = rd;
+ *best_filter = mbmi->interp_filter;
+ }
+ if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
+ (cm->mcomp_filter_type != SWITCHABLE &&
+ cm->mcomp_filter_type == mbmi->interp_filter)) {
+ int i;
+ for (i = 0; i < 64; ++i)
+ vpx_memcpy(tmp_ybuf + i * 64,
+ xd->dst.y_buffer + i * xd->dst.y_stride,
+ sizeof(unsigned char) * 64);
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(tmp_ubuf + i * 32,
+ xd->dst.u_buffer + i * xd->dst.uv_stride,
+ sizeof(unsigned char) * 32);
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(tmp_vbuf + i * 32,
+ xd->dst.v_buffer + i * xd->dst.uv_stride,
+ sizeof(unsigned char) * 32);
+ pred_exists = 1;
+ }
+ interpolating_intpel_seen |=
+ intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
+ }
} else if (block_size == BLOCK_32X32) {
- vp9_build_inter32x32_predictors_sb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
+ int switchable_filter_index, newbest;
+ int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
+ int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
+ for (switchable_filter_index = 0;
+ switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+ int rs = 0;
+ mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+ const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+ rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
+ }
+ if (interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
+ tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
+ } else {
+ unsigned int sse, var;
+ int tmp_rate_y, tmp_rate_u, tmp_rate_v;
+ int tmp_dist_y, tmp_dist_u, tmp_dist_v;
+ vp9_build_inter32x32_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
+ var = vp9_variance32x32(*(b->base_src), b->src_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, &sse);
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ model_rd_from_var_lapndz(var, 32 * 32, xd->block[0].dequant[1] >> 3,
+ &tmp_rate_y, &tmp_dist_y);
+ var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride,
+ xd->dst.u_buffer, xd->dst.uv_stride, &sse);
+ model_rd_from_var_lapndz(var, 16 * 16, xd->block[16].dequant[1] >> 3,
+ &tmp_rate_u, &tmp_dist_u);
+ var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride,
+ xd->dst.v_buffer, xd->dst.uv_stride, &sse);
+ model_rd_from_var_lapndz(var, 16 * 16, xd->block[20].dequant[1] >> 3,
+ &tmp_rate_v, &tmp_dist_v);
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
+ tmp_dist_y + tmp_dist_u + tmp_dist_v);
+ if (!interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ tmp_rate_y_i = tmp_rate_y;
+ tmp_rate_u_i = tmp_rate_u;
+ tmp_rate_v_i = tmp_rate_v;
+ tmp_dist_y_i = tmp_dist_y;
+ tmp_dist_u_i = tmp_dist_u;
+ tmp_dist_v_i = tmp_dist_v;
+ }
+ }
+ newbest = (switchable_filter_index == 0 || rd < best_rd);
+ if (newbest) {
+ best_rd = rd;
+ *best_filter = mbmi->interp_filter;
+ }
+ if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
+ (cm->mcomp_filter_type != SWITCHABLE &&
+ cm->mcomp_filter_type == mbmi->interp_filter)) {
+ int i;
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(tmp_ybuf + i * 64,
+ xd->dst.y_buffer + i * xd->dst.y_stride,
+ sizeof(unsigned char) * 32);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(tmp_ubuf + i * 32,
+ xd->dst.u_buffer + i * xd->dst.uv_stride,
+ sizeof(unsigned char) * 16);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(tmp_vbuf + i * 32,
+ xd->dst.v_buffer + i * xd->dst.uv_stride,
+ sizeof(unsigned char) * 16);
+ pred_exists = 1;
+ }
+ interpolating_intpel_seen |=
+ intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
+ }
} else {
+ int switchable_filter_index, newbest;
+ int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
+ int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
assert(block_size == BLOCK_16X16);
- vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
- if (is_comp_pred)
- vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16);
+ for (switchable_filter_index = 0;
+ switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+ int rs = 0;
+ mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+ const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+ rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
+ }
+ if (interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
+ tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
+ } else {
+ unsigned int sse, var;
+ int tmp_rate_y, tmp_rate_u, tmp_rate_v;
+ int tmp_dist_y, tmp_dist_u, tmp_dist_v;
+ // TODO(jkoleszar): these 2 y/uv should be replaced with one call to
+ // vp9_build_interintra_16x16_predictors_mb().
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16,
+ mb_row, mb_col);
+
#if CONFIG_COMP_INTERINTRA_PRED
- if (is_comp_interintra_pred) {
- vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
+ if (is_comp_interintra_pred) {
+ vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
+ }
+#endif
+
+ vp9_build_inter16x16_predictors_mbuv(xd, xd->predictor + 256,
+ xd->predictor + 320, 8,
+ mb_row, mb_col);
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (is_comp_interintra_pred) {
+ vp9_build_interintra_16x16_predictors_mbuv(xd, xd->predictor + 256,
+ xd->predictor + 320, 8);
+ }
+#endif
+ var = vp9_variance16x16(*(b->base_src), b->src_stride,
+ xd->predictor, 16, &sse);
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3,
+ &tmp_rate_y, &tmp_dist_y);
+ var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
+ &xd->predictor[256], 8, &sse);
+ model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3,
+ &tmp_rate_u, &tmp_dist_u);
+ var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
+ &xd->predictor[320], 8, &sse);
+ model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3,
+ &tmp_rate_v, &tmp_dist_v);
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
+ tmp_dist_y + tmp_dist_u + tmp_dist_v);
+ if (!interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ tmp_rate_y_i = tmp_rate_y;
+ tmp_rate_u_i = tmp_rate_u;
+ tmp_rate_v_i = tmp_rate_v;
+ tmp_dist_y_i = tmp_dist_y;
+ tmp_dist_u_i = tmp_dist_u;
+ tmp_dist_v_i = tmp_dist_v;
+ }
+ }
+ newbest = (switchable_filter_index == 0 || rd < best_rd);
+ if (newbest) {
+ best_rd = rd;
+ *best_filter = mbmi->interp_filter;
+ }
+ if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
+ (cm->mcomp_filter_type != SWITCHABLE &&
+ cm->mcomp_filter_type == mbmi->interp_filter)) {
+ vpx_memcpy(tmp_ybuf, xd->predictor, sizeof(unsigned char) * 256);
+ vpx_memcpy(tmp_ubuf, xd->predictor + 256, sizeof(unsigned char) * 64);
+ vpx_memcpy(tmp_vbuf, xd->predictor + 320, sizeof(unsigned char) * 64);
+ pred_exists = 1;
+ }
+ interpolating_intpel_seen |=
+ intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
}
+ }
+
+ // Set the appripriate filter
+ if (cm->mcomp_filter_type != SWITCHABLE)
+ mbmi->interp_filter = cm->mcomp_filter_type;
+ else
+ mbmi->interp_filter = *best_filter;
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+ if (pred_exists) {
+ if (block_size == BLOCK_64X64) {
+ for (i = 0; i < 64; ++i)
+ vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64,
+ sizeof(unsigned char) * 64);
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32,
+ sizeof(unsigned char) * 32);
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32,
+ sizeof(unsigned char) * 32);
+ } else if (block_size == BLOCK_32X32) {
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64,
+ sizeof(unsigned char) * 32);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32,
+ sizeof(unsigned char) * 16);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32,
+ sizeof(unsigned char) * 16);
+ } else {
+ vpx_memcpy(xd->predictor, tmp_ybuf, sizeof(unsigned char) * 256);
+ vpx_memcpy(xd->predictor + 256, tmp_ubuf, sizeof(unsigned char) * 64);
+ vpx_memcpy(xd->predictor + 320, tmp_vbuf, sizeof(unsigned char) * 64);
+ }
+ } else {
+ // Handles the special case when a filter that is not in the
+ // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
+ if (block_size == BLOCK_64X64) {
+ vp9_build_inter64x64_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
+ } else if (block_size == BLOCK_32X32) {
+ vp9_build_inter32x32_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
+ } else {
+ // TODO(jkoleszar): These y/uv fns can be replaced with their mb
+ // equivalent
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16,
+ mb_row, mb_col);
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (is_comp_interintra_pred) {
+ vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
+ }
+#endif
+ vp9_build_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
+ &xd->predictor[320], 8,
+ mb_row, mb_col);
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (is_comp_interintra_pred) {
+ vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256],
+ &xd->predictor[320], 8);
+ }
#endif
+ }
+ }
+
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+ const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+ *rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
}
if (cpi->active_map_enabled && x->active_ptr[0] == 0)
x->skip = 1;
else if (x->encode_breakout) {
- unsigned int sse, var;
+ unsigned int var, sse;
int threshold = (xd->block[0].dequant[1]
* xd->block[0].dequant[1] >> 4);
@@ -3404,9 +3749,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if ((int)sse < threshold) {
- unsigned int q2dc = xd->block[24].dequant[0];
+ unsigned int q2dc = xd->block[0].dequant[0];
/* If there is no codeable 2nd order dc
- or a very small uniform pixel change change */
+ or a very small uniform pixel change change */
if ((sse - var < q2dc * q2dc >> 4) ||
(sse / 2 > var && sse - var < 64)) {
// Check u and v to make sure skip is ok
@@ -3447,17 +3792,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- if (!(*mode_excluded)) {
- if (is_comp_pred) {
- *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
- } else {
- *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
- }
-#if CONFIG_COMP_INTERINTRA_PRED
- if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1;
-#endif
- }
-
if (!x->skip) {
if (block_size == BLOCK_64X64) {
int skippable_y, skippable_uv;
@@ -3491,30 +3825,32 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*skippable = skippable_y && skippable_uv;
} else {
assert(block_size == BLOCK_16X16);
-
- vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
- &xd->predictor[320], 8);
- if (is_comp_pred)
- vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
- &xd->predictor[320], 8);
-#if CONFIG_COMP_INTERINTRA_PRED
- if (is_comp_interintra_pred) {
- vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256],
- &xd->predictor[320], 8);
- }
-#endif
inter_mode_cost(cpi, x, rate2, distortion,
rate_y, distortion_y, rate_uv, distortion_uv,
skippable, txfm_cache);
}
}
+
+ if (!(*mode_excluded)) {
+ if (is_comp_pred) {
+ *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
+ } else {
+ *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1;
+#endif
+ }
+
return this_rd; // if 0, this will be re-calculated by caller
}
static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int *returnrate, int *returndistortion,
int64_t *returnintra) {
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
union b_mode_info best_bmodes[16];
@@ -3544,6 +3880,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#endif
int64_t best_overall_rd = INT64_MAX;
INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
+ INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
int uv_intra_skippable = 0;
int uv_intra_rate_8x8 = 0, uv_intra_distortion_8x8 = 0, uv_intra_rate_tokenonly_8x8 = 0;
@@ -3551,7 +3888,6 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
int distortion_uv = INT_MAX;
int64_t best_yrd = INT64_MAX;
- int switchable_filter_index = 0;
MB_PREDICTION_MODE uv_intra_mode;
MB_PREDICTION_MODE uv_intra_mode_8x8 = 0;
@@ -3561,7 +3897,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
int frame_mdcounts[4][4];
- uint8_t *y_buffer[4], *u_buffer[4], *v_buffer[4];
+ YV12_BUFFER_CONFIG yv12_mb[4];
unsigned int ref_costs[MAX_REF_FRAMES];
int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1];
@@ -3569,6 +3905,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
cpi->common.y1dc_delta_q);
+ struct scale_factors scale_factor[4];
+
vpx_memset(mode8x8, 0, sizeof(mode8x8));
vpx_memset(&frame_mv, 0, sizeof(frame_mv));
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
@@ -3592,24 +3930,24 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- setup_buffer_inter(cpi, x, cpi->common.lst_fb_idx, LAST_FRAME,
- BLOCK_16X16, recon_yoffset, recon_uvoffset,
+ setup_buffer_inter(cpi, x, cpi->lst_fb_idx,
+ LAST_FRAME, BLOCK_16X16, mb_row, mb_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, y_buffer, u_buffer, v_buffer);
+ frame_mdcounts, yv12_mb, scale_factor);
}
if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- setup_buffer_inter(cpi, x, cpi->common.gld_fb_idx, GOLDEN_FRAME,
- BLOCK_16X16, recon_yoffset, recon_uvoffset,
+ setup_buffer_inter(cpi, x, cpi->gld_fb_idx,
+ GOLDEN_FRAME, BLOCK_16X16, mb_row, mb_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, y_buffer, u_buffer, v_buffer);
+ frame_mdcounts, yv12_mb, scale_factor);
}
if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
- setup_buffer_inter(cpi, x, cpi->common.alt_fb_idx, ALTREF_FRAME,
- BLOCK_16X16, recon_yoffset, recon_uvoffset,
+ setup_buffer_inter(cpi, x, cpi->alt_fb_idx,
+ ALTREF_FRAME, BLOCK_16X16, mb_row, mb_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, y_buffer, u_buffer, v_buffer);
+ frame_mdcounts, yv12_mb, scale_factor);
}
*returnintra = INT64_MAX;
@@ -3638,8 +3976,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// that depend on the current prediction etc.
estimate_ref_frame_costs(cpi, segment_id, ref_costs);
- for (mode_index = 0; mode_index < MAX_MODES;
- mode_index += (!switchable_filter_index)) {
+ for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
int64_t this_rd = INT64_MAX;
int disable_skip = 0, skippable = 0;
int other_cost = 0;
@@ -3649,6 +3986,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#endif
int mode_excluded = 0;
int64_t txfm_cache[NB_TXFM_MODES] = { 0 };
+ YV12_BUFFER_CONFIG *scaled_ref_frame;
// These variables hold are rolling total cost and distortion for this mode
rate2 = 0;
@@ -3664,24 +4002,38 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame;
mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
- // Evaluate all sub-pel filters irrespective of whether we can use
- // them for this frame.
- if (this_mode >= NEARESTMV && this_mode <= SPLITMV) {
- mbmi->interp_filter =
- vp9_switchable_interp[switchable_filter_index++];
- if (switchable_filter_index == VP9_SWITCHABLE_FILTERS)
- switchable_filter_index = 0;
- if ((cm->mcomp_filter_type != SWITCHABLE) &&
- (cm->mcomp_filter_type != mbmi->interp_filter)) {
- mode_excluded = 1;
- }
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
- }
+ mbmi->interp_filter = cm->mcomp_filter_type;
+
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
+
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
// Test best rd so far against threshold for trying this mode.
if (best_rd <= cpi->rd_threshes[mode_index])
continue;
+ // Ensure that the references used by this mode are available.
+ if (mbmi->ref_frame &&
+ !(cpi->ref_frame_flags & flag_list[mbmi->ref_frame]))
+ continue;
+
+ if (mbmi->second_ref_frame > 0 &&
+ !(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))
+ continue;
+
+ // only scale on zeromv.
+ if (mbmi->ref_frame > 0 &&
+ (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 ||
+ yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) &&
+ this_mode != ZEROMV)
+ continue;
+ if (mbmi->second_ref_frame > 0 &&
+ (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 ||
+ yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) &&
+ this_mode != ZEROMV)
+ continue;
+
// current coding mode under rate-distortion optimization test loop
#if CONFIG_COMP_INTERINTRA_PRED
mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
@@ -3693,18 +4045,16 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
!vp9_check_segref(xd, segment_id, mbmi->ref_frame)) {
continue;
- // If the segment mode feature is enabled....
+ // If the segment skip feature is enabled....
// then do nothing if the current mode is not allowed..
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
- (this_mode !=
- vp9_get_segdata(xd, segment_id, SEG_LVL_MODE))) {
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
+ (this_mode != ZEROMV)) {
continue;
- // Disable this drop out case if either the mode or ref frame
- // segment level feature is enabled for this segment. This is to
+ // Disable this drop out case if the ref frame segment
+ // level feature is enabled for this segment. This is to
// prevent the possibility that the we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
- // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame overlay,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative
if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
@@ -3716,22 +4066,31 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
/* everything but intra */
+ scaled_ref_frame = NULL;
if (mbmi->ref_frame) {
int ref = mbmi->ref_frame;
+ int fb;
- xd->pre.y_buffer = y_buffer[ref];
- xd->pre.u_buffer = u_buffer[ref];
- xd->pre.v_buffer = v_buffer[ref];
+ xd->pre = yv12_mb[ref];
best_ref_mv = mbmi->ref_mvs[ref][0];
vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts));
+
+ if (mbmi->ref_frame == LAST_FRAME) {
+ fb = cpi->lst_fb_idx;
+ } else if (mbmi->ref_frame == GOLDEN_FRAME) {
+ fb = cpi->gld_fb_idx;
+ } else {
+ fb = cpi->alt_fb_idx;
+ }
+
+ if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
+ scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
}
if (mbmi->second_ref_frame > 0) {
int ref = mbmi->second_ref_frame;
- xd->second_pre.y_buffer = y_buffer[ref];
- xd->second_pre.u_buffer = u_buffer[ref];
- xd->second_pre.v_buffer = v_buffer[ref];
+ xd->second_pre = yv12_mb[ref];
second_best_ref_mv = mbmi->ref_mvs[ref][0];
}
@@ -3798,8 +4157,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// the BPRED mode : x->mbmode_cost[xd->frame_type][BPRED];
mbmi->txfm_size = TX_4X4;
tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
- &distortion, best_yrd,
- cpi->update_context);
+ &distortion, best_yrd);
rate2 += rate;
rate2 += intra_cost_penalty;
distortion2 += distortion;
@@ -3898,29 +4256,108 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// special case it.
else if (this_mode == SPLITMV) {
const int is_comp_pred = mbmi->second_ref_frame > 0;
- int64_t tmp_rd, this_rd_thresh;
+ int64_t this_rd_thresh;
+ int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
+ int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
+ int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0;
+ int switchable_filter_index;
int_mv *second_ref = is_comp_pred ? &second_best_ref_mv : NULL;
+ union b_mode_info tmp_best_bmodes[16];
+ MB_MODE_INFO tmp_best_mbmode;
+ PARTITION_INFO tmp_best_partition;
+ int pred_exists = 0;
this_rd_thresh =
- (mbmi->ref_frame == LAST_FRAME) ?
+ (mbmi->ref_frame == LAST_FRAME) ?
cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
this_rd_thresh =
- (mbmi->ref_frame == GOLDEN_FRAME) ?
+ (mbmi->ref_frame == GOLDEN_FRAME) ?
cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
- tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
- second_ref, best_yrd, mdcounts,
- &rate, &rate_y, &distortion,
- &skippable,
- (int)this_rd_thresh, seg_mvs,
- txfm_cache);
+ for (switchable_filter_index = 0;
+ switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+ int newbest;
+ mbmi->interp_filter =
+ vp9_switchable_interp[switchable_filter_index];
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+ tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
+ second_ref, best_yrd, mdcounts,
+ &rate, &rate_y, &distortion,
+ &skippable,
+ (int)this_rd_thresh, seg_mvs,
+ txfm_cache);
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
+ [vp9_get_pred_context(&cpi->common, xd,
+ PRED_SWITCHABLE_INTERP)]
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
+ tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
+ }
+ newbest = (tmp_rd < tmp_best_rd);
+ if (newbest) {
+ tmp_best_filter = mbmi->interp_filter;
+ tmp_best_rd = tmp_rd;
+ }
+ if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
+ (mbmi->interp_filter == cm->mcomp_filter_type &&
+ cm->mcomp_filter_type != SWITCHABLE)) {
+ tmp_best_rdu = tmp_rd;
+ tmp_best_rate = rate;
+ tmp_best_ratey = rate_y;
+ tmp_best_distortion = distortion;
+ tmp_best_skippable = skippable;
+ vpx_memcpy(&tmp_best_mbmode, mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&tmp_best_partition, x->partition_info,
+ sizeof(PARTITION_INFO));
+ for (i = 0; i < 16; i++) {
+ tmp_best_bmodes[i] = xd->block[i].bmi;
+ }
+ pred_exists = 1;
+ }
+ } // switchable_filter_index loop
+
+ mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
+ tmp_best_filter : cm->mcomp_filter_type);
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+ if (!pred_exists) {
+ // Handles the special case when a filter that is not in the
+ // switchable list (bilinear, 6-tap) is indicated at the frame level
+ tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
+ second_ref, best_yrd, mdcounts,
+ &rate, &rate_y, &distortion,
+ &skippable,
+ (int)this_rd_thresh, seg_mvs,
+ txfm_cache);
+ } else {
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
+ [vp9_get_pred_context(&cpi->common, xd,
+ PRED_SWITCHABLE_INTERP)]
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
+ tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
+ }
+ tmp_rd = tmp_best_rdu;
+ rate = tmp_best_rate;
+ rate_y = tmp_best_ratey;
+ distortion = tmp_best_distortion;
+ skippable = tmp_best_skippable;
+ vpx_memcpy(mbmi, &tmp_best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(x->partition_info, &tmp_best_partition,
+ sizeof(PARTITION_INFO));
+ for (i = 0; i < 16; i++) {
+ xd->block[i].bmi = tmp_best_bmodes[i];
+ }
+ }
+
rate2 += rate;
distortion2 += distortion;
if (cpi->common.mcomp_filter_type == SWITCHABLE)
rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
[vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
- [vp9_switchable_interp_map[mbmi->interp_filter]];
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
// If even the 'Y' rd value of split is higher than best so far
// then dont bother looking at UV
@@ -3928,7 +4365,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int uv_skippable;
rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
- cpi->common.full_pixel);
+ cpi->common.full_pixel, mb_row, mb_col);
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
@@ -3969,8 +4406,9 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#endif
&rate_y, &distortion,
&rate_uv, &distortion_uv,
- &mode_excluded, &disable_skip, recon_yoffset,
- mode_index, frame_mv);
+ &mode_excluded, &disable_skip,
+ mode_index, &tmp_best_filter, frame_mv,
+ scaled_ref_frame, mb_row, mb_col);
if (this_rd == INT64_MAX)
continue;
}
@@ -3995,10 +4433,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->common.mb_no_coeff_skip) {
int mb_skip_allowed;
- // Is Mb level skip allowed for this mb.
- mb_skip_allowed =
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ // Is Mb level skip allowed (i.e. not coded at segment level).
+ mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
if (skippable) {
mbmi->mb_skip_coeff = 1;
@@ -4061,7 +4497,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (this_rd < best_overall_rd) {
best_overall_rd = this_rd;
- best_filter = mbmi->interp_filter;
+ best_filter = tmp_best_filter;
best_mode = this_mode;
#if CONFIG_COMP_INTERINTRA_PRED
is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME);
@@ -4175,7 +4611,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (x->skip && !mode_excluded)
break;
- }
+ }
assert((cm->mcomp_filter_type == SWITCHABLE) ||
(cm->mcomp_filter_type == best_mbmode.interp_filter) ||
@@ -4204,12 +4640,11 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
cpi->rd_thresh_mult[best_mode_index];
}
- // This code force Altref,0,0 and skip for the frame that overlays a
+ // This code forces Altref,0,0 and skip for the frame that overlays a
// an alrtef unless Altref is filtered. However, this is unsafe if
- // segment level coding of ref frame or mode is enabled for this
+ // segment level coding of ref frame is enabled for this
// segment.
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
cpi->is_src_frame_alt_ref &&
(cpi->oxcf.arnr_max_frames == 0) &&
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
@@ -4224,6 +4659,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->mb_skip_coeff =
(cpi->common.mb_no_coeff_skip) ? 1 : 0;
mbmi->partitioning = 0;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
@@ -4244,10 +4681,12 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (best_mbmode.mode == SPLITMV) {
for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].as_mv.first.as_int = best_bmodes[i].as_mv.first.as_int;
+ xd->mode_info_context->bmi[i].as_mv[0].as_int =
+ best_bmodes[i].as_mv[0].as_int;
if (mbmi->second_ref_frame > 0)
for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].as_mv.second.as_int = best_bmodes[i].as_mv.second.as_int;
+ xd->mode_info_context->bmi[i].as_mv[1].as_int =
+ best_bmodes[i].as_mv[1].as_int;
vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
@@ -4265,7 +4704,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (!x->skip) {
for (i = 0; i < NB_TXFM_MODES; i++) {
if (best_txfm_rd[i] == INT64_MAX)
- best_txfm_diff[i] = INT_MIN;
+ best_txfm_diff[i] = 0;
else
best_txfm_diff[i] = best_rd - best_txfm_rd[i];
}
@@ -4274,6 +4713,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
end:
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index],
best_mode_index, &best_partition,
&mbmi->ref_mvs[mbmi->ref_frame][0],
@@ -4291,22 +4732,28 @@ void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
int rate_y_tokenonly = 0, rate_uv_tokenonly;
int dist_y = 0, dist_uv;
int y_skip = 0, uv_skip;
- int64_t txfm_cache[NB_TXFM_MODES];
+ int64_t txfm_cache[NB_TXFM_MODES], err;
+ int i;
- rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
- &dist_y, &y_skip, txfm_cache);
+ err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
+ &dist_y, &y_skip, txfm_cache);
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
- &dist_uv, &uv_skip);
+ &dist_uv, &uv_skip);
if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) {
*returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
*returndist = dist_y + (dist_uv >> 2);
+ memset(x->sb32_context[xd->sb_index].txfm_rd_diff, 0,
+ sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff));
} else {
*returnrate = rate_y + rate_uv;
if (cpi->common.mb_no_coeff_skip)
*returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
*returndist = dist_y + (dist_uv >> 2);
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ x->sb32_context[xd->sb_index].txfm_rd_diff[i] = err - txfm_cache[i];
+ }
}
}
@@ -4319,22 +4766,28 @@ void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
int rate_y_tokenonly = 0, rate_uv_tokenonly;
int dist_y = 0, dist_uv;
int y_skip = 0, uv_skip;
- int64_t txfm_cache[NB_TXFM_MODES];
+ int64_t txfm_cache[NB_TXFM_MODES], err;
+ int i;
- rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
- &dist_y, &y_skip, txfm_cache);
+ err = rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
+ &dist_y, &y_skip, txfm_cache);
rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
- &dist_uv, &uv_skip);
+ &dist_uv, &uv_skip);
if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) {
*returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
*returndist = dist_y + (dist_uv >> 2);
+ memset(x->sb64_context.txfm_rd_diff, 0,
+ sizeof(x->sb64_context.txfm_rd_diff));
} else {
*returnrate = rate_y + rate_uv;
if (cm->mb_no_coeff_skip)
*returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
*returndist = dist_y + (dist_uv >> 2);
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ x->sb64_context.txfm_rd_diff[i] = err - txfm_cache[i];
+ }
}
}
@@ -4392,10 +4845,10 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
mode8x8[2]= xd->mode_info_context->bmi[8].as_mode.first;
mode8x8[3]= xd->mode_info_context->bmi[10].as_mode.first;
+ mbmi->txfm_size = TX_4X4;
error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
&rate4x4, &rate4x4_tokenonly,
- &dist4x4, error16x16,
- cpi->update_context);
+ &dist4x4, error16x16);
mbmi->mb_skip_coeff = 0;
if (cpi->common.mb_no_coeff_skip &&
@@ -4457,7 +4910,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int *returnrate,
int *returndistortion,
int block_size) {
@@ -4471,13 +4924,13 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int comp_pred, i;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
int frame_mdcounts[4][4];
- uint8_t *y_buffer[4];
- uint8_t *u_buffer[4];
- uint8_t *v_buffer[4];
+ YV12_BUFFER_CONFIG yv12_mb[4];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
- int idx_list[4] = { 0, cpi->common.lst_fb_idx, cpi->common.gld_fb_idx,
- cpi->common.alt_fb_idx };
+ int idx_list[4] = {0,
+ cpi->lst_fb_idx,
+ cpi->gld_fb_idx,
+ cpi->alt_fb_idx};
int mdcounts[4];
int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
int saddone = 0;
@@ -4496,16 +4949,16 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
#endif
int64_t best_overall_rd = INT64_MAX;
INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
+ INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
int rate_uv_4x4 = 0, rate_uv_8x8 = 0, rate_uv_tokenonly_4x4 = 0,
rate_uv_tokenonly_8x8 = 0;
int dist_uv_4x4 = 0, dist_uv_8x8 = 0, uv_skip_4x4 = 0, uv_skip_8x8 = 0;
MB_PREDICTION_MODE mode_uv_4x4 = NEARESTMV, mode_uv_8x8 = NEARESTMV;
- int switchable_filter_index = 0;
int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0;
int dist_uv_16x16 = 0, uv_skip_16x16 = 0;
MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV;
+ struct scale_factors scale_factor[4];
- x->skip = 0;
xd->mode_info_context->mbmi.segment_id = segment_id;
estimate_ref_frame_costs(cpi, segment_id, ref_costs);
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
@@ -4518,9 +4971,9 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
- recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV],
+ mb_row, mb_col, frame_mv[NEARESTMV],
frame_mv[NEARMV], frame_mdcounts,
- y_buffer, u_buffer, v_buffer);
+ yv12_mb, scale_factor);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
@@ -4570,8 +5023,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- for (mode_index = 0; mode_index < MAX_MODES;
- mode_index += (!switchable_filter_index)) {
+ for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
int mode_excluded = 0;
int64_t this_rd = INT64_MAX;
int disable_skip = 0;
@@ -4588,10 +5040,10 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Test best rd so far against threshold for trying this mode.
if (best_rd <= cpi->rd_threshes[mode_index] ||
cpi->rd_threshes[mode_index] == INT_MAX) {
- switchable_filter_index = 0;
continue;
}
+ x->skip = 0;
this_mode = vp9_mode_order[mode_index].mode;
ref_frame = vp9_mode_order[mode_index].ref_frame;
if (!(ref_frame == INTRA_FRAME ||
@@ -4600,6 +5052,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
mbmi->ref_frame = ref_frame;
mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
comp_pred = mbmi->second_ref_frame > INTRA_FRAME;
mbmi->mode = this_mode;
mbmi->uv_mode = DC_PRED;
@@ -4607,19 +5061,11 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
#endif
+
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
- if (this_mode >= NEARESTMV && this_mode <= SPLITMV) {
- mbmi->interp_filter =
- vp9_switchable_interp[switchable_filter_index++];
- if (switchable_filter_index == VP9_SWITCHABLE_FILTERS)
- switchable_filter_index = 0;
- if ((cm->mcomp_filter_type != SWITCHABLE) &&
- (cm->mcomp_filter_type != mbmi->interp_filter)) {
- mode_excluded = 1;
- }
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
- }
+ mbmi->interp_filter = cm->mcomp_filter_type;
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
// if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
// continue;
@@ -4640,10 +5086,10 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (!(cpi->ref_frame_flags & flag_list[second_ref]))
continue;
mbmi->second_ref_frame = second_ref;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
- xd->second_pre.y_buffer = y_buffer[second_ref];
- xd->second_pre.u_buffer = u_buffer[second_ref];
- xd->second_pre.v_buffer = v_buffer[second_ref];
+ xd->second_pre = yv12_mb[second_ref];
mode_excluded =
mode_excluded ?
mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
@@ -4661,9 +5107,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- xd->pre.y_buffer = y_buffer[ref_frame];
- xd->pre.u_buffer = u_buffer[ref_frame];
- xd->pre.v_buffer = v_buffer[ref_frame];
+ xd->pre = yv12_mb[ref_frame];
vpx_memcpy(mdcounts, frame_mdcounts[ref_frame], sizeof(mdcounts));
// If the segment reference frame feature is enabled....
@@ -4671,16 +5115,15 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
!vp9_check_segref(xd, segment_id, ref_frame)) {
continue;
- // If the segment mode feature is enabled....
+ // If the segment skip feature is enabled....
// then do nothing if the current mode is not allowed..
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
- (this_mode != vp9_get_segdata(xd, segment_id, SEG_LVL_MODE))) {
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
+ (this_mode != ZEROMV)) {
continue;
- // Disable this drop out case if either the mode or ref frame
+ // Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative
@@ -4722,6 +5165,20 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv;
distortion2 = distortion_y + distortion_uv;
} else {
+ YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
+ int fb;
+
+ if (mbmi->ref_frame == LAST_FRAME) {
+ fb = cpi->lst_fb_idx;
+ } else if (mbmi->ref_frame == GOLDEN_FRAME) {
+ fb = cpi->gld_fb_idx;
+ } else {
+ fb = cpi->alt_fb_idx;
+ }
+
+ if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
+ scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
+
#if CONFIG_COMP_INTERINTRA_PRED
if (mbmi->second_ref_frame == INTRA_FRAME) {
if (best_intra16_mode == DC_PRED - 1) continue;
@@ -4742,8 +5199,9 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
#endif
&rate_y, &distortion_y,
&rate_uv, &distortion_uv,
- &mode_excluded, &disable_skip, recon_yoffset,
- mode_index, frame_mv);
+ &mode_excluded, &disable_skip,
+ mode_index, &tmp_best_filter, frame_mv,
+ scaled_ref_frame, mb_row, mb_col);
if (this_rd == INT64_MAX)
continue;
}
@@ -4769,10 +5227,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->common.mb_no_coeff_skip) {
int mb_skip_allowed;
- // Is Mb level skip allowed for this mb.
- mb_skip_allowed =
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ // Is Mb level skip allowed (i.e. not coded at segment level).
+ mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
if (skippable) {
// Back out the coefficient coding costs
@@ -4832,7 +5288,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (this_rd < best_overall_rd) {
best_overall_rd = this_rd;
- best_filter = mbmi->interp_filter;
+ best_filter = tmp_best_filter;
best_mode = this_mode;
#if CONFIG_COMP_INTERINTRA_PRED
is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME);
@@ -4956,10 +5412,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// This code forces Altref,0,0 and skip for the frame that overlays a
// an alrtef unless Altref is filtered. However, this is unsafe if
- // segment level coding of ref frame or mode is enabled for this
- // segment.
+ // segment level coding of ref frame is enabled for this segment.
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
cpi->is_src_frame_alt_ref &&
(cpi->oxcf.arnr_max_frames == 0) &&
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
@@ -4971,7 +5425,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
mbmi->partitioning = 0;
mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ?
- TX_16X16 : cm->txfm_mode;
+ TX_32X32 : cm->txfm_mode;
vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
@@ -4991,7 +5445,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (!x->skip) {
for (i = 0; i < NB_TXFM_MODES; i++) {
if (best_txfm_rd[i] == INT64_MAX)
- best_txfm_diff[i] = INT_MIN;
+ best_txfm_diff[i] = 0;
else
best_txfm_diff[i] = best_rd - best_txfm_rd[i];
}
@@ -5000,6 +5454,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
end:
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
{
PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ?
&x->sb32_context[xd->sb_index] :
@@ -5015,24 +5471,23 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int *returnrate,
int *returndistortion) {
- return vp9_rd_pick_inter_mode_sb(cpi, x, recon_yoffset, recon_uvoffset,
+ return vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col,
returnrate, returndistortion, BLOCK_32X32);
}
int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int *returnrate,
int *returndistortion) {
- return vp9_rd_pick_inter_mode_sb(cpi, x, recon_yoffset, recon_uvoffset,
+ return vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col,
returnrate, returndistortion, BLOCK_64X64);
}
void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset,
- int recon_uvoffset,
+ int mb_row, int mb_col,
int *totalrate, int *totaldist) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
@@ -5050,7 +5505,7 @@ void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
{
int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
- rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
+ rd_pick_inter_mode(cpi, x, mb_row, mb_col, &rate,
&distortion, &intra_error);
/* restore cpi->zbin_mode_boost_enabled */
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 8ee2c0bf9..01b156044 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -29,15 +29,15 @@ extern void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
int *r, int *d);
extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
- int ref_yoffset, int ref_uvoffset,
+ int mb_row, int mb_col,
int *r, int *d);
extern int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
- int ref_yoffset, int ref_uvoffset,
+ int mb_row, int mb_col,
int *r, int *d);
extern int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
- int ref_yoffset, int ref_uvoffset,
+ int mb_row, int mb_col,
int *r, int *d);
extern void vp9_init_me_luts();
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c
index 84121f79c..dc21f02f6 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -13,12 +13,13 @@
#include "vp9/common/vp9_sadmxn.h"
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#include "./vp9_rtcd.h"
unsigned int vp9_sad64x64_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 64);
}
@@ -26,7 +27,7 @@ unsigned int vp9_sad32x32_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32);
}
@@ -34,7 +35,7 @@ unsigned int vp9_sad16x16_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16);
}
@@ -42,7 +43,7 @@ unsigned int vp9_sad8x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 8);
}
@@ -51,7 +52,7 @@ unsigned int vp9_sad16x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 8);
}
@@ -59,7 +60,7 @@ unsigned int vp9_sad8x16_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16);
}
@@ -68,7 +69,7 @@ unsigned int vp9_sad4x4_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4);
}
@@ -77,12 +78,12 @@ void vp9_sad64x64x3_c(const uint8_t *src_ptr,
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad64x64(src_ptr, src_stride, ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad64x64(src_ptr, src_stride, ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad64x64(src_ptr, src_stride, ref_ptr + 2, ref_stride,
+ 0x7fffffff);
}
void vp9_sad32x32x3_c(const uint8_t *src_ptr,
@@ -90,74 +91,74 @@ void vp9_sad32x32x3_c(const uint8_t *src_ptr,
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad64x64x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad32x32x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad16x16x3_c(const uint8_t *src_ptr,
@@ -165,43 +166,43 @@ void vp9_sad16x16x3_c(const uint8_t *src_ptr,
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad16x16x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad16x8x3_c(const uint8_t *src_ptr,
@@ -209,43 +210,43 @@ void vp9_sad16x8x3_c(const uint8_t *src_ptr,
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad16x8x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad8x8x3_c(const uint8_t *src_ptr,
@@ -253,43 +254,43 @@ void vp9_sad8x8x3_c(const uint8_t *src_ptr,
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad8x8x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad8x16x3_c(const uint8_t *src_ptr,
@@ -297,43 +298,43 @@ void vp9_sad8x16x3_c(const uint8_t *src_ptr,
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad8x16x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad4x4x3_c(const uint8_t *src_ptr,
@@ -341,204 +342,147 @@ void vp9_sad4x4x3_c(const uint8_t *src_ptr,
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad4x4x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad64x64x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t *ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad32x32x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t *ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad16x16x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t *ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad16x8x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t *ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad8x8x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t *ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad8x16x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t *ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t *ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
-/* Copy 2 macroblocks to a buffer */
-void vp9_copy32xn_c(uint8_t *src_ptr,
- int src_stride,
- uint8_t *dst_ptr,
- int dst_stride,
- int height) {
- int r;
-
- for (r = 0; r < height; r++) {
-#if !(CONFIG_FAST_UNALIGNED)
- dst_ptr[0] = src_ptr[0];
- dst_ptr[1] = src_ptr[1];
- dst_ptr[2] = src_ptr[2];
- dst_ptr[3] = src_ptr[3];
- dst_ptr[4] = src_ptr[4];
- dst_ptr[5] = src_ptr[5];
- dst_ptr[6] = src_ptr[6];
- dst_ptr[7] = src_ptr[7];
- dst_ptr[8] = src_ptr[8];
- dst_ptr[9] = src_ptr[9];
- dst_ptr[10] = src_ptr[10];
- dst_ptr[11] = src_ptr[11];
- dst_ptr[12] = src_ptr[12];
- dst_ptr[13] = src_ptr[13];
- dst_ptr[14] = src_ptr[14];
- dst_ptr[15] = src_ptr[15];
- dst_ptr[16] = src_ptr[16];
- dst_ptr[17] = src_ptr[17];
- dst_ptr[18] = src_ptr[18];
- dst_ptr[19] = src_ptr[19];
- dst_ptr[20] = src_ptr[20];
- dst_ptr[21] = src_ptr[21];
- dst_ptr[22] = src_ptr[22];
- dst_ptr[23] = src_ptr[23];
- dst_ptr[24] = src_ptr[24];
- dst_ptr[25] = src_ptr[25];
- dst_ptr[26] = src_ptr[26];
- dst_ptr[27] = src_ptr[27];
- dst_ptr[28] = src_ptr[28];
- dst_ptr[29] = src_ptr[29];
- dst_ptr[30] = src_ptr[30];
- dst_ptr[31] = src_ptr[31];
-#else
- ((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0];
- ((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1];
- ((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2];
- ((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3];
- ((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4];
- ((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5];
- ((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6];
- ((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7];
-#endif
- src_ptr += src_stride;
- dst_ptr += dst_stride;
-
- }
-}
diff --git a/vp9/encoder/vp9_satd_c.c b/vp9/encoder/vp9_satd_c.c
deleted file mode 100644
index 212c2243d..000000000
--- a/vp9/encoder/vp9_satd_c.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-#include "vpx_ports/mem.h"
-#include "./vp9_rtcd.h"
-
-unsigned int vp9_satd16x16_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
- unsigned int *psatd) {
- int r, c, i;
- unsigned int satd = 0;
- DECLARE_ALIGNED(16, int16_t, diff_in[256]);
- DECLARE_ALIGNED(16, int16_t, diff_out[16]);
- int16_t *in;
-
- for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
- diff_in[r * 16 + c] = src_ptr[c] - ref_ptr[c];
- }
- src_ptr += src_stride;
- ref_ptr += ref_stride;
- }
-
- in = diff_in;
- for (r = 0; r < 16; r += 4) {
- for (c = 0; c < 16; c += 4) {
- vp9_short_walsh4x4_c(in + c, diff_out, 32);
- for (i = 0; i < 16; i++)
- satd += abs(diff_out[i]);
- }
- in += 64;
- }
-
- if (psatd)
- *psatd = satd;
-
- return satd;
-}
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 49195e80c..cfaf5f592 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -9,10 +9,11 @@
*/
-#include "limits.h"
+#include <limits.h>
#include "vpx_mem/vpx_mem.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_tile_common.h"
void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x) {
int mb_row, mb_col;
@@ -21,7 +22,7 @@ void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x) {
x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
- if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) {
+ if ((cm->frame_type == KEY_FRAME) || (cpi->refresh_golden_frame)) {
// Reset Gf useage monitors
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
@@ -143,11 +144,74 @@ static int cost_segmap(MACROBLOCKD *xd,
return cost;
}
+// Based on set of segment counts calculate a probability tree
+static void calc_segtree_probs_pred(MACROBLOCKD *xd,
+ int (*segcounts)[MAX_MB_SEGMENTS],
+ vp9_prob *segment_tree_probs,
+ vp9_prob *mod_probs) {
+ int count[4];
+
+ assert(!segcounts[0][0] && !segcounts[1][1] &&
+ !segcounts[2][2] && !segcounts[3][3]);
+
+ // Total count for all segments
+ count[0] = segcounts[3][0] + segcounts[1][0] + segcounts[2][0];
+ count[1] = segcounts[2][1] + segcounts[0][1] + segcounts[3][1];
+ count[2] = segcounts[0][2] + segcounts[3][2] + segcounts[1][2];
+ count[3] = segcounts[1][3] + segcounts[2][3] + segcounts[0][3];
+
+ // Work out probabilities of each segment
+ segment_tree_probs[0] = get_binary_prob(count[0] + count[1],
+ count[2] + count[3]);
+ segment_tree_probs[1] = get_binary_prob(count[0], count[1]);
+ segment_tree_probs[2] = get_binary_prob(count[2], count[3]);
+
+ // now work out modified counts that the decoder would have
+ count[0] = segment_tree_probs[0] * segment_tree_probs[1];
+ count[1] = segment_tree_probs[0] * (256 - segment_tree_probs[1]);
+ count[2] = (256 - segment_tree_probs[0]) * segment_tree_probs[2];
+ count[3] = (256 - segment_tree_probs[0]) * (256 - segment_tree_probs[2]);
+
+ // Work out modified probabilties depending on what segment was predicted
+ mod_probs[0] = get_binary_prob(count[1], count[2] + count[3]);
+ mod_probs[1] = get_binary_prob(count[0], count[2] + count[3]);
+ mod_probs[2] = get_binary_prob(count[0] + count[1], count[3]);
+ mod_probs[3] = get_binary_prob(count[0] + count[1], count[2]);
+}
+
+// Based on set of segment counts and probabilities calculate a cost estimate
+static int cost_segmap_pred(MACROBLOCKD *xd,
+ int (*segcounts)[MAX_MB_SEGMENTS],
+ vp9_prob *probs, vp9_prob *mod_probs) {
+ int pred_seg, cost = 0;
+
+ for (pred_seg = 0; pred_seg < MAX_MB_SEGMENTS; pred_seg++) {
+ int count1, count2;
+
+ // Cost the top node of the tree
+ count1 = segcounts[pred_seg][0] + segcounts[pred_seg][1];
+ count2 = segcounts[pred_seg][2] + segcounts[pred_seg][3];
+ cost += count1 * vp9_cost_zero(mod_probs[pred_seg]) +
+ count2 * vp9_cost_one(mod_probs[pred_seg]);
+
+ // Now add the cost of each individual segment branch
+ if (pred_seg >= 2 && count1) {
+ cost += segcounts[pred_seg][0] * vp9_cost_zero(probs[1]) +
+ segcounts[pred_seg][1] * vp9_cost_one(probs[1]);
+ } else if (pred_seg < 2 && count2 > 0) {
+ cost += segcounts[pred_seg][2] * vp9_cost_zero(probs[2]) +
+ segcounts[pred_seg][3] * vp9_cost_one(probs[2]);
+ }
+ }
+
+ return cost;
+}
+
static void count_segs(VP9_COMP *cpi,
MODE_INFO *mi,
int *no_pred_segcounts,
int (*temporal_predictor_count)[2],
- int *t_unpred_seg_counts,
+ int (*t_unpred_seg_counts)[MAX_MB_SEGMENTS],
int mb_size, int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
@@ -166,8 +230,8 @@ static void count_segs(VP9_COMP *cpi,
// Temporal prediction not allowed on key frames
if (cm->frame_type != KEY_FRAME) {
// Test to see if the segment id matches the predicted value.
- const int seg_predicted =
- (segment_id == vp9_get_pred_mb_segid(cm, xd, segmap_index));
+ const int pred_seg_id = vp9_get_pred_mb_segid(cm, xd, segmap_index);
+ const int seg_predicted = (segment_id == pred_seg_id);
// Get the segment id prediction context
const int pred_context = vp9_get_pred_context(cm, xd, PRED_SEG_ID);
@@ -179,7 +243,7 @@ static void count_segs(VP9_COMP *cpi,
if (!seg_predicted)
// Update the "unpredicted" segment count
- t_unpred_seg_counts[segment_id]++;
+ t_unpred_seg_counts[pred_seg_id][segment_id]++;
}
}
@@ -191,18 +255,19 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
int t_pred_cost = INT_MAX;
int i;
- int mb_row, mb_col;
+ int tile_col, mb_row, mb_col;
int temporal_predictor_count[PREDICTION_PROBS][2];
int no_pred_segcounts[MAX_MB_SEGMENTS];
- int t_unpred_seg_counts[MAX_MB_SEGMENTS];
+ int t_unpred_seg_counts[MAX_MB_SEGMENTS][MAX_MB_SEGMENTS];
vp9_prob no_pred_tree[MB_FEATURE_TREE_PROBS];
vp9_prob t_pred_tree[MB_FEATURE_TREE_PROBS];
+ vp9_prob t_pred_tree_mod[MAX_MB_SEGMENTS];
vp9_prob t_nopred_prob[PREDICTION_PROBS];
const int mis = cm->mode_info_stride;
- MODE_INFO *mi_ptr = cm->mi, *mi;
+ MODE_INFO *mi_ptr, *mi;
// Set default state for the segment tree probabilities and the
// temporal coding probabilities
@@ -218,42 +283,49 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
// First of all generate stats regarding how well the last segment map
// predicts this one
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) {
- mi = mi_ptr;
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) {
- if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
- count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count,
- t_unpred_seg_counts, 4, mb_row, mb_col);
- } else {
- for (i = 0; i < 4; i++) {
- int x_idx = (i & 1) << 1, y_idx = i & 2;
- MODE_INFO *sb_mi = mi + y_idx * mis + x_idx;
-
- if (mb_col + x_idx >= cm->mb_cols ||
- mb_row + y_idx >= cm->mb_rows) {
- continue;
- }
-
- if (sb_mi->mbmi.sb_type) {
- assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32);
- count_segs(cpi, sb_mi, no_pred_segcounts, temporal_predictor_count,
- t_unpred_seg_counts, 2, mb_row + y_idx, mb_col + x_idx);
- } else {
- int j;
-
- for (j = 0; j < 4; j++) {
- const int x_idx_mb = x_idx + (j & 1), y_idx_mb = y_idx + (j >> 1);
- MODE_INFO *mb_mi = mi + x_idx_mb + y_idx_mb * mis;
+ for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
+ vp9_get_tile_col_offsets(cm, tile_col);
+ mi_ptr = cm->mi + cm->cur_tile_mb_col_start;
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) {
+ mi = mi_ptr;
+ for (mb_col = cm->cur_tile_mb_col_start;
+ mb_col < cm->cur_tile_mb_col_end; mb_col += 4, mi += 4) {
+ if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, 4, mb_row, mb_col);
+ } else {
+ for (i = 0; i < 4; i++) {
+ int x_idx = (i & 1) << 1, y_idx = i & 2;
+ MODE_INFO *sb_mi = mi + y_idx * mis + x_idx;
+
+ if (mb_col + x_idx >= cm->mb_cols ||
+ mb_row + y_idx >= cm->mb_rows) {
+ continue;
+ }
- if (mb_col + x_idx_mb >= cm->mb_cols ||
- mb_row + y_idx_mb >= cm->mb_rows) {
- continue;
+ if (sb_mi->mbmi.sb_type) {
+ assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32);
+ count_segs(cpi, sb_mi, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, 2,
+ mb_row + y_idx, mb_col + x_idx);
+ } else {
+ int j;
+
+ for (j = 0; j < 4; j++) {
+ const int x_idx_mb = x_idx + (j & 1);
+ const int y_idx_mb = y_idx + (j >> 1);
+ MODE_INFO *mb_mi = mi + x_idx_mb + y_idx_mb * mis;
+
+ if (mb_col + x_idx_mb >= cm->mb_cols ||
+ mb_row + y_idx_mb >= cm->mb_rows) {
+ continue;
+ }
+
+ assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16);
+ count_segs(cpi, mb_mi, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts,
+ 1, mb_row + y_idx_mb, mb_col + x_idx_mb);
}
-
- assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16);
- count_segs(cpi, mb_mi, no_pred_segcounts,
- temporal_predictor_count, t_unpred_seg_counts,
- 1, mb_row + y_idx_mb, mb_col + x_idx_mb);
}
}
}
@@ -270,8 +342,10 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
if (cm->frame_type != KEY_FRAME) {
// Work out probability tree for coding those segments not
// predicted using the temporal method and the cost.
- calc_segtree_probs(xd, t_unpred_seg_counts, t_pred_tree);
- t_pred_cost = cost_segmap(xd, t_unpred_seg_counts, t_pred_tree);
+ calc_segtree_probs_pred(xd, t_unpred_seg_counts, t_pred_tree,
+ t_pred_tree_mod);
+ t_pred_cost = cost_segmap_pred(xd, t_unpred_seg_counts, t_pred_tree,
+ t_pred_tree_mod);
// Add in the cost of the signalling for each prediction context
for (i = 0; i < PREDICTION_PROBS; i++) {
@@ -291,6 +365,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
cm->temporal_update = 1;
vpx_memcpy(xd->mb_segment_tree_probs,
t_pred_tree, sizeof(t_pred_tree));
+ vpx_memcpy(xd->mb_segment_mispred_tree_probs,
+ t_pred_tree_mod, sizeof(t_pred_tree_mod));
vpx_memcpy(&cm->segment_pred_probs,
t_nopred_prob, sizeof(t_nopred_prob));
} else {
diff --git a/vp9/encoder/vp9_segmentation.h b/vp9/encoder/vp9_segmentation.h
index 3c75c68d8..1c90c2f2d 100644
--- a/vp9/encoder/vp9_segmentation.h
+++ b/vp9/encoder/vp9_segmentation.h
@@ -9,23 +9,20 @@
*/
-#include "string.h"
-#include "vp9/common/vp9_blockd.h"
-#include "vp9/encoder/vp9_onyx_int.h"
-
#ifndef VP9_ENCODER_VP9_SEGMENTATION_H_
#define VP9_ENCODER_VP9_SEGMENTATION_H_
-extern void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm,
- MACROBLOCK *x);
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+
+void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x);
-extern void vp9_enable_segmentation(VP9_PTR ptr);
-extern void vp9_disable_segmentation(VP9_PTR ptr);
+void vp9_enable_segmentation(VP9_PTR ptr);
+void vp9_disable_segmentation(VP9_PTR ptr);
// Valid values for a segment are 0 to 3
// Segmentation map is arrange as [Rows][Columns]
-extern void vp9_set_segmentation_map(VP9_PTR ptr,
- unsigned char *segmentation_map);
+void vp9_set_segmentation_map(VP9_PTR ptr, unsigned char *segmentation_map);
// The values given for each segment can be either deltas (from the default
// value chosen for the frame) or absolute values.
@@ -37,10 +34,9 @@ extern void vp9_set_segmentation_map(VP9_PTR ptr,
//
// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use
// the absolute values given).
-//
-extern void vp9_set_segment_data(VP9_PTR ptr, signed char *feature_data,
- unsigned char abs_delta);
+void vp9_set_segment_data(VP9_PTR ptr, signed char *feature_data,
+ unsigned char abs_delta);
-extern void vp9_choose_segmap_coding_method(VP9_COMP *cpi);
+void vp9_choose_segmap_coding_method(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_SEGMENTATION_H_
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 8bbe53486..a6cd1c0c3 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -8,8 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
+#include <limits.h>
#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_quantize.h"
@@ -26,9 +29,6 @@
#include "vp9/common/vp9_swapyv12buffer.h"
#include "vpx_ports/vpx_timer.h"
-#include <math.h>
-#include <limits.h>
-
#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
@@ -43,39 +43,35 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
int mv_row,
int mv_col,
uint8_t *pred) {
- int offset;
- uint8_t *yptr, *uptr, *vptr;
- int omv_row, omv_col;
-
- // Y
- yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
+ const int which_mv = 0;
+ int_mv subpel_mv;
+ int_mv fullpel_mv;
+
+ subpel_mv.as_mv.row = mv_row;
+ subpel_mv.as_mv.col = mv_col;
+ // TODO(jkoleszar): Make this rounding consistent with the rest of the code
+ fullpel_mv.as_mv.row = (mv_row >> 1) & ~7;
+ fullpel_mv.as_mv.col = (mv_col >> 1) & ~7;
+
+ vp9_build_inter_predictor(y_mb_ptr, stride,
+ &pred[0], 16,
+ &subpel_mv,
+ &xd->scale_factor[which_mv],
+ 16, 16, which_mv, &xd->subpix);
- if ((mv_row | mv_col) & 7) {
- xd->subpixel_predict16x16(yptr, stride,
- (mv_col & 7) << 1, (mv_row & 7) << 1, &pred[0], 16);
- } else {
- vp9_copy_mem16x16(yptr, stride, &pred[0], 16);
- }
-
- // U & V
- omv_row = mv_row;
- omv_col = mv_col;
- mv_row >>= 1;
- mv_col >>= 1;
stride = (stride + 1) >> 1;
- offset = (mv_row >> 3) * stride + (mv_col >> 3);
- uptr = u_mb_ptr + offset;
- vptr = v_mb_ptr + offset;
-
- if ((omv_row | omv_col) & 15) {
- xd->subpixel_predict8x8(uptr, stride,
- (omv_col & 15), (omv_row & 15), &pred[256], 8);
- xd->subpixel_predict8x8(vptr, stride,
- (omv_col & 15), (omv_row & 15), &pred[320], 8);
- } else {
- vp9_copy_mem8x8(uptr, stride, &pred[256], 8);
- vp9_copy_mem8x8(vptr, stride, &pred[320], 8);
- }
+
+ vp9_build_inter_predictor_q4(u_mb_ptr, stride,
+ &pred[256], 8,
+ &fullpel_mv, &subpel_mv,
+ &xd->scale_factor_uv[which_mv],
+ 8, 8, which_mv, &xd->subpix);
+
+ vp9_build_inter_predictor_q4(v_mb_ptr, stride,
+ &pred[320], 8,
+ &fullpel_mv, &subpel_mv,
+ &xd->scale_factor_uv[which_mv],
+ 8, 8, which_mv, &xd->subpix);
}
void vp9_temporal_filter_apply_c(uint8_t *frame1,
@@ -170,7 +166,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
/*cpi->sf.search_method == HEX*/
// TODO Check that the 16x16 vf & sdf are selected here
// Ignore mv costing by sending NULL pointer instead of cost arrays
- bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv.first,
+ bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv[0],
step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16],
NULL, NULL, NULL, NULL,
&best_ref_mv1);
@@ -182,7 +178,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
int distortion;
unsigned int sse;
// Ignore mv costing by sending NULL pointer instead of cost array
- bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv.first,
+ bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv[0],
&best_ref_mv1,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
@@ -262,8 +258,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
if (cpi->frames[frame] == NULL)
continue;
- mbd->block[0].bmi.as_mv.first.as_mv.row = 0;
- mbd->block[0].bmi.as_mv.first.as_mv.col = 0;
+ mbd->block[0].bmi.as_mv[0].as_mv.row = 0;
+ mbd->block[0].bmi.as_mv[0].as_mv.col = 0;
if (frame == alt_ref_index) {
filter_weight = 2;
@@ -296,8 +292,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
cpi->frames[frame]->u_buffer + mb_uv_offset,
cpi->frames[frame]->v_buffer + mb_uv_offset,
cpi->frames[frame]->y_stride,
- mbd->block[0].bmi.as_mv.first.as_mv.row,
- mbd->block[0].bmi.as_mv.first.as_mv.col,
+ mbd->block[0].bmi.as_mv[0].as_mv.row,
+ mbd->block[0].bmi.as_mv[0].as_mv.col,
predictor);
// Apply the filter (YUV)
@@ -375,11 +371,7 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
mbd->pre.v_buffer = v_buffer;
}
-void vp9_temporal_filter_prepare
-(
- VP9_COMP *cpi,
- int distance
-) {
+void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
int frame = 0;
int num_frames_backward = 0;
@@ -464,6 +456,13 @@ void vp9_temporal_filter_prepare
, start_frame);
#endif
+ // Setup scaling factors. Scaling on each of the arnr frames is not supported
+ vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0],
+ &cpi->common.yv12_fb[cpi->common.new_fb_idx],
+ 16 * cpi->common.mb_cols,
+ 16 * cpi->common.mb_rows);
+ cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0];
+
// Setup frame pointers, NULL indicates frame not included in filter
vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *));
for (frame = 0; frame < frames_to_blur; frame++) {
diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h
index 27fc35f82..f3ca8c616 100644
--- a/vp9/encoder/vp9_temporal_filter.h
+++ b/vp9/encoder/vp9_temporal_filter.h
@@ -11,6 +11,6 @@
#ifndef VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
#define VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
-extern void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance);
+void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance);
#endif // VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index fc99311ae..95a2e1227 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -25,20 +25,14 @@
compressions, then generating vp9_context.c = initial stats. */
#ifdef ENTROPY_STATS
-vp9_coeff_accum context_counters_4x4[BLOCK_TYPES_4X4];
-vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4];
-vp9_coeff_accum context_counters_8x8[BLOCK_TYPES_8X8];
-vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8];
-vp9_coeff_accum context_counters_16x16[BLOCK_TYPES_16X16];
-vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16];
+vp9_coeff_accum context_counters_4x4[BLOCK_TYPES];
+vp9_coeff_accum context_counters_8x8[BLOCK_TYPES];
+vp9_coeff_accum context_counters_16x16[BLOCK_TYPES];
vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32];
-extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4];
-extern vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4];
-extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8];
-extern vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8];
-extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16];
-extern vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16];
+extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES];
+extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES];
+extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES];
extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32];
#endif /* ENTROPY_STATS */
@@ -100,12 +94,6 @@ static void fill_value_tokens() {
vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE;
}
-#if CONFIG_NEWCOEFCONTEXT
-#define PT pn
-#else
-#define PT pt
-#endif
-
static void tokenize_b(VP9_COMP *cpi,
MACROBLOCKD *xd,
const int ib,
@@ -114,22 +102,20 @@ static void tokenize_b(VP9_COMP *cpi,
TX_SIZE tx_size,
int dry_run) {
int pt; /* near block/prev token context index */
- int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
+ int c = 0;
+ int recent_energy = 0;
const BLOCKD * const b = xd->block + ib;
- const int eob = b->eob; /* one beyond last nonzero coeff */
+ const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */
TOKENEXTRA *t = *tp; /* store tokens starting here */
int16_t *qcoeff_ptr = b->qcoeff;
int seg_eob;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
- const int *bands, *scan;
+ const int *scan;
vp9_coeff_count *counts;
vp9_coeff_probs *probs;
const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type(xd, b) : DCT_DCT;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
- int pn;
-#endif
+ const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context +
vp9_block2above[tx_size][ib];
@@ -147,45 +133,26 @@ static void tokenize_b(VP9_COMP *cpi,
default:
case TX_4X4:
seg_eob = 16;
- bands = vp9_coef_bands_4x4;
scan = vp9_default_zig_zag1d_4x4;
if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_4x4;
- probs = cpi->common.fc.hybrid_coef_probs_4x4;
if (tx_type == ADST_DCT) {
scan = vp9_row_scan_4x4;
} else if (tx_type == DCT_ADST) {
scan = vp9_col_scan_4x4;
}
- } else {
- counts = cpi->coef_counts_4x4;
- probs = cpi->common.fc.coef_probs_4x4;
}
+ counts = cpi->coef_counts_4x4;
+ probs = cpi->common.fc.coef_probs_4x4;
break;
case TX_8X8:
- if (type == PLANE_TYPE_Y2) {
- seg_eob = 4;
- bands = vp9_coef_bands_4x4;
- scan = vp9_default_zig_zag1d_4x4;
- } else {
-#if CONFIG_CNVCONTEXT
- a_ec = (a[0] + a[1]) != 0;
- l_ec = (l[0] + l[1]) != 0;
-#endif
- seg_eob = 64;
- bands = vp9_coef_bands_8x8;
- scan = vp9_default_zig_zag1d_8x8;
- }
- if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_8x8;
- probs = cpi->common.fc.hybrid_coef_probs_8x8;
- } else {
- counts = cpi->coef_counts_8x8;
- probs = cpi->common.fc.coef_probs_8x8;
- }
+ a_ec = (a[0] + a[1]) != 0;
+ l_ec = (l[0] + l[1]) != 0;
+ seg_eob = 64;
+ scan = vp9_default_zig_zag1d_8x8;
+ counts = cpi->coef_counts_8x8;
+ probs = cpi->common.fc.coef_probs_8x8;
break;
case TX_16X16:
-#if CONFIG_CNVCONTEXT
if (type != PLANE_TYPE_UV) {
a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
@@ -193,33 +160,23 @@ static void tokenize_b(VP9_COMP *cpi,
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
}
-#endif
seg_eob = 256;
- bands = vp9_coef_bands_16x16;
scan = vp9_default_zig_zag1d_16x16;
- if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_16x16;
- probs = cpi->common.fc.hybrid_coef_probs_16x16;
- } else {
- counts = cpi->coef_counts_16x16;
- probs = cpi->common.fc.coef_probs_16x16;
- }
+ counts = cpi->coef_counts_16x16;
+ probs = cpi->common.fc.coef_probs_16x16;
if (type == PLANE_TYPE_UV) {
int uv_idx = (ib - 16) >> 2;
qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx;
}
break;
case TX_32X32:
-#if CONFIG_CNVCONTEXT
a_ec = a[0] + a[1] + a[2] + a[3] +
a1[0] + a1[1] + a1[2] + a1[3];
l_ec = l[0] + l[1] + l[2] + l[3] +
l1[0] + l1[1] + l1[2] + l1[3];
a_ec = a_ec != 0;
l_ec = l_ec != 0;
-#endif
seg_eob = 1024;
- bands = vp9_coef_bands_32x32;
scan = vp9_default_zig_zag1d_32x32;
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
@@ -228,16 +185,12 @@ static void tokenize_b(VP9_COMP *cpi,
}
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
-#if CONFIG_NEWCOEFCONTEXT
- neighbors = vp9_get_coef_neighbors_handle(scan);
- pn = pt;
-#endif
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB))
- seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
+ seg_eob = 0;
do {
- const int band = bands[c];
+ const int band = get_coef_band(tx_size, c);
int token;
if (c < eob) {
@@ -252,30 +205,23 @@ static void tokenize_b(VP9_COMP *cpi,
}
t->Token = token;
- t->context_tree = probs[type][band][PT];
- t->skip_eob_node = (pt == 0) && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
- (band > 1 && type == PLANE_TYPE_Y_NO_DC));
+ t->context_tree = probs[type][ref][band][pt];
+ t->skip_eob_node = (pt == 0) && (band > 0);
assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
if (!dry_run) {
- ++counts[type][band][PT][token];
+ ++counts[type][ref][band][pt][token];
}
- pt = vp9_prev_token_class[token];
-#if CONFIG_NEWCOEFCONTEXT
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(bands[c + 1]))
- pn = vp9_get_coef_neighbor_context(
- qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
- else
- pn = pt;
-#endif
+
+ pt = vp9_get_coef_context(&recent_energy, token);
++t;
} while (c < eob && ++c < seg_eob);
*tp = t;
- a_ec = l_ec = (c > !type); /* 0 <-> all coeff data is zero */
+ a_ec = l_ec = (c > 0); /* 0 <-> all coeff data is zero */
a[0] = a_ec;
l[0] = l_ec;
- if (tx_size == TX_8X8 && type != PLANE_TYPE_Y2) {
+ if (tx_size == TX_8X8) {
a[1] = a_ec;
l[1] = l_ec;
} else if (tx_size == TX_16X16) {
@@ -294,18 +240,13 @@ static void tokenize_b(VP9_COMP *cpi,
}
}
-int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) {
+int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
- if (has_2nd_order) {
- for (i = 0; i < 16; i++)
- skip &= (xd->block[i].eob < 2);
- skip &= (!xd->block[24].eob);
- } else {
- for (i = 0; i < 16; i++)
- skip &= (!xd->block[i].eob);
- }
+ for (i = 0; i < 16; i++)
+ skip &= (!xd->eobs[i]);
+
return skip;
}
@@ -314,47 +255,42 @@ int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) {
int i;
for (i = 16; i < 24; i++)
- skip &= (!xd->block[i].eob);
+ skip &= (!xd->eobs[i]);
return skip;
}
-static int mb_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) {
- return (vp9_mby_is_skippable_4x4(xd, has_2nd_order) &
+static int mb_is_skippable_4x4(MACROBLOCKD *xd) {
+ return (vp9_mby_is_skippable_4x4(xd) &
vp9_mbuv_is_skippable_4x4(xd));
}
-int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) {
+int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
- if (has_2nd_order) {
- for (i = 0; i < 16; i += 4)
- skip &= (xd->block[i].eob < 2);
- skip &= (!xd->block[24].eob);
- } else {
- for (i = 0; i < 16; i += 4)
- skip &= (!xd->block[i].eob);
- }
+ for (i = 0; i < 16; i += 4)
+ skip &= (!xd->eobs[i]);
+
return skip;
}
int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) {
- return (!xd->block[16].eob) & (!xd->block[20].eob);
+ return (!xd->eobs[16]) & (!xd->eobs[20]);
}
-static int mb_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) {
- return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) &
+static int mb_is_skippable_8x8(MACROBLOCKD *xd) {
+ return (vp9_mby_is_skippable_8x8(xd) &
vp9_mbuv_is_skippable_8x8(xd));
}
-static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd, int has_2nd_order) {
- return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) &
+static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) {
+ return (vp9_mby_is_skippable_8x8(xd) &
vp9_mbuv_is_skippable_4x4(xd));
}
int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) {
int skip = 1;
- skip &= !xd->block[0].eob;
+ skip &= !xd->eobs[0];
return skip;
}
@@ -364,12 +300,12 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) {
int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) {
int skip = 1;
- skip &= !xd->block[0].eob;
+ skip &= !xd->eobs[0];
return skip;
}
int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) {
- return (!xd->block[16].eob) & (!xd->block[20].eob);
+ return (!xd->eobs[16]) & (!xd->eobs[20]);
}
static int sb_is_skippable_32x32(MACROBLOCKD *xd) {
@@ -384,14 +320,9 @@ void vp9_tokenize_sb(VP9_COMP *cpi,
VP9_COMMON * const cm = &cpi->common;
MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi;
TOKENEXTRA *t_backup = *t;
- ENTROPY_CONTEXT *A[2] = { (ENTROPY_CONTEXT *) (xd->above_context + 0),
- (ENTROPY_CONTEXT *) (xd->above_context + 1), };
- ENTROPY_CONTEXT *L[2] = { (ENTROPY_CONTEXT *) (xd->left_context + 0),
- (ENTROPY_CONTEXT *) (xd->left_context + 1), };
const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP);
const int segment_id = mbmi->segment_id;
- const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0);
+ const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
int b;
mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd);
@@ -419,7 +350,6 @@ void vp9_tokenize_sb(VP9_COMP *cpi,
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
TX_16X16, dry_run);
}
- A[0][8] = L[0][8] = A[1][8] = L[1][8] = 0;
if (dry_run)
*t = t_backup;
}
@@ -428,8 +358,6 @@ void vp9_tokenize_mb(VP9_COMP *cpi,
MACROBLOCKD *xd,
TOKENEXTRA **t,
int dry_run) {
- PLANE_TYPE plane_type;
- int has_2nd_order;
int b;
int tx_size = xd->mode_info_context->mbmi.txfm_size;
int mb_skip_context = vp9_get_pred_context(&cpi->common, xd, PRED_MBSKIP);
@@ -441,14 +369,11 @@ void vp9_tokenize_mb(VP9_COMP *cpi,
int skip_inc;
int segment_id = xd->mode_info_context->mbmi.segment_id;
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0)) {
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_inc = 1;
} else
skip_inc = 0;
- has_2nd_order = get_2nd_order_usage(xd);
-
switch (tx_size) {
case TX_16X16:
@@ -458,15 +383,15 @@ void vp9_tokenize_mb(VP9_COMP *cpi,
if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
xd->mode_info_context->mbmi.mode == SPLITMV)
xd->mode_info_context->mbmi.mb_skip_coeff =
- mb_is_skippable_8x8_4x4uv(xd, 0);
+ mb_is_skippable_8x8_4x4uv(xd);
else
xd->mode_info_context->mbmi.mb_skip_coeff =
- mb_is_skippable_8x8(xd, has_2nd_order);
+ mb_is_skippable_8x8(xd);
break;
default:
xd->mode_info_context->mbmi.mb_skip_coeff =
- mb_is_skippable_4x4(xd, has_2nd_order);
+ mb_is_skippable_4x4(xd);
break;
}
@@ -487,15 +412,6 @@ void vp9_tokenize_mb(VP9_COMP *cpi,
if (!dry_run)
cpi->skip_false_count[mb_skip_context] += skip_inc;
- if (has_2nd_order) {
- tokenize_b(cpi, xd, 24, t, PLANE_TYPE_Y2, tx_size, dry_run);
- plane_type = PLANE_TYPE_Y_NO_DC;
- } else {
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
- plane_type = PLANE_TYPE_Y_WITH_DC;
- }
-
if (tx_size == TX_16X16) {
tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run);
for (b = 16; b < 24; b += 4) {
@@ -503,7 +419,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi,
}
} else if (tx_size == TX_8X8) {
for (b = 0; b < 16; b += 4) {
- tokenize_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run);
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
}
if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
xd->mode_info_context->mbmi.mode == SPLITMV) {
@@ -516,11 +432,10 @@ void vp9_tokenize_mb(VP9_COMP *cpi,
}
}
} else {
- for (b = 0; b < 24; b++) {
- if (b >= 16)
- plane_type = PLANE_TYPE_UV;
- tokenize_b(cpi, xd, b, t, plane_type, TX_4X4, dry_run);
- }
+ for (b = 0; b < 16; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run);
+ for (b = 16; b < 24; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
}
if (dry_run)
*t = t_backup;
@@ -531,25 +446,13 @@ void init_context_counters(void) {
FILE *f = fopen("context.bin", "rb");
if (!f) {
vpx_memset(context_counters_4x4, 0, sizeof(context_counters_4x4));
- vpx_memset(hybrid_context_counters_4x4, 0,
- sizeof(hybrid_context_counters_4x4));
vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8));
- vpx_memset(hybrid_context_counters_8x8, 0,
- sizeof(hybrid_context_counters_8x8));
vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16));
- vpx_memset(hybrid_context_counters_16x16, 0,
- sizeof(hybrid_context_counters_16x16));
vpx_memset(context_counters_32x32, 0, sizeof(context_counters_32x32));
} else {
fread(context_counters_4x4, sizeof(context_counters_4x4), 1, f);
- fread(hybrid_context_counters_4x4,
- sizeof(hybrid_context_counters_4x4), 1, f);
fread(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
- fread(hybrid_context_counters_8x8,
- sizeof(hybrid_context_counters_8x8), 1, f);
fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
- fread(hybrid_context_counters_16x16,
- sizeof(hybrid_context_counters_16x16), 1, f);
fread(context_counters_32x32, sizeof(context_counters_32x32), 1, f);
fclose(f);
}
@@ -557,25 +460,13 @@ void init_context_counters(void) {
f = fopen("treeupdate.bin", "rb");
if (!f) {
vpx_memset(tree_update_hist_4x4, 0, sizeof(tree_update_hist_4x4));
- vpx_memset(hybrid_tree_update_hist_4x4, 0,
- sizeof(hybrid_tree_update_hist_4x4));
vpx_memset(tree_update_hist_8x8, 0, sizeof(tree_update_hist_8x8));
- vpx_memset(hybrid_tree_update_hist_8x8, 0,
- sizeof(hybrid_tree_update_hist_8x8));
vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16));
- vpx_memset(hybrid_tree_update_hist_16x16, 0,
- sizeof(hybrid_tree_update_hist_16x16));
vpx_memset(tree_update_hist_32x32, 0, sizeof(tree_update_hist_32x32));
} else {
fread(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f);
- fread(hybrid_tree_update_hist_4x4,
- sizeof(hybrid_tree_update_hist_4x4), 1, f);
fread(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
- fread(hybrid_tree_update_hist_8x8,
- sizeof(hybrid_tree_update_hist_8x8), 1, f);
fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
- fread(hybrid_tree_update_hist_16x16,
- sizeof(hybrid_tree_update_hist_16x16), 1, f);
fread(tree_update_hist_32x32, sizeof(tree_update_hist_32x32), 1, f);
fclose(f);
}
@@ -583,33 +474,38 @@ void init_context_counters(void) {
static void print_counter(FILE *f, vp9_coeff_accum *context_counters,
int block_types, const char *header) {
- int type, band, pt, t;
+ int type, ref, band, pt, t;
fprintf(f, "static const vp9_coeff_count %s = {\n", header);
#define Comma(X) (X ? "," : "")
type = 0;
do {
+ ref = 0;
fprintf(f, "%s\n { /* block Type %d */", Comma(type), type);
- band = 0;
do {
- fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
- pt = 0;
+ fprintf(f, "%s\n { /* %s */", Comma(type), ref ? "Inter" : "Intra");
+ band = 0;
do {
- fprintf(f, "%s\n {", Comma(pt));
-
- t = 0;
+ fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
+ pt = 0;
do {
- const int64_t x = context_counters[type][band][pt][t];
- const int y = (int) x;
-
- assert(x == (int64_t) y); /* no overflow handling yet */
- fprintf(f, "%s %d", Comma(t), y);
- } while (++t < MAX_ENTROPY_TOKENS);
- fprintf(f, "}");
- } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, "%s\n {", Comma(pt));
+
+ t = 0;
+ do {
+ const int64_t x = context_counters[type][ref][band][pt][t];
+ const int y = (int) x;
+
+ assert(x == (int64_t) y); /* no overflow handling yet */
+ fprintf(f, "%s %d", Comma(t), y);
+ } while (++t < MAX_ENTROPY_TOKENS);
+ fprintf(f, "}");
+ } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, "\n }");
+ } while (++band < COEF_BANDS);
fprintf(f, "\n }");
- } while (++band < COEF_BANDS);
+ } while (++ref < REF_TYPES);
fprintf(f, "\n }");
} while (++type < block_types);
fprintf(f, "\n};\n");
@@ -617,7 +513,7 @@ static void print_counter(FILE *f, vp9_coeff_accum *context_counters,
static void print_probs(FILE *f, vp9_coeff_accum *context_counters,
int block_types, const char *header) {
- int type, band, pt, t;
+ int type, ref, band, pt, t;
fprintf(f, "static const vp9_coeff_probs %s = {", header);
@@ -626,32 +522,38 @@ static void print_probs(FILE *f, vp9_coeff_accum *context_counters,
do {
fprintf(f, "%s%s{ /* block Type %d */",
Comma(type), Newline(type, " "), type);
- band = 0;
+ ref = 0;
do {
- fprintf(f, "%s%s{ /* Coeff Band %d */",
- Comma(band), Newline(band, " "), band);
- pt = 0;
+ fprintf(f, "%s%s{ /* %s */",
+ Comma(band), Newline(band, " "), ref ? "Inter" : "Intra");
+ band = 0;
do {
- unsigned int branch_ct[ENTROPY_NODES][2];
- unsigned int coef_counts[MAX_ENTROPY_TOKENS];
- vp9_prob coef_probs[ENTROPY_NODES];
-
- for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
- coef_counts[t] = context_counters[type][band][pt][t];
- vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS,
- vp9_coef_encodings, vp9_coef_tree,
- coef_probs, branch_ct, coef_counts);
- fprintf(f, "%s\n {", Comma(pt));
-
- t = 0;
+ fprintf(f, "%s%s{ /* Coeff Band %d */",
+ Comma(band), Newline(band, " "), band);
+ pt = 0;
do {
- fprintf(f, "%s %3d", Comma(t), coef_probs[t]);
- } while (++t < ENTROPY_NODES);
-
- fprintf(f, " }");
- } while (++pt < PREV_COEF_CONTEXTS);
+ unsigned int branch_ct[ENTROPY_NODES][2];
+ unsigned int coef_counts[MAX_ENTROPY_TOKENS];
+ vp9_prob coef_probs[ENTROPY_NODES];
+
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ coef_counts[t] = context_counters[type][ref][band][pt][t];
+ vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS,
+ vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, coef_counts);
+ fprintf(f, "%s\n {", Comma(pt));
+
+ t = 0;
+ do {
+ fprintf(f, "%s %3d", Comma(t), coef_probs[t]);
+ } while (++t < ENTROPY_NODES);
+
+ fprintf(f, " }");
+ } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, "\n }");
+ } while (++band < COEF_BANDS);
fprintf(f, "\n }");
- } while (++band < COEF_BANDS);
+ } while (++ref < REF_TYPES);
fprintf(f, "\n }");
} while (++type < block_types);
fprintf(f, "\n};\n");
@@ -664,34 +566,22 @@ void print_context_counters() {
fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n");
/* print counts */
- print_counter(f, context_counters_4x4, BLOCK_TYPES_4X4,
+ print_counter(f, context_counters_4x4, BLOCK_TYPES,
"vp9_default_coef_counts_4x4[BLOCK_TYPES_4X4]");
- print_counter(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4,
- "vp9_default_hybrid_coef_counts_4x4[BLOCK_TYPES_4X4]");
- print_counter(f, context_counters_8x8, BLOCK_TYPES_8X8,
+ print_counter(f, context_counters_8x8, BLOCK_TYPES,
"vp9_default_coef_counts_8x8[BLOCK_TYPES_8X8]");
- print_counter(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8,
- "vp9_default_hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]");
- print_counter(f, context_counters_16x16, BLOCK_TYPES_16X16,
+ print_counter(f, context_counters_16x16, BLOCK_TYPES,
"vp9_default_coef_counts_16x16[BLOCK_TYPES_16X16]");
- print_counter(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16,
- "vp9_default_hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]");
print_counter(f, context_counters_32x32, BLOCK_TYPES_32X32,
"vp9_default_coef_counts_32x32[BLOCK_TYPES_32X32]");
/* print coefficient probabilities */
- print_probs(f, context_counters_4x4, BLOCK_TYPES_4X4,
+ print_probs(f, context_counters_4x4, BLOCK_TYPES,
"default_coef_probs_4x4[BLOCK_TYPES_4X4]");
- print_probs(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4,
- "default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]");
- print_probs(f, context_counters_8x8, BLOCK_TYPES_8X8,
+ print_probs(f, context_counters_8x8, BLOCK_TYPES,
"default_coef_probs_8x8[BLOCK_TYPES_8X8]");
- print_probs(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8,
- "default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]");
- print_probs(f, context_counters_16x16, BLOCK_TYPES_16X16,
+ print_probs(f, context_counters_16x16, BLOCK_TYPES,
"default_coef_probs_16x16[BLOCK_TYPES_16X16]");
- print_probs(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16,
- "default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]");
print_probs(f, context_counters_32x32, BLOCK_TYPES_32X32,
"default_coef_probs_32x32[BLOCK_TYPES_32X32]");
@@ -699,14 +589,8 @@ void print_context_counters() {
f = fopen("context.bin", "wb");
fwrite(context_counters_4x4, sizeof(context_counters_4x4), 1, f);
- fwrite(hybrid_context_counters_4x4,
- sizeof(hybrid_context_counters_4x4), 1, f);
fwrite(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
- fwrite(hybrid_context_counters_8x8,
- sizeof(hybrid_context_counters_8x8), 1, f);
fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
- fwrite(hybrid_context_counters_16x16,
- sizeof(hybrid_context_counters_16x16), 1, f);
fwrite(context_counters_32x32, sizeof(context_counters_32x32), 1, f);
fclose(f);
}
@@ -716,21 +600,18 @@ void vp9_tokenize_initialize() {
fill_value_tokens();
}
-static __inline void stuff_b(VP9_COMP *cpi,
- MACROBLOCKD *xd,
- const int ib,
- TOKENEXTRA **tp,
- PLANE_TYPE type,
- TX_SIZE tx_size,
- int dry_run) {
- const BLOCKD * const b = xd->block + ib;
- const int *bands;
+static INLINE void stuff_b(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ const int ib,
+ TOKENEXTRA **tp,
+ PLANE_TYPE type,
+ TX_SIZE tx_size,
+ int dry_run) {
vp9_coeff_count *counts;
vp9_coeff_probs *probs;
int pt, band;
TOKENEXTRA *t = *tp;
- const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, b) : DCT_DCT;
+ const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context +
vp9_block2above[tx_size][ib];
ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context +
@@ -744,33 +625,16 @@ static __inline void stuff_b(VP9_COMP *cpi,
switch (tx_size) {
default:
case TX_4X4:
- bands = vp9_coef_bands_4x4;
- if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_4x4;
- probs = cpi->common.fc.hybrid_coef_probs_4x4;
- } else {
- counts = cpi->coef_counts_4x4;
- probs = cpi->common.fc.coef_probs_4x4;
- }
+ counts = cpi->coef_counts_4x4;
+ probs = cpi->common.fc.coef_probs_4x4;
break;
case TX_8X8:
-#if CONFIG_CNVCONTEXT
- if (type != PLANE_TYPE_Y2) {
- a_ec = (a[0] + a[1]) != 0;
- l_ec = (l[0] + l[1]) != 0;
- }
-#endif
- bands = vp9_coef_bands_8x8;
- if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_8x8;
- probs = cpi->common.fc.hybrid_coef_probs_8x8;
- } else {
- counts = cpi->coef_counts_8x8;
- probs = cpi->common.fc.coef_probs_8x8;
- }
+ a_ec = (a[0] + a[1]) != 0;
+ l_ec = (l[0] + l[1]) != 0;
+ counts = cpi->coef_counts_8x8;
+ probs = cpi->common.fc.coef_probs_8x8;
break;
case TX_16X16:
-#if CONFIG_CNVCONTEXT
if (type != PLANE_TYPE_UV) {
a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
@@ -778,26 +642,16 @@ static __inline void stuff_b(VP9_COMP *cpi,
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
}
-#endif
- bands = vp9_coef_bands_16x16;
- if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_16x16;
- probs = cpi->common.fc.hybrid_coef_probs_16x16;
- } else {
- counts = cpi->coef_counts_16x16;
- probs = cpi->common.fc.coef_probs_16x16;
- }
+ counts = cpi->coef_counts_16x16;
+ probs = cpi->common.fc.coef_probs_16x16;
break;
case TX_32X32:
-#if CONFIG_CNVCONTEXT
a_ec = a[0] + a[1] + a[2] + a[3] +
a1[0] + a1[1] + a1[2] + a1[3];
l_ec = l[0] + l[1] + l[2] + l[3] +
l1[0] + l1[1] + l1[2] + l1[3];
a_ec = a_ec != 0;
l_ec = l_ec != 0;
-#endif
- bands = vp9_coef_bands_32x32;
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
break;
@@ -805,14 +659,14 @@ static __inline void stuff_b(VP9_COMP *cpi,
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
- band = bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
+ band = get_coef_band(tx_size, 0);
t->Token = DCT_EOB_TOKEN;
- t->context_tree = probs[type][band][pt];
+ t->context_tree = probs[type][ref][band][pt];
t->skip_eob_node = 0;
++t;
*tp = t;
*a = *l = 0;
- if (tx_size == TX_8X8 && type != PLANE_TYPE_Y2) {
+ if (tx_size == TX_8X8) {
a[1] = 0;
l[1] = 0;
} else if (tx_size == TX_16X16) {
@@ -831,32 +685,18 @@ static __inline void stuff_b(VP9_COMP *cpi,
}
if (!dry_run) {
- ++counts[type][band][pt][DCT_EOB_TOKEN];
+ ++counts[type][ref][band][pt][DCT_EOB_TOKEN];
}
}
static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run) {
- PLANE_TYPE plane_type;
int b;
- int has_2nd_order = get_2nd_order_usage(xd);
-
- if (has_2nd_order) {
- stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_8X8, dry_run);
- plane_type = PLANE_TYPE_Y_NO_DC;
- } else {
-#if CONFIG_CNVCONTEXT
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
-#endif
- plane_type = PLANE_TYPE_Y_WITH_DC;
- }
- for (b = 0; b < 24; b += 4) {
- if (b >= 16)
- plane_type = PLANE_TYPE_UV;
- stuff_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run);
- }
+ for (b = 0; b < 16; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
+ for (b = 16; b < 24; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
}
static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd,
@@ -867,56 +707,26 @@ static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd,
for (b = 16; b < 24; b += 4) {
stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
}
-#if CONFIG_CNVCONTEXT
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
-#endif
}
static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run) {
int b;
- PLANE_TYPE plane_type;
- int has_2nd_order = get_2nd_order_usage(xd);
-
- if (has_2nd_order) {
- stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_4X4, dry_run);
- plane_type = PLANE_TYPE_Y_NO_DC;
- } else {
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
- plane_type = PLANE_TYPE_Y_WITH_DC;
- }
- for (b = 0; b < 24; b++) {
- if (b >= 16)
- plane_type = PLANE_TYPE_UV;
- stuff_b(cpi, xd, b, t, plane_type, TX_4X4, dry_run);
- }
+ for (b = 0; b < 16; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run);
+ for (b = 16; b < 24; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
}
static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run) {
- PLANE_TYPE plane_type;
int b;
- int has_2nd_order = get_2nd_order_usage(xd);
- if (has_2nd_order) {
- stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_8X8, dry_run);
- plane_type = PLANE_TYPE_Y_NO_DC;
- } else {
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
- plane_type = PLANE_TYPE_Y_WITH_DC;
- }
-
- for (b = 0; b < 16; b += 4) {
- stuff_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run);
- }
-
- for (b = 16; b < 24; b++) {
+ for (b = 0; b < 16; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
+ for (b = 16; b < 24; b++)
stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
- }
}
void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index 3eeb8fa5a..6ac19ba71 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -28,42 +28,39 @@ typedef struct {
uint8_t skip_eob_node;
} TOKENEXTRA;
-typedef int64_t vp9_coeff_accum[COEF_BANDS][PREV_COEF_CONTEXTS]
+typedef int64_t vp9_coeff_accum[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS];
-extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block);
-extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd);
-extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block);
-extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd);
-extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd);
-extern int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd);
-extern int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd);
+int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd);
+int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd);
+int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd);
struct VP9_COMP;
-extern void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run);
-extern void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run);
+void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
+void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
-extern void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run);
-extern void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run);
+void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
+void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
+
+void vp9_fix_contexts_sb(MACROBLOCKD *xd);
-extern void vp9_fix_contexts_sb(MACROBLOCKD *xd);
#ifdef ENTROPY_STATS
void init_context_counters();
void print_context_counters();
-extern vp9_coeff_accum context_counters_4x4[BLOCK_TYPES_4X4];
-extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES_8X8];
-extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES_16X16];
+extern vp9_coeff_accum context_counters_4x4[BLOCK_TYPES];
+extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES];
+extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES];
extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32];
-
-extern vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4];
-extern vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8];
-extern vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16];
#endif
extern const int *vp9_dct_value_cost_ptr;
diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h
index 4e0e5e12c..832471aa8 100644
--- a/vp9/encoder/vp9_treewriter.h
+++ b/vp9/encoder/vp9_treewriter.h
@@ -36,30 +36,28 @@ typedef BOOL_CODER vp9_writer;
/* Both of these return bits, not scaled bits. */
-
-static __inline unsigned int cost_branch(const unsigned int ct[2],
- vp9_prob p) {
+static INLINE unsigned int cost_branch256(const unsigned int ct[2],
+ vp9_prob p) {
/* Imitate existing calculation */
- return ((ct[0] * vp9_cost_zero(p))
- + (ct[1] * vp9_cost_one(p))) >> 8;
+ return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p);
}
-static __inline unsigned int cost_branch256(const unsigned int ct[2],
- vp9_prob p) {
+static INLINE unsigned int cost_branch(const unsigned int ct[2],
+ vp9_prob p) {
/* Imitate existing calculation */
- return ((ct[0] * vp9_cost_zero(p))
- + (ct[1] * vp9_cost_one(p)));
+ return cost_branch256(ct, p) >> 8;
}
+
/* Small functions to write explicit values and tokens, as well as
estimate their lengths. */
-static __inline void treed_write(vp9_writer *const w,
- vp9_tree t,
- const vp9_prob *const p,
- int v,
- /* number of bits in v, assumed nonzero */
- int n) {
+static INLINE void treed_write(vp9_writer *const w,
+ vp9_tree t,
+ const vp9_prob *const p,
+ int v,
+ /* number of bits in v, assumed nonzero */
+ int n) {
vp9_tree_index i = 0;
do {
@@ -69,18 +67,18 @@ static __inline void treed_write(vp9_writer *const w,
} while (n);
}
-static __inline void write_token(vp9_writer *const w,
- vp9_tree t,
- const vp9_prob *const p,
- vp9_token *const x) {
+static INLINE void write_token(vp9_writer *const w,
+ vp9_tree t,
+ const vp9_prob *const p,
+ vp9_token *const x) {
treed_write(w, t, p, x->value, x->Len);
}
-static __inline int treed_cost(vp9_tree t,
- const vp9_prob *const p,
- int v,
- /* number of bits in v, assumed nonzero */
- int n) {
+static INLINE int treed_cost(vp9_tree t,
+ const vp9_prob *const p,
+ int v,
+ /* number of bits in v, assumed nonzero */
+ int n) {
int c = 0;
vp9_tree_index i = 0;
@@ -93,9 +91,9 @@ static __inline int treed_cost(vp9_tree t,
return c;
}
-static __inline int cost_token(vp9_tree t,
- const vp9_prob *const p,
- vp9_token *const x) {
+static INLINE int cost_token(vp9_tree t,
+ const vp9_prob *const p,
+ vp9_token *const x) {
return treed_cost(t, p, x->value, x->Len);
}
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 675dbb63e..7120c5fe7 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -19,12 +19,6 @@ typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
int ref_stride,
unsigned int max_sad);
-typedef void (*vp9_copy32xn_fn_t)(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
- int n);
-
typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
@@ -35,7 +29,7 @@ typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int ref_stride,
- unsigned short *sad_array);
+ unsigned int *sad_array);
typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr,
int source_stride,
@@ -79,7 +73,6 @@ typedef struct vp9_variance_vtable {
vp9_sad_multi_fn_t sdx3f;
vp9_sad_multi1_fn_t sdx8f;
vp9_sad_multi_d_fn_t sdx4df;
- vp9_copy32xn_fn_t copymem;
} vp9_variance_fn_ptr_t;
#endif // VP9_ENCODER_VP9_VARIANCE_H_
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index d03e285c6..d07a65b45 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -142,8 +142,8 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
const int16_t *HFilter, *VFilter;
uint16_t FData3[5 * 4]; // Temp data bufffer used in filtering
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
// First filter 1d Horizontal
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
@@ -166,8 +166,8 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
@@ -186,8 +186,8 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
@@ -206,8 +206,8 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
uint8_t temp2[68 * 64];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line,
1, 65, 64, HFilter);
@@ -227,8 +227,8 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
uint8_t temp2[36 * 32];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter);
@@ -367,8 +367,8 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
@@ -387,8 +387,8 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
uint8_t temp2[20 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line,
1, 17, 8, HFilter);
diff --git a/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c
new file mode 100644
index 000000000..ff884d999
--- /dev/null
+++ b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h> // SSE2
+#include "vp9/common/vp9_idct.h" // for cospi constants
+
+#define pair_set_epi16(a, b) \
+ _mm_set1_epi32(((uint16_t)(a)) + (((uint16_t)(b)) << 16))
+
+void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int pitch) {
+ const int stride = pitch >> 1;
+ int pass;
+ // Constants
+ // When we use them, in one case, they are all the same. In all others
+ // it's a pair of them that we need to repeat four times. This is done
+ // by constructing the 32 bit constant corresponding to that pair.
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ // Load input
+ __m128i in0 = _mm_loadu_si128((const __m128i *)(input + 0 * stride));
+ __m128i in1 = _mm_loadu_si128((const __m128i *)(input + 1 * stride));
+ __m128i in2 = _mm_loadu_si128((const __m128i *)(input + 2 * stride));
+ __m128i in3 = _mm_loadu_si128((const __m128i *)(input + 3 * stride));
+ __m128i in4 = _mm_loadu_si128((const __m128i *)(input + 4 * stride));
+ __m128i in5 = _mm_loadu_si128((const __m128i *)(input + 5 * stride));
+ __m128i in6 = _mm_loadu_si128((const __m128i *)(input + 6 * stride));
+ __m128i in7 = _mm_loadu_si128((const __m128i *)(input + 7 * stride));
+ // Pre-condition input (shift by two)
+ in0 = _mm_slli_epi16(in0, 2);
+ in1 = _mm_slli_epi16(in1, 2);
+ in2 = _mm_slli_epi16(in2, 2);
+ in3 = _mm_slli_epi16(in3, 2);
+ in4 = _mm_slli_epi16(in4, 2);
+ in5 = _mm_slli_epi16(in5, 2);
+ in6 = _mm_slli_epi16(in6, 2);
+ in7 = _mm_slli_epi16(in7, 2);
+
+ // We do two passes, first the columns, then the rows. The results of the
+ // first pass are transposed so that the same column code can be reused. The
+ // results of the second pass are also transposed so that the rows (processed
+ // as columns) are put back in row positions.
+ for (pass = 0; pass < 2; pass++) {
+ // To store results of each pass before the transpose.
+ __m128i res0, res1, res2, res3, res4, res5, res6, res7;
+ // Add/substract
+ const __m128i q0 = _mm_add_epi16(in0, in7);
+ const __m128i q1 = _mm_add_epi16(in1, in6);
+ const __m128i q2 = _mm_add_epi16(in2, in5);
+ const __m128i q3 = _mm_add_epi16(in3, in4);
+ const __m128i q4 = _mm_sub_epi16(in3, in4);
+ const __m128i q5 = _mm_sub_epi16(in2, in5);
+ const __m128i q6 = _mm_sub_epi16(in1, in6);
+ const __m128i q7 = _mm_sub_epi16(in0, in7);
+ // Work on first four results
+ {
+ // Add/substract
+ const __m128i r0 = _mm_add_epi16(q0, q3);
+ const __m128i r1 = _mm_add_epi16(q1, q2);
+ const __m128i r2 = _mm_sub_epi16(q1, q2);
+ const __m128i r3 = _mm_sub_epi16(q0, q3);
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+ const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
+ const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+ const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res0 = _mm_packs_epi32(w0, w1);
+ res4 = _mm_packs_epi32(w2, w3);
+ res2 = _mm_packs_epi32(w4, w5);
+ res6 = _mm_packs_epi32(w6, w7);
+ }
+ // Work on next four results
+ {
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
+ const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
+ const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16);
+ const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16);
+ const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16);
+ const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING);
+ const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING);
+ const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING);
+ const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING);
+ const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS);
+ const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS);
+ const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS);
+ const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS);
+ // Combine
+ const __m128i r0 = _mm_packs_epi32(s0, s1);
+ const __m128i r1 = _mm_packs_epi32(s2, s3);
+ // Add/substract
+ const __m128i x0 = _mm_add_epi16(q4, r0);
+ const __m128i x1 = _mm_sub_epi16(q4, r0);
+ const __m128i x2 = _mm_sub_epi16(q7, r1);
+ const __m128i x3 = _mm_add_epi16(q7, r1);
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
+ const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
+ const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
+ const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res1 = _mm_packs_epi32(w0, w1);
+ res7 = _mm_packs_epi32(w2, w3);
+ res5 = _mm_packs_epi32(w4, w5);
+ res3 = _mm_packs_epi32(w6, w7);
+ }
+ // Transpose the 8x8.
+ {
+ // 00 01 02 03 04 05 06 07
+ // 10 11 12 13 14 15 16 17
+ // 20 21 22 23 24 25 26 27
+ // 30 31 32 33 34 35 36 37
+ // 40 41 42 43 44 45 46 47
+ // 50 51 52 53 54 55 56 57
+ // 60 61 62 63 64 65 66 67
+ // 70 71 72 73 74 75 76 77
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ // 04 14 05 15 06 16 07 17
+ // 24 34 25 35 26 36 27 37
+ // 40 50 41 51 42 52 43 53
+ // 60 70 61 71 62 72 63 73
+ // 54 54 55 55 56 56 57 57
+ // 64 74 65 75 66 76 67 77
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+ // 00 10 20 30 01 11 21 31
+ // 40 50 60 70 41 51 61 71
+ // 02 12 22 32 03 13 23 33
+ // 42 52 62 72 43 53 63 73
+ // 04 14 24 34 05 15 21 36
+ // 44 54 64 74 45 55 61 76
+ // 06 16 26 36 07 17 27 37
+ // 46 56 66 76 47 57 67 77
+ in0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ in1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ in2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ in3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+ in4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+ in5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+ in6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+ in7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+ }
+ }
+ // Post-condition output and store it
+ {
+ // Post-condition (division by two)
+ // division of two 16 bits signed numbers using shifts
+ // n / 2 = (n - (n >> 15)) >> 1
+ const __m128i sign_in0 = _mm_srai_epi16(in0, 15);
+ const __m128i sign_in1 = _mm_srai_epi16(in1, 15);
+ const __m128i sign_in2 = _mm_srai_epi16(in2, 15);
+ const __m128i sign_in3 = _mm_srai_epi16(in3, 15);
+ const __m128i sign_in4 = _mm_srai_epi16(in4, 15);
+ const __m128i sign_in5 = _mm_srai_epi16(in5, 15);
+ const __m128i sign_in6 = _mm_srai_epi16(in6, 15);
+ const __m128i sign_in7 = _mm_srai_epi16(in7, 15);
+ in0 = _mm_sub_epi16(in0, sign_in0);
+ in1 = _mm_sub_epi16(in1, sign_in1);
+ in2 = _mm_sub_epi16(in2, sign_in2);
+ in3 = _mm_sub_epi16(in3, sign_in3);
+ in4 = _mm_sub_epi16(in4, sign_in4);
+ in5 = _mm_sub_epi16(in5, sign_in5);
+ in6 = _mm_sub_epi16(in6, sign_in6);
+ in7 = _mm_sub_epi16(in7, sign_in7);
+ in0 = _mm_srai_epi16(in0, 1);
+ in1 = _mm_srai_epi16(in1, 1);
+ in2 = _mm_srai_epi16(in2, 1);
+ in3 = _mm_srai_epi16(in3, 1);
+ in4 = _mm_srai_epi16(in4, 1);
+ in5 = _mm_srai_epi16(in5, 1);
+ in6 = _mm_srai_epi16(in6, 1);
+ in7 = _mm_srai_epi16(in7, 1);
+ // store results
+ _mm_storeu_si128 ((__m128i *)(output + 0 * 8), in0);
+ _mm_storeu_si128 ((__m128i *)(output + 1 * 8), in1);
+ _mm_storeu_si128 ((__m128i *)(output + 2 * 8), in2);
+ _mm_storeu_si128 ((__m128i *)(output + 3 * 8), in3);
+ _mm_storeu_si128 ((__m128i *)(output + 4 * 8), in4);
+ _mm_storeu_si128 ((__m128i *)(output + 5 * 8), in5);
+ _mm_storeu_si128 ((__m128i *)(output + 6 * 8), in6);
+ _mm_storeu_si128 ((__m128i *)(output + 7 * 8), in7);
+ }
+}
diff --git a/vp9/encoder/x86/vp9_encodeopt.asm b/vp9/encoder/x86/vp9_encodeopt.asm
index 5d9f7769d..90c793d4f 100644
--- a/vp9/encoder/x86/vp9_encodeopt.asm
+++ b/vp9/encoder/x86/vp9_encodeopt.asm
@@ -125,7 +125,7 @@ sym(vp9_block_error_mmx):
ret
-;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
+;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
global sym(vp9_mbblock_error_mmx_impl) PRIVATE
sym(vp9_mbblock_error_mmx_impl):
push rbp
@@ -142,10 +142,6 @@ sym(vp9_mbblock_error_mmx_impl):
mov rdi, arg(1) ;dcoef_ptr
pxor mm2, mm2
- movd mm1, dword ptr arg(2) ;dc
- por mm1, mm2
-
- pcmpeqw mm1, mm7
mov rcx, 16
.mberror_loop_mmx:
@@ -160,7 +156,6 @@ sym(vp9_mbblock_error_mmx_impl):
pmaddwd mm5, mm5
psubw mm3, mm4
- pand mm3, mm1
pmaddwd mm3, mm3
paddd mm2, mm5
@@ -202,28 +197,24 @@ sym(vp9_mbblock_error_mmx_impl):
ret
-;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
+;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
global sym(vp9_mbblock_error_xmm_impl) PRIVATE
sym(vp9_mbblock_error_xmm_impl):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
- SAVE_XMM 6
+ SAVE_XMM 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;coeff_ptr
- pxor xmm6, xmm6
+ pxor xmm5, xmm5
mov rdi, arg(1) ;dcoef_ptr
pxor xmm4, xmm4
- movd xmm5, dword ptr arg(2) ;dc
- por xmm5, xmm4
-
- pcmpeqw xmm5, xmm6
mov rcx, 16
.mberror_loop:
@@ -238,7 +229,6 @@ sym(vp9_mbblock_error_xmm_impl):
pmaddwd xmm2, xmm2
psubw xmm0, xmm1
- pand xmm0, xmm5
pmaddwd xmm0, xmm0
add rsi, 32
@@ -252,9 +242,9 @@ sym(vp9_mbblock_error_xmm_impl):
jnz .mberror_loop
movdqa xmm0, xmm4
- punpckldq xmm0, xmm6
+ punpckldq xmm0, xmm5
- punpckhdq xmm4, xmm6
+ punpckhdq xmm4, xmm5
paddd xmm0, xmm4
movdqa xmm1, xmm0
diff --git a/vp9/encoder/x86/vp9_sad4d_sse2.asm b/vp9/encoder/x86/vp9_sad4d_sse2.asm
new file mode 100644
index 000000000..3716d91ec
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad4d_sse2.asm
@@ -0,0 +1,225 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_4x2x4 5-6 0
+ movd m0, [srcq +%2]
+%if %1 == 1
+ movd m6, [ref1q+%3]
+ movd m4, [ref2q+%3]
+ movd m7, [ref3q+%3]
+ movd m5, [ref4q+%3]
+ punpckldq m0, [srcq +%4]
+ punpckldq m6, [ref1q+%5]
+ punpckldq m4, [ref2q+%5]
+ punpckldq m7, [ref3q+%5]
+ punpckldq m5, [ref4q+%5]
+ psadbw m6, m0
+ psadbw m4, m0
+ psadbw m7, m0
+ psadbw m5, m0
+ punpckldq m6, m4
+ punpckldq m7, m5
+%else
+ movd m1, [ref1q+%3]
+ movd m2, [ref2q+%3]
+ movd m3, [ref3q+%3]
+ movd m4, [ref4q+%3]
+ punpckldq m0, [srcq +%4]
+ punpckldq m1, [ref1q+%5]
+ punpckldq m2, [ref2q+%5]
+ punpckldq m3, [ref3q+%5]
+ punpckldq m4, [ref4q+%5]
+ psadbw m1, m0
+ psadbw m2, m0
+ psadbw m3, m0
+ psadbw m4, m0
+ punpckldq m1, m2
+ punpckldq m3, m4
+ paddd m6, m1
+ paddd m7, m3
+%endif
+%if %6 == 1
+ lea srcq, [srcq +src_strideq*2]
+ lea ref1q, [ref1q+ref_strideq*2]
+ lea ref2q, [ref2q+ref_strideq*2]
+ lea ref3q, [ref3q+ref_strideq*2]
+ lea ref4q, [ref4q+ref_strideq*2]
+%endif
+%endmacro
+
+; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_8x2x4 5-6 0
+ movh m0, [srcq +%2]
+%if %1 == 1
+ movh m4, [ref1q+%3]
+ movh m5, [ref2q+%3]
+ movh m6, [ref3q+%3]
+ movh m7, [ref4q+%3]
+ movhps m0, [srcq +%4]
+ movhps m4, [ref1q+%5]
+ movhps m5, [ref2q+%5]
+ movhps m6, [ref3q+%5]
+ movhps m7, [ref4q+%5]
+ psadbw m4, m0
+ psadbw m5, m0
+ psadbw m6, m0
+ psadbw m7, m0
+%else
+ movh m1, [ref1q+%3]
+ movh m2, [ref2q+%3]
+ movh m3, [ref3q+%3]
+ movhps m0, [srcq +%4]
+ movhps m1, [ref1q+%5]
+ movhps m2, [ref2q+%5]
+ movhps m3, [ref3q+%5]
+ psadbw m1, m0
+ psadbw m2, m0
+ psadbw m3, m0
+ paddd m4, m1
+ movh m1, [ref4q+%3]
+ movhps m1, [ref4q+%5]
+ paddd m5, m2
+ paddd m6, m3
+ psadbw m1, m0
+ paddd m7, m1
+%endif
+%if %6 == 1
+ lea srcq, [srcq +src_strideq*2]
+ lea ref1q, [ref1q+ref_strideq*2]
+ lea ref2q, [ref2q+ref_strideq*2]
+ lea ref3q, [ref3q+ref_strideq*2]
+ lea ref4q, [ref4q+ref_strideq*2]
+%endif
+%endmacro
+
+; PROCESS_16x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_16x2x4 5-6 0
+ ; 1st 16 px
+ mova m0, [srcq +%2]
+%if %1 == 1
+ movu m4, [ref1q+%3]
+ movu m5, [ref2q+%3]
+ movu m6, [ref3q+%3]
+ movu m7, [ref4q+%3]
+ psadbw m4, m0
+ psadbw m5, m0
+ psadbw m6, m0
+ psadbw m7, m0
+%else
+ movu m1, [ref1q+%3]
+ movu m2, [ref2q+%3]
+ movu m3, [ref3q+%3]
+ psadbw m1, m0
+ psadbw m2, m0
+ psadbw m3, m0
+ paddd m4, m1
+ movu m1, [ref4q+%3]
+ paddd m5, m2
+ paddd m6, m3
+ psadbw m1, m0
+ paddd m7, m1
+%endif
+
+ ; 2nd 16 px
+ mova m0, [srcq +%4]
+ movu m1, [ref1q+%5]
+ movu m2, [ref2q+%5]
+ movu m3, [ref3q+%5]
+ psadbw m1, m0
+ psadbw m2, m0
+ psadbw m3, m0
+ paddd m4, m1
+ movu m1, [ref4q+%5]
+ paddd m5, m2
+ paddd m6, m3
+%if %6 == 1
+ lea srcq, [srcq +src_strideq*2]
+ lea ref1q, [ref1q+ref_strideq*2]
+ lea ref2q, [ref2q+ref_strideq*2]
+ lea ref3q, [ref3q+ref_strideq*2]
+ lea ref4q, [ref4q+ref_strideq*2]
+%endif
+ psadbw m1, m0
+ paddd m7, m1
+%endmacro
+
+; PROCESS_32x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_32x2x4 5-6 0
+ PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16
+ PROCESS_16x2x4 0, %4, %5, %4 + 16, %5 + 16, %6
+%endmacro
+
+; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_64x2x4 5-6 0
+ PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32
+ PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6
+%endmacro
+
+; void vp9_sadNxNx4d_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref[4], int ref_stride,
+; unsigned int res[4]);
+; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
+%macro SADNXN4D 2
+%if UNIX64
+cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
+ res, ref2, ref3, ref4
+%else
+cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
+ ref2, ref3, ref4
+%endif
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ mov ref2q, [ref1q+gprsize*1]
+ mov ref3q, [ref1q+gprsize*2]
+ mov ref4q, [ref1q+gprsize*3]
+ mov ref1q, [ref1q+gprsize*0]
+
+ PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1
+%rep (%2-4)/2
+ PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1
+%endrep
+ PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
+
+%if mmsize == 16
+ pslldq m5, 4
+ pslldq m7, 4
+ por m4, m5
+ por m6, m7
+ mova m5, m4
+ mova m7, m6
+ punpcklqdq m4, m6
+ punpckhqdq m5, m7
+ movifnidn r4, r4mp
+ paddd m4, m5
+ movu [r4], m4
+ RET
+%else
+ movifnidn r4, r4mp
+ movq [r4+0], m6
+ movq [r4+8], m7
+ RET
+%endif
+%endmacro
+
+INIT_XMM sse2
+SADNXN4D 64, 64
+SADNXN4D 32, 32
+SADNXN4D 16, 16
+SADNXN4D 16, 8
+SADNXN4D 8, 16
+SADNXN4D 8, 8
+
+INIT_MMX sse
+SADNXN4D 4, 4
diff --git a/vp9/encoder/x86/vp9_sad_sse2.asm b/vp9/encoder/x86/vp9_sad_sse2.asm
index 33271635c..ea482e071 100644
--- a/vp9/encoder/x86/vp9_sad_sse2.asm
+++ b/vp9/encoder/x86/vp9_sad_sse2.asm
@@ -8,403 +8,175 @@
; be found in the AUTHORS file in the root of the source tree.
;
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;unsigned int vp9_sad16x16_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp9_sad16x16_wmt) PRIVATE
-sym(vp9_sad16x16_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- SAVE_XMM 6
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
-
- lea rcx, [rcx+rax*8]
- pxor xmm6, xmm6
-
-.x16x16sad_wmt_loop:
-
- movq xmm0, QWORD PTR [rsi]
- movq xmm2, QWORD PTR [rsi+8]
-
- movq xmm1, QWORD PTR [rdi]
- movq xmm3, QWORD PTR [rdi+8]
-
- movq xmm4, QWORD PTR [rsi+rax]
- movq xmm5, QWORD PTR [rdi+rdx]
-
-
- punpcklbw xmm0, xmm2
- punpcklbw xmm1, xmm3
-
- psadbw xmm0, xmm1
- movq xmm2, QWORD PTR [rsi+rax+8]
-
- movq xmm3, QWORD PTR [rdi+rdx+8]
- lea rsi, [rsi+rax*2]
-
- lea rdi, [rdi+rdx*2]
- punpcklbw xmm4, xmm2
-
- punpcklbw xmm5, xmm3
- psadbw xmm4, xmm5
-
- paddw xmm6, xmm0
- paddw xmm6, xmm4
-
- cmp rsi, rcx
- jne .x16x16sad_wmt_loop
-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movq rax, xmm0
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;unsigned int vp9_sad8x16_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int max_err)
-global sym(vp9_sad8x16_wmt) PRIVATE
-sym(vp9_sad8x16_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rbx, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rbx*8]
-
- lea rcx, [rcx+rbx*8]
- pxor mm7, mm7
-
-.x8x16sad_wmt_loop:
-
- movq rax, mm7
- cmp eax, arg(4)
- jg .x8x16sad_wmt_early_exit
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- movq mm2, QWORD PTR [rsi+rbx]
- movq mm3, QWORD PTR [rdi+rdx]
-
- psadbw mm0, mm1
- psadbw mm2, mm3
-
- lea rsi, [rsi+rbx*2]
- lea rdi, [rdi+rdx*2]
-
- paddw mm7, mm0
- paddw mm7, mm2
-
- cmp rsi, rcx
- jne .x8x16sad_wmt_loop
-
- movq rax, mm7
-
-.x8x16sad_wmt_early_exit:
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vp9_sad8x8_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp9_sad8x8_wmt) PRIVATE
-sym(vp9_sad8x8_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rbx, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rbx*8]
- pxor mm7, mm7
-
-.x8x8sad_wmt_loop:
-
- movq rax, mm7
- cmp eax, arg(4)
- jg .x8x8sad_wmt_early_exit
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- psadbw mm0, mm1
- lea rsi, [rsi+rbx]
-
- add rdi, rdx
- paddw mm7, mm0
-
- cmp rsi, rcx
- jne .x8x8sad_wmt_loop
-
- movq rax, mm7
-.x8x8sad_wmt_early_exit:
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;unsigned int vp9_sad4x4_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp9_sad4x4_wmt) PRIVATE
-sym(vp9_sad4x4_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- movd mm0, DWORD PTR [rsi]
- movd mm1, DWORD PTR [rdi]
-
- movd mm2, DWORD PTR [rsi+rax]
- movd mm3, DWORD PTR [rdi+rdx]
-
- punpcklbw mm0, mm2
- punpcklbw mm1, mm3
-
- psadbw mm0, mm1
- lea rsi, [rsi+rax*2]
-
- lea rdi, [rdi+rdx*2]
- movd mm4, DWORD PTR [rsi]
-
- movd mm5, DWORD PTR [rdi]
- movd mm6, DWORD PTR [rsi+rax]
-
- movd mm7, DWORD PTR [rdi+rdx]
- punpcklbw mm4, mm6
-
- punpcklbw mm5, mm7
- psadbw mm4, mm5
-
- paddw mm0, mm4
- movq rax, mm0
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vp9_sad16x8_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp9_sad16x8_wmt) PRIVATE
-sym(vp9_sad16x8_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rbx
- push rsi
- push rdi
- ; end prolog
-
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rbx, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rbx*8]
- pxor mm7, mm7
-
-.x16x8sad_wmt_loop:
-
- movq rax, mm7
- cmp eax, arg(4)
- jg .x16x8sad_wmt_early_exit
-
- movq mm0, QWORD PTR [rsi]
- movq mm2, QWORD PTR [rsi+8]
-
- movq mm1, QWORD PTR [rdi]
- movq mm3, QWORD PTR [rdi+8]
-
- movq mm4, QWORD PTR [rsi+rbx]
- movq mm5, QWORD PTR [rdi+rdx]
-
- psadbw mm0, mm1
- psadbw mm2, mm3
-
- movq mm1, QWORD PTR [rsi+rbx+8]
- movq mm3, QWORD PTR [rdi+rdx+8]
-
- psadbw mm4, mm5
- psadbw mm1, mm3
-
- lea rsi, [rsi+rbx*2]
- lea rdi, [rdi+rdx*2]
-
- paddw mm0, mm2
- paddw mm4, mm1
-
- paddw mm7, mm0
- paddw mm7, mm4
-
- cmp rsi, rcx
- jne .x16x8sad_wmt_loop
-
- movq rax, mm7
-
-.x16x8sad_wmt_early_exit:
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_copy32xn_sse2(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *dst_ptr,
-; int dst_stride,
-; int height);
-global sym(vp9_copy32xn_sse2) PRIVATE
-sym(vp9_copy32xn_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;dst_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;dst_stride
- movsxd rcx, dword ptr arg(4) ;height
-
-.block_copy_sse2_loopx4:
- movdqu xmm0, XMMWORD PTR [rsi]
- movdqu xmm1, XMMWORD PTR [rsi + 16]
- movdqu xmm2, XMMWORD PTR [rsi + rax]
- movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
-
- lea rsi, [rsi+rax*2]
-
- movdqu xmm4, XMMWORD PTR [rsi]
- movdqu xmm5, XMMWORD PTR [rsi + 16]
- movdqu xmm6, XMMWORD PTR [rsi + rax]
- movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
-
- lea rsi, [rsi+rax*2]
-
- movdqa XMMWORD PTR [rdi], xmm0
- movdqa XMMWORD PTR [rdi + 16], xmm1
- movdqa XMMWORD PTR [rdi + rdx], xmm2
- movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
-
- lea rdi, [rdi+rdx*2]
-
- movdqa XMMWORD PTR [rdi], xmm4
- movdqa XMMWORD PTR [rdi + 16], xmm5
- movdqa XMMWORD PTR [rdi + rdx], xmm6
- movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
-
- lea rdi, [rdi+rdx*2]
-
- sub rcx, 4
- cmp rcx, 4
- jge .block_copy_sse2_loopx4
-
- cmp rcx, 0
- je .copy_is_done
-
-.block_copy_sse2_loop:
- movdqu xmm0, XMMWORD PTR [rsi]
- movdqu xmm1, XMMWORD PTR [rsi + 16]
- lea rsi, [rsi+rax]
-
- movdqa XMMWORD PTR [rdi], xmm0
- movdqa XMMWORD PTR [rdi + 16], xmm1
- lea rdi, [rdi+rdx]
-
- sub rcx, 1
- jne .block_copy_sse2_loop
-
-.copy_is_done:
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+; unsigned int vp9_sad64x64_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+INIT_XMM sse2
+cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ mov n_rowsd, 64
+ pxor m0, m0
+.loop:
+ movu m1, [refq]
+ movu m2, [refq+16]
+ movu m3, [refq+32]
+ movu m4, [refq+48]
+ psadbw m1, [srcq]
+ psadbw m2, [srcq+16]
+ psadbw m3, [srcq+32]
+ psadbw m4, [srcq+48]
+ paddd m1, m2
+ paddd m3, m4
+ add refq, ref_strideq
+ paddd m0, m1
+ add srcq, src_strideq
+ paddd m0, m3
+ dec n_rowsd
+ jg .loop
+
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+
+; unsigned int vp9_sad32x32_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+INIT_XMM sse2
+cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ mov n_rowsd, 16
+ pxor m0, m0
+
+.loop:
+ movu m1, [refq]
+ movu m2, [refq+16]
+ movu m3, [refq+ref_strideq]
+ movu m4, [refq+ref_strideq+16]
+ psadbw m1, [srcq]
+ psadbw m2, [srcq+16]
+ psadbw m3, [srcq+src_strideq]
+ psadbw m4, [srcq+src_strideq+16]
+ paddd m1, m2
+ paddd m3, m4
+ lea refq, [refq+ref_strideq*2]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*2]
+ paddd m0, m3
+ dec n_rowsd
+ jg .loop
+
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+
+; unsigned int vp9_sad16x{8,16}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+%macro SAD16XN 1
+cglobal sad16x%1, 4, 7, 5, src, src_stride, ref, ref_stride, \
+ src_stride3, ref_stride3, n_rows
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ lea src_stride3q, [src_strideq*3]
+ lea ref_stride3q, [ref_strideq*3]
+ mov n_rowsd, %1/4
+ pxor m0, m0
+
+.loop:
+ movu m1, [refq]
+ movu m2, [refq+ref_strideq]
+ movu m3, [refq+ref_strideq*2]
+ movu m4, [refq+ref_stride3q]
+ psadbw m1, [srcq]
+ psadbw m2, [srcq+src_strideq]
+ psadbw m3, [srcq+src_strideq*2]
+ psadbw m4, [srcq+src_stride3q]
+ paddd m1, m2
+ paddd m3, m4
+ lea refq, [refq+ref_strideq*4]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*4]
+ paddd m0, m3
+ dec n_rowsd
+ jg .loop
+
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+%endmacro
+
+INIT_XMM sse2
+SAD16XN 16 ; sad16x16_sse2
+SAD16XN 8 ; sad16x8_sse2
+
+; unsigned int vp9_sad8x{8,16}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+%macro SAD8XN 1
+cglobal sad8x%1, 4, 7, 5, src, src_stride, ref, ref_stride, \
+ src_stride3, ref_stride3, n_rows
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ lea src_stride3q, [src_strideq*3]
+ lea ref_stride3q, [ref_strideq*3]
+ mov n_rowsd, %1/4
+ pxor m0, m0
+
+.loop:
+ movh m1, [refq]
+ movhps m1, [refq+ref_strideq]
+ movh m2, [refq+ref_strideq*2]
+ movhps m2, [refq+ref_stride3q]
+ movh m3, [srcq]
+ movhps m3, [srcq+src_strideq]
+ movh m4, [srcq+src_strideq*2]
+ movhps m4, [srcq+src_stride3q]
+ psadbw m1, m3
+ psadbw m2, m4
+ lea refq, [refq+ref_strideq*4]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*4]
+ paddd m0, m2
+ dec n_rowsd
+ jg .loop
+
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+%endmacro
+
+INIT_XMM sse2
+SAD8XN 16 ; sad8x16_sse2
+SAD8XN 8 ; sad8x8_sse2
+
+; unsigned int vp9_sad4x4_sse(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+INIT_MMX sse
+cglobal sad4x4, 4, 4, 8, src, src_stride, ref, ref_stride
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ movd m0, [refq]
+ movd m1, [refq+ref_strideq]
+ movd m2, [srcq]
+ movd m3, [srcq+src_strideq]
+ lea refq, [refq+ref_strideq*2]
+ lea srcq, [srcq+src_strideq*2]
+ movd m4, [refq]
+ movd m5, [refq+ref_strideq]
+ movd m6, [srcq]
+ movd m7, [srcq+src_strideq]
+ punpckldq m0, m1
+ punpckldq m2, m3
+ punpckldq m4, m5
+ punpckldq m6, m7
+ psadbw m0, m2
+ psadbw m4, m6
+ paddd m0, m4
+ movd eax, m0
+ RET
diff --git a/vp9/encoder/x86/vp9_sad_sse3.asm b/vp9/encoder/x86/vp9_sad_sse3.asm
index 1c39a08f8..2b90a5d54 100644
--- a/vp9/encoder/x86/vp9_sad_sse3.asm
+++ b/vp9/encoder/x86/vp9_sad_sse3.asm
@@ -83,87 +83,6 @@
ret
%endmacro
-%macro STACK_FRAME_CREATE_X4 0
-%if ABI_IS_32BIT
- %define src_ptr rsi
- %define src_stride rax
- %define r0_ptr rcx
- %define r1_ptr rdx
- %define r2_ptr rbx
- %define r3_ptr rdi
- %define ref_stride rbp
- %define result_ptr arg(4)
- push rbp
- mov rbp, rsp
- push rsi
- push rdi
- push rbx
-
- push rbp
- mov rdi, arg(2) ; ref_ptr_base
-
- LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
- mov rsi, arg(0) ; src_ptr
-
- movsxd rbx, dword ptr arg(1) ; src_stride
- movsxd rbp, dword ptr arg(3) ; ref_stride
-
- xchg rbx, rax
-%else
- %if LIBVPX_YASM_WIN64
- SAVE_XMM 7, u
- %define src_ptr rcx
- %define src_stride rdx
- %define r0_ptr rsi
- %define r1_ptr r10
- %define r2_ptr r11
- %define r3_ptr r8
- %define ref_stride r9
- %define result_ptr [rsp+xmm_stack_space+16+4*8]
- push rsi
-
- LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
- %else
- %define src_ptr rdi
- %define src_stride rsi
- %define r0_ptr r9
- %define r1_ptr r10
- %define r2_ptr r11
- %define r3_ptr rdx
- %define ref_stride rcx
- %define result_ptr r8
-
- LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
-
- %endif
-%endif
-%endmacro
-
-%macro STACK_FRAME_DESTROY_X4 0
- %define src_ptr
- %define src_stride
- %define r0_ptr
- %define r1_ptr
- %define r2_ptr
- %define r3_ptr
- %define ref_stride
- %define result_ptr
-
-%if ABI_IS_32BIT
- pop rbx
- pop rdi
- pop rsi
- pop rbp
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- RESTORE_XMM
- %endif
-%endif
- ret
-%endmacro
-
%macro PROCESS_16X2X3 5
%if %1==0
movdqa xmm0, XMMWORD PTR [%2]
@@ -250,130 +169,6 @@
paddw mm7, mm3
%endmacro
-%macro LOAD_X4_ADDRESSES 5
- mov %2, [%1+REG_SZ_BYTES*0]
- mov %3, [%1+REG_SZ_BYTES*1]
-
- mov %4, [%1+REG_SZ_BYTES*2]
- mov %5, [%1+REG_SZ_BYTES*3]
-%endmacro
-
-%macro PROCESS_16X2X4 8
-%if %1==0
- movdqa xmm0, XMMWORD PTR [%2]
- lddqu xmm4, XMMWORD PTR [%3]
- lddqu xmm5, XMMWORD PTR [%4]
- lddqu xmm6, XMMWORD PTR [%5]
- lddqu xmm7, XMMWORD PTR [%6]
-
- psadbw xmm4, xmm0
- psadbw xmm5, xmm0
- psadbw xmm6, xmm0
- psadbw xmm7, xmm0
-%else
- movdqa xmm0, XMMWORD PTR [%2]
- lddqu xmm1, XMMWORD PTR [%3]
- lddqu xmm2, XMMWORD PTR [%4]
- lddqu xmm3, XMMWORD PTR [%5]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm4, xmm1
- lddqu xmm1, XMMWORD PTR [%6]
- paddw xmm5, xmm2
- paddw xmm6, xmm3
-
- psadbw xmm1, xmm0
- paddw xmm7, xmm1
-%endif
- movdqa xmm0, XMMWORD PTR [%2+%7]
- lddqu xmm1, XMMWORD PTR [%3+%8]
- lddqu xmm2, XMMWORD PTR [%4+%8]
- lddqu xmm3, XMMWORD PTR [%5+%8]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm4, xmm1
- lddqu xmm1, XMMWORD PTR [%6+%8]
- paddw xmm5, xmm2
- paddw xmm6, xmm3
-
-%if %1==0 || %1==1
- lea %2, [%2+%7*2]
- lea %3, [%3+%8*2]
-
- lea %4, [%4+%8*2]
- lea %5, [%5+%8*2]
-
- lea %6, [%6+%8*2]
-%endif
- psadbw xmm1, xmm0
- paddw xmm7, xmm1
-
-%endmacro
-
-%macro PROCESS_8X2X4 8
-%if %1==0
- movq mm0, QWORD PTR [%2]
- movq mm4, QWORD PTR [%3]
- movq mm5, QWORD PTR [%4]
- movq mm6, QWORD PTR [%5]
- movq mm7, QWORD PTR [%6]
-
- psadbw mm4, mm0
- psadbw mm5, mm0
- psadbw mm6, mm0
- psadbw mm7, mm0
-%else
- movq mm0, QWORD PTR [%2]
- movq mm1, QWORD PTR [%3]
- movq mm2, QWORD PTR [%4]
- movq mm3, QWORD PTR [%5]
-
- psadbw mm1, mm0
- psadbw mm2, mm0
- psadbw mm3, mm0
-
- paddw mm4, mm1
- movq mm1, QWORD PTR [%6]
- paddw mm5, mm2
- paddw mm6, mm3
-
- psadbw mm1, mm0
- paddw mm7, mm1
-%endif
- movq mm0, QWORD PTR [%2+%7]
- movq mm1, QWORD PTR [%3+%8]
- movq mm2, QWORD PTR [%4+%8]
- movq mm3, QWORD PTR [%5+%8]
-
- psadbw mm1, mm0
- psadbw mm2, mm0
- psadbw mm3, mm0
-
- paddw mm4, mm1
- movq mm1, QWORD PTR [%6+%8]
- paddw mm5, mm2
- paddw mm6, mm3
-
-%if %1==0 || %1==1
- lea %2, [%2+%7*2]
- lea %3, [%3+%8*2]
-
- lea %4, [%4+%8*2]
- lea %5, [%5+%8*2]
-
- lea %6, [%6+%8*2]
-%endif
- psadbw mm1, mm0
- paddw mm7, mm1
-
-%endmacro
-
;void int vp9_sad16x16x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
@@ -581,380 +376,3 @@ sym(vp9_sad4x4x3_sse3):
movd [rcx+8], mm7
STACK_FRAME_DESTROY_X3
-
-;unsigned int vp9_sad16x16_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int max_err)
-;%define lddqu movdqu
-global sym(vp9_sad16x16_sse3) PRIVATE
-sym(vp9_sad16x16_sse3):
-
- STACK_FRAME_CREATE_X3
-
- mov end_ptr, 4
- pxor xmm7, xmm7
-
-.vp9_sad16x16_sse3_loop:
- movdqa xmm0, XMMWORD PTR [src_ptr]
- movdqu xmm1, XMMWORD PTR [ref_ptr]
- movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
- movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea ref_ptr, [ref_ptr+ref_stride*2]
-
- movdqa xmm4, XMMWORD PTR [src_ptr]
- movdqu xmm5, XMMWORD PTR [ref_ptr]
- movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
-
- psadbw xmm0, xmm1
-
- movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
-
- psadbw xmm2, xmm3
- psadbw xmm4, xmm5
- psadbw xmm6, xmm1
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea ref_ptr, [ref_ptr+ref_stride*2]
-
- paddw xmm7, xmm0
- paddw xmm7, xmm2
- paddw xmm7, xmm4
- paddw xmm7, xmm6
-
- sub end_ptr, 1
- jne .vp9_sad16x16_sse3_loop
-
- movq xmm0, xmm7
- psrldq xmm7, 8
- paddw xmm0, xmm7
- movq rax, xmm0
-
- STACK_FRAME_DESTROY_X3
-
-;void vp9_copy32xn_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *dst_ptr,
-; int dst_stride,
-; int height);
-global sym(vp9_copy32xn_sse3) PRIVATE
-sym(vp9_copy32xn_sse3):
-
- STACK_FRAME_CREATE_X3
-
-.block_copy_sse3_loopx4:
- lea end_ptr, [src_ptr+src_stride*2]
-
- movdqu xmm0, XMMWORD PTR [src_ptr]
- movdqu xmm1, XMMWORD PTR [src_ptr + 16]
- movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
- movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
- movdqu xmm4, XMMWORD PTR [end_ptr]
- movdqu xmm5, XMMWORD PTR [end_ptr + 16]
- movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
- movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
-
- lea src_ptr, [src_ptr+src_stride*4]
-
- lea end_ptr, [ref_ptr+ref_stride*2]
-
- movdqa XMMWORD PTR [ref_ptr], xmm0
- movdqa XMMWORD PTR [ref_ptr + 16], xmm1
- movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
- movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
- movdqa XMMWORD PTR [end_ptr], xmm4
- movdqa XMMWORD PTR [end_ptr + 16], xmm5
- movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
- movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
-
- lea ref_ptr, [ref_ptr+ref_stride*4]
-
- sub height, 4
- cmp height, 4
- jge .block_copy_sse3_loopx4
-
- ;Check to see if there is more rows need to be copied.
- cmp height, 0
- je .copy_is_done
-
-.block_copy_sse3_loop:
- movdqu xmm0, XMMWORD PTR [src_ptr]
- movdqu xmm1, XMMWORD PTR [src_ptr + 16]
- lea src_ptr, [src_ptr+src_stride]
-
- movdqa XMMWORD PTR [ref_ptr], xmm0
- movdqa XMMWORD PTR [ref_ptr + 16], xmm1
- lea ref_ptr, [ref_ptr+ref_stride]
-
- sub height, 1
- jne .block_copy_sse3_loop
-
-.copy_is_done:
- STACK_FRAME_DESTROY_X3
-
-;void vp9_sad16x16x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr_base,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad16x16x4d_sse3) PRIVATE
-sym(vp9_sad16x16x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- movq xmm0, xmm4
- psrldq xmm4, 8
-
- paddw xmm0, xmm4
- movd [rcx], xmm0
-;-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rcx+4], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rcx+8], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rcx+12], xmm0
-
- STACK_FRAME_DESTROY_X4
-
-;void vp9_sad16x8x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr_base,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad16x8x4d_sse3) PRIVATE
-sym(vp9_sad16x8x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- movq xmm0, xmm4
- psrldq xmm4, 8
-
- paddw xmm0, xmm4
- movd [rcx], xmm0
-;-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rcx+4], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rcx+8], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rcx+12], xmm0
-
- STACK_FRAME_DESTROY_X4
-
-;void int vp9_sad8x16x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad8x16x4d_sse3) PRIVATE
-sym(vp9_sad8x16x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- punpckldq mm4, mm5
- punpckldq mm6, mm7
-
- movq [rcx], mm4
- movq [rcx+8], mm6
-
- STACK_FRAME_DESTROY_X4
-
-;void int vp9_sad8x8x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad8x8x4d_sse3) PRIVATE
-sym(vp9_sad8x8x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- punpckldq mm4, mm5
- punpckldq mm6, mm7
-
- movq [rcx], mm4
- movq [rcx+8], mm6
-
- STACK_FRAME_DESTROY_X4
-
-;void int vp9_sad4x4x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad4x4x4d_sse3) PRIVATE
-sym(vp9_sad4x4x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- movd mm0, DWORD PTR [src_ptr]
- movd mm1, DWORD PTR [r0_ptr]
-
- movd mm2, DWORD PTR [src_ptr+src_stride]
- movd mm3, DWORD PTR [r0_ptr+ref_stride]
-
- punpcklbw mm0, mm2
- punpcklbw mm1, mm3
-
- movd mm4, DWORD PTR [r1_ptr]
- movd mm5, DWORD PTR [r2_ptr]
-
- movd mm6, DWORD PTR [r3_ptr]
- movd mm2, DWORD PTR [r1_ptr+ref_stride]
-
- movd mm3, DWORD PTR [r2_ptr+ref_stride]
- movd mm7, DWORD PTR [r3_ptr+ref_stride]
-
- psadbw mm1, mm0
-
- punpcklbw mm4, mm2
- punpcklbw mm5, mm3
-
- punpcklbw mm6, mm7
- psadbw mm4, mm0
-
- psadbw mm5, mm0
- psadbw mm6, mm0
-
-
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea r0_ptr, [r0_ptr+ref_stride*2]
-
- lea r1_ptr, [r1_ptr+ref_stride*2]
- lea r2_ptr, [r2_ptr+ref_stride*2]
-
- lea r3_ptr, [r3_ptr+ref_stride*2]
-
- movd mm0, DWORD PTR [src_ptr]
- movd mm2, DWORD PTR [r0_ptr]
-
- movd mm3, DWORD PTR [src_ptr+src_stride]
- movd mm7, DWORD PTR [r0_ptr+ref_stride]
-
- punpcklbw mm0, mm3
- punpcklbw mm2, mm7
-
- movd mm3, DWORD PTR [r1_ptr]
- movd mm7, DWORD PTR [r2_ptr]
-
- psadbw mm2, mm0
-%if ABI_IS_32BIT
- mov rax, rbp
-
- pop rbp
-%define ref_stride rax
-%endif
- mov rsi, result_ptr
-
- paddw mm1, mm2
- movd [rsi], mm1
-
- movd mm2, DWORD PTR [r1_ptr+ref_stride]
- movd mm1, DWORD PTR [r2_ptr+ref_stride]
-
- punpcklbw mm3, mm2
- punpcklbw mm7, mm1
-
- psadbw mm3, mm0
- psadbw mm7, mm0
-
- movd mm2, DWORD PTR [r3_ptr]
- movd mm1, DWORD PTR [r3_ptr+ref_stride]
-
- paddw mm3, mm4
- paddw mm7, mm5
-
- movd [rsi+4], mm3
- punpcklbw mm2, mm1
-
- movd [rsi+8], mm7
- psadbw mm2, mm0
-
- paddw mm2, mm6
- movd [rsi+12], mm2
-
-
- STACK_FRAME_DESTROY_X4
-
diff --git a/vp9/encoder/x86/vp9_sad_sse4.asm b/vp9/encoder/x86/vp9_sad_sse4.asm
index b42982a1f..faf1768a9 100644
--- a/vp9/encoder/x86/vp9_sad_sse4.asm
+++ b/vp9/encoder/x86/vp9_sad_sse4.asm
@@ -154,6 +154,16 @@
paddw xmm1, xmm5
%endmacro
+%macro WRITE_AS_INTS 0
+ mov rdi, arg(4) ;Results
+ pxor xmm0, xmm0
+ movdqa xmm2, xmm1
+ punpcklwd xmm1, xmm0
+ punpckhwd xmm2, xmm0
+
+ movdqa [rdi], xmm1
+ movdqa [rdi + 16], xmm2
+%endmacro
;void vp9_sad16x16x8_sse4(
; const unsigned char *src_ptr,
@@ -170,23 +180,22 @@ sym(vp9_sad16x16x8_sse4):
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- PROCESS_16X2X8 1
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
+ PROCESS_16X2X8 1
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ WRITE_AS_INTS
; begin epilog
pop rdi
@@ -212,19 +221,18 @@ sym(vp9_sad16x8x8_sse4):
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- PROCESS_16X2X8 1
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
+ PROCESS_16X2X8 1
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ WRITE_AS_INTS
; begin epilog
pop rdi
@@ -250,19 +258,18 @@ sym(vp9_sad8x8x8_sse4):
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- PROCESS_8X2X8 1
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
+ PROCESS_8X2X8 1
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ WRITE_AS_INTS
; begin epilog
pop rdi
@@ -288,22 +295,22 @@ sym(vp9_sad8x16x8_sse4):
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ PROCESS_8X2X8 1
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
- PROCESS_8X2X8 1
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ WRITE_AS_INTS
; begin epilog
pop rdi
@@ -329,17 +336,16 @@ sym(vp9_sad4x4x8_sse4):
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- PROCESS_4X2X8 1
- PROCESS_4X2X8 0
+ PROCESS_4X2X8 1
+ PROCESS_4X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ WRITE_AS_INTS
; begin epilog
pop rdi
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 36fae6e8c..fc363b6b0 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -186,6 +186,7 @@ unsigned int vp9_variance16x16_wmt
*sse = sse0;
return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
}
+
unsigned int vp9_mse16x16_wmt(
const unsigned char *src_ptr,
int source_stride,
@@ -305,20 +306,16 @@ unsigned int vp9_sub_pixel_variance8x8_wmt
return (xxsum - (((unsigned int)xsum * xsum) >> 6));
}
-unsigned int vp9_sub_pixel_variance16x16_wmt
-(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-) {
+static void sub_pixel_variance16x16_sse2(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse, int *avg) {
int xsum0, xsum1;
unsigned int xxsum0, xxsum1;
-
// note we could avoid these if statements if the calling function
// just called the appropriate functions inside.
if (xoffset == HALFNDX && yoffset == 0) {
@@ -355,10 +352,136 @@ unsigned int vp9_sub_pixel_variance16x16_wmt
}
*sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
+ *avg = xsum0;
+}
+
+unsigned int vp9_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse_ptr) {
+ int avg;
+ unsigned int sse;
+
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse, &avg);
+ *sse_ptr = sse;
+
+ return (sse - (((unsigned int) avg * avg) >> 8));
+}
+
+unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse_ptr) {
+ int avg0, avg1, avg2, avg3;
+ unsigned int sse0, sse1, sse2, sse3;
+
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse0, &avg0);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse1, &avg1);
+ src_ptr += 16 * src_pixels_per_line;
+ dst_ptr += 16 * dst_pixels_per_line;
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse3, &avg3);
+ sse0 += sse1 + sse2 + sse3;
+ avg0 += avg1 + avg2 + avg3;
+ *sse_ptr = sse0;
+
+ return (sse0 - (((unsigned int) avg0 * avg0) >> 10));
+}
+
+unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse_ptr) {
+ int avg0, avg1, avg2, avg3, avg4;
+ unsigned int sse0, sse1, sse2, sse3, sse4;
+
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse0, &avg0);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse1, &avg1);
+ sub_pixel_variance16x16_sse2(src_ptr + 32, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 32, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 48, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 48, dst_pixels_per_line,
+ &sse3, &avg3);
+ src_ptr += 16 * src_pixels_per_line;
+ dst_ptr += 16 * dst_pixels_per_line;
+ avg0 += avg1 + avg2 + avg3;
+ sse0 += sse1 + sse2 + sse3;
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse1, &avg1);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 32, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 32, dst_pixels_per_line,
+ &sse3, &avg3);
+ sub_pixel_variance16x16_sse2(src_ptr + 48, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 48, dst_pixels_per_line,
+ &sse4, &avg4);
+ src_ptr += 16 * src_pixels_per_line;
+ dst_ptr += 16 * dst_pixels_per_line;
+ avg0 += avg1 + avg2 + avg3 + avg4;
+ sse0 += sse1 + sse2 + sse3 + sse4;
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse1, &avg1);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 32, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 32, dst_pixels_per_line,
+ &sse3, &avg3);
+ sub_pixel_variance16x16_sse2(src_ptr + 48, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 48, dst_pixels_per_line,
+ &sse4, &avg4);
+ src_ptr += 16 * src_pixels_per_line;
+ dst_ptr += 16 * dst_pixels_per_line;
+ avg0 += avg1 + avg2 + avg3 + avg4;
+ sse0 += sse1 + sse2 + sse3 + sse4;
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse1, &avg1);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 32, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 32, dst_pixels_per_line,
+ &sse3, &avg3);
+ sub_pixel_variance16x16_sse2(src_ptr + 48, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 48, dst_pixels_per_line,
+ &sse4, &avg4);
+ avg0 += avg1 + avg2 + avg3 + avg4;
+ sse0 += sse1 + sse2 + sse3 + sse4;
+ *sse_ptr = sse0;
+
+ return (sse0 - (((unsigned int) avg0 * avg0) >> 12));
}
-unsigned int vp9_sub_pixel_mse16x16_wmt(
+unsigned int vp9_sub_pixel_mse16x16_sse2(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
@@ -367,7 +490,8 @@ unsigned int vp9_sub_pixel_mse16x16_wmt(
int dst_pixels_per_line,
unsigned int *sse
) {
- vp9_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
+ vp9_sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line, sse);
return *sse;
}
diff --git a/vp9/encoder/x86/vp9_x86_csystemdependent.c b/vp9/encoder/x86/vp9_x86_csystemdependent.c
index 3beef53a2..2bf32c569 100644
--- a/vp9/encoder/x86/vp9_x86_csystemdependent.c
+++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c
@@ -23,11 +23,11 @@ void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) {
vp9_short_fdct4x4_mmx(input + 4, output + 16, pitch);
}
-int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-int vp9_mbblock_error_mmx(MACROBLOCK *mb, int dc) {
+int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
+int vp9_mbblock_error_mmx(MACROBLOCK *mb) {
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
- return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc);
+ return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr);
}
int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
@@ -51,11 +51,11 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) {
#endif
#if HAVE_SSE2
-int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-int vp9_mbblock_error_xmm(MACROBLOCK *mb, int dc) {
+int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
+int vp9_mbblock_error_xmm(MACROBLOCK *mb) {
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
- return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc);
+ return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr);
}
int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);