diff options
Diffstat (limited to 'vp9/encoder')
34 files changed, 2670 insertions, 1989 deletions
diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c new file mode 100644 index 000000000..83892e872 --- /dev/null +++ b/vp9/encoder/vp9_aq_complexity.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits.h> +#include <math.h> + +#include "vp9/common/vp9_seg_common.h" + +#include "vp9/encoder/vp9_segmentation.h" + +static const double in_frame_q_adj_ratio[MAX_SEGMENTS] = + {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + +void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + struct segmentation *const seg = &cm->seg; + + // Make SURE use of floating point in this function is safe. + vp9_clear_system_state(); + + if (cm->frame_type == KEY_FRAME || + cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { + int segment; + + // Clear down the segment map. + vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); + + // Clear down the complexity map used for rd. + vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols); + + vp9_enable_segmentation(seg); + vp9_clearall_segfeatures(seg); + + // Select delta coding method. + seg->abs_delta = SEGMENT_DELTADATA; + + // Segment 0 "Q" feature is disabled so it defaults to the baseline Q. + vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); + + // Use some of the segments for in frame Q adjustment. + for (segment = 1; segment < 2; segment++) { + const int qindex_delta = + vp9_compute_qdelta_by_rate(cpi, + cm->base_qindex, + in_frame_q_adj_ratio[segment]); + vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + } + } +} + +// Select a segment for the current SB64 +void vp9_select_in_frame_q_segment(VP9_COMP *cpi, + int mi_row, int mi_col, + int output_enabled, int projected_rate) { + VP9_COMMON *const cm = &cpi->common; + + const int mi_offset = mi_row * cm->mi_cols + mi_col; + const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int xmis = MIN(cm->mi_cols - mi_col, bw); + const int ymis = MIN(cm->mi_rows - mi_row, bh); + int complexity_metric = 64; + int x, y; + + unsigned char segment; + + if (!output_enabled) { + segment = 0; + } else { + // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). + // It is converted to bits * 256 units. + const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / + (bw * bh); + + if (projected_rate < (target_rate / 4)) { + segment = 1; + } else { + segment = 0; + } + + if (target_rate > 0) { + complexity_metric = + clamp((int)((projected_rate * 64) / target_rate), 16, 255); + } + } + + // Fill in the entires in the segment map corresponding to this SB64. + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; + cpi->complexity_map[mi_offset + y * cm->mi_cols + x] = + (unsigned char)complexity_metric; + } + } +} diff --git a/vp9/encoder/vp9_aq_complexity.h b/vp9/encoder/vp9_aq_complexity.h new file mode 100644 index 000000000..af031a46c --- /dev/null +++ b/vp9/encoder/vp9_aq_complexity.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ +#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9_COMP; + +// Select a segment for the current SB64. +void vp9_select_in_frame_q_segment(struct VP9_COMP *cpi, int mi_row, int mi_col, + int output_enabled, int projected_rate); + + +// This function sets up a set of segments with delta Q values around +// the baseline frame quantizer. +void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ diff --git a/vp9/encoder/vp9_craq.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 40437c77f..1d272e1d9 100644 --- a/vp9/encoder/vp9_craq.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -11,7 +11,7 @@ #include <limits.h> #include <math.h> -#include "vp9/encoder/vp9_craq.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/common/vp9_seg_common.h" @@ -19,19 +19,69 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" +struct CYCLIC_REFRESH { + // Percentage of super-blocks per frame that are targeted as candidates + // for cyclic refresh. + int max_sbs_perframe; + // Maximum q-delta as percentage of base q. + int max_qdelta_perc; + // Block size below which we don't apply cyclic refresh. + BLOCK_SIZE min_block_size; + // Superblock starting index for cycling through the frame. + int sb_index; + // Controls how long a block will need to wait to be refreshed again. + int time_for_refresh; + // Actual number of (8x8) blocks that were applied delta-q (segment 1). + int num_seg_blocks; + // Actual encoding bits for segment 1. + int actual_seg_bits; + // RD mult. parameters for segment 1. + int rdmult; + // Cyclic refresh map. + signed char *map; + // Projected rate and distortion for the current superblock. + int64_t projected_rate_sb; + int64_t projected_dist_sb; + // Thresholds applied to projected rate/distortion of the superblock. + int64_t thresh_rate_sb; + int64_t thresh_dist_sb; +}; + +CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { + CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); + if (cr == NULL) + return NULL; + + cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map)); + if (cr->map == NULL) { + vpx_free(cr); + return NULL; + } + + return cr; +} + +void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { + vpx_free(cr->map); + vpx_free(cr); +} // Check if we should turn off cyclic refresh based on bitrate condition. -static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi) { +static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm, + const RATE_CONTROL *rc) { // Turn off cyclic refresh if bits available per frame is not sufficiently // larger than bit cost of segmentation. Segment map bit cost should scale // with number of seg blocks, so compare available bits to number of blocks. // Average bits available per frame = av_per_frame_bandwidth // Number of (8x8) blocks in frame = mi_rows * mi_cols; - float factor = 0.5; - int number_blocks = cpi->common.mi_rows * cpi->common.mi_cols; + const float factor = 0.5; + const int number_blocks = cm->mi_rows * cm->mi_cols; // The condition below corresponds to turning off at target bitrates: // ~24kbps for CIF, 72kbps for VGA (at 30fps). - if (cpi->rc.av_per_frame_bandwidth < factor * number_blocks) + // Also turn off at very small frame sizes, to avoid too large fraction of + // superblocks to be refreshed per frame. Threshold below is less than QCIF. + if (rc->av_per_frame_bandwidth < factor * number_blocks || + number_blocks / 64 < 5) return 0; else return 1; @@ -41,11 +91,9 @@ static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi) { // (lower-qp coding). Decision can be based on various factors, such as // size of the coding block (i.e., below min_block size rejected), coding // mode, and rate/distortion. -static int candidate_refresh_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int bsize, - int use_rd) { - CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh; +static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, + const MB_MODE_INFO *mbmi, + BLOCK_SIZE bsize, int use_rd) { if (use_rd) { // If projected rate is below the thresh_rate (well below target, // so undershoot expected), accept it for lower-qp coding. @@ -56,18 +104,18 @@ static int candidate_refresh_aq(VP9_COMP *const cpi, // 2) mode is non-zero mv and projected distortion is above thresh_dist // 3) mode is an intra-mode (we may want to allow some of this under // another thresh_dist) - else if ((bsize < cr->min_block_size) || - (mi->mbmi.mv[0].as_int != 0 && - cr->projected_dist_sb > cr->thresh_dist_sb) || - !is_inter_block(&mi->mbmi)) + else if (bsize < cr->min_block_size || + (mbmi->mv[0].as_int != 0 && + cr->projected_dist_sb > cr->thresh_dist_sb) || + !is_inter_block(mbmi)) return 0; else return 1; } else { // Rate/distortion not used for update. - if ((bsize < cr->min_block_size) || - (mi->mbmi.mv[0].as_int != 0) || - !is_inter_block(&mi->mbmi)) + if (bsize < cr->min_block_size || + mbmi->mv[0].as_int != 0 || + !is_inter_block(mbmi)) return 0; else return 1; @@ -77,33 +125,31 @@ static int candidate_refresh_aq(VP9_COMP *const cpi, // Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), // check if we should reset the segment_id, and update the cyclic_refresh map // and segmentation map. -void vp9_update_segment_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int mi_row, - int mi_col, - int bsize, - int use_rd) { - CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh; - VP9_COMMON *const cm = &cpi->common; +void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, + MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int use_rd) { + const VP9_COMMON *const cm = &cpi->common; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = MIN(cm->mi_cols - mi_col, bw); const int ymis = MIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; + const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd); // Default is to not update the refresh map. int new_map_value = cr->map[block_index]; int x = 0; int y = 0; - int current_segment = mi->mbmi.segment_id; - int refresh_this_block = candidate_refresh_aq(cpi, mi, bsize, use_rd); + // Check if we should reset the segment_id for this block. - if (current_segment && !refresh_this_block) - mi->mbmi.segment_id = 0; + if (mbmi->segment_id > 0 && !refresh_this_block) + mbmi->segment_id = 0; // Update the cyclic refresh map, to be used for setting segmentation map // for the next frame. If the block will be refreshed this frame, mark it // as clean. The magnitude of the -ve influences how long before we consider // it for refresh again. - if (mi->mbmi.segment_id == 1) { + if (mbmi->segment_id == 1) { new_map_value = -cr->time_for_refresh; } else if (refresh_this_block) { // Else if it is accepted as candidate for refresh, and has not already @@ -121,54 +167,54 @@ void vp9_update_segment_aq(VP9_COMP *const cpi, for (x = 0; x < xmis; x++) { cr->map[block_index + y * cm->mi_cols + x] = new_map_value; cpi->segmentation_map[block_index + y * cm->mi_cols + x] = - mi->mbmi.segment_id; + mbmi->segment_id; } // Keep track of actual number (in units of 8x8) of blocks in segment 1 used // for encoding this frame. - if (mi->mbmi.segment_id) + if (mbmi->segment_id) cr->num_seg_blocks += xmis * ymis; } // Setup cyclic background refresh: set delta q and segmentation map. -void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) { +void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh; + const RATE_CONTROL *const rc = &cpi->rc; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; struct segmentation *const seg = &cm->seg; - unsigned char *seg_map = cpi->segmentation_map; - int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cpi); + unsigned char *const seg_map = cpi->segmentation_map; + const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); // Don't apply refresh on key frame or enhancement layer frames. if (!apply_cyclic_refresh || - (cpi->common.frame_type == KEY_FRAME) || + (cm->frame_type == KEY_FRAME) || (cpi->svc.temporal_layer_id > 0)) { // Set segmentation map to 0 and disable. vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols); vp9_disable_segmentation(&cm->seg); - if (cpi->common.frame_type == KEY_FRAME) - cr->mb_index = 0; + if (cm->frame_type == KEY_FRAME) + cr->sb_index = 0; return; } else { int qindex_delta = 0; - int mbs_in_frame = cm->mi_rows * cm->mi_cols; - int i, x, y, block_count, bl_index, bl_index2; - int sum_map, new_value, mi_row, mi_col, xmis, ymis, qindex2; + int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; + int xmis, ymis, x, y, qindex2; // Rate target ratio to set q delta. - float rate_ratio_qdelta = 2.0; + const float rate_ratio_qdelta = 2.0; vp9_clear_system_state(); // Some of these parameters may be set via codec-control function later. - cr->max_mbs_perframe = 10; + cr->max_sbs_perframe = 10; cr->max_qdelta_perc = 50; cr->min_block_size = BLOCK_16X16; cr->time_for_refresh = 1; // Set rate threshold to some fraction of target (and scaled by 256). - cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 2; + cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2; // Distortion threshold, quadratic in Q, scale factor to be adjusted. cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * vp9_convert_qindex_to_q(cm->base_qindex)); if (cpi->sf.use_nonrd_pick_mode) { // May want to be more conservative with thresholds in non-rd mode for now // as rate/distortion are derived from model based on prediction residual. - cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 3; + cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3; cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * vp9_convert_qindex_to_q(cm->base_qindex)); } @@ -200,68 +246,79 @@ void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) { rate_ratio_qdelta); // TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from // previous encoded frame. - if ((-qindex_delta) > cr->max_qdelta_perc * cm->base_qindex / 100) { + if (-qindex_delta > cr->max_qdelta_perc * cm->base_qindex / 100) qindex_delta = -cr->max_qdelta_perc * cm->base_qindex / 100; - } // Compute rd-mult for segment 1. qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ); cr->rdmult = vp9_compute_rd_mult(cpi, qindex2); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qindex_delta); - // Number of target macroblocks to get the q delta (segment 1). - block_count = cr->max_mbs_perframe * mbs_in_frame / 100; - // Set the segmentation map: cycle through the macroblocks, starting at + + sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + sbs_in_frame = sb_cols * sb_rows; + // Number of target superblocks to get the q delta (segment 1). + block_count = cr->max_sbs_perframe * sbs_in_frame / 100; + // Set the segmentation map: cycle through the superblocks, starting at // cr->mb_index, and stopping when either block_count blocks have been found // to be refreshed, or we have passed through whole frame. - // Note the setting of seg_map below is done in two steps (one over 8x8) - // and then another over SB, in order to keep the value constant over SB. - // TODO(marpan): Do this in one pass in SB order. - assert(cr->mb_index < mbs_in_frame); - i = cr->mb_index; + assert(cr->sb_index < sbs_in_frame); + i = cr->sb_index; do { - // If the macroblock is as a candidate for clean up then mark it - // for possible boost/refresh (segment 1). The segment id may get reset to - // 0 later if the macroblock gets coded anything other than ZEROMV. - if (cr->map[i] == 0) { - seg_map[i] = 1; - block_count--; - } else if (cr->map[i] < 0) { - cr->map[i]++; + int sum_map = 0; + // Get the mi_row/mi_col corresponding to superblock index i. + int sb_row_index = (i / sb_cols); + int sb_col_index = i - sb_row_index * sb_cols; + int mi_row = sb_row_index * MI_BLOCK_SIZE; + int mi_col = sb_col_index * MI_BLOCK_SIZE; + assert(mi_row >= 0 && mi_row < cm->mi_rows); + assert(mi_col >= 0 && mi_col < cm->mi_cols); + bl_index = mi_row * cm->mi_cols + mi_col; + // Loop through all 8x8 blocks in superblock and update map. + xmis = MIN(cm->mi_cols - mi_col, + num_8x8_blocks_wide_lookup[BLOCK_64X64]); + ymis = MIN(cm->mi_rows - mi_row, + num_8x8_blocks_high_lookup[BLOCK_64X64]); + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + const int bl_index2 = bl_index + y * cm->mi_cols + x; + // If the block is as a candidate for clean up then mark it + // for possible boost/refresh (segment 1). The segment id may get + // reset to 0 later if block gets coded anything other than ZEROMV. + if (cr->map[bl_index2] == 0) { + seg_map[bl_index2] = 1; + sum_map++; + } else if (cr->map[bl_index2] < 0) { + cr->map[bl_index2]++; + } + } + } + // Enforce constant segment over superblock. + // If segment is partial over superblock, reset to either all 1 or 0. + if (sum_map > 0 && sum_map < xmis * ymis) { + const int new_value = (sum_map >= xmis * ymis / 2); + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) + seg_map[bl_index + y * cm->mi_cols + x] = new_value; } i++; - if (i == mbs_in_frame) { + if (i == sbs_in_frame) { i = 0; } - } while (block_count && i != cr->mb_index); - cr->mb_index = i; - // Enforce constant segment map over superblock. - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { - bl_index = mi_row * cm->mi_cols + mi_col; - xmis = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - ymis = num_8x8_blocks_high_lookup[BLOCK_64X64]; - xmis = MIN(cm->mi_cols - mi_col, xmis); - ymis = MIN(cm->mi_rows - mi_row, ymis); - sum_map = 0; - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - bl_index2 = bl_index + y * cm->mi_cols + x; - sum_map += seg_map[bl_index2]; - } - new_value = 0; - // If segment is partial over superblock, reset. - if (sum_map > 0 && sum_map < xmis * ymis) { - if (sum_map < xmis * ymis / 2) - new_value = 0; - else - new_value = 1; - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - bl_index2 = bl_index + y * cm->mi_cols + x; - seg_map[bl_index2] = new_value; - } - } - } + if (sum_map >= xmis * ymis /2) + block_count--; + } while (block_count && i != cr->sb_index); + cr->sb_index = i; } } + +void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, + int64_t rate_sb, int64_t dist_sb) { + cr->projected_rate_sb = rate_sb; + cr->projected_dist_sb = dist_sb; +} + +int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) { + return cr->rdmult; +} diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h new file mode 100644 index 000000000..f556d658b --- /dev/null +++ b/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ +#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ + +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9_COMP; + +struct CYCLIC_REFRESH; +typedef struct CYCLIC_REFRESH CYCLIC_REFRESH; + +CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols); + +void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr); + +// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), +// check if we should reset the segment_id, and update the cyclic_refresh map +// and segmentation map. +void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi, + MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int use_rd); + +// Setup cyclic background refresh: set delta q and segmentation map. +void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi); + +void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, + int64_t rate_sb, int64_t dist_sb); + +int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ diff --git a/vp9/encoder/vp9_vaq.c b/vp9/encoder/vp9_aq_variance.c index c71c171bb..c25eb95c7 100644 --- a/vp9/encoder/vp9_vaq.c +++ b/vp9/encoder/vp9_aq_variance.c @@ -10,7 +10,7 @@ #include <math.h> -#include "vp9/encoder/vp9_vaq.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/common/vp9_seg_common.h" diff --git a/vp9/encoder/vp9_vaq.h b/vp9/encoder/vp9_aq_variance.h index c73114aeb..381fe50cf 100644 --- a/vp9/encoder/vp9_vaq.h +++ b/vp9/encoder/vp9_aq_variance.h @@ -9,8 +9,8 @@ */ -#ifndef VP9_ENCODER_VP9_VAQ_H_ -#define VP9_ENCODER_VP9_VAQ_H_ +#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_ +#define VP9_ENCODER_VP9_AQ_VARIANCE_H_ #include "vp9/encoder/vp9_onyx_int.h" @@ -31,4 +31,4 @@ int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_VAQ_H_ +#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_ diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 1b4a6cc9b..a1db097bd 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -392,12 +392,10 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, pack_mb_tokens(w, tok, tok_end); } -static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col, +static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd, + int hbs, int mi_row, int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) { - VP9_COMMON *const cm = &cpi->common; - const int ctx = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vp9_prob *const probs = get_partition_probs(cm, ctx); const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; @@ -415,10 +413,13 @@ static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col, } } -static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, +static void write_modes_sb(VP9_COMP *cpi, + const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int bsl = b_width_log2(bsize); const int bs = (1 << bsl) / 4; PARTITION_TYPE partition; @@ -429,7 +430,7 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, return; partition = partition_lookup[bsl][m->mbmi.sb_type]; - write_partition(cpi, bs, mi_row, mi_col, partition, bsize, w); + write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); @@ -465,20 +466,21 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } -static void write_modes(VP9_COMP *cpi, const TileInfo *const tile, +static void write_modes(VP9_COMP *cpi, + const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end) { int mi_row, mi_col; for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { - vp9_zero(cpi->left_seg_context); + vp9_zero(cpi->mb.e_mbd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64); + write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, + BLOCK_64X64); } } @@ -930,7 +932,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; - vpx_memset(cpi->above_seg_context, 0, sizeof(*cpi->above_seg_context) * + vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols)); tok[0][0] = cpi->tok; diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 168702e90..7729d84b3 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -115,7 +115,6 @@ struct macroblock { unsigned int source_variance; unsigned int pred_sse[MAX_REF_FRAMES]; int pred_mv_sad[MAX_REF_FRAMES]; - int mode_sad[MAX_REF_FRAMES][INTER_MODES + 1]; int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; @@ -158,7 +157,6 @@ struct macroblock { // note that token_costs is the cost when eob node is skipped vp9_coeff_cost token_costs[TX_SIZES]; - DECLARE_ALIGNED(16, uint8_t, token_cache[1024]); int optimize; @@ -198,7 +196,8 @@ struct macroblock { // TODO(jingning): the variables used here are little complicated. need further // refactoring on organizing the temporary buffers, when recursive // partition down to 4x4 block size is enabled. -static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) { +static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, + BLOCK_SIZE bsize) { switch (bsize) { case BLOCK_64X64: return &x->sb64_context; diff --git a/vp9/encoder/vp9_craq.h b/vp9/encoder/vp9_craq.h deleted file mode 100644 index 1f81f3e77..000000000 --- a/vp9/encoder/vp9_craq.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_VP9_CRAQ_H_ -#define VP9_ENCODER_VP9_CRAQ_H_ - -#include "vp9/encoder/vp9_onyx_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Check if we should turn off cyclic refresh based on bitrate condition. -static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi); - -// Check if this coding block, of size bsize, should be considered for refresh -// (lower-qp coding). -static int candidate_refresh_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int bsize, - int use_rd); - -// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), -// check if we should reset the segment_id, and update the cyclic_refresh map -// and segmentation map. -void vp9_update_segment_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int mi_row, - int mi_col, - int bsize, - int use_rd); - -// Setup cyclic background refresh: set delta q and segmentation map. -void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_CRAQ_H_ diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index a38592240..80abb1fa5 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -30,6 +30,9 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_aq_complexity.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" @@ -38,8 +41,6 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_tokenize.h" -#include "vp9/encoder/vp9_vaq.h" -#include "vp9/encoder/vp9_craq.h" #define GF_ZEROMV_ZBIN_BOOST 0 #define LF_ZEROMV_ZBIN_BOOST 0 @@ -164,13 +165,12 @@ static INLINE void set_modeinfo_offsets(VP9_COMMON *const cm, int mi_col) { const int idx_str = xd->mode_info_stride * mi_row + mi_col; xd->mi_8x8 = cm->mi_grid_visible + idx_str; - xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; xd->mi_8x8[0] = cm->mi + idx_str; } -static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col, +static int is_block_in_mb_map(const VP9_COMP *cpi, int mi_row, int mi_col, BLOCK_SIZE bsize) { - VP9_COMMON *const cm = &cpi->common; + const VP9_COMMON *const cm = &cpi->common; const int mb_rows = cm->mb_rows; const int mb_cols = cm->mb_cols; const int mb_row = mi_row >> 1; @@ -194,6 +194,16 @@ static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col, return 0; } +static int check_active_map(const VP9_COMP *cpi, const MACROBLOCK *x, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + if (cpi->active_map_enabled && !x->e_mbd.lossless) { + return is_block_in_mb_map(cpi, mi_row, mi_col, bsize); + } else { + return 1; + } +} + static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; @@ -207,16 +217,11 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, const int idx_map = mb_row * cm->mb_cols + mb_col; const struct segmentation *const seg = &cm->seg; - set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col); + set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col); // Activity map pointer x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; - - if (cpi->active_map_enabled && !x->e_mbd.lossless) { - x->in_active_map = is_block_in_mb_map(cpi, mi_row, mi_col, bsize); - } else { - x->in_active_map = 1; - } + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); set_modeinfo_offsets(cm, xd, mi_row, mi_col); @@ -276,7 +281,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, } } -static void duplicate_modeinfo_in_sb(VP9_COMMON * const cm, +static void duplicate_mode_info_in_sb(VP9_COMMON * const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, @@ -300,7 +305,7 @@ static void set_block_size(VP9_COMP * const cpi, MACROBLOCKD *const xd = &cpi->mb.e_mbd; set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col); xd->mi_8x8[0]->mbmi.sb_type = bsize; - duplicate_modeinfo_in_sb(&cpi->common, xd, mi_row, mi_col, bsize); + duplicate_mode_info_in_sb(&cpi->common, xd, mi_row, mi_col, bsize); } } @@ -829,52 +834,6 @@ static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { adjust_act_zbin(cpi, x); } -// Select a segment for the current SB64 -static void select_in_frame_q_segment(VP9_COMP *cpi, - int mi_row, int mi_col, - int output_enabled, int projected_rate) { - VP9_COMMON *const cm = &cpi->common; - - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; - const int xmis = MIN(cm->mi_cols - mi_col, bw); - const int ymis = MIN(cm->mi_rows - mi_row, bh); - int complexity_metric = 64; - int x, y; - - unsigned char segment; - - if (!output_enabled) { - segment = 0; - } else { - // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). - // It is converted to bits * 256 units - const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / - (bw * bh); - - if (projected_rate < (target_rate / 4)) { - segment = 1; - } else { - segment = 0; - } - - if (target_rate > 0) { - complexity_metric = - clamp((int)((projected_rate * 64) / target_rate), 16, 255); - } - } - - // Fill in the entires in the segment map corresponding to this SB64 - for (y = 0; y < ymis; y++) { - for (x = 0; x < xmis; x++) { - cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; - cpi->complexity_map[mi_offset + y * cm->mi_cols + x] = - (unsigned char)complexity_metric; - } - } -} - static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled) { @@ -896,21 +855,17 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, assert(mi->mbmi.sb_type == bsize); - // For in frame adaptive Q copy over the chosen segment id into the - // mode innfo context for the chosen mode / partition. - if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ || - cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && + *mi_addr = *mi; + + // For in frame adaptive Q, check for reseting the segment_id and updating + // the cyclic refresh map. + if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled && output_enabled) { - // Check for reseting segment_id and update cyclic map. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) { - vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1); - vp9_init_plane_quantizers(cpi, x); - } - mi->mbmi.segment_id = xd->mi_8x8[0]->mbmi.segment_id; + vp9_cyclic_refresh_update_segment(cpi, &xd->mi_8x8[0]->mbmi, + mi_row, mi_col, bsize, 1); + vp9_init_plane_quantizers(cpi, x); } - *mi_addr = *mi; - max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1; for (i = 0; i < max_plane; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; @@ -985,12 +940,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, #endif if (!frame_is_intra_only(cm)) { if (is_inter_block(mbmi)) { - if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) { - MV best_mv[2]; - for (i = 0; i < 1 + has_second_ref(mbmi); ++i) - best_mv[i] = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv; - vp9_update_mv_count(cm, xd, best_mv); - } + vp9_update_mv_count(cm, xd); if (cm->interp_filter == SWITCHABLE) { const int ctx = vp9_get_pred_context_switchable_interp(xd); @@ -1111,7 +1061,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, : cm->last_frame_seg_map; // If segment 1, use rdmult for that segment. if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col)) - x->rdmult = cpi->cyclic_refresh.rdmult; + x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); } // Find best coding mode & reconstruct the MB so it is available @@ -1211,21 +1161,21 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, int mi_height = num_8x8_blocks_high_lookup[bsize]; for (p = 0; p < MAX_MB_PLANE; p++) { vpx_memcpy( - cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), + xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), a + num_4x4_blocks_wide * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); vpx_memcpy( - cpi->left_context[p] + xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), l + num_4x4_blocks_high * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(cpi->above_seg_context + mi_col, sa, - sizeof(*cpi->above_seg_context) * mi_width); - vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl, - sizeof(cpi->left_seg_context[0]) * mi_height); + vpx_memcpy(xd->above_seg_context + mi_col, sa, + sizeof(*xd->above_seg_context) * mi_width); + vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, + sizeof(xd->left_seg_context[0]) * mi_height); } static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], @@ -1244,20 +1194,20 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, for (p = 0; p < MAX_MB_PLANE; ++p) { vpx_memcpy( a + num_4x4_blocks_wide * p, - cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), + xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); vpx_memcpy( l + num_4x4_blocks_high * p, - cpi->left_context[p] + xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(sa, cpi->above_seg_context + mi_col, - sizeof(*cpi->above_seg_context) * mi_width); - vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK), - sizeof(cpi->left_seg_context[0]) * mi_height); + vpx_memcpy(sa, xd->above_seg_context + mi_col, + sizeof(*xd->above_seg_context) * mi_width); + vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), + sizeof(xd->left_seg_context[0]) * mi_height); } static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, @@ -1289,6 +1239,8 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, int output_enabled, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; @@ -1298,8 +1250,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, return; if (bsize >= BLOCK_8X8) { - ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = *get_sb_partitioning(x, bsize); } else { ctx = 0; @@ -1354,8 +1305,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } // Check to see if the given partition size is allowed for a specified number @@ -1432,6 +1382,7 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, for (block_col = 0; block_col < 8; ++block_col) { MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + if (prev_mi) { const ptrdiff_t offset = prev_mi - cm->prev_mi; mi_8x8[block_row * mis + block_col] = cm->mi + offset; @@ -1460,45 +1411,39 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { return 0; } -static void update_state_rt(VP9_COMP *cpi, const PICK_MODE_CONTEXT *ctx, +static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, int bsize) { - int i; VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; const struct segmentation *const seg = &cm->seg; - // TODO(jingning) We might need PICK_MODE_CONTEXT to buffer coding modes - // associated with variable block sizes. Otherwise, remove this ctx - // from argument list. - (void)ctx; + *(xd->mi_8x8[0]) = ctx->mic; - // Check for reseting segment_id and update cyclic map. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) { - vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1); + // For in frame adaptive Q, check for reseting the segment_id and updating + // the cyclic refresh map. + if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled) { + vp9_cyclic_refresh_update_segment(cpi, &xd->mi_8x8[0]->mbmi, + mi_row, mi_col, bsize, 1); vp9_init_plane_quantizers(cpi, x); } - if (is_inter_block(mbmi)) { - if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) { - MV best_mv[2]; - for (i = 0; i < 1 + has_second_ref(mbmi); ++i) - best_mv[i] = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv; - vp9_update_mv_count(cm, xd, best_mv); - } + vp9_update_mv_count(cm, xd); if (cm->interp_filter == SWITCHABLE) { const int pred_ctx = vp9_get_pred_context_switchable_interp(xd); ++cm->counts.switchable_interp[pred_ctx][mbmi->interp_filter]; } } + + x->skip = ctx->skip; } static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, - TOKENEXTRA **tp, int mi_row, int mi_col, - int output_enabled, BLOCK_SIZE bsize) { + TOKENEXTRA **tp, int mi_row, int mi_col, + int output_enabled, BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; if (bsize < BLOCK_8X8) { @@ -1507,6 +1452,7 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, if (x->ab_index > 0) return; } + set_offsets(cpi, tile, mi_row, mi_col, bsize); update_state_rt(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize); @@ -1522,6 +1468,8 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, int output_enabled, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; @@ -1534,8 +1482,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &cpi->mb.e_mbd; const int idx_str = xd->mode_info_stride * mi_row + mi_col; MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str; - ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = mi_8x8[0]->mbmi.sb_type; } else { ctx = 0; @@ -1594,8 +1541,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } static void rd_use_partition(VP9_COMP *cpi, @@ -1606,12 +1552,10 @@ static void rd_use_partition(VP9_COMP *cpi, int do_recon) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; const int mis = cm->mode_info_stride; const int bsl = b_width_log2(bsize); - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - const int ms = num_4x4_blocks_wide / 2; - const int mh = num_4x4_blocks_high / 2; + const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; const int bss = (1 << bsl) / 4; int i, pl; PARTITION_TYPE partition = PARTITION_NONE; @@ -1630,10 +1574,14 @@ static void rd_use_partition(VP9_COMP *cpi, BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; + int do_partition_search = 1; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + assert(num_4x4_blocks_wide_lookup[bsize] == + num_4x4_blocks_high_lookup[bsize]); + partition = partition_lookup[bsl][bs_type]; subsize = get_subsize(bsize, partition); @@ -1653,9 +1601,22 @@ static void rd_use_partition(VP9_COMP *cpi, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); + } else { + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } - if (cpi->sf.partition_search_type == SEARCH_PARTITION && + if (!x->in_active_map) { + do_partition_search = 0; + if (mi_row + (mi_step >> 1) < cm->mi_rows && + mi_col + (mi_step >> 1) < cm->mi_cols) { + *(get_sb_partitioning(x, bsize)) = bsize; + bs_type = mi_8x8[0]->mbmi.sb_type = bsize; + subsize = bsize; + partition = PARTITION_NONE; + } + } + if (do_partition_search && + cpi->sf.partition_search_type == SEARCH_PARTITION && cpi->sf.adjust_partitioning_from_last_frame) { // Check if any of the sub blocks are further split. if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { @@ -1673,15 +1634,13 @@ static void rd_use_partition(VP9_COMP *cpi, // If partition is not none try none unless each of the 4 splits are split // even further.. if (partition != PARTITION_NONE && !splits_below && - mi_row + (ms >> 1) < cm->mi_rows && - mi_col + (ms >> 1) < cm->mi_cols) { + mi_row + (mi_step >> 1) < cm->mi_rows && + mi_col + (mi_step >> 1) < cm->mi_cols) { *(get_sb_partitioning(x, bsize)) = bsize; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize, get_block_context(x, bsize), INT64_MAX); - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (none_rate < INT_MAX) { none_rate += x->partition_cost[pl][PARTITION_NONE]; @@ -1706,14 +1665,14 @@ static void rd_use_partition(VP9_COMP *cpi, &last_part_dist, subsize, get_block_context(x, subsize), INT64_MAX); if (last_part_rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) { + bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { int rt = 0; int64_t dt = 0; update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt, subsize, get_block_context(x, subsize), INT64_MAX); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; @@ -1731,14 +1690,14 @@ static void rd_use_partition(VP9_COMP *cpi, &last_part_dist, subsize, get_block_context(x, subsize), INT64_MAX); if (last_part_rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) { + bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { int rt = 0; int64_t dt = 0; update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt, subsize, get_block_context(x, subsize), INT64_MAX); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; @@ -1754,8 +1713,8 @@ static void rd_use_partition(VP9_COMP *cpi, last_part_rate = 0; last_part_dist = 0; for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (ms >> 1); - int y_idx = (i >> 1) * (ms >> 1); + int x_idx = (i & 1) * (mi_step >> 1); + int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; int rt; int64_t dt; @@ -1781,18 +1740,20 @@ static void rd_use_partition(VP9_COMP *cpi, assert(0); } - pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (last_part_rate < INT_MAX) { last_part_rate += x->partition_cost[pl][partition]; last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist); } - if (cpi->sf.adjust_partitioning_from_last_frame + if (do_partition_search + && cpi->sf.adjust_partitioning_from_last_frame && cpi->sf.partition_search_type == SEARCH_PARTITION && partition != PARTITION_SPLIT && bsize > BLOCK_8X8 - && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows) - && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) { + && (mi_row + mi_step < cm->mi_rows || + mi_row + (mi_step >> 1) == cm->mi_rows) + && (mi_col + mi_step < cm->mi_cols || + mi_col + (mi_step >> 1) == cm->mi_cols)) { BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rate = 0; chosen_dist = 0; @@ -1800,8 +1761,8 @@ static void rd_use_partition(VP9_COMP *cpi, // Split partition. for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2); - int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2); + int x_idx = (i & 1) * (mi_step >> 1); + int y_idx = (i >> 1) * (mi_step >> 1); int rt = 0; int64_t dt = 0; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; @@ -1835,14 +1796,11 @@ static void rd_use_partition(VP9_COMP *cpi, encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0, split_subsize); - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row + y_idx, mi_col + x_idx, + pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, split_subsize); chosen_rate += x->partition_cost[pl][PARTITION_NONE]; } - pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rate < INT_MAX) { chosen_rate += x->partition_cost[pl][PARTITION_SPLIT]; chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist); @@ -1880,14 +1838,14 @@ static void rd_use_partition(VP9_COMP *cpi, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, - output_enabled, chosen_rate); - } - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - cpi->cyclic_refresh.projected_rate_sb = chosen_rate; - cpi->cyclic_refresh.projected_dist_sb = chosen_dist; + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, + output_enabled, chosen_rate); } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + chosen_rate, chosen_dist); + encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); } @@ -1951,77 +1909,71 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { // Look at neighboring blocks and set a min and max partition size based on // what they chose. static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, - int row, int col, + int mi_row, int mi_col, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size) { - VP9_COMMON * const cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - MODE_INFO ** mi_8x8 = xd->mi_8x8; - MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8; - + MODE_INFO **mi_8x8 = xd->mi_8x8; const int left_in_image = xd->left_available && mi_8x8[-1]; const int above_in_image = xd->up_available && mi_8x8[-xd->mode_info_stride]; - MODE_INFO ** above_sb64_mi_8x8; - MODE_INFO ** left_sb64_mi_8x8; + MODE_INFO **above_sb64_mi_8x8; + MODE_INFO **left_sb64_mi_8x8; - int row8x8_remaining = tile->mi_row_end - row; - int col8x8_remaining = tile->mi_col_end - col; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; - + BLOCK_SIZE min_size = BLOCK_4X4; + BLOCK_SIZE max_size = BLOCK_64X64; // Trap case where we do not have a prediction. - if (!left_in_image && !above_in_image && - ((cm->frame_type == KEY_FRAME) || !cm->prev_mi)) { - *min_block_size = BLOCK_4X4; - *max_block_size = BLOCK_64X64; - } else { + if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { // Default "min to max" and "max to min" - *min_block_size = BLOCK_64X64; - *max_block_size = BLOCK_4X4; + min_size = BLOCK_64X64; + max_size = BLOCK_4X4; // NOTE: each call to get_sb_partition_size_range() uses the previous // passed in values for min and max as a starting point. - // // Find the min and max partition used in previous frame at this location - if (cm->prev_mi && (cm->frame_type != KEY_FRAME)) { - get_sb_partition_size_range(cpi, prev_mi_8x8, - min_block_size, max_block_size); + if (cm->frame_type != KEY_FRAME) { + MODE_INFO **const prev_mi = + &cm->prev_mi_grid_visible[mi_row * xd->mode_info_stride + mi_col]; + get_sb_partition_size_range(cpi, prev_mi, &min_size, &max_size); } - // Find the min and max partition sizes used in the left SB64 if (left_in_image) { left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE]; get_sb_partition_size_range(cpi, left_sb64_mi_8x8, - min_block_size, max_block_size); + &min_size, &max_size); } - // Find the min and max partition sizes used in the above SB64. if (above_in_image) { above_sb64_mi_8x8 = &mi_8x8[-xd->mode_info_stride * MI_BLOCK_SIZE]; get_sb_partition_size_range(cpi, above_sb64_mi_8x8, - min_block_size, max_block_size); + &min_size, &max_size); + } + // adjust observed min and max + if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { + min_size = min_partition_size[min_size]; + max_size = max_partition_size[max_size]; } } - // adjust observed min and max - if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { - *min_block_size = min_partition_size[*min_block_size]; - *max_block_size = max_partition_size[*max_block_size]; - } - - // Check border cases where max and min from neighbours may not be legal. - *max_block_size = find_partition_size(*max_block_size, - row8x8_remaining, col8x8_remaining, - &bh, &bw); - *min_block_size = MIN(*min_block_size, *max_block_size); + // Check border cases where max and min from neighbors may not be legal. + max_size = find_partition_size(max_size, + row8x8_remaining, col8x8_remaining, + &bh, &bw); + min_size = MIN(min_size, max_size); // When use_square_partition_only is true, make sure at least one square // partition is allowed by selecting the next smaller square size as // *min_block_size. if (cpi->sf.use_square_partition_only && - next_square_size[*max_block_size] < *min_block_size) { - *min_block_size = next_square_size[*max_block_size]; + next_square_size[max_size] < min_size) { + min_size = next_square_size[max_size]; } + *min_block_size = min_size; + *max_block_size = max_size; } static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { @@ -2041,7 +1993,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int64_t *dist, int do_recon, int64_t best_rd) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; + MACROBLOCKD *const xd = &x->e_mbd; + const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; TOKENEXTRA *tp_orig = *tp; @@ -2054,8 +2007,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int do_split = bsize >= BLOCK_8X8; int do_rect = 1; // Override skipping rectangular partition operations for edge blocks - const int force_horz_split = (mi_row + ms >= cm->mi_rows); - const int force_vert_split = (mi_col + ms >= cm->mi_cols); + const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); + const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); const int xss = x->e_mbd.plane[1].subsampling_x; const int yss = x->e_mbd.plane[1].subsampling_y; @@ -2081,6 +2034,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); + } else { + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } // Determine partition types in search according to the speed features. @@ -2122,9 +2077,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ctx, best_rd); if (this_rate != INT_MAX) { if (bsize >= BLOCK_8X8) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rate += x->partition_cost[pl][PARTITION_NONE]; } sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); @@ -2169,8 +2122,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (do_split) { subsize = get_subsize(bsize, PARTITION_SPLIT); for (i = 0; i < 4 && sum_rd < best_rd; ++i) { - const int x_idx = (i & 1) * ms; - const int y_idx = (i >> 1) * ms; + const int x_idx = (i & 1) * mi_step; + const int y_idx = (i >> 1) * mi_step; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; @@ -2194,9 +2147,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd && i == 4) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -2228,7 +2179,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) { + if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) { update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); @@ -2240,7 +2191,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate, + rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); if (this_rate == INT_MAX) { @@ -2252,9 +2203,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_HORZ]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -2281,7 +2230,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { + if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) { update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); @@ -2293,7 +2242,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); if (this_rate == INT_MAX) { @@ -2305,9 +2254,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_VERT]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -2335,13 +2282,14 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate); - } - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - cpi->cyclic_refresh.projected_rate_sb = best_rate; - cpi->cyclic_refresh.projected_dist_sb = best_dist; + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, + best_rate); } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + best_rate, best_dist); + encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); } if (bsize == BLOCK_64X64) { @@ -2356,11 +2304,12 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; int mi_col; // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); + vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; @@ -2486,11 +2435,11 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(cpi->above_context[0], 0, - sizeof(*cpi->above_context[0]) * + vpx_memset(xd->above_context[0], 0, + sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE); - vpx_memset(cpi->above_seg_context, 0, - sizeof(*cpi->above_seg_context) * aligned_mi_cols); + vpx_memset(xd->above_seg_context, 0, + sizeof(*xd->above_seg_context) * aligned_mi_cols); } static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { @@ -2533,87 +2482,15 @@ static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) { return 1; } -static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs, - TX_SIZE tx_size) { - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) - mi_8x8[y * mis + x]->mbmi.tx_size = tx_size; - } -} - -static void reset_skip_txfm_size_b(const VP9_COMMON *cm, int mis, - TX_SIZE max_tx_size, int bw, int bh, - int mi_row, int mi_col, - MODE_INFO **mi_8x8) { - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) { - return; - } else { - const MB_MODE_INFO *const mbmi = &mi_8x8[0]->mbmi; - if (mbmi->tx_size > max_tx_size) { - const int ymbs = MIN(bh, cm->mi_rows - mi_row); - const int xmbs = MIN(bw, cm->mi_cols - mi_col); - - assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) || - get_skip_flag(mi_8x8, mis, ymbs, xmbs)); - set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size); - } - } -} - -static void reset_skip_txfm_size_sb(VP9_COMMON *cm, MODE_INFO **mi_8x8, - TX_SIZE max_tx_size, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const int mis = cm->mode_info_stride; - int bw, bh; - const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; - - bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type]; - bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type]; - - if (bw == bs && bh == bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, bs, mi_row, mi_col, - mi_8x8); - } else if (bw == bs && bh < bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row, mi_col, - mi_8x8); - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row + hbs, - mi_col, mi_8x8 + hbs * mis); - } else if (bw < bs && bh == bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, mi_col, - mi_8x8); - reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, - mi_col + hbs, mi_8x8 + hbs); - } else { - const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize]; - int n; - - assert(bw < bs && bh < bs); - - for (n = 0; n < 4; n++) { - const int mi_dc = hbs * (n & 1); - const int mi_dr = hbs * (n >> 1); - - reset_skip_txfm_size_sb(cm, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size, - mi_row + mi_dr, mi_col + mi_dc, subsize); - } - } -} - static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) { int mi_row, mi_col; const int mis = cm->mode_info_stride; - MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible; + MODE_INFO **mi_ptr = cm->mi_grid_visible; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) { - mi_8x8 = mi_ptr; - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) { - reset_skip_txfm_size_sb(cm, mi_8x8, txfm_max, mi_row, mi_col, - BLOCK_64X64); + for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { + for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { + if (mi_ptr[mi_col]->mbmi.tx_size > txfm_max) + mi_ptr[mi_col]->mbmi.tx_size = txfm_max; } } } @@ -2693,6 +2570,7 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &x->e_mbd; set_offsets(cpi, tile, mi_row, mi_col, bsize); xd->mi_8x8[0]->mbmi.sb_type = bsize; + if (!frame_is_intra_only(cm)) { vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize); @@ -2700,7 +2578,338 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, MB_PREDICTION_MODE intramode = DC_PRED; set_mode_info(&xd->mi_8x8[0]->mbmi, bsize, intramode); } - duplicate_modeinfo_in_sb(cm, xd, mi_row, mi_col, bsize); + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); +} + +static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, + int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE subsize) { + MACROBLOCKD *xd = &x->e_mbd; + int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = partition_lookup[bsl][subsize]; + + assert(bsize >= BLOCK_8X8); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + break; + case PARTITION_VERT: + *get_sb_index(x, subsize) = 0; + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + + if (mi_col + hbs < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + set_modeinfo_offsets(cm, xd, mi_row, mi_col + hbs); + *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, bsize); + } + break; + case PARTITION_HORZ: + *get_sb_index(x, subsize) = 0; + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + if (mi_row + hbs < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + set_modeinfo_offsets(cm, xd, mi_row + hbs, mi_col); + *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, bsize); + } + break; + case PARTITION_SPLIT: + *get_sb_index(x, subsize) = 0; + fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 1; + fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 2; + fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 3; + fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize, + *(get_sb_partitioning(x, subsize))); + break; + default: + break; + } +} + +static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, + TOKENEXTRA **tp, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *rate, + int64_t *dist, int do_recon, int64_t best_rd) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; + TOKENEXTRA *tp_orig = *tp; + PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); + int i; + BLOCK_SIZE subsize; + int this_rate, sum_rate = 0, best_rate = INT_MAX; + int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX; + int64_t sum_rd = 0; + int do_split = bsize >= BLOCK_8X8; + int do_rect = 1; + // Override skipping rectangular partition operations for edge blocks + const int force_horz_split = (mi_row + ms >= cm->mi_rows); + const int force_vert_split = (mi_col + ms >= cm->mi_cols); + const int xss = x->e_mbd.plane[1].subsampling_x; + const int yss = x->e_mbd.plane[1].subsampling_y; + + int partition_none_allowed = !force_horz_split && !force_vert_split; + int partition_horz_allowed = !force_vert_split && yss <= xss && + bsize >= BLOCK_8X8; + int partition_vert_allowed = !force_horz_split && xss <= yss && + bsize >= BLOCK_8X8; + (void) *tp_orig; + + if (bsize < BLOCK_8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. + if (x->ab_index != 0) { + *rate = 0; + *dist = 0; + return; + } + } + + assert(num_8x8_blocks_wide_lookup[bsize] == + num_8x8_blocks_high_lookup[bsize]); + + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); + + // Determine partition types in search according to the speed features. + // The threshold set here has to be of square block size. + if (cpi->sf.auto_min_max_partition_size) { + partition_none_allowed &= (bsize <= cpi->sf.max_partition_size && + bsize >= cpi->sf.min_partition_size); + partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_horz_split); + partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_vert_split); + do_split &= bsize > cpi->sf.min_partition_size; + } + if (cpi->sf.use_square_partition_only) { + partition_horz_allowed &= force_horz_split; + partition_vert_allowed &= force_vert_split; + } + + if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed)) + do_split = 0; + + // PARTITION_NONE + if (partition_none_allowed) { + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, bsize); + ctx->mic.mbmi = xd->mi_8x8[0]->mbmi; + + if (this_rate != INT_MAX) { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_NONE]; + sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); + if (sum_rd < best_rd) { + int64_t stop_thresh = 4096; + int64_t stop_thresh_rd; + + best_rate = this_rate; + best_dist = this_dist; + best_rd = sum_rd; + if (bsize >= BLOCK_8X8) + *(get_sb_partitioning(x, bsize)) = bsize; + + // Adjust threshold according to partition size. + stop_thresh >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh); + // If obtained distortion is very small, choose current partition + // and stop splitting. + if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) { + do_split = 0; + do_rect = 0; + } + } + } + if (!x->in_active_map) { + do_split = 0; + do_rect = 0; + } + } + + // store estimated motion vector + store_pred_mv(x, ctx); + + // PARTITION_SPLIT + sum_rd = 0; + if (do_split) { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; + subsize = get_subsize(bsize, PARTITION_SPLIT); + for (i = 0; i < 4 && sum_rd < best_rd; ++i) { + const int x_idx = (i & 1) * ms; + const int y_idx = (i >> 1) * ms; + + if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) + continue; + + *get_sb_index(x, subsize) = i; + load_pred_mv(x, ctx); + + nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, + subsize, &this_rate, &this_dist, 0, + best_rd - sum_rd); + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } else { + // skip rectangular partition test when larger block size + // gives better rd cost + if (cpi->sf.less_rectangular_check) + do_rect &= !partition_none_allowed; + } + } + + // PARTITION_HORZ + if (partition_horz_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_HORZ); + *get_sb_index(x, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, subsize); + + (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi; + + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, + &this_rate, &this_dist, subsize); + + (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi; + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_HORZ]; + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd) { + best_rd = sum_rd; + best_rate = sum_rate; + best_dist = sum_dist; + *(get_sb_partitioning(x, bsize)) = subsize; + } + } + + // PARTITION_VERT + if (partition_vert_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_VERT); + + *get_sb_index(x, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, subsize); + (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, + &this_rate, &this_dist, subsize); + + (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi; + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_VERT]; + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } + } + + *rate = best_rate; + *dist = best_dist; + + if (best_rate == INT_MAX) + return; + + // update mode info array + fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, + *(get_sb_partitioning(x, bsize))); + + if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) { + int output_enabled = (bsize == BLOCK_64X64); + + // Check the projected output rate for this SB against it's target + // and and if necessary apply a Q delta using segmentation to get + // closer to the target. + if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, + best_rate); + } + + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + best_rate, best_dist); + + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); + } + + if (bsize == BLOCK_64X64) { + assert(tp_orig < *tp); + assert(best_rate < INT_MAX); + assert(best_dist < INT64_MAX); + } else { + assert(tp_orig == *tp); + } } static void nonrd_use_partition(VP9_COMP *cpi, @@ -2712,12 +2921,13 @@ static void nonrd_use_partition(VP9_COMP *cpi, int *totrate, int64_t *totdist) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; const int mis = cm->mode_info_stride; PARTITION_TYPE partition; BLOCK_SIZE subsize; - int rate; - int64_t dist; + int rate = INT_MAX; + int64_t dist = INT64_MAX; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -2733,14 +2943,17 @@ static void nonrd_use_partition(VP9_COMP *cpi, switch (partition) { case PARTITION_NONE: nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi; break; case PARTITION_VERT: *get_sb_index(x, subsize) = 0; nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi; if (mi_col + hbs < cm->mi_cols) { *get_sb_index(x, subsize) = 1; nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs, &rate, &dist, subsize); + (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -2751,10 +2964,12 @@ static void nonrd_use_partition(VP9_COMP *cpi, case PARTITION_HORZ: *get_sb_index(x, subsize) = 0; nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi; if (mi_row + hbs < cm->mi_rows) { *get_sb_index(x, subsize) = 1; nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col, &rate, &dist, subsize); + (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -2764,7 +2979,6 @@ static void nonrd_use_partition(VP9_COMP *cpi, break; case PARTITION_SPLIT: subsize = get_subsize(bsize, PARTITION_SPLIT); - *get_sb_index(x, subsize) = 0; nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, subsize, output_enabled, totrate, totdist); @@ -2801,10 +3015,9 @@ static void nonrd_use_partition(VP9_COMP *cpi, } if (bsize == BLOCK_64X64 && output_enabled) { - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - cpi->cyclic_refresh.projected_rate_sb = *totrate; - cpi->cyclic_refresh.projected_dist_sb = *totdist; - } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + *totrate, *totdist); encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize); } } @@ -2812,33 +3025,55 @@ static void nonrd_use_partition(VP9_COMP *cpi, static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &cpi->mb.e_mbd; int mi_col; // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); + vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { - int dummy_rate; - int64_t dummy_dist; + int dummy_rate = 0; + int64_t dummy_dist = 0; const int idx_str = cm->mode_info_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; + MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; + BLOCK_SIZE bsize = cpi->sf.partition_search_type == FIXED_PARTITION ? cpi->sf.always_this_block_size : get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); cpi->mb.source_variance = UINT_MAX; + vp9_zero(cpi->mb.pred_mv); // Set the partition type of the 64X64 block - if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) - choose_partitioning(cpi, tile, mi_row, mi_col); - else - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); - - nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1, - &dummy_rate, &dummy_dist); + switch (cpi->sf.partition_search_type) { + case VAR_BASED_PARTITION: + choose_partitioning(cpi, tile, mi_row, mi_col); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + 1, &dummy_rate, &dummy_dist); + break; + case VAR_BASED_FIXED_PARTITION: + case FIXED_PARTITION: + set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + 1, &dummy_rate, &dummy_dist); + break; + case REFERENCE_PARTITION: + if (cpi->sf.partition_check || sb_has_motion(cm, prev_mi_8x8)) { + nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1, INT64_MAX); + } else { + copy_partitioning(cm, mi_8x8, prev_mi_8x8); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + BLOCK_64X64, 1, &dummy_rate, &dummy_dist); + } + break; + default: + assert(0); + } } } // end RTC play code @@ -3192,9 +3427,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, const int mi_height = num_8x8_blocks_high_lookup[bsize]; x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 && - (cpi->oxcf.aq_mode != COMPLEXITY_AQ && - cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) && - !cpi->sf.use_nonrd_pick_mode; + cpi->oxcf.aq_mode != COMPLEXITY_AQ && + cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && + cpi->sf.allow_skip_recode; + x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index 703dde323..2a10bbfde 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -229,22 +229,21 @@ void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp); } -static void inc_mvs(const int_mv mv[2], const MV ref[2], int is_compound, +static void inc_mvs(const MB_MODE_INFO *mbmi, const int_mv mvs[2], nmv_context_counts *counts) { int i; - for (i = 0; i < 1 + is_compound; ++i) { - const MV diff = { mv[i].as_mv.row - ref[i].row, - mv[i].as_mv.col - ref[i].col }; + + for (i = 0; i < 1 + has_second_ref(mbmi); ++i) { + const MV *ref = &mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv; + const MV diff = {mvs[i].as_mv.row - ref->row, + mvs[i].as_mv.col - ref->col}; vp9_inc_mv(&diff, counts); } } -void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd, - const MV best_ref_mv[2]) { +void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd) { const MODE_INFO *mi = xd->mi_8x8[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; - const int is_compound = has_second_ref(mbmi); - nmv_context_counts *counts = &cm->counts.mv; if (mbmi->sb_type < BLOCK_8X8) { const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type]; @@ -255,11 +254,12 @@ void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd, for (idx = 0; idx < 2; idx += num_4x4_w) { const int i = idy * 2 + idx; if (mi->bmi[i].as_mode == NEWMV) - inc_mvs(mi->bmi[i].as_mv, best_ref_mv, is_compound, counts); + inc_mvs(mbmi, mi->bmi[i].as_mv, &cm->counts.mv); } } - } else if (mbmi->mode == NEWMV) { - inc_mvs(mbmi->mv, best_ref_mv, is_compound, counts); + } else { + if (mbmi->mode == NEWMV) + inc_mvs(mbmi, mbmi->mv, &cm->counts.mv); } } diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h index f16b2c17c..50cb9611b 100644 --- a/vp9/encoder/vp9_encodemv.h +++ b/vp9/encoder/vp9_encodemv.h @@ -28,8 +28,7 @@ void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref, void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], const nmv_context* mvctx, int usehp); -void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd, - const MV best_ref_mv[2]); +void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd); #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 8167d43a8..6d36b7cf5 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -23,6 +23,7 @@ #include "vp9/common/vp9_reconinter.h" // vp9_setup_dst_planes() #include "vp9/common/vp9_systemdependent.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" @@ -34,7 +35,6 @@ #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" -#include "vp9/encoder/vp9_vaq.h" #include "vp9/encoder/vp9_variance.h" #define OUTPUT_FPF 0 @@ -181,11 +181,13 @@ static void zero_stats(FIRSTPASS_STATS *section) { section->new_mv_count = 0.0; section->count = 0.0; section->duration = 1.0; + section->spatial_layer_id = 0; } static void accumulate_stats(FIRSTPASS_STATS *section, const FIRSTPASS_STATS *frame) { section->frame += frame->frame; + section->spatial_layer_id = frame->spatial_layer_id; section->intra_error += frame->intra_error; section->coded_error += frame->coded_error; section->sr_coded_error += frame->sr_coded_error; @@ -255,12 +257,21 @@ static void avg_stats(FIRSTPASS_STATS *section) { // harder frames. static double calculate_modified_err(const VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame) { - const struct twopass_rc *const twopass = &cpi->twopass; - const FIRSTPASS_STATS *const stats = &twopass->total_stats; - const double av_err = stats->ssim_weighted_pred_err / stats->count; - double modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / - DOUBLE_DIVIDE_CHECK(av_err), - cpi->oxcf.two_pass_vbrbias / 100.0); + const struct twopass_rc *twopass = &cpi->twopass; + const FIRSTPASS_STATS *stats; + double av_err; + double modified_error; + + if (cpi->svc.number_spatial_layers > 1 && + cpi->svc.number_temporal_layers == 1) { + twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass; + } + + stats = &twopass->total_stats; + av_err = stats->ssim_weighted_pred_err / stats->count; + modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / + DOUBLE_DIVIDE_CHECK(av_err), + cpi->oxcf.two_pass_vbrbias / 100.0); return fclamp(modified_error, twopass->modified_error_min, twopass->modified_error_max); @@ -342,7 +353,15 @@ void vp9_init_first_pass(VP9_COMP *cpi) { } void vp9_end_first_pass(VP9_COMP *cpi) { - output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + int i; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + output_stats(&cpi->svc.layer_context[i].twopass.total_stats, + cpi->output_pkt_list); + } + } else { + output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); + } } static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { @@ -464,11 +483,11 @@ void vp9_first_pass(VP9_COMP *cpi) { int recon_yoffset, recon_uvoffset; YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - YV12_BUFFER_CONFIG *const gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); - const int recon_y_stride = lst_yv12->y_stride; - const int recon_uv_stride = lst_yv12->uv_stride; - const int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height); + int recon_y_stride = lst_yv12->y_stride; + int recon_uv_stride = lst_yv12->uv_stride; + int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height); int64_t intra_error = 0; int64_t coded_error = 0; int64_t sr_coded_error = 0; @@ -484,13 +503,43 @@ void vp9_first_pass(VP9_COMP *cpi) { int new_mv_count = 0; int sum_in_vectors = 0; uint32_t lastmv_as_int = 0; - struct twopass_rc *const twopass = &cpi->twopass; + struct twopass_rc *twopass = &cpi->twopass; const MV zero_mv = {0, 0}; + const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; vp9_clear_system_state(); + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + MV_REFERENCE_FRAME ref_frame = LAST_FRAME; + const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL; + twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass; + + vp9_scale_references(cpi); + + // Use either last frame or alt frame for motion search. + if (cpi->ref_frame_flags & VP9_LAST_FLAG) { + scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); + ref_frame = LAST_FRAME; + } else if (cpi->ref_frame_flags & VP9_ALT_FLAG) { + scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, ALTREF_FRAME); + ref_frame = ALTREF_FRAME; + } + + if (scaled_ref_buf != NULL) { + // Update the stride since we are using scaled reference buffer + first_ref_buf = scaled_ref_buf; + recon_y_stride = first_ref_buf->y_stride; + recon_uv_stride = first_ref_buf->uv_stride; + uv_mb_height = 16 >> (first_ref_buf->y_height > first_ref_buf->uv_height); + } + + // Disable golden frame for svc first pass for now. + gld_yv12 = NULL; + set_ref_ptrs(cm, xd, ref_frame, NONE); + } + vp9_setup_src_planes(x, cpi->Source, 0, 0); - vp9_setup_pre_planes(xd, 0, lst_yv12, 0, 0, NULL); + vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); vp9_setup_dst_planes(xd, new_yv12, 0, 0); xd->mi_8x8 = cm->mi_grid_visible; @@ -583,7 +632,7 @@ void vp9_first_pass(VP9_COMP *cpi) { int tmp_err, motion_error; int_mv mv, tmp_mv; - xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; + xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; motion_error = zz_motion_search(x); // Assume 0,0 motion with no mv overhead. mv.as_int = tmp_mv.as_int = 0; @@ -615,7 +664,7 @@ void vp9_first_pass(VP9_COMP *cpi) { } // Search in an older reference frame. - if (cm->current_video_frame > 1) { + if (cm->current_video_frame > 1 && gld_yv12 != NULL) { // Assume 0,0 motion with no mv overhead. int gf_motion_error; @@ -633,9 +682,9 @@ void vp9_first_pass(VP9_COMP *cpi) { ++second_ref_count; // Reset to last frame as reference buffer. - xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; - xd->plane[1].pre[0].buf = lst_yv12->u_buffer + recon_uvoffset; - xd->plane[2].pre[0].buf = lst_yv12->v_buffer + recon_uvoffset; + xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; + xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset; + xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset; // In accumulating a score for the older reference frame take the // best of the motion predicted score and the intra coded error @@ -743,6 +792,7 @@ void vp9_first_pass(VP9_COMP *cpi) { FIRSTPASS_STATS fps; fps.frame = cm->current_video_frame; + fps.spatial_layer_id = cpi->svc.spatial_layer_id; fps.intra_error = (double)(intra_error >> 8); fps.coded_error = (double)(coded_error >> 8); fps.sr_coded_error = (double)(sr_coded_error >> 8); @@ -792,20 +842,28 @@ void vp9_first_pass(VP9_COMP *cpi) { (twopass->this_frame_stats.pcnt_inter > 0.20) && ((twopass->this_frame_stats.intra_error / DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) { - vp8_yv12_copy_frame(lst_yv12, gld_yv12); + if (gld_yv12 != NULL) { + vp8_yv12_copy_frame(lst_yv12, gld_yv12); + } twopass->sr_update_lag = 1; } else { ++twopass->sr_update_lag; } - // Swap frame pointers so last frame refers to the frame we just compressed. - swap_yv12(lst_yv12, new_yv12); + + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + vp9_update_reference_frames(cpi); + } else { + // Swap frame pointers so last frame refers to the frame we just compressed. + swap_yv12(lst_yv12, new_yv12); + } vp9_extend_frame_borders(lst_yv12); // Special case for the first frame. Copy into the GF buffer as a second // reference. - if (cm->current_video_frame == 0) + if (cm->current_video_frame == 0 && gld_yv12 != NULL) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); + } // Use this to see what the first pass reconstruction looks like. if (0) { @@ -884,8 +942,15 @@ extern void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_init_second_pass(VP9_COMP *cpi) { FIRSTPASS_STATS this_frame; const FIRSTPASS_STATS *start_pos; - struct twopass_rc *const twopass = &cpi->twopass; + struct twopass_rc *twopass = &cpi->twopass; const VP9_CONFIG *const oxcf = &cpi->oxcf; + const int is_spatial_svc = (cpi->svc.number_spatial_layers > 1) && + (cpi->svc.number_temporal_layers == 1); + double frame_rate; + + if (is_spatial_svc) { + twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass; + } zero_stats(&twopass->total_stats); zero_stats(&twopass->total_left_stats); @@ -896,30 +961,44 @@ void vp9_init_second_pass(VP9_COMP *cpi) { twopass->total_stats = *twopass->stats_in_end; twopass->total_left_stats = twopass->total_stats; + frame_rate = 10000000.0 * twopass->total_stats.count / + twopass->total_stats.duration; // Each frame can have a different duration, as the frame rate in the source // isn't guaranteed to be constant. The frame rate prior to the first frame // encoded in the second pass is a guess. However, the sum duration is not. // It is calculated based on the actual durations of all frames from the // first pass. - vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count / - twopass->total_stats.duration); + + if (is_spatial_svc) { + vp9_update_spatial_layer_framerate(cpi, frame_rate); + twopass->bits_left = + (int64_t)(twopass->total_stats.duration * + cpi->svc.layer_context[cpi->svc.spatial_layer_id].target_bandwidth / + 10000000.0); + } else { + vp9_new_framerate(cpi, frame_rate); + twopass->bits_left = (int64_t)(twopass->total_stats.duration * + oxcf->target_bandwidth / 10000000.0); + } cpi->output_framerate = oxcf->framerate; - twopass->bits_left = (int64_t)(twopass->total_stats.duration * - oxcf->target_bandwidth / 10000000.0); // Calculate a minimum intra value to be used in determining the IIratio // scores used in the second pass. We have this minimum to make sure // that clips that are static but "low complexity" in the intra domain // are still boosted appropriately for KF/GF/ARF. - twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; - twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + if (!is_spatial_svc) { + // We don't know the number of MBs for each layer at this point. + // So we will do it later. + twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + } // This variable monitors how far behind the second ref update is lagging. twopass->sr_update_lag = 1; - // Scan the first pass file and calculate an average Intra / Inter error score - // ratio for the sequence. + // Scan the first pass file and calculate an average Intra / Inter error + // score ratio for the sequence. { double sum_iiratio = 0.0; start_pos = twopass->stats_in; @@ -1566,8 +1645,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the bits to be allocated to the group as a whole. if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) { - twopass->gf_group_bits = (int64_t)(cpi->twopass.kf_group_bits * - (gf_group_err / cpi->twopass.kf_group_error_left)); + twopass->gf_group_bits = (int64_t)(twopass->kf_group_bits * + (gf_group_err / twopass->kf_group_error_left)); } else { twopass->gf_group_bits = 0; } @@ -2138,14 +2217,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; struct twopass_rc *const twopass = &cpi->twopass; - const int frames_left = (int)(twopass->total_stats.count - - cm->current_video_frame); + int frames_left; FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; double this_frame_intra_error; double this_frame_coded_error; int target; + LAYER_CONTEXT *lc = NULL; + int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1); + + if (is_spatial_svc) { + lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + frames_left = (int)(twopass->total_stats.count - + lc->current_video_frame_in_layer); + } else { + frames_left = (int)(twopass->total_stats.count - + cm->current_video_frame); + } if (!twopass->stats_in) return; @@ -2158,9 +2247,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { vp9_clear_system_state(); + if (is_spatial_svc && twopass->kf_intra_err_min == 0) { + twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + } + if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { twopass->active_worst_quality = cpi->oxcf.cq_level; - } else if (cm->current_video_frame == 0) { + } else if (cm->current_video_frame == 0 || + (is_spatial_svc && lc->current_video_frame_in_layer == 0)) { // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); @@ -2183,6 +2278,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Define next KF group and assign bits to it. this_frame_copy = this_frame; find_next_key_frame(cpi, &this_frame_copy); + // Don't place key frame in any enhancement layers in spatial svc + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 && + cpi->svc.spatial_layer_id > 0) { + cm->frame_type = INTER_FRAME; + } } else { cm->frame_type = INTER_FRAME; } diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 278b22c5c..9268c38a8 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -35,6 +35,7 @@ typedef struct { double new_mv_count; double duration; double count; + int64_t spatial_layer_id; } FIRSTPASS_STATS; struct twopass_rc { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 2ae8a2aa7..e0299f07f 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -23,6 +23,11 @@ // #define NEW_DIAMOND_SEARCH +static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, + const MV *mv) { + return &buf->buf[mv->row * buf->stride + mv->col]; +} + void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); @@ -370,9 +375,9 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, unsigned int sse; unsigned int whichdir; int thismse; - unsigned int halfiters = iters_per_step; - unsigned int quarteriters = iters_per_step; - unsigned int eighthiters = iters_per_step; + const unsigned int halfiters = iters_per_step; + const unsigned int quarteriters = iters_per_step; + const unsigned int eighthiters = iters_per_step; DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); const int y_stride = xd->plane[0].pre[0].stride; @@ -399,7 +404,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, // calculate central point error // TODO(yunqingwang): central pointer error was already calculated in full- // pixel search, and can be passed in this function. - comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -736,7 +741,6 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { unsigned int bestsad; - MV this_mv; const MACROBLOCKD *const xd = &x->e_mbd; const uint8_t *what = x->plane[0].src.buf; const int what_stride = x->plane[0].src.stride; @@ -744,8 +748,7 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x, const uint8_t *base_offset = xd->plane[0].pre[0].buf; const uint8_t *this_offset = base_offset + (best_mv->row * in_what_stride) + best_mv->col; - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; + const MV this_mv = {best_mv->row * 8, best_mv->col * 8}; return vfp->svaf(this_offset, in_what_stride, 0, 0, what, what_stride, &bestsad, second_pred) + (use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost, @@ -908,7 +911,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, const int what_stride = x->plane[0].src.stride; const uint8_t *in_what; const int in_what_stride = xd->plane[0].pre[0].stride; - MV this_mv; unsigned int bestsad = INT_MAX; int ref_row, ref_col; @@ -960,8 +962,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, for (i = 0; i < 4; ++i) { if (sad_array[i] < bestsad) { - this_mv.row = ref_row + tr; - this_mv.col = ref_col + tc + i; + const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad = sad_array[i] + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -979,8 +980,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, bestsad); if (thissad < bestsad) { - this_mv.row = ref_row + tr; - this_mv.col = ref_col + tc + i; + const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1331,10 +1331,8 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const MV *center_mv, MV *best_mv) { int r, c; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *const what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); @@ -1342,25 +1340,22 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; - int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, - 0x7fffffff) + + int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); *best_mv = *ref_mv; for (r = row_min; r < row_max; ++r) { for (c = col_min; c < col_max; ++c) { - const MV this_mv = {r, c}; - const uint8_t *check_here = &in_what[r * in_what_stride + c]; - const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - best_sad) + - mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + const MV mv = {r, c}; + const int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + + mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + sad_per_bit); if (sad < best_sad) { best_sad = sad; - *best_mv = this_mv; + *best_mv = mv; } } } @@ -1472,7 +1467,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, MV this_mv; unsigned int bestsad = INT_MAX; int r, c; - unsigned int thissad; int ref_row = ref_mv->row; int ref_col = ref_mv->col; @@ -1512,7 +1506,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); for (i = 0; i < 8; i++) { - thissad = (unsigned int)sad_array8[i]; + unsigned int thissad = (unsigned int)sad_array8[i]; if (thissad < bestsad) { this_mv.col = c; @@ -1537,12 +1531,12 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); for (i = 0; i < 3; i++) { - thissad = sad_array[i]; + unsigned int thissad = sad_array[i]; if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1557,8 +1551,8 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, } while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); + unsigned int thissad = fn_ptr->sdf(what, what_stride, + check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.col = c; @@ -1585,41 +1579,34 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], const MV *center_mv) { - const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; - int i, j; - - const int what_stride = x->plane[0].src.stride; - const uint8_t *const what = x->plane[0].src.buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, - in_what_stride, 0x7fffffff) + + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), + in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; i++) { int best_site = -1; for (j = 0; j < 4; j++) { - const MV this_mv = {ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col}; - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + - this_mv.col]; - unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + error_per_bit); + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1633,7 +1620,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, ref_mv->col += neighbors[best_site].col; } } - return bestsad; + return best_sad; } int vp9_refining_search_sadx4(const MACROBLOCK *x, @@ -1702,8 +1689,9 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { const uint8_t *check_here = neighbors[j].row * in_what_stride + neighbors[j].col + best_address; - unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); + unsigned int thissad = fn_ptr->sdf(what, what_stride, + check_here, in_what_stride, + bestsad); if (thissad < bestsad) { thissad += mvsad_err_cost(&this_mv, &fcenter_mv, @@ -1740,48 +1728,36 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, int *mvjcost, int *mvcost[2], const MV *center_mv, const uint8_t *second_pred, int w, int h) { - const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; - int i, j; - - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; - unsigned int thissad; - MV this_mv; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - /* Get compound pred by averaging two pred blocks. */ - unsigned int bestsad = fn_ptr->sdaf(what, what_stride, - best_address, in_what_stride, - second_pred, 0x7fffffff) + + unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, + second_pred, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; ++i) { int best_site = -1; - for (j = 0; j < 8; j++) { - this_mv.row = ref_mv->row + neighbors[j].row; - this_mv.col = ref_mv->col + neighbors[j].col; + for (j = 0; j < 8; ++j) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + - this_mv.col]; - - thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, - second_pred, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, + second_pred, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); - if (thissad < bestsad) { - bestsad = thissad; + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1795,5 +1771,5 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, ref_mv->col += neighbors[best_site].col; } } - return bestsad; + return best_sad; } diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index ca91a67d5..f112fa24f 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -27,8 +27,10 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_aq_complexity.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_bitstream.h" -#include "vp9/encoder/vp9_craq.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_firstpass.h" @@ -38,17 +40,11 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" +#include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_temporal_filter.h" -#include "vp9/encoder/vp9_vaq.h" #include "vp9/encoder/vp9_resize.h" #include "vp9/encoder/vp9_svc_layercontext.h" -#define ALL_INTRA_MODES 0x3FF -#define INTRA_DC_ONLY 0x01 -#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) -#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED)) -#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) - void vp9_coef_tree_initialize(); #define DEFAULT_INTERP_FILTER SWITCHABLE @@ -62,12 +58,6 @@ void vp9_coef_tree_initialize(); // now so that HIGH_PRECISION is always // chosen. -// Masks for partially or completely disabling split mode -#define DISABLE_ALL_SPLIT 0x3F -#define DISABLE_ALL_INTER_SPLIT 0x1F -#define DISABLE_COMPOUND_SPLIT 0x18 -#define LAST_AND_INTRA_SPLIT_ONLY 0x1E - // Max rate target for 1080P and below encodes under normal circumstances // (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB #define MAX_MB_RATE 250 @@ -103,9 +93,6 @@ FILE *keyfile; void vp9_init_quantizer(VP9_COMP *cpi); -static const double in_frame_q_adj_ratio[MAX_SEGMENTS] = - {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; - static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { switch (mode) { case NORMAL: @@ -144,17 +131,33 @@ static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { } } +static void setup_key_frame(VP9_COMP *cpi) { + vp9_setup_past_independence(&cpi->common); + + // All buffers are implicitly updated on key frames. + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; +} + +static void setup_inter_frame(VP9_COMMON *cm) { + if (cm->error_resilient_mode || cm->intra_only) + vp9_setup_past_independence(cm); + + assert(cm->frame_context_idx < FRAME_CONTEXTS); + cm->fc = cm->frame_contexts[cm->frame_context_idx]; +} + void vp9_initialize_enc() { static int init_done = 0; if (!init_done) { - vp9_initialize_common(); + vp9_init_neighbors(); + vp9_init_quant_tables(); + vp9_coef_tree_initialize(); vp9_tokenize_initialize(); - vp9_init_quant_tables(); vp9_init_me_luts(); vp9_rc_init_minq_luts(); - // init_base_skip_probs(); vp9_entropy_mv_init(); vp9_entropy_mode_init(); init_done = 1; @@ -163,6 +166,7 @@ void vp9_initialize_enc() { static void dealloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + int i; // Delete sementation map vpx_free(cpi->segmentation_map); @@ -173,11 +177,13 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { cpi->coding_context.last_frame_seg_map_copy = NULL; vpx_free(cpi->complexity_map); - cpi->complexity_map = 0; - vpx_free(cpi->cyclic_refresh.map); - cpi->cyclic_refresh.map = 0; + cpi->complexity_map = NULL; + + vp9_cyclic_refresh_free(cpi->cyclic_refresh); + cpi->cyclic_refresh = NULL; + vpx_free(cpi->active_map); - cpi->active_map = 0; + cpi->active_map = NULL; vp9_free_frame_buffers(cm); @@ -195,11 +201,12 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->mb_norm_activity_map); cpi->mb_norm_activity_map = 0; - vpx_free(cpi->above_context[0]); - cpi->above_context[0] = NULL; - - vpx_free(cpi->above_seg_context); - cpi->above_seg_context = NULL; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i]; + vpx_free(lc->rc_twopass_stats_in.buf); + lc->rc_twopass_stats_in.buf = NULL; + lc->rc_twopass_stats_in.sz = 0; + } } // Computes a q delta (in "q index" terms) to get from a starting q value @@ -252,51 +259,12 @@ int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, return target_index - base_q_index; } -// This function sets up a set of segments with delta Q values around -// the baseline frame quantizer. -static void setup_in_frame_q_adj(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - struct segmentation *const seg = &cm->seg; - - // Make SURE use of floating point in this function is safe. - vp9_clear_system_state(); - - if (cm->frame_type == KEY_FRAME || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - int segment; - - // Clear down the segment map - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - - // Clear down the complexity map used for rd - vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols); - - vp9_enable_segmentation(seg); - vp9_clearall_segfeatures(seg); - - // Select delta coding method - seg->abs_delta = SEGMENT_DELTADATA; - - // Segment 0 "Q" feature is disabled so it defaults to the baseline Q - vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); - - // Use some of the segments for in frame Q adjustment - for (segment = 1; segment < 2; segment++) { - const int qindex_delta = - vp9_compute_qdelta_by_rate(cpi, - cm->base_qindex, - in_frame_q_adj_ratio[segment]); - vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); - vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); - } - } -} static void configure_static_seg_features(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + const RATE_CONTROL *const rc = &cpi->rc; struct segmentation *const seg = &cm->seg; - int high_q = (int)(cpi->rc.avg_q > 48.0); + int high_q = (int)(rc->avg_q > 48.0); int qi_delta; // Disable and clear down for KF @@ -334,9 +302,8 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_map = 1; seg->update_data = 1; - qi_delta = vp9_compute_qdelta( - cpi, cpi->rc.avg_q, (cpi->rc.avg_q * 0.875)); - vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2)); + qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 0.875); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); @@ -349,16 +316,15 @@ static void configure_static_seg_features(VP9_COMP *cpi) { // All other frames if segmentation has been enabled // First normal frame in a valid gf or alt ref group - if (cpi->rc.frames_since_golden == 0) { + if (rc->frames_since_golden == 0) { // Set up segment features for normal frames in an arf group - if (cpi->rc.source_alt_ref_active) { + if (rc->source_alt_ref_active) { seg->update_map = 0; seg->update_data = 1; seg->abs_delta = SEGMENT_DELTADATA; - qi_delta = vp9_compute_qdelta(cpi, cpi->rc.avg_q, - (cpi->rc.avg_q * 1.125)); - vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2)); + qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 1.125); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); @@ -383,7 +349,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { vp9_clearall_segfeatures(seg); } - } else if (cpi->rc.is_src_frame_alt_ref) { + } else if (rc->is_src_frame_alt_ref) { // Special case where we are coding over the top of a previous // alt ref frame. // Segment coding disabled for compred testing @@ -456,554 +422,137 @@ static int is_slowest_mode(int mode) { } static void set_rd_speed_thresholds(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; int i; // Set baseline threshold values for (i = 0; i < MAX_MODES; ++i) - sf->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTG] = 0; - sf->thresh_mult[THR_NEARESTA] = 0; - - sf->thresh_mult[THR_DC] += 1000; - - sf->thresh_mult[THR_NEWMV] += 1000; - sf->thresh_mult[THR_NEWA] += 1000; - sf->thresh_mult[THR_NEWG] += 1000; - - sf->thresh_mult[THR_NEARMV] += 1000; - sf->thresh_mult[THR_NEARA] += 1000; - sf->thresh_mult[THR_COMP_NEARESTLA] += 1000; - sf->thresh_mult[THR_COMP_NEARESTGA] += 1000; - - sf->thresh_mult[THR_TM] += 1000; - - sf->thresh_mult[THR_COMP_NEARLA] += 1500; - sf->thresh_mult[THR_COMP_NEWLA] += 2000; - sf->thresh_mult[THR_NEARG] += 1000; - sf->thresh_mult[THR_COMP_NEARGA] += 1500; - sf->thresh_mult[THR_COMP_NEWGA] += 2000; - - sf->thresh_mult[THR_ZEROMV] += 2000; - sf->thresh_mult[THR_ZEROG] += 2000; - sf->thresh_mult[THR_ZEROA] += 2000; - sf->thresh_mult[THR_COMP_ZEROLA] += 2500; - sf->thresh_mult[THR_COMP_ZEROGA] += 2500; - - sf->thresh_mult[THR_H_PRED] += 2000; - sf->thresh_mult[THR_V_PRED] += 2000; - sf->thresh_mult[THR_D45_PRED ] += 2500; - sf->thresh_mult[THR_D135_PRED] += 2500; - sf->thresh_mult[THR_D117_PRED] += 2500; - sf->thresh_mult[THR_D153_PRED] += 2500; - sf->thresh_mult[THR_D207_PRED] += 2500; - sf->thresh_mult[THR_D63_PRED] += 2500; + cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + + cpi->rd_thresh_mult[THR_NEARESTMV] = 0; + cpi->rd_thresh_mult[THR_NEARESTG] = 0; + cpi->rd_thresh_mult[THR_NEARESTA] = 0; + + cpi->rd_thresh_mult[THR_DC] += 1000; + + cpi->rd_thresh_mult[THR_NEWMV] += 1000; + cpi->rd_thresh_mult[THR_NEWA] += 1000; + cpi->rd_thresh_mult[THR_NEWG] += 1000; + + cpi->rd_thresh_mult[THR_NEARMV] += 1000; + cpi->rd_thresh_mult[THR_NEARA] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000; + + cpi->rd_thresh_mult[THR_TM] += 1000; + + cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500; + cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000; + cpi->rd_thresh_mult[THR_NEARG] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500; + cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000; + + cpi->rd_thresh_mult[THR_ZEROMV] += 2000; + cpi->rd_thresh_mult[THR_ZEROG] += 2000; + cpi->rd_thresh_mult[THR_ZEROA] += 2000; + cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500; + cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500; + + cpi->rd_thresh_mult[THR_H_PRED] += 2000; + cpi->rd_thresh_mult[THR_V_PRED] += 2000; + cpi->rd_thresh_mult[THR_D45_PRED ] += 2500; + cpi->rd_thresh_mult[THR_D135_PRED] += 2500; + cpi->rd_thresh_mult[THR_D117_PRED] += 2500; + cpi->rd_thresh_mult[THR_D153_PRED] += 2500; + cpi->rd_thresh_mult[THR_D207_PRED] += 2500; + cpi->rd_thresh_mult[THR_D63_PRED] += 2500; /* disable frame modes if flags not set */ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { - sf->thresh_mult[THR_NEWMV ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV] = INT_MAX; - sf->thresh_mult[THR_ZEROMV ] = INT_MAX; - sf->thresh_mult[THR_NEARMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARMV ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { - sf->thresh_mult[THR_NEARESTG ] = INT_MAX; - sf->thresh_mult[THR_ZEROG ] = INT_MAX; - sf->thresh_mult[THR_NEARG ] = INT_MAX; - sf->thresh_mult[THR_NEWG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWG ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { - sf->thresh_mult[THR_NEARESTA ] = INT_MAX; - sf->thresh_mult[THR_ZEROA ] = INT_MAX; - sf->thresh_mult[THR_NEARA ] = INT_MAX; - sf->thresh_mult[THR_NEWA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEWLA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEWGA ] = INT_MAX; } } static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; + const SPEED_FEATURES *const sf = &cpi->sf; int i; for (i = 0; i < MAX_REFS; ++i) - sf->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - sf->thresh_mult_sub8x8[THR_LAST] += 2500; - sf->thresh_mult_sub8x8[THR_GOLD] += 2500; - sf->thresh_mult_sub8x8[THR_ALTR] += 2500; - sf->thresh_mult_sub8x8[THR_INTRA] += 2500; - sf->thresh_mult_sub8x8[THR_COMP_LA] += 4500; - sf->thresh_mult_sub8x8[THR_COMP_GA] += 4500; + cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500; + cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500; // Check for masked out split cases. - for (i = 0; i < MAX_REFS; i++) { + for (i = 0; i < MAX_REFS; i++) if (sf->disable_split_mask & (1 << i)) - sf->thresh_mult_sub8x8[i] = INT_MAX; - } + cpi->rd_thresh_mult_sub8x8[i] = INT_MAX; // disable mode test if frame flag is not set if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) - sf->thresh_mult_sub8x8[THR_LAST] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) - sf->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; -} - -static void set_good_speed_feature(VP9_COMMON *cm, - SPEED_FEATURES *sf, - int speed) { - int i; - sf->adaptive_rd_thresh = 1; - sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_split_var_thresh = 32; - sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->use_fast_coef_costing = 1; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed == 4) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 200; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->use_fast_coef_costing = 1; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed >= 5) { - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->partition_search_type = FIXED_PARTITION; - sf->tx_size_search_method = frame_is_intra_only(cm) ? - USE_FULL_RD : USE_LARGESTALL; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - sf->use_rd_breakout = 1; - sf->use_lp32x32fdct = 1; - sf->optimize_coefficients = 0; - sf->auto_mv_step_size = 1; - sf->reference_masking = 1; - - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->search_method = HEX; - sf->subpel_iters_per_step = 1; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 500; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->use_fast_coef_updates = 2; - sf->use_fast_coef_costing = 1; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } -} - -static void set_rt_speed_feature(VP9_COMMON *cm, - SPEED_FEATURES *sf, - int speed) { - sf->static_segmentation = 0; - sf->adaptive_rd_thresh = 1; - sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); - sf->encode_breakout_thresh = 1; - sf->use_fast_coef_costing = 1; - - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 8; - } - if (speed >= 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->auto_mv_step_size = 1; - sf->reference_masking = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 200; - } - if (speed >= 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; - - sf->disable_filter_search_var_thresh = 100; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - sf->encode_breakout_thresh = 400; - } - if (speed >= 4) { - sf->optimize_coefficients = 0; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->use_fast_lpf_pick = 2; - sf->encode_breakout_thresh = 700; - } - if (speed >= 5) { - int i; - sf->last_partitioning_redo_frequency = 4; - sf->adaptive_rd_thresh = 5; - sf->use_fast_coef_costing = 0; - sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; - sf->adjust_partitioning_from_last_frame = - cm->last_frame_type != cm->frame_type || (0 == - (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency); - sf->subpel_force_stop = 1; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; - sf->frame_parameter_update = 0; - sf->encode_breakout_thresh = 1000; - sf->search_method = FAST_HEX; - sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); - sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->max_intra_bsize = BLOCK_32X32; - } - if (speed >= 6) { - sf->partition_search_type = VAR_BASED_FIXED_PARTITION; - sf->search_method = HEX; - } - if (speed >= 7) { - sf->partition_search_type = VAR_BASED_FIXED_PARTITION; - sf->use_nonrd_pick_mode = 1; - sf->search_method = FAST_DIAMOND; - } - if (speed >= 8) { - int i; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 14; // only search NEARESTMV (0) - } + cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; } -void vp9_set_speed_features(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; - VP9_COMMON *cm = &cpi->common; - int speed = cpi->speed; - int i; - - // Convert negative speed to positive - if (speed < 0) - speed = -speed; - +static void set_speed_features(VP9_COMP *cpi) { #if CONFIG_INTERNAL_STATS + int i; for (i = 0; i < MAX_MODES; ++i) cpi->mode_chosen_counts[i] = 0; #endif - // best quality defaults - sf->frame_parameter_update = 1; - sf->search_method = NSTEP; - sf->recode_loop = ALLOW_RECODE; - sf->subpel_search_method = SUBPEL_TREE; - sf->subpel_iters_per_step = 2; - sf->subpel_force_stop = 0; - sf->optimize_coefficients = !cpi->oxcf.lossless; - sf->reduce_first_step_size = 0; - sf->auto_mv_step_size = 0; - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->comp_inter_joint_search_thresh = BLOCK_4X4; - sf->adaptive_rd_thresh = 0; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; - sf->tx_size_search_method = USE_FULL_RD; - sf->use_lp32x32fdct = 0; - sf->adaptive_motion_search = 0; - sf->adaptive_pred_interp_filter = 0; - sf->reference_masking = 0; - sf->partition_search_type = SEARCH_PARTITION; - sf->less_rectangular_check = 0; - sf->use_square_partition_only = 0; - sf->auto_min_max_partition_size = NOT_IN_USE; - sf->max_partition_size = BLOCK_64X64; - sf->min_partition_size = BLOCK_4X4; - sf->adjust_partitioning_from_last_frame = 0; - sf->last_partitioning_redo_frequency = 4; - sf->disable_split_mask = 0; - sf->mode_search_skip_flags = 0; - sf->disable_split_var_thresh = 0; - sf->disable_filter_search_var_thresh = 0; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; - sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; - } - sf->use_rd_breakout = 0; - sf->skip_encode_sb = 0; - sf->use_uv_intra_rd_estimate = 0; - sf->use_fast_lpf_pick = 0; - sf->use_fast_coef_updates = 0; - sf->use_fast_coef_costing = 0; - sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set - sf->use_nonrd_pick_mode = 0; - sf->encode_breakout_thresh = 0; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 0; - sf->max_intra_bsize = BLOCK_64X64; - // This setting only takes effect when partition_search_type is set - // to FIXED_PARTITION. - sf->always_this_block_size = BLOCK_16X16; - - switch (cpi->oxcf.mode) { - case MODE_BESTQUALITY: - case MODE_SECONDPASS_BEST: // This is the best quality mode. - cpi->diamond_search_sad = vp9_full_range_search; - break; - case MODE_FIRSTPASS: - case MODE_GOODQUALITY: - case MODE_SECONDPASS: - set_good_speed_feature(cm, sf, speed); - break; - case MODE_REALTIME: - set_rt_speed_feature(cm, sf, speed); - break; - }; /* switch */ + vp9_set_speed_features(cpi); // Set rd thresholds based on mode and speed setting set_rd_speed_thresholds(cpi); set_rd_speed_thresholds_sub8x8(cpi); - // Slow quant, dct and trellis not worthwhile for first pass - // so make sure they are always turned off. - if (cpi->pass == 1) { - sf->optimize_coefficients = 0; - } - - // No recode for 1 pass. - if (cpi->pass == 0) { - sf->recode_loop = DISALLOW_RECODE; - sf->optimize_coefficients = 0; - } - cpi->mb.fwd_txm4x4 = vp9_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { cpi->mb.fwd_txm4x4 = vp9_fwht4x4; } - - if (cpi->sf.subpel_search_method == SUBPEL_TREE) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; - cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; - } - - cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; - - if (cpi->encode_breakout && cpi->oxcf.mode == MODE_REALTIME && - sf->encode_breakout_thresh > cpi->encode_breakout) - cpi->encode_breakout = sf->encode_breakout_thresh; - - if (sf->disable_split_mask == DISABLE_ALL_SPLIT) - sf->adaptive_pred_interp_filter = 0; } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { @@ -1063,24 +612,12 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { CHECK_MEM_ERROR(cm, cpi->mb_norm_activity_map, vpx_calloc(sizeof(unsigned int), cm->mb_rows * cm->mb_cols)); - - // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm - // block where mi unit size is 8x8. - vpx_free(cpi->above_context[0]); - CHECK_MEM_ERROR(cm, cpi->above_context[0], - vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) * - MAX_MB_PLANE, - sizeof(*cpi->above_context[0]))); - - vpx_free(cpi->above_seg_context); - CHECK_MEM_ERROR(cm, cpi->above_seg_context, - vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols), - sizeof(*cpi->above_seg_context))); } static void update_frame_size(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; vp9_update_frame_size(cm); @@ -1111,12 +648,13 @@ static void update_frame_size(VP9_COMP *cpi) { { int i; - for (i = 1; i < MAX_MB_PLANE; ++i) { - cpi->above_context[i] = cpi->above_context[0] + - i * sizeof(*cpi->above_context[0]) * 2 * - mi_cols_aligned_to_sb(cm->mi_cols); - } + + for (i = 0; i < MAX_MB_PLANE; ++i) + xd->above_context[i] = cm->above_context + + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); } + + xd->above_seg_context = cpi->common.above_seg_context; } // Table that converts 0-63 Q range values passed in outside to the Qindex @@ -1209,6 +747,37 @@ static void set_tile_limits(VP9_COMP *cpi) { cm->log2_tile_rows = cpi->oxcf.tile_rows; } +static void init_rate_control(const VP9_CONFIG *oxcf, int pass, + RATE_CONTROL *rc) { + if (pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + rc->avg_frame_qindex[0] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[1] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[2] = oxcf->worst_allowed_q; + } else { + rc->avg_frame_qindex[0] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[1] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[2] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + } + + rc->last_q[0] = oxcf->best_allowed_q; + rc->last_q[1] = oxcf->best_allowed_q; + rc->last_q[2] = oxcf->best_allowed_q; + + rc->buffer_level = oxcf->starting_buffer_level; + rc->bits_off_target = oxcf->starting_buffer_level; + + rc->rolling_target_bits = rc->av_per_frame_bandwidth; + rc->rolling_actual_bits = rc->av_per_frame_bandwidth; + rc->long_rolling_target_bits = rc->av_per_frame_bandwidth; + rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth; + + rc->total_actual_bits = 0; + rc->total_target_vs_actual = 0; +} + static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { VP9_COMMON *const cm = &cpi->common; int i; @@ -1228,43 +797,16 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { // Temporal scalability. cpi->svc.number_temporal_layers = oxcf->ts_number_layers; - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && + cpi->oxcf.mode == MODE_SECONDPASS_BEST)) { vp9_init_layer_context(cpi); } // change includes all joint functionality vp9_change_config(cpi, oxcf); - // Initialize active best and worst q and average q values. - if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - cpi->rc.avg_frame_qindex[0] = cpi->oxcf.worst_allowed_q; - cpi->rc.avg_frame_qindex[1] = cpi->oxcf.worst_allowed_q; - cpi->rc.avg_frame_qindex[2] = cpi->oxcf.worst_allowed_q; - } else { - cpi->rc.avg_frame_qindex[0] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - cpi->rc.avg_frame_qindex[1] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - cpi->rc.avg_frame_qindex[2] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - } - cpi->rc.last_q[0] = cpi->oxcf.best_allowed_q; - cpi->rc.last_q[1] = cpi->oxcf.best_allowed_q; - cpi->rc.last_q[2] = cpi->oxcf.best_allowed_q; - - // Initialise the starting buffer levels - cpi->rc.buffer_level = cpi->oxcf.starting_buffer_level; - cpi->rc.bits_off_target = cpi->oxcf.starting_buffer_level; - - cpi->rc.rolling_target_bits = cpi->rc.av_per_frame_bandwidth; - cpi->rc.rolling_actual_bits = cpi->rc.av_per_frame_bandwidth; - cpi->rc.long_rolling_target_bits = cpi->rc.av_per_frame_bandwidth; - cpi->rc.long_rolling_actual_bits = cpi->rc.av_per_frame_bandwidth; - - cpi->rc.total_actual_bits = 0; - cpi->rc.total_target_vs_actual = 0; - cpi->static_mb_pct = 0; cpi->lst_fb_idx = 0; @@ -1278,15 +820,11 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { cpi->fixed_divide[i] = 0x80000 / i; } -void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { +void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { VP9_COMMON *const cm = &cpi->common; - if (!cpi || !oxcf) - return; - - if (cm->version != oxcf->version) { + if (cm->version != oxcf->version) cm->version = oxcf->version; - } cpi->oxcf = *oxcf; @@ -1327,10 +865,16 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; cpi->oxcf.lossless = oxcf->lossless; - cpi->mb.e_mbd.itxm_add = cpi->oxcf.lossless ? vp9_iwht4x4_add - : vp9_idct4x4_add; + if (cpi->oxcf.lossless) { + // In lossless mode, make sure right quantizer range and correct transform + // is set. + cpi->oxcf.worst_allowed_q = 0; + cpi->oxcf.best_allowed_q = 0; + cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; + } else { + cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; + } cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; - cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; cpi->refresh_golden_frame = 0; @@ -1414,8 +958,9 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { } update_frame_size(cpi); - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { vp9_update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth); } @@ -1606,8 +1151,9 @@ static void free_pick_mode_context(MACROBLOCK *x) { VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { int i, j; - VP9_COMP *cpi = vpx_memalign(32, sizeof(VP9_COMP)); - VP9_COMMON *cm = cpi != NULL ? &cpi->common : NULL; + VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP)); + VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL; + RATE_CONTROL *const rc = cpi != NULL ? &cpi->rc : NULL; if (!cm) return NULL; @@ -1630,6 +1176,7 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->use_svc = 0; init_config(cpi, oxcf); + init_rate_control(&cpi->oxcf, cpi->pass, &cpi->rc); init_pick_mode_context(cpi); cm->current_video_frame = 0; @@ -1637,7 +1184,7 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { // Set reference frame sign bias for ALTREF frame to 1 (for now) cm->ref_frame_sign_bias[ALTREF_FRAME] = 1; - cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; + rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; cpi->gold_is_last = 0; cpi->alt_is_last = 0; @@ -1652,8 +1199,8 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); // Create a map used for cyclic background refresh. - CHECK_MEM_ERROR(cm, cpi->cyclic_refresh.map, - vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); + CHECK_MEM_ERROR(cm, cpi->cyclic_refresh, + vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols)); // And a place holder structure is the coding context // for use if we want to save and restore it @@ -1675,12 +1222,12 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->activity_avg = 90 << 12; cpi->key_frame_frequency = cpi->oxcf.key_freq; - cpi->rc.frames_since_key = 8; // Sensible default for first frame. - cpi->rc.this_key_frame_forced = 0; - cpi->rc.next_key_frame_forced = 0; + rc->frames_since_key = 8; // Sensible default for first frame. + rc->this_key_frame_forced = 0; + rc->next_key_frame_forced = 0; - cpi->rc.source_alt_ref_pending = 0; - cpi->rc.source_alt_ref_active = 0; + rc->source_alt_ref_pending = 0; + rc->source_alt_ref_active = 0; cpi->refresh_alt_ref_frame = 0; #if CONFIG_MULTIPLE_ARF @@ -1736,17 +1283,17 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->first_time_stamp_ever = INT64_MAX; - cpi->rc.frames_till_gf_update_due = 0; + rc->frames_till_gf_update_due = 0; - cpi->rc.ni_av_qi = cpi->oxcf.worst_allowed_q; - cpi->rc.ni_tot_qi = 0; - cpi->rc.ni_frames = 0; - cpi->rc.tot_q = 0.0; - cpi->rc.avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q); + rc->ni_av_qi = cpi->oxcf.worst_allowed_q; + rc->ni_tot_qi = 0; + rc->ni_frames = 0; + rc->tot_q = 0.0; + rc->avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q); - cpi->rc.rate_correction_factor = 1.0; - cpi->rc.key_frame_rate_correction_factor = 1.0; - cpi->rc.gf_rate_correction_factor = 1.0; + rc->rate_correction_factor = 1.0; + rc->key_frame_rate_correction_factor = 1.0; + rc->gf_rate_correction_factor = 1.0; cal_nmvjointsadcost(cpi->mb.nmvjointsadcost); cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX]; @@ -1783,13 +1330,53 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { const size_t packet_sz = sizeof(FIRSTPASS_STATS); const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); - cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; - cpi->twopass.stats_in = cpi->twopass.stats_in_start; - cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; - vp9_init_second_pass(cpi); + if (cpi->svc.number_spatial_layers > 1 + && cpi->svc.number_temporal_layers == 1) { + FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf; + FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0}; + int i; + + for (i = 0; i < oxcf->ss_number_layers; ++i) { + FIRSTPASS_STATS *const last_packet_for_layer = + &stats[packets - oxcf->ss_number_layers + i]; + const int layer_id = (int)last_packet_for_layer->spatial_layer_id; + const int packets_in_layer = (int)last_packet_for_layer->count + 1; + if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id]; + + vpx_free(lc->rc_twopass_stats_in.buf); + + lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz; + CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf, + vpx_malloc(lc->rc_twopass_stats_in.sz)); + lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf; + lc->twopass.stats_in = lc->twopass.stats_in_start; + lc->twopass.stats_in_end = lc->twopass.stats_in_start + + packets_in_layer - 1; + stats_copy[layer_id] = lc->rc_twopass_stats_in.buf; + } + } + + for (i = 0; i < packets; ++i) { + const int layer_id = (int)stats[i].spatial_layer_id; + if (layer_id >= 0 && layer_id < oxcf->ss_number_layers + && stats_copy[layer_id] != NULL) { + *stats_copy[layer_id] = stats[i]; + ++stats_copy[layer_id]; + } + } + + vp9_init_second_pass_spatial_svc(cpi); + } else { + cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; + cpi->twopass.stats_in = cpi->twopass.stats_in_start; + cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; + + vp9_init_second_pass(cpi); + } } - vp9_set_speed_features(cpi); + set_speed_features(cpi); // Default rd threshold factors for mode selection for (i = 0; i < BLOCK_SIZES; ++i) { @@ -2155,25 +1742,11 @@ int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) { return 0; } -int vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) { - if (ref_frame_flags > 7) - return -1; - - cpi->ext_refresh_golden_frame = 0; - cpi->ext_refresh_alt_ref_frame = 0; - cpi->ext_refresh_last_frame = 0; - - if (ref_frame_flags & VP9_LAST_FLAG) - cpi->ext_refresh_last_frame = 1; - - if (ref_frame_flags & VP9_GOLD_FLAG) - cpi->ext_refresh_golden_frame = 1; - - if (ref_frame_flags & VP9_ALT_FLAG) - cpi->ext_refresh_alt_ref_frame = 1; - +void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) { + cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0; + cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0; + cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0; cpi->ext_refresh_frame_flags_pending = 1; - return 0; } static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(VP9_COMP *cpi, @@ -2460,7 +2033,7 @@ static int recode_loop_test(const VP9_COMP *cpi, return force_recode; } -static void update_reference_frames(VP9_COMP * const cpi) { +void vp9_update_reference_frames(VP9_COMP *cpi) { VP9_COMMON * const cm = &cpi->common; // At this point the new frame has been encoded. @@ -2531,7 +2104,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vpx_usec_timer_start(&timer); - vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick); + vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick); vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); @@ -2544,7 +2117,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vp9_extend_frame_inner_borders(cm->frame_to_show); } -static void scale_references(VP9_COMP *cpi) { +void vp9_scale_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MV_REFERENCE_FRAME ref_frame; @@ -2670,21 +2243,21 @@ static void encode_without_recode_loop(VP9_COMP *cpi, // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. if (cm->frame_type == KEY_FRAME) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) { + if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; - } - vp9_setup_inter_frame(cpi); + + setup_inter_frame(cm); } // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - setup_in_frame_q_adj(cpi); + vp9_setup_in_frame_q_adj(cpi); } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - vp9_setup_cyclic_refresh_aq(cpi); + vp9_cyclic_refresh_setup(cpi); } // transform / motion compensation build reconstruction frame vp9_encode_frame(cpi); @@ -2728,12 +2301,12 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. if (cm->frame_type == KEY_FRAME) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) { + if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; - } - vp9_setup_inter_frame(cpi); + + setup_inter_frame(cm); } } @@ -2742,7 +2315,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - setup_in_frame_q_adj(cpi); + vp9_setup_in_frame_q_adj(cpi); } // transform / motion compensation build reconstruction frame @@ -2981,7 +2554,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } else { cpi->Source = cpi->un_scaled_source; } - scale_references(cpi); + vp9_scale_references(cpi); vp9_clear_system_state(); @@ -3018,7 +2591,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set various flags etc to special state if it is a key frame. if (frame_is_intra_only(cm)) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); // Reset the loop filter deltas and segmentation map. vp9_reset_segment_features(&cm->seg); @@ -3100,6 +2673,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_write_yuv_frame(cpi->Source); #endif + set_speed_features(cpi); + // Decide q and q bounds. q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index); @@ -3109,8 +2684,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH); } - vp9_set_speed_features(cpi); - if (cpi->sf.recode_loop == DISALLOW_RECODE) { encode_without_recode_loop(cpi, size, dest, q); } else { @@ -3159,7 +2732,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, update_reference_segmentation_map(cpi); release_scaled_references(cpi); - update_reference_frames(cpi); + vp9_update_reference_frames(cpi); for (t = TX_4X4; t <= TX_32X32; t++) full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]); @@ -3241,6 +2814,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Don't increment frame counters if this was an altref buffer // update not a real frame ++cm->current_video_frame; + if (cpi->use_svc) { + LAYER_CONTEXT *lc; + if (cpi->svc.number_temporal_layers > 1) { + lc = &cpi->svc.layer_context[cpi->svc.temporal_layer_id]; + } else { + lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + } + ++lc->current_video_frame_in_layer; + } } // restore prev_mi @@ -3396,6 +2978,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (!cpi) return -1; + if (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2) { + vp9_restore_layer_context(cpi); + } + vpx_usec_timer_start(&cmptimer); cpi->source = NULL; @@ -3528,7 +3114,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - vp9_update_layer_framerate(cpi); + vp9_update_temporal_layer_framerate(cpi); vp9_restore_layer_context(cpi); } @@ -3585,12 +3171,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, vp9_vaq_init(); } - if (cpi->use_svc) { - SvcEncode(cpi, size, dest, frame_flags); - } else if (cpi->pass == 1) { + if (cpi->pass == 1 && + (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass1Encode(cpi, size, dest, frame_flags); - } else if (cpi->pass == 2) { + } else if (cpi->pass == 2 && + (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass2Encode(cpi, size, dest, frame_flags); + } else if (cpi->use_svc) { + SvcEncode(cpi, size, dest, frame_flags); } else { // One pass encode Pass0Encode(cpi, size, dest, frame_flags); @@ -3609,8 +3197,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Save layer specific state. - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { vp9_save_layer_context(cpi); } diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 9925e5f19..6dbe4d474 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -23,6 +23,7 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_lookahead.h" @@ -30,6 +31,7 @@ #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_tokenize.h" #include "vp9/encoder/vp9_variance.h" @@ -114,75 +116,6 @@ typedef enum { } THR_MODES_SUB8X8; typedef enum { - DIAMOND = 0, - NSTEP = 1, - HEX = 2, - BIGDIA = 3, - SQUARE = 4, - FAST_HEX = 5, - FAST_DIAMOND = 6 -} SEARCH_METHODS; - -typedef enum { - USE_FULL_RD = 0, - USE_LARGESTINTRA, - USE_LARGESTINTRA_MODELINTER, - USE_LARGESTALL -} TX_SIZE_SEARCH_METHOD; - -typedef enum { - NOT_IN_USE = 0, - RELAXED_NEIGHBORING_MIN_MAX = 1, - STRICT_NEIGHBORING_MIN_MAX = 2 -} AUTO_MIN_MAX_MODE; - -typedef enum { - // Terminate search early based on distortion so far compared to - // qp step, distortion in the neighborhood of the frame, etc. - FLAG_EARLY_TERMINATE = 1 << 0, - - // Skips comp inter modes if the best so far is an intra mode. - FLAG_SKIP_COMP_BESTINTRA = 1 << 1, - - // Skips comp inter modes if the best single intermode so far does - // not have the same reference as one of the two references being - // tested. - FLAG_SKIP_COMP_REFMISMATCH = 1 << 2, - - // Skips oblique intra modes if the best so far is an inter mode. - FLAG_SKIP_INTRA_BESTINTER = 1 << 3, - - // Skips oblique intra modes at angles 27, 63, 117, 153 if the best - // intra so far is not one of the neighboring directions. - FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, - - // Skips intra modes other than DC_PRED if the source variance is small - FLAG_SKIP_INTRA_LOWVAR = 1 << 5, -} MODE_SEARCH_SKIP_LOGIC; - -typedef enum { - SUBPEL_TREE = 0, - // Other methods to come -} SUBPEL_SEARCH_METHODS; - -typedef enum { - LAST_FRAME_PARTITION_OFF = 0, - LAST_FRAME_PARTITION_LOW_MOTION = 1, - LAST_FRAME_PARTITION_ALL = 2 -} LAST_FRAME_PARTITION_METHOD; - -typedef enum { - // No recode. - DISALLOW_RECODE = 0, - // Allow recode for KF and exceeding maximum frame bandwidth. - ALLOW_RECODE_KFMAXBW = 1, - // Allow recode only for KF/ARF/GF frames. - ALLOW_RECODE_KFARFGF = 2, - // Allow recode for all frames based on bitrate constraints. - ALLOW_RECODE = 3, -} RECODE_LOOP_TYPE; - -typedef enum { // encode_breakout is disabled. ENCODE_BREAKOUT_DISABLED = 0, // encode_breakout is enabled. @@ -192,219 +125,6 @@ typedef enum { } ENCODE_BREAKOUT_TYPE; typedef enum { - // Search partitions using RD/NONRD criterion - SEARCH_PARTITION = 0, - - // Always use a fixed size partition - FIXED_PARTITION = 1, - - // Use a fixed size partition in every 64X64 SB, where the size is - // determined based on source variance - VAR_BASED_FIXED_PARTITION = 2, - - // Use an arbitrary partitioning scheme based on source variance within - // a 64X64 SB - VAR_BASED_PARTITION -} PARTITION_SEARCH_TYPE; - -typedef struct { - // Frame level coding parameter update - int frame_parameter_update; - - // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). - SEARCH_METHODS search_method; - - RECODE_LOOP_TYPE recode_loop; - - // Subpel_search_method can only be subpel_tree which does a subpixel - // logarithmic search that keeps stepping at 1/2 pixel units until - // you stop getting a gain, and then goes on to 1/4 and repeats - // the same process. Along the way it skips many diagonals. - SUBPEL_SEARCH_METHODS subpel_search_method; - - // Maximum number of steps in logarithmic subpel search before giving up. - int subpel_iters_per_step; - - // Control when to stop subpel search - int subpel_force_stop; - - // Thresh_mult is used to set a threshold for the rd score. A higher value - // means that we will accept the best mode so far more often. This number - // is used in combination with the current block size, and thresh_freq_fact - // to pick a threshold. - int thresh_mult[MAX_MODES]; - int thresh_mult_sub8x8[MAX_REFS]; - - // This parameter controls the number of steps we'll do in a diamond - // search. - int max_step_search_steps; - - // This parameter controls which step in the n-step process we start at. - // It's changed adaptively based on circumstances. - int reduce_first_step_size; - - // If this is set to 1, we limit the motion search range to 2 times the - // largest motion vector found in the last frame. - int auto_mv_step_size; - - // Trellis (dynamic programming) optimization of quantized values (+1, 0). - int optimize_coefficients; - - // Always set to 0. If on it enables 0 cost background transmission - // (except for the initial transmission of the segmentation). The feature is - // disabled because the addition of very large block sizes make the - // backgrounds very to cheap to encode, and the segmentation we have - // adds overhead. - int static_segmentation; - - // If 1 we iterate finding a best reference for 2 ref frames together - via - // a log search that iterates 4 times (check around mv for last for best - // error of combined predictor then check around mv for alt). If 0 we - // we just use the best motion vector found for each frame by itself. - int comp_inter_joint_search_thresh; - - // This variable is used to cap the maximum number of times we skip testing a - // mode to be evaluated. A high value means we will be faster. - int adaptive_rd_thresh; - - // Enables skipping the reconstruction step (idct, recon) in the - // intermediate steps assuming the last frame didn't have too many intra - // blocks and the q is less than a threshold. - int skip_encode_sb; - int skip_encode_frame; - - // This variable allows us to reuse the last frames partition choices - // (64x64 v 32x32 etc) for this frame. It can be set to only use the last - // frame as a starting point in low motion scenes or always use it. If set - // we use last partitioning_redo frequency to determine how often to redo - // the partitioning from scratch. Adjust_partitioning_from_last_frame - // enables us to adjust up or down one partitioning from the last frames - // partitioning. - LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; - - // Determine which method we use to determine transform size. We can choose - // between options like full rd, largest for prediction size, largest - // for intra and model coefs for the rest. - TX_SIZE_SEARCH_METHOD tx_size_search_method; - - // Low precision 32x32 fdct keeps everything in 16 bits and thus is less - // precise but significantly faster than the non lp version. - int use_lp32x32fdct; - - // TODO(JBB): remove this as its no longer used. - - // After looking at the first set of modes (set by index here), skip - // checking modes for reference frames that don't match the reference frame - // of the best so far. - int mode_skip_start; - - // TODO(JBB): Remove this. - int reference_masking; - - PARTITION_SEARCH_TYPE partition_search_type; - - // Used if partition_search_type = FIXED_SIZE_PARTITION - BLOCK_SIZE always_this_block_size; - - // Skip rectangular partition test when partition type none gives better - // rd than partition type split. - int less_rectangular_check; - - // Disable testing non square partitions. (eg 16x32) - int use_square_partition_only; - - // Sets min and max partition sizes for this 64x64 region based on the - // same 64x64 in last encoded frame, and the left and above neighbor. - AUTO_MIN_MAX_MODE auto_min_max_partition_size; - - // Min and max partition size we enable (block_size) as per auto - // min max, but also used by adjust partitioning, and pick_partitioning. - BLOCK_SIZE min_partition_size; - BLOCK_SIZE max_partition_size; - - // Whether or not we allow partitions one smaller or one greater than the last - // frame's partitioning. Only used if use_lastframe_partitioning is set. - int adjust_partitioning_from_last_frame; - - // How frequently we re do the partitioning from scratch. Only used if - // use_lastframe_partitioning is set. - int last_partitioning_redo_frequency; - - // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable - // it always, to allow it for only Last frame and Intra, disable it for all - // inter modes or to enable it always. - int disable_split_mask; - - // TODO(jingning): combine the related motion search speed features - // This allows us to use motion search at other sizes as a starting - // point for this motion search and limits the search range around it. - int adaptive_motion_search; - - // Allows sub 8x8 modes to use the prediction filter that was determined - // best for 8x8 mode. If set to 0 we always re check all the filters for - // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter - // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. - int adaptive_pred_interp_filter; - - // Implements various heuristics to skip searching modes - // The heuristics selected are based on flags - // defined in the MODE_SEARCH_SKIP_HEURISTICS enum - unsigned int mode_search_skip_flags; - - // A source variance threshold below which the split mode is disabled - unsigned int disable_split_var_thresh; - - // A source variance threshold below which filter search is disabled - // Choose a very large value (UINT_MAX) to use 8-tap always - unsigned int disable_filter_search_var_thresh; - - // These bit masks allow you to enable or disable intra modes for each - // transform size separately. - int intra_y_mode_mask[TX_SIZES]; - int intra_uv_mode_mask[TX_SIZES]; - - // This variable enables an early break out of mode testing if the model for - // rd built from the prediction signal indicates a value that's much - // higher than the best rd we've seen so far. - int use_rd_breakout; - - // This enables us to use an estimate for intra rd based on dc mode rather - // than choosing an actual uv mode in the stage of encoding before the actual - // final encode. - int use_uv_intra_rd_estimate; - - // This feature controls how the loop filter level is determined: - // 0: Try the full image with different values. - // 1: Try a small portion of the image with different values. - // 2: Estimate the level based on quantizer and frame type - int use_fast_lpf_pick; - - // This feature limits the number of coefficients updates we actually do - // by only looking at counts from 1/2 the bands. - int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced - - // This flag controls the use of non-RD mode decision. - int use_nonrd_pick_mode; - - // This variable sets the encode_breakout threshold. Currently, it is only - // enabled in real time mode. - int encode_breakout_thresh; - - // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV - // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. - int disable_inter_mode_mask[BLOCK_SIZES]; - - // This feature controls whether we do the expensive context update and - // calculation in the rd coefficient costing loop. - int use_fast_coef_costing; - - // This variable controls the maximum block size where intra blocks can be - // used in inter frames. - // TODO(aconverse): Fold this into one of the other many mode skips - BLOCK_SIZE max_intra_bsize; -} SPEED_FEATURES; - -typedef enum { NORMAL = 0, FOURFIVE = 1, THREEFIVE = 2, @@ -412,44 +132,12 @@ typedef enum { } VPX_SCALING; typedef enum { - VP9_LAST_FLAG = 1 << 0, - VP9_GOLD_FLAG = 1 << 1, - VP9_ALT_FLAG = 1 << 2, -} VP9_REFFRAME; - -typedef enum { USAGE_LOCAL_FILE_PLAYBACK = 0, USAGE_STREAM_FROM_SERVER = 1, USAGE_CONSTRAINED_QUALITY = 2, USAGE_CONSTANT_QUALITY = 3, } END_USAGE; -typedef struct { - // Target percentage of blocks per frame that are cyclicly refreshed. - int max_mbs_perframe; - // Maximum q-delta as percentage of base q. - int max_qdelta_perc; - // Block size below which we don't apply cyclic refresh. - BLOCK_SIZE min_block_size; - // Macroblock starting index (unit of 8x8) for cycling through the frame. - int mb_index; - // Controls how long a block will need to wait to be refreshed again. - int time_for_refresh; - // Actual number of blocks that were applied delta-q (segment 1). - int num_seg_blocks; - // Actual encoding bits for segment 1. - int actual_seg_bits; - // RD mult. parameters for segment 1. - int rdmult; - // Cyclic refresh map. - signed char *map; - // Projected rate and distortion for the current superblock. - int64_t projected_rate_sb; - int64_t projected_dist_sb; - // Thresholds applied to projected rate/distortion of the superblock. - int64_t thresh_rate_sb; - int64_t thresh_dist_sb; -} CYCLIC_REFRESH; typedef enum { // Good Quality Fast Encoding. The encoder balances quality with the // amount of time it takes to encode the output. (speed setting @@ -665,6 +353,13 @@ typedef struct VP9_COMP { // Ambient reconstruction err target for force key frames int ambient_err; + // Thresh_mult is used to set a threshold for the rd score. A higher value + // means that we will accept the best mode so far more often. This number + // is used in combination with the current block size, and thresh_freq_fact + // to pick a threshold. + int rd_thresh_mult[MAX_MODES]; + int rd_thresh_mult_sub8x8[MAX_REFS]; + int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS]; @@ -741,7 +436,7 @@ typedef struct VP9_COMP { unsigned char *active_map; unsigned int active_map_enabled; - CYCLIC_REFRESH cyclic_refresh; + CYCLIC_REFRESH *cyclic_refresh; fractional_mv_step_fp *find_fractional_mv_step; fractional_mv_step_comp_fp *find_fractional_mv_step_comp; @@ -799,10 +494,6 @@ typedef struct VP9_COMP { unsigned int activity_avg; unsigned int *mb_activity_map; int *mb_norm_activity_map; - int output_partition; - - // Force next frame to intra when kf_auto says so. - int force_next_frame_intra; int droppable; @@ -834,13 +525,6 @@ typedef struct VP9_COMP { // Debug / test stats int64_t mode_test_hits[BLOCK_SIZES]; #endif - - // Y,U,V,(A) - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; - - PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[8]; } VP9_COMP; void vp9_initialize_enc(); @@ -848,7 +532,7 @@ void vp9_initialize_enc(); struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf); void vp9_remove_compressor(VP9_COMP *cpi); -void vp9_change_config(VP9_COMP *cpi, VP9_CONFIG *oxcf); +void vp9_change_config(VP9_COMP *cpi, const VP9_CONFIG *oxcf); // receive a frames worth of data. caller can assume that a copy of this // frame is made and not just a copy of the pointer.. @@ -865,7 +549,7 @@ int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags); -int vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags); +void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags); int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); @@ -897,8 +581,8 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc); int vp9_get_quantizer(struct VP9_COMP *cpi); -static int get_ref_frame_idx(const VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { +static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, + MV_REFERENCE_FRAME ref_frame) { if (ref_frame == LAST_FRAME) { return cpi->lst_fb_idx; } else if (ref_frame == GOLDEN_FRAME) { @@ -908,14 +592,22 @@ static int get_ref_frame_idx(const VP9_COMP *cpi, } } -static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { - VP9_COMMON *const cm = &cpi->common; - return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, - ref_frame)]].buf; +static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( + VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { + VP9_COMMON * const cm = &cpi->common; + return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] + .buf; } -void vp9_set_speed_features(VP9_COMP *cpi); +static INLINE int get_token_alloc(int mb_rows, int mb_cols) { + // TODO(JBB): make this work for alpha channel and double check we can't + // exceed this token count if we have a 32x32 transform crossing a boundary + // at a multiple of 16. + // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full + // resolution. We assume up to 1 token per pixel, and then allow + // a head room of 4. + return mb_rows * mb_cols * (16 * 16 * 3 + 4); +} int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *reference); @@ -927,16 +619,17 @@ int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget); int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, double rate_target_ratio); -static int get_token_alloc(int mb_rows, int mb_cols) { - return mb_rows * mb_cols * (48 * 16 + 4); -} +void vp9_scale_references(VP9_COMP *cpi); + +void vp9_update_reference_frames(VP9_COMP *cpi); extern const int q_trans[]; int64_t vp9_rescale(int64_t val, int64_t num, int denom); -static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, - MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { +static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, + MV_REFERENCE_FRAME ref0, + MV_REFERENCE_FRAME ref1) { xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0]; xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index b5f490199..92ad1e745 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -10,16 +10,18 @@ #include <assert.h> #include <limits.h> + +#include "./vpx_scale_rtcd.h" + +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_quant_common.h" + #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" -#include "vp9/common/vp9_quant_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpx_scale.h" -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" -#include "./vpx_scale_rtcd.h" static int get_max_filter_level(VP9_COMP *cpi) { return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 @@ -28,11 +30,11 @@ static int get_max_filter_level(VP9_COMP *cpi) { static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, - MACROBLOCKD *const xd, VP9_COMMON *const cm, int filt_level, int partial_frame) { + VP9_COMMON *const cm = &cpi->common; int filt_err; - vp9_loop_filter_frame(cm, xd, filt_level, 1, partial_frame); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame); filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame @@ -43,7 +45,6 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial_frame) { - MACROBLOCKD *const xd = &cpi->mb.e_mbd; VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; const int min_filter_level = 0; @@ -64,7 +65,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Make a copy of the unfiltered / processed recon buffer vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); - best_err = try_filter_frame(sd, cpi, xd, cm, filt_mid, partial_frame); + best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame); filt_best = filt_mid; ss_err[filt_mid] = best_err; @@ -86,7 +87,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, if (filt_direction <= 0 && filt_low != filt_mid) { // Get Low filter error score if (ss_err[filt_low] < 0) { - filt_err = try_filter_frame(sd, cpi, xd, cm, filt_low, partial_frame); + filt_err = try_filter_frame(sd, cpi, filt_low, partial_frame); ss_err[filt_low] = filt_err; } else { filt_err = ss_err[filt_low]; @@ -105,7 +106,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Now look at filt_high if (filt_direction >= 0 && filt_high != filt_mid) { if (ss_err[filt_high] < 0) { - filt_err = try_filter_frame(sd, cpi, xd, cm, filt_high, partial_frame); + filt_err = try_filter_frame(sd, cpi, filt_high, partial_frame); ss_err[filt_high] = filt_err; } else { filt_err = ss_err[filt_high]; @@ -119,7 +120,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Half the step distance if the best filter value was the same as last time if (filt_best == filt_mid) { - filter_step = filter_step / 2; + filter_step /= 2; filt_direction = 0; } else { filt_direction = (filt_best < filt_mid) ? -1 : 1; @@ -131,25 +132,24 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, } void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, - int method) { + LPF_PICK_METHOD method) { VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness; - if (method == 2) { + if (method == LPF_PICK_FROM_Q) { const int min_filter_level = 0; const int max_filter_level = get_max_filter_level(cpi); const int q = vp9_ac_quant(cm->base_qindex, 0); // These values were determined by linear fitting the result of the - // searched level - // filt_guess = q * 0.316206 + 3.87252 - int filt_guess = (q * 20723 + 1015158 + (1 << 17)) >> 18; + // searched level, filt_guess = q * 0.316206 + 3.87252 + int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); if (cm->frame_type == KEY_FRAME) filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); } else { - search_filter_level(sd, cpi, method == 1); + search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); } } diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h index 203ef871a..7d08ddb5f 100644 --- a/vp9/encoder/vp9_picklpf.h +++ b/vp9/encoder/vp9_picklpf.h @@ -16,11 +16,13 @@ extern "C" { #endif +#include "vp9/encoder/vp9_onyx_int.h" + struct yv12_buffer_config; struct VP9_COMP; void vp9_pick_filter_level(const struct yv12_buffer_config *sd, - struct VP9_COMP *cpi, int method); + struct VP9_COMP *cpi, LPF_PICK_METHOD method); #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 0a396fff0..c500a0163 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -26,14 +26,13 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" -static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, +static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; - int bestsme = INT_MAX; int step_param; int sadpb = x->sadperbit16; MV mvp_full; @@ -46,9 +45,6 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; - int buf_offset; - int stride = xd->plane[0].pre[0].stride; - const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); if (scaled_ref_frame) { @@ -77,11 +73,14 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } - return INT_MAX; + return; } } - - mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; + assert(x->mv_best_ref_index[ref] <= 2); + if (x->mv_best_ref_index[ref] < 2) + mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; + else + mvp_full = x->pred_mv[ref].as_mv; mvp_full.col >>= 3; mvp_full.row >>= 3; @@ -129,17 +128,6 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } - - // TODO(jingning) This step can be merged into full pixel search step in the - // re-designed log-diamond search - buf_offset = tmp_mv->as_mv.row * stride + tmp_mv->as_mv.col; - - // Find sad for current vector. - bestsme = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf + buf_offset, - stride, 0x7fffffff); - - return bestsme; } static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, @@ -184,6 +172,8 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } + + x->pred_mv[ref].as_mv = *tmp_mv; } static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, @@ -196,7 +186,6 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, int rate; int64_t dist; - struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); @@ -233,12 +222,22 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_rd = INT64_MAX; int64_t this_rd = INT64_MAX; - const int64_t inter_mode_thresh = 300; - const int64_t intra_mode_cost = 50; - int rate = INT_MAX; int64_t dist = INT64_MAX; + VP9_COMMON *cm = &cpi->common; + int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); + + const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, + intra_cost_penalty, 0); + const int64_t intra_mode_cost = 50; + + unsigned char segment_id = mbmi->segment_id; + const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; + const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; + // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame. + int mode_idx[MB_MODE_COUNT] = {0}; + x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; @@ -256,7 +255,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ? EIGHTTAP : cpi->common.interp_filter; mbmi->skip = 0; - mbmi->segment_id = 0; + mbmi->segment_id = segment_id; xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { @@ -282,6 +281,14 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame[0] = ref_frame; + // Set conversion index for LAST_FRAME. + if (ref_frame == LAST_FRAME) { + mode_idx[NEARESTMV] = THR_NEARESTMV; // LAST_FRAME, NEARESTMV + mode_idx[NEARMV] = THR_NEARMV; // LAST_FRAME, NEARMV + mode_idx[ZEROMV] = THR_ZEROMV; // LAST_FRAME, ZEROMV + mode_idx[NEWMV] = THR_NEWMV; // LAST_FRAME, NEWMV + } + for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int rate_mv = 0; @@ -289,13 +296,17 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (1 << INTER_OFFSET(this_mode))) continue; + if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] * + rd_thresh_freq_fact[this_mode] >> 5) || + rd_threshes[mode_idx[this_mode]] == INT_MAX) + continue; + if (this_mode == NEWMV) { if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) continue; - x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] = - full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame]); + full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame]); if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) continue; @@ -345,6 +356,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); rate += x->mbmode_cost[this_mode]; + rate += intra_cost_penalty; this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); if (this_rd + intra_mode_cost < best_rd) { diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index edc48bb16..3c8baad79 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -55,10 +55,9 @@ static int kf_low = 400; // formulaic approach to facilitate easier adjustment of the Q tables. // The formulae were derived from computing a 3rd order polynomial best // fit to the original data (after plotting real maxq vs minq (not q index)) -static int calculate_minq_index(double maxq, - double x3, double x2, double x1, double c) { +static int get_minq_index(double maxq, double x3, double x2, double x1) { int i; - const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq + c, + const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq); // Special case handling to deal with the step from q2.0 @@ -66,57 +65,26 @@ static int calculate_minq_index(double maxq, if (minqtarget <= 2.0) return 0; - for (i = 0; i < QINDEX_RANGE; i++) { + for (i = 0; i < QINDEX_RANGE; i++) if (minqtarget <= vp9_convert_qindex_to_q(i)) return i; - } return QINDEX_RANGE - 1; } -void vp9_rc_init_minq_luts(void) { +void vp9_rc_init_minq_luts() { int i; for (i = 0; i < QINDEX_RANGE; i++) { const double maxq = vp9_convert_qindex_to_q(i); - - kf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.000001, - -0.0004, - 0.15, - 0.0); - kf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.000002, - -0.0012, - 0.50, - 0.0); - - gf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000015, - -0.0009, - 0.32, - 0.0); - gf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000021, - -0.00125, - 0.50, - 0.0); - afq_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000015, - -0.0009, - 0.33, - 0.0); - afq_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000021, - -0.00125, - 0.55, - 0.0); - inter_minq[i] = calculate_minq_index(maxq, - 0.00000271, - -0.00113, - 0.75, - 0.0); + kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15); + kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50); + gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32); + gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); + afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33); + afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55); + inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75); } } @@ -183,25 +151,6 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { cm->fc = cc->fc; } -void vp9_setup_key_frame(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - vp9_setup_past_independence(cm); - - /* All buffers are implicitly updated on key frames. */ - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 1; -} - -void vp9_setup_inter_frame(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - if (cm->error_resilient_mode || cm->intra_only) - vp9_setup_past_independence(cm); - - assert(cm->frame_context_idx < FRAME_CONTEXTS); - cm->fc = cm->frame_contexts[cm->frame_context_idx]; -} - static int estimate_bits_at_q(int frame_kind, int q, int mbs, double correction_factor) { const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor)); @@ -984,7 +933,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate. - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -1036,6 +985,9 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, q /= 3; if (q == 0) q++; + if (cpi->sf.partition_check == 1) + q -= 10; + if (q < *bottom_index) *bottom_index = q; else if (q > *top_index) diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index ed6266fe2..87421af5f 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -87,13 +87,9 @@ struct VP9_COMP; void vp9_save_coding_context(struct VP9_COMP *cpi); void vp9_restore_coding_context(struct VP9_COMP *cpi); -void vp9_setup_key_frame(struct VP9_COMP *cpi); -void vp9_setup_inter_frame(struct VP9_COMP *cpi); - double vp9_convert_qindex_to_q(int qindex); -// initialize luts for minq -void vp9_rc_init_minq_luts(void); +void vp9_rc_init_minq_luts(); // Generally at the high level, the following flow is expected // to be enforced for rate control: diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c7e730aad..ba7d4db12 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -244,7 +244,6 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { static void set_block_thresholds(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; - const SPEED_FEATURES *const sf = &cpi->sf; int i, bsize, segment_id; for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { @@ -261,13 +260,13 @@ static void set_block_thresholds(VP9_COMP *cpi) { for (i = 0; i < MAX_MODES; ++i) cpi->rd_threshes[segment_id][bsize][i] = - sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4 + cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4 : INT_MAX; for (i = 0; i < MAX_REFS; ++i) { cpi->rd_thresh_sub8x8[segment_id][bsize][i] = - sf->thresh_mult_sub8x8[i] < thresh_max - ? sf->thresh_mult_sub8x8[i] * t / 4 + cpi->rd_thresh_mult_sub8x8[i] < thresh_max + ? cpi->rd_thresh_mult_sub8x8[i] * t / 4 : INT_MAX; } } @@ -566,7 +565,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = x->token_costs[tx_size][type][is_inter_block(mbmi)]; - uint8_t *p_tok = x->token_cache; + uint8_t token_cache[32 * 32]; int pt = combine_entropy_contexts(*A, *L); int c, cost; // Check for consistency of tx_size with mode info @@ -584,7 +583,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, int v = qcoeff[0]; int prev_t = vp9_dct_value_tokens_ptr[v].token; cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; - p_tok[0] = vp9_pt_energy_class[prev_t]; + token_cache[0] = vp9_pt_energy_class[prev_t]; ++token_costs; // ac tokens @@ -597,9 +596,9 @@ static INLINE int cost_coeffs(MACROBLOCK *x, if (use_fast_coef_costing) { cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v]; } else { - pt = get_coef_context(nb, p_tok, c); + pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; - p_tok[rc] = vp9_pt_energy_class[t]; + token_cache[rc] = vp9_pt_energy_class[t]; } prev_t = t; if (!--band_left) { @@ -613,7 +612,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, if (use_fast_coef_costing) { cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; } else { - pt = get_coef_context(nb, p_tok, c); + pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[0][pt][EOB_TOKEN]; } } @@ -1450,9 +1449,9 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode; } -static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, +static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode, int mode_context) { - MACROBLOCK *const x = &cpi->mb; + const MACROBLOCK *const x = &cpi->mb; const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id; // Don't account for mode here if segment skip is enabled. @@ -1669,6 +1668,45 @@ static INLINE int mv_has_subpel(const MV *mv) { return (mv->row & 0x0F) || (mv->col & 0x0F); } +// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. +// TODO(aconverse): Find out if this is still productive then clean up or remove +static int check_best_zero_mv( + const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], + int disable_inter_mode_mask, int this_mode, int ref_frame, + int second_ref_frame) { + if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && + frame_mv[this_mode][ref_frame].as_int == 0 && + (second_ref_frame == NONE || + frame_mv[this_mode][second_ref_frame].as_int == 0)) { + int rfc = mode_context[ref_frame]; + int c1 = cost_mv_ref(cpi, NEARMV, rfc); + int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); + int c3 = cost_mv_ref(cpi, ZEROMV, rfc); + + if (this_mode == NEARMV) { + if (c1 > c3) return 0; + } else if (this_mode == NEARESTMV) { + if (c2 > c3) return 0; + } else { + assert(this_mode == ZEROMV); + if (second_ref_frame == NONE) { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0)) + return 0; + } else { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 && + frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 && + frame_mv[NEARMV][second_ref_frame].as_int == 0)) + return 0; + } + } + } + return 1; +} + static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BEST_SEG_INFO *bsi_buf, int filter_idx, @@ -1737,43 +1775,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (disable_inter_mode_mask & (1 << mode_idx)) continue; - // if we're near/nearest and mv == 0,0, compare to zeromv - if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && - (this_mode == NEARMV || this_mode == NEARESTMV || - this_mode == ZEROMV) && - frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 && - (!has_second_rf || - frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) { - int rfc = mbmi->mode_context[mbmi->ref_frame[0]]; - int c1 = cost_mv_ref(cpi, NEARMV, rfc); - int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); - int c3 = cost_mv_ref(cpi, ZEROMV, rfc); - - if (this_mode == NEARMV) { - if (c1 > c3) - continue; - } else if (this_mode == NEARESTMV) { - if (c2 > c3) - continue; - } else { - assert(this_mode == ZEROMV); - if (!has_second_rf) { - if ((c3 >= c2 && - frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0)) - continue; - } else { - if ((c3 >= c2 && - frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 && - frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && - frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) - continue; - } - } - } + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + disable_inter_mode_mask, + this_mode, mbmi->ref_frame[0], + mbmi->ref_frame[1])) + continue; vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre)); vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above, @@ -2166,10 +2172,9 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, max_mv = MAX(max_mv, MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3); // only need to check zero mv once - if (!this_mv.as_int && zero_seen) { - x->mode_sad[ref_frame][i] = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)]; + if (!this_mv.as_int && zero_seen) continue; - } + zero_seen = zero_seen || !this_mv.as_int; row_offset = this_mv.as_mv.row >> 3; @@ -2180,9 +2185,6 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride, 0x7fffffff); - x->mode_sad[ref_frame][i] = this_sad; - if (this_mv.as_int == 0) - x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] = this_sad; // Note if it is the best so far. if (this_sad < best_sad) { @@ -2191,12 +2193,6 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, } } - if (!zero_seen) - x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] = - cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, - ref_y_buffer, ref_y_stride, - 0x7fffffff); - // Note the index of the mv that worked best in the reference list. x->mv_best_ref_index[ref_frame] = best_index; x->max_mv_context[ref_frame] = max_mv; @@ -3133,7 +3129,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; const struct segmentation *const seg = &cm->seg; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; @@ -3199,7 +3194,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; @@ -3381,46 +3376,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } } else { - // TODO(aconverse): Find out if this is still productive then clean up or - // remove - // if we're near/nearest and mv == 0,0, compare to zeromv if (x->in_active_map && - !(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && - (this_mode == NEARMV || this_mode == NEARESTMV || - this_mode == ZEROMV) && - frame_mv[this_mode][ref_frame].as_int == 0 && - !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && - (!comp_pred || frame_mv[this_mode][second_ref_frame].as_int == 0)) { - int rfc = mbmi->mode_context[ref_frame]; - int c1 = cost_mv_ref(cpi, NEARMV, rfc); - int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); - int c3 = cost_mv_ref(cpi, ZEROMV, rfc); - - if (this_mode == NEARMV) { - if (c1 > c3) - continue; - } else if (this_mode == NEARESTMV) { - if (c2 > c3) - continue; - } else { - assert(this_mode == ZEROMV); - if (!comp_pred) { - if ((c3 >= c2 && - frame_mv[NEARESTMV][ref_frame].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][ref_frame].as_int == 0)) - continue; - } else { - if ((c3 >= c2 && - frame_mv[NEARESTMV][ref_frame].as_int == 0 && - frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][ref_frame].as_int == 0 && - frame_mv[NEARMV][second_ref_frame].as_int == 0)) - continue; - } - } - } + !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + disable_inter_mode_mask, this_mode, ref_frame, + second_ref_frame)) + continue; } mbmi->mode = this_mode; @@ -3800,7 +3761,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; const struct segmentation *seg = &cm->seg; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; int comp_pred, i; @@ -3860,7 +3820,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c index 58c5df47e..9d8da0da4 100644 --- a/vp9/encoder/vp9_sad.c +++ b/vp9/encoder/vp9_sad.c @@ -44,7 +44,7 @@ unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *second_pred, \ unsigned int max_sad) { \ uint8_t comp_pred[m * n]; \ - comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ + vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ return sad(src_ptr, src_stride, comp_pred, m, m, n); \ } diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c new file mode 100644 index 000000000..f09035077 --- /dev/null +++ b/vp9/encoder/vp9_speed_features.c @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits.h> + +#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_speed_features.h" + +#define ALL_INTRA_MODES 0x3FF +#define INTRA_DC_ONLY 0x01 +#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) +#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED)) +#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) + +// Masks for partially or completely disabling split mode +#define DISABLE_ALL_SPLIT 0x3F +#define DISABLE_ALL_INTER_SPLIT 0x1F +#define DISABLE_COMPOUND_SPLIT 0x18 +#define LAST_AND_INTRA_SPLIT_ONLY 0x1E + +// Intra only frames, golden frames (except alt ref overlays) and +// alt ref frames tend to be coded at a higher than ambient quality +static INLINE int frame_is_boosted(const VP9_COMP *cpi) { + return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); +} + +static void set_good_speed_feature(VP9_COMP *cpi, + VP9_COMMON *cm, + SPEED_FEATURES *sf, + int speed) { + int i; + sf->adaptive_rd_thresh = 1; + sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); + sf->allow_skip_recode = 1; + + if (speed >= 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_boosted(cpi) + ? USE_FULL_RD : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->subpel_iters_per_step = 1; + sf->mode_skip_start = 10; + sf->adaptive_pred_interp_filter = 1; + + sf->recode_loop = ALLOW_RECODE_KFARFGF; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + } + // Additions or changes from speed 1 for speed >= 2. + if (speed >= 2) { + sf->tx_size_search_method = frame_is_boosted(cpi) + ? USE_FULL_RD : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->adaptive_pred_interp_filter = 2; + + sf->reference_masking = 1; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + sf->disable_filter_search_var_thresh = 100; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + } + // Additions or changes for speed 3 and above + if (speed >= 3) { + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = DISABLE_ALL_SPLIT; + else + sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; + + sf->recode_loop = ALLOW_RECODE_KFMAXBW; + + sf->adaptive_rd_thresh = 3; + sf->mode_skip_start = 6; + sf->use_fast_coef_updates = 2; + sf->use_fast_coef_costing = 1; + } + // Additions or changes for speed 3 and above + if (speed >= 4) { + sf->use_square_partition_only = 1; + sf->tx_size_search_method = USE_LARGESTALL; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + + sf->adaptive_rd_thresh = 4; + + // Add a couple more skip flags + sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH | + FLAG_EARLY_TERMINATE; + + sf->disable_filter_search_var_thresh = 200; + + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->use_lp32x32fdct = 1; + } + if (speed >= 5) { + sf->partition_search_type = FIXED_PARTITION; + sf->optimize_coefficients = 0; + sf->search_method = HEX; + sf->disable_filter_search_var_thresh = 500; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; + } +} + +static void set_rt_speed_feature(VP9_COMMON *cm, + SPEED_FEATURES *sf, + int speed) { + sf->static_segmentation = 0; + sf->adaptive_rd_thresh = 1; + sf->encode_breakout_thresh = 1; + sf->use_fast_coef_costing = 1; + + if (speed == 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = + frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_interp_filter = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->encode_breakout_thresh = 8; + } + if (speed >= 2) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = + frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH + | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA + | FLAG_SKIP_INTRA_LOWVAR; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_interp_filter = 2; + sf->auto_mv_step_size = 1; + sf->reference_masking = 1; + + sf->disable_filter_search_var_thresh = 50; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->adaptive_rd_thresh = 2; + sf->use_lp32x32fdct = 1; + sf->mode_skip_start = 11; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->encode_breakout_thresh = 200; + } + if (speed >= 3) { + sf->use_square_partition_only = 1; + sf->disable_filter_search_var_thresh = 100; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->use_uv_intra_rd_estimate = 1; + sf->skip_encode_sb = 1; + sf->subpel_iters_per_step = 1; + sf->use_fast_coef_updates = 2; + sf->adaptive_rd_thresh = 4; + sf->mode_skip_start = 6; + sf->allow_skip_recode = 0; + sf->optimize_coefficients = 0; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->lpf_pick = LPF_PICK_FROM_Q; + sf->encode_breakout_thresh = 700; + } + if (speed >= 4) { + int i; + sf->last_partitioning_redo_frequency = 4; + sf->adaptive_rd_thresh = 5; + sf->use_fast_coef_costing = 0; + sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; + sf->adjust_partitioning_from_last_frame = + cm->last_frame_type != cm->frame_type || (0 == + (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency); + sf->subpel_force_stop = 1; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; + sf->frame_parameter_update = 0; + sf->encode_breakout_thresh = 1000; + sf->search_method = FAST_HEX; + sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); + sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->max_intra_bsize = BLOCK_32X32; + sf->allow_skip_recode = 1; + } + if (speed >= 5) { + sf->max_partition_size = BLOCK_32X32; + sf->min_partition_size = BLOCK_8X8; + sf->partition_check = + (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1); + sf->partition_search_type = REFERENCE_PARTITION; + sf->use_nonrd_pick_mode = 1; + sf->search_method = FAST_DIAMOND; + sf->allow_skip_recode = 0; + } + if (speed >= 6) { + sf->partition_search_type = VAR_BASED_FIXED_PARTITION; + sf->use_nonrd_pick_mode = 1; + sf->search_method = FAST_DIAMOND; + } + if (speed >= 7) { + int i; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->disable_inter_mode_mask[i] = 14; // only search NEARESTMV (0) + } +} + +void vp9_set_speed_features(VP9_COMP *cpi) { + SPEED_FEATURES *const sf = &cpi->sf; + VP9_COMMON *const cm = &cpi->common; + const int speed = cpi->speed < 0 ? -cpi->speed : cpi->speed; + int i; + + // best quality defaults + sf->frame_parameter_update = 1; + sf->search_method = NSTEP; + sf->recode_loop = ALLOW_RECODE; + sf->subpel_search_method = SUBPEL_TREE; + sf->subpel_iters_per_step = 2; + sf->subpel_force_stop = 0; + sf->optimize_coefficients = !cpi->oxcf.lossless; + sf->reduce_first_step_size = 0; + sf->auto_mv_step_size = 0; + sf->max_step_search_steps = MAX_MVSEARCH_STEPS; + sf->comp_inter_joint_search_thresh = BLOCK_4X4; + sf->adaptive_rd_thresh = 0; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; + sf->tx_size_search_method = USE_FULL_RD; + sf->use_lp32x32fdct = 0; + sf->adaptive_motion_search = 0; + sf->adaptive_pred_interp_filter = 0; + sf->reference_masking = 0; + sf->partition_search_type = SEARCH_PARTITION; + sf->less_rectangular_check = 0; + sf->use_square_partition_only = 0; + sf->auto_min_max_partition_size = NOT_IN_USE; + sf->max_partition_size = BLOCK_64X64; + sf->min_partition_size = BLOCK_4X4; + sf->adjust_partitioning_from_last_frame = 0; + sf->last_partitioning_redo_frequency = 4; + sf->disable_split_mask = 0; + sf->mode_search_skip_flags = 0; + sf->disable_split_var_thresh = 0; + sf->disable_filter_search_var_thresh = 0; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; + sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; + } + sf->use_rd_breakout = 0; + sf->skip_encode_sb = 0; + sf->use_uv_intra_rd_estimate = 0; + sf->allow_skip_recode = 0; + sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; + sf->use_fast_coef_updates = 0; + sf->use_fast_coef_costing = 0; + sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set + sf->use_nonrd_pick_mode = 0; + sf->encode_breakout_thresh = 0; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->disable_inter_mode_mask[i] = 0; + sf->max_intra_bsize = BLOCK_64X64; + // This setting only takes effect when partition_search_type is set + // to FIXED_PARTITION. + sf->always_this_block_size = BLOCK_16X16; + + switch (cpi->oxcf.mode) { + case MODE_BESTQUALITY: + case MODE_SECONDPASS_BEST: // This is the best quality mode. + cpi->diamond_search_sad = vp9_full_range_search; + break; + case MODE_FIRSTPASS: + case MODE_GOODQUALITY: + case MODE_SECONDPASS: + set_good_speed_feature(cpi, cm, sf, speed); + break; + case MODE_REALTIME: + set_rt_speed_feature(cm, sf, speed); + break; + } + + // Slow quant, dct and trellis not worthwhile for first pass + // so make sure they are always turned off. + if (cpi->pass == 1) { + sf->optimize_coefficients = 0; + } + + // No recode for 1 pass. + if (cpi->pass == 0) { + sf->recode_loop = DISALLOW_RECODE; + sf->optimize_coefficients = 0; + } + + if (cpi->sf.subpel_search_method == SUBPEL_TREE) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; + cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; + } + + cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; + + if (cpi->encode_breakout && cpi->oxcf.mode == MODE_REALTIME && + sf->encode_breakout_thresh > cpi->encode_breakout) + cpi->encode_breakout = sf->encode_breakout_thresh; + + if (sf->disable_split_mask == DISABLE_ALL_SPLIT) + sf->adaptive_pred_interp_filter = 0; +} diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h new file mode 100644 index 000000000..5091e2b06 --- /dev/null +++ b/vp9/encoder/vp9_speed_features.h @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_SPEED_FEATURES_H_ +#define VP9_ENCODER_VP9_SPEED_FEATURES_H_ + +#include "vp9/common/vp9_enums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + DIAMOND = 0, + NSTEP = 1, + HEX = 2, + BIGDIA = 3, + SQUARE = 4, + FAST_HEX = 5, + FAST_DIAMOND = 6 +} SEARCH_METHODS; + +typedef enum { + // No recode. + DISALLOW_RECODE = 0, + // Allow recode for KF and exceeding maximum frame bandwidth. + ALLOW_RECODE_KFMAXBW = 1, + // Allow recode only for KF/ARF/GF frames. + ALLOW_RECODE_KFARFGF = 2, + // Allow recode for all frames based on bitrate constraints. + ALLOW_RECODE = 3, +} RECODE_LOOP_TYPE; + +typedef enum { + SUBPEL_TREE = 0, + // Other methods to come +} SUBPEL_SEARCH_METHODS; + +typedef enum { + LAST_FRAME_PARTITION_OFF = 0, + LAST_FRAME_PARTITION_LOW_MOTION = 1, + LAST_FRAME_PARTITION_ALL = 2 +} LAST_FRAME_PARTITION_METHOD; + +typedef enum { + USE_FULL_RD = 0, + USE_LARGESTINTRA, + USE_LARGESTINTRA_MODELINTER, + USE_LARGESTALL +} TX_SIZE_SEARCH_METHOD; + +typedef enum { + NOT_IN_USE = 0, + RELAXED_NEIGHBORING_MIN_MAX = 1, + STRICT_NEIGHBORING_MIN_MAX = 2 +} AUTO_MIN_MAX_MODE; + +typedef enum { + // Try the full image with different values. + LPF_PICK_FROM_FULL_IMAGE, + // Try a small portion of the image with different values. + LPF_PICK_FROM_SUBIMAGE, + // Estimate the level based on quantizer and frame type + LPF_PICK_FROM_Q, +} LPF_PICK_METHOD; + +typedef enum { + // Terminate search early based on distortion so far compared to + // qp step, distortion in the neighborhood of the frame, etc. + FLAG_EARLY_TERMINATE = 1 << 0, + + // Skips comp inter modes if the best so far is an intra mode. + FLAG_SKIP_COMP_BESTINTRA = 1 << 1, + + // Skips comp inter modes if the best single intermode so far does + // not have the same reference as one of the two references being + // tested. + FLAG_SKIP_COMP_REFMISMATCH = 1 << 2, + + // Skips oblique intra modes if the best so far is an inter mode. + FLAG_SKIP_INTRA_BESTINTER = 1 << 3, + + // Skips oblique intra modes at angles 27, 63, 117, 153 if the best + // intra so far is not one of the neighboring directions. + FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, + + // Skips intra modes other than DC_PRED if the source variance is small + FLAG_SKIP_INTRA_LOWVAR = 1 << 5, +} MODE_SEARCH_SKIP_LOGIC; + +typedef enum { + // Search partitions using RD/NONRD criterion + SEARCH_PARTITION = 0, + + // Always use a fixed size partition + FIXED_PARTITION = 1, + + // Use a fixed size partition in every 64X64 SB, where the size is + // determined based on source variance + VAR_BASED_FIXED_PARTITION = 2, + + REFERENCE_PARTITION = 3, + + // Use an arbitrary partitioning scheme based on source variance within + // a 64X64 SB + VAR_BASED_PARTITION +} PARTITION_SEARCH_TYPE; + +typedef struct { + // Frame level coding parameter update + int frame_parameter_update; + + // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). + SEARCH_METHODS search_method; + + RECODE_LOOP_TYPE recode_loop; + + // Subpel_search_method can only be subpel_tree which does a subpixel + // logarithmic search that keeps stepping at 1/2 pixel units until + // you stop getting a gain, and then goes on to 1/4 and repeats + // the same process. Along the way it skips many diagonals. + SUBPEL_SEARCH_METHODS subpel_search_method; + + // Maximum number of steps in logarithmic subpel search before giving up. + int subpel_iters_per_step; + + // Control when to stop subpel search + int subpel_force_stop; + + // This parameter controls the number of steps we'll do in a diamond + // search. + int max_step_search_steps; + + // This parameter controls which step in the n-step process we start at. + // It's changed adaptively based on circumstances. + int reduce_first_step_size; + + // If this is set to 1, we limit the motion search range to 2 times the + // largest motion vector found in the last frame. + int auto_mv_step_size; + + // Trellis (dynamic programming) optimization of quantized values (+1, 0). + int optimize_coefficients; + + // Always set to 0. If on it enables 0 cost background transmission + // (except for the initial transmission of the segmentation). The feature is + // disabled because the addition of very large block sizes make the + // backgrounds very to cheap to encode, and the segmentation we have + // adds overhead. + int static_segmentation; + + // If 1 we iterate finding a best reference for 2 ref frames together - via + // a log search that iterates 4 times (check around mv for last for best + // error of combined predictor then check around mv for alt). If 0 we + // we just use the best motion vector found for each frame by itself. + int comp_inter_joint_search_thresh; + + // This variable is used to cap the maximum number of times we skip testing a + // mode to be evaluated. A high value means we will be faster. + int adaptive_rd_thresh; + + // Enables skipping the reconstruction step (idct, recon) in the + // intermediate steps assuming the last frame didn't have too many intra + // blocks and the q is less than a threshold. + int skip_encode_sb; + int skip_encode_frame; + // Speed feature to allow or disallow skipping of recode at block + // level within a frame. + int allow_skip_recode; + + // This variable allows us to reuse the last frames partition choices + // (64x64 v 32x32 etc) for this frame. It can be set to only use the last + // frame as a starting point in low motion scenes or always use it. If set + // we use last partitioning_redo frequency to determine how often to redo + // the partitioning from scratch. Adjust_partitioning_from_last_frame + // enables us to adjust up or down one partitioning from the last frames + // partitioning. + LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; + + // Determine which method we use to determine transform size. We can choose + // between options like full rd, largest for prediction size, largest + // for intra and model coefs for the rest. + TX_SIZE_SEARCH_METHOD tx_size_search_method; + + // Low precision 32x32 fdct keeps everything in 16 bits and thus is less + // precise but significantly faster than the non lp version. + int use_lp32x32fdct; + + // TODO(JBB): remove this as its no longer used. + + // After looking at the first set of modes (set by index here), skip + // checking modes for reference frames that don't match the reference frame + // of the best so far. + int mode_skip_start; + + // TODO(JBB): Remove this. + int reference_masking; + + PARTITION_SEARCH_TYPE partition_search_type; + + // Used if partition_search_type = FIXED_SIZE_PARTITION + BLOCK_SIZE always_this_block_size; + + // Skip rectangular partition test when partition type none gives better + // rd than partition type split. + int less_rectangular_check; + + // Disable testing non square partitions. (eg 16x32) + int use_square_partition_only; + + // Sets min and max partition sizes for this 64x64 region based on the + // same 64x64 in last encoded frame, and the left and above neighbor. + AUTO_MIN_MAX_MODE auto_min_max_partition_size; + + // Min and max partition size we enable (block_size) as per auto + // min max, but also used by adjust partitioning, and pick_partitioning. + BLOCK_SIZE min_partition_size; + BLOCK_SIZE max_partition_size; + + // Whether or not we allow partitions one smaller or one greater than the last + // frame's partitioning. Only used if use_lastframe_partitioning is set. + int adjust_partitioning_from_last_frame; + + // How frequently we re do the partitioning from scratch. Only used if + // use_lastframe_partitioning is set. + int last_partitioning_redo_frequency; + + // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable + // it always, to allow it for only Last frame and Intra, disable it for all + // inter modes or to enable it always. + int disable_split_mask; + + // TODO(jingning): combine the related motion search speed features + // This allows us to use motion search at other sizes as a starting + // point for this motion search and limits the search range around it. + int adaptive_motion_search; + + // Allows sub 8x8 modes to use the prediction filter that was determined + // best for 8x8 mode. If set to 0 we always re check all the filters for + // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter + // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. + int adaptive_pred_interp_filter; + + // Search through variable block partition types in non-RD mode decision + // encoding process for RTC. + int partition_check; + + // Implements various heuristics to skip searching modes + // The heuristics selected are based on flags + // defined in the MODE_SEARCH_SKIP_HEURISTICS enum + unsigned int mode_search_skip_flags; + + // A source variance threshold below which the split mode is disabled + unsigned int disable_split_var_thresh; + + // A source variance threshold below which filter search is disabled + // Choose a very large value (UINT_MAX) to use 8-tap always + unsigned int disable_filter_search_var_thresh; + + // These bit masks allow you to enable or disable intra modes for each + // transform size separately. + int intra_y_mode_mask[TX_SIZES]; + int intra_uv_mode_mask[TX_SIZES]; + + // This variable enables an early break out of mode testing if the model for + // rd built from the prediction signal indicates a value that's much + // higher than the best rd we've seen so far. + int use_rd_breakout; + + // This enables us to use an estimate for intra rd based on dc mode rather + // than choosing an actual uv mode in the stage of encoding before the actual + // final encode. + int use_uv_intra_rd_estimate; + + // This feature controls how the loop filter level is determined. + LPF_PICK_METHOD lpf_pick; + + // This feature limits the number of coefficients updates we actually do + // by only looking at counts from 1/2 the bands. + int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced + + // This flag controls the use of non-RD mode decision. + int use_nonrd_pick_mode; + + // This variable sets the encode_breakout threshold. Currently, it is only + // enabled in real time mode. + int encode_breakout_thresh; + + // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV + // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. + int disable_inter_mode_mask[BLOCK_SIZES]; + + // This feature controls whether we do the expensive context update and + // calculation in the rd coefficient costing loop. + int use_fast_coef_costing; + + // This variable controls the maximum block size where intra blocks can be + // used in inter frames. + // TODO(aconverse): Fold this into one of the other many mode skips + BLOCK_SIZE max_intra_bsize; +} SPEED_FEATURES; + +struct VP9_COMP; + +void vp9_set_speed_features(struct VP9_COMP *cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_ + diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index eba7bc682..2b5639dcf 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -15,15 +15,23 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { const VP9_CONFIG *const oxcf = &cpi->oxcf; - int temporal_layer = 0; + int layer; + int layer_end; + cpi->svc.spatial_layer_id = 0; cpi->svc.temporal_layer_id = 0; - for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; - ++temporal_layer) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + + if (cpi->svc.number_temporal_layers > 1) { + layer_end = cpi->svc.number_temporal_layers; + } else { + layer_end = cpi->svc.number_spatial_layers; + } + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; + lc->current_video_frame_in_layer = 0; lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; - lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q]; lrc->total_actual_bits = 0; lrc->total_target_vs_actual = 0; @@ -35,11 +43,19 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->decimation_factor = 0; lrc->rate_correction_factor = 1.0; lrc->key_frame_rate_correction_factor = 1.0; - lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * - 1000; - lrc->buffer_level = - vp9_rescale((int)(oxcf->starting_buffer_level), - lc->target_bandwidth, 1000); + + if (cpi->svc.number_temporal_layers > 1) { + lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; + } else { + lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + lrc->last_q[0] = q_trans[oxcf->best_allowed_q]; + lrc->last_q[1] = q_trans[oxcf->best_allowed_q]; + lrc->last_q[2] = q_trans[oxcf->best_allowed_q]; + } + + lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level), + lc->target_bandwidth, 1000); lrc->bits_off_target = lrc->buffer_level; } } @@ -49,14 +65,26 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, const int target_bandwidth) { const VP9_CONFIG *const oxcf = &cpi->oxcf; const RATE_CONTROL *const rc = &cpi->rc; - int temporal_layer = 0; + int layer; + int layer_end; float bitrate_alloc = 1.0; - for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; - ++temporal_layer) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + + if (cpi->svc.number_temporal_layers > 1) { + layer_end = cpi->svc.number_temporal_layers; + } else { + layer_end = cpi->svc.number_spatial_layers; + } + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; - lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000; - bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth; + + if (cpi->svc.number_temporal_layers > 1) { + lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + } else { + lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + } + bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. lc->starting_buffer_level = (int64_t)(oxcf->starting_buffer_level * bitrate_alloc); @@ -67,7 +95,11 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size); // Update framerate-related quantities. - lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + if (cpi->svc.number_temporal_layers > 1) { + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; + } else { + lc->framerate = oxcf->framerate; + } lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = rc->max_frame_bandwidth; // Update qp-related quantities. @@ -76,34 +108,69 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, } } -void vp9_update_layer_framerate(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; +static LAYER_CONTEXT *get_layer_context(SVC *svc) { + return svc->number_temporal_layers > 1 ? + &svc->layer_context[svc->temporal_layer_id] : + &svc->layer_context[svc->spatial_layer_id]; +} + +void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { + const int layer = cpi->svc.temporal_layer_id; const VP9_CONFIG *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); RATE_CONTROL *const lrc = &lc->rc; - lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; // Update the average layer frame size (non-cumulative per-frame-bw). - if (temporal_layer == 0) { + if (layer == 0) { lc->avg_frame_size = lrc->av_per_frame_bandwidth; } else { - double prev_layer_framerate = oxcf->framerate / - oxcf->ts_rate_decimator[temporal_layer - 1]; - int prev_layer_target_bandwidth = - oxcf->ts_target_bitrate[temporal_layer - 1] * 1000; + const double prev_layer_framerate = + oxcf->framerate / oxcf->ts_rate_decimator[layer - 1]; + const int prev_layer_target_bandwidth = + oxcf->ts_target_bitrate[layer - 1] * 1000; lc->avg_frame_size = (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / (lc->framerate - prev_layer_framerate)); } } +void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + RATE_CONTROL *const lrc = &lc->rc; + + lc->framerate = framerate; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->min_frame_bandwidth = (int)(lrc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmin_section / 100); + lrc->max_frame_bandwidth = (int)(((int64_t)lrc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmax_section) / 100); + lrc->max_gf_interval = 16; + + lrc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; + + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (lrc->max_gf_interval > oxcf->lag_in_frames - 1) + lrc->max_gf_interval = oxcf->lag_in_frames - 1; + + if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } + + if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval) + lrc->max_gf_interval = lrc->static_scene_max_gf_interval; +} + void vp9_restore_layer_context(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; - int frame_since_key = cpi->rc.frames_since_key; - int frame_to_key = cpi->rc.frames_to_key; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + const int old_frame_since_key = cpi->rc.frames_since_key; + const int old_frame_to_key = cpi->rc.frames_to_key; + cpi->rc = lc->rc; + cpi->twopass = lc->twopass; cpi->oxcf.target_bandwidth = lc->target_bandwidth; cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; @@ -111,17 +178,35 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->output_framerate = lc->framerate; // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). - cpi->rc.frames_since_key = frame_since_key; - cpi->rc.frames_to_key = frame_to_key; + if (cpi->svc.number_temporal_layers > 1) { + cpi->rc.frames_since_key = old_frame_since_key; + cpi->rc.frames_to_key = old_frame_to_key; + } } void vp9_save_layer_context(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + lc->rc = cpi->rc; - lc->target_bandwidth = (int)cpi->oxcf.target_bandwidth; - lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; - lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; - lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; + lc->twopass = cpi->twopass; + lc->target_bandwidth = (int)oxcf->target_bandwidth; + lc->starting_buffer_level = oxcf->starting_buffer_level; + lc->optimal_buffer_level = oxcf->optimal_buffer_level; + lc->maximum_buffer_size = oxcf->maximum_buffer_size; lc->framerate = cpi->output_framerate; } + +void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { + int i; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + struct twopass_rc *const twopass = &cpi->svc.layer_context[i].twopass; + + cpi->svc.spatial_layer_id = i; + vp9_init_second_pass(cpi); + + twopass->total_stats.spatial_layer_id = i; + twopass->total_left_stats.spatial_layer_id = i; + } + cpi->svc.spatial_layer_id = 0; +} diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index c7a4c065c..e859a2fd5 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -27,6 +27,9 @@ typedef struct { int64_t maximum_buffer_size; double framerate; int avg_frame_size; + struct twopass_rc twopass; + struct vpx_fixed_buf rc_twopass_stats_in; + unsigned int current_video_frame_in_layer; } LAYER_CONTEXT; typedef struct { @@ -34,9 +37,10 @@ typedef struct { int temporal_layer_id; int number_spatial_layers; int number_temporal_layers; - // Layer context used for rate control in CBR mode, only defined for - // temporal layers for now. - LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS]; + // Layer context used for rate control in one pass temporal CBR mode or + // two pass spatial mode. Defined for temporal or spatial layers for now. + // Does not support temporal combined with spatial RC. + LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)]; } SVC; struct VP9_COMP; @@ -49,8 +53,12 @@ void vp9_update_layer_context_change_config(struct VP9_COMP *const cpi, const int target_bandwidth); // Prior to encoding the frame, update framerate-related quantities -// for the current layer. -void vp9_update_layer_framerate(struct VP9_COMP *const cpi); +// for the current temporal layer. +void vp9_update_temporal_layer_framerate(struct VP9_COMP *const cpi); + +// Update framerate-related quantities for the current spatial layer. +void vp9_update_spatial_layer_framerate(struct VP9_COMP *const cpi, + double framerate); // Prior to encoding the frame, set the layer context, for the current layer // to be encoded, to the cpi struct. @@ -59,6 +67,9 @@ void vp9_restore_layer_context(struct VP9_COMP *const cpi); // Save the layer context after encoding the frame. void vp9_save_layer_context(struct VP9_COMP *const cpi); +// Initialize second pass rc for spatial svc. +void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index bb5f1c23b..b6211e5fa 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -108,7 +108,7 @@ void vp9_coef_tree_initialize() { vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree); } -static void fill_value_tokens() { +void vp9_tokenize_initialize() { TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE; const vp9_extra_bit *const e = vp9_extra_bits; @@ -162,7 +162,6 @@ struct tokenize_b_args { VP9_COMP *cpi; MACROBLOCKD *xd; TOKENEXTRA **tp; - uint8_t *token_cache; }; static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, @@ -213,7 +212,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, VP9_COMP *cpi = args->cpi; MACROBLOCKD *xd = args->xd; TOKENEXTRA **tp = args->tp; - uint8_t *token_cache = args->token_cache; + uint8_t token_cache[32 * 32]; struct macroblock_plane *p = &cpi->mb.plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; @@ -315,7 +314,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, const int ctx = vp9_get_skip_context(xd); const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); - struct tokenize_b_args arg = {cpi, xd, t, cpi->mb.token_cache}; + struct tokenize_b_args arg = {cpi, xd, t}; if (mbmi->skip) { if (!dry_run) cm->counts.skip[ctx][1] += skip_inc; @@ -333,7 +332,3 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, *t = t_backup; } } - -void vp9_tokenize_initialize() { - fill_value_tokens(); -} diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 8bc385089..996f730ef 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -216,7 +216,7 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); - comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); + vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); } @@ -273,7 +273,7 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -330,7 +330,7 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -387,7 +387,7 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -614,7 +614,7 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); - comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); + vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); } @@ -658,7 +658,7 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -703,7 +703,7 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, 1, 17, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -747,7 +747,7 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); - comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); + vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); } @@ -791,7 +791,7 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -955,7 +955,7 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -999,7 +999,7 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -1043,7 +1043,7 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -1089,6 +1089,23 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); - comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); + vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); } + + +void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + int tmp; + tmp = pred[j] + ref[j]; + comp_pred[j] = (tmp + 1) >> 1; + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 3bc2091f8..62e20dc00 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -100,21 +100,9 @@ typedef struct vp9_variance_vtable { vp9_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; -static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, - int height, const uint8_t *ref, int ref_stride) { - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - int tmp; - tmp = pred[j] + ref[j]; - comp_pred[j] = (tmp + 1) >> 1; - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} +void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_write_bit_buffer.c b/vp9/encoder/vp9_write_bit_buffer.c new file mode 100644 index 000000000..962d0ca56 --- /dev/null +++ b/vp9/encoder/vp9_write_bit_buffer.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/encoder/vp9_write_bit_buffer.h" + +size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { + return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); +} + +void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { + const int off = (int)wb->bit_offset; + const int p = off / CHAR_BIT; + const int q = CHAR_BIT - 1 - off % CHAR_BIT; + if (q == CHAR_BIT -1) { + wb->bit_buffer[p] = bit << q; + } else { + wb->bit_buffer[p] &= ~(1 << q); + wb->bit_buffer[p] |= bit << q; + } + wb->bit_offset = off + 1; +} + +void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits) { + int bit; + for (bit = bits - 1; bit >= 0; bit--) + vp9_wb_write_bit(wb, (data >> bit) & 1); +} diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h index 1795e05e4..073608d7f 100644 --- a/vp9/encoder/vp9_write_bit_buffer.h +++ b/vp9/encoder/vp9_write_bit_buffer.h @@ -24,29 +24,11 @@ struct vp9_write_bit_buffer { size_t bit_offset; }; -static size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { - return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); -} - -static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { - const int off = (int)wb->bit_offset; - const int p = off / CHAR_BIT; - const int q = CHAR_BIT - 1 - off % CHAR_BIT; - if (q == CHAR_BIT -1) { - wb->bit_buffer[p] = bit << q; - } else { - wb->bit_buffer[p] &= ~(1 << q); - wb->bit_buffer[p] |= bit << q; - } - wb->bit_offset = off + 1; -} - -static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, - int data, int bits) { - int bit; - for (bit = bits - 1; bit >= 0; bit--) - vp9_wb_write_bit(wb, (data >> bit) & 1); -} +size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb); + +void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit); + +void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits); #ifdef __cplusplus diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c new file mode 100644 index 000000000..f31b176e5 --- /dev/null +++ b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <immintrin.h> // AVX2 +#include "vpx/vpx_integer.h" + +void vp9_sad32x32x4d_avx2(uint8_t *src, + int src_stride, + uint8_t *ref[4], + int ref_stride, + unsigned int res[4]) { + __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg; + __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; + __m256i sum_mlow, sum_mhigh; + int i; + uint8_t *ref0, *ref1, *ref2, *ref3; + + ref0 = ref[0]; + ref1 = ref[1]; + ref2 = ref[2]; + ref3 = ref[3]; + sum_ref0 = _mm256_set1_epi16(0); + sum_ref1 = _mm256_set1_epi16(0); + sum_ref2 = _mm256_set1_epi16(0); + sum_ref3 = _mm256_set1_epi16(0); + for (i = 0; i < 32 ; i++) { + // load src and all refs + src_reg = _mm256_load_si256((__m256i *)(src)); + ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); + ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); + ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); + ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); + // sum of the absolute differences between every ref-i to src + ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); + ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); + ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); + ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); + // sum every ref-i + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); + + src+= src_stride; + ref0+= ref_stride; + ref1+= ref_stride; + ref2+= ref_stride; + ref3+= ref_stride; + } + { + __m128i sum; + // in sum_ref-i the result is saved in the first 4 bytes + // the other 4 bytes are zeroed. + // sum_ref1 and sum_ref3 are shifted left by 4 bytes + sum_ref1 = _mm256_slli_si256(sum_ref1, 4); + sum_ref3 = _mm256_slli_si256(sum_ref3, 4); + + // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 + sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); + sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); + + // merge every 64 bit from each sum_ref-i + sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); + sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); + + // add the low 64 bit to the high 64 bit + sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); + + // add the low 128 bit to the high 128 bit + sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), + _mm256_extractf128_si256(sum_mlow, 1)); + + _mm_storeu_si128((__m128i *)(res), sum); + } +} + +void vp9_sad64x64x4d_avx2(uint8_t *src, + int src_stride, + uint8_t *ref[4], + int ref_stride, + unsigned int res[4]) { + __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg; + __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg; + __m256i ref3_reg, ref3next_reg; + __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; + __m256i sum_mlow, sum_mhigh; + int i; + uint8_t *ref0, *ref1, *ref2, *ref3; + + ref0 = ref[0]; + ref1 = ref[1]; + ref2 = ref[2]; + ref3 = ref[3]; + sum_ref0 = _mm256_set1_epi16(0); + sum_ref1 = _mm256_set1_epi16(0); + sum_ref2 = _mm256_set1_epi16(0); + sum_ref3 = _mm256_set1_epi16(0); + for (i = 0; i < 64 ; i++) { + // load 64 bytes from src and all refs + src_reg = _mm256_load_si256((__m256i *)(src)); + srcnext_reg = _mm256_load_si256((__m256i *)(src + 32)); + ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); + ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32)); + ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); + ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32)); + ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); + ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32)); + ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); + ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32)); + // sum of the absolute differences between every ref-i to src + ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); + ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); + ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); + ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); + ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg); + ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg); + ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg); + ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg); + + // sum every ref-i + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg); + src+= src_stride; + ref0+= ref_stride; + ref1+= ref_stride; + ref2+= ref_stride; + ref3+= ref_stride; + } + { + __m128i sum; + + // in sum_ref-i the result is saved in the first 4 bytes + // the other 4 bytes are zeroed. + // sum_ref1 and sum_ref3 are shifted left by 4 bytes + sum_ref1 = _mm256_slli_si256(sum_ref1, 4); + sum_ref3 = _mm256_slli_si256(sum_ref3, 4); + + // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 + sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); + sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); + + // merge every 64 bit from each sum_ref-i + sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); + sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); + + // add the low 64 bit to the high 64 bit + sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); + + // add the low 128 bit to the high 128 bit + sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), + _mm256_extractf128_si256(sum_mlow, 1)); + + _mm_storeu_si128((__m128i *)(res), sum); + } +} |