summaryrefslogtreecommitdiff
path: root/vp8
diff options
context:
space:
mode:
Diffstat (limited to 'vp8')
-rw-r--r--vp8/common/onyx.h5
-rw-r--r--vp8/common/postproc.c3
-rw-r--r--vp8/decoder/decodframe.c1
-rw-r--r--vp8/decoder/threading.c5
-rw-r--r--vp8/encoder/encodeframe.c18
-rw-r--r--vp8/encoder/encodemb.c1
-rw-r--r--vp8/encoder/firstpass.c204
-rw-r--r--vp8/encoder/generic/csystemdependent.c3
-rw-r--r--vp8/encoder/mcomp.c4
-rw-r--r--vp8/encoder/mcomp.h1
-rw-r--r--vp8/encoder/onyx_if.c322
-rw-r--r--vp8/encoder/onyx_int.h13
-rw-r--r--vp8/encoder/pickinter.c6
-rw-r--r--vp8/encoder/quantize.c1
-rw-r--r--vp8/encoder/ratectrl.c15
-rw-r--r--vp8/encoder/rdopt.c229
-rw-r--r--vp8/encoder/temporal_filter.c64
-rw-r--r--vp8/encoder/temporal_filter.h6
-rw-r--r--vp8/encoder/x86/temporal_filter_apply_sse2.asm207
-rw-r--r--vp8/encoder/x86/temporal_filter_x86.h27
-rw-r--r--vp8/encoder/x86/x86_csystemdependent.c7
-rw-r--r--vp8/vp8_cx_iface.c15
-rw-r--r--vp8/vp8cx.mk2
23 files changed, 803 insertions, 356 deletions
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 3724b11e0..426b8fc2b 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -46,7 +46,8 @@ extern "C"
typedef enum
{
USAGE_STREAM_FROM_SERVER = 0x0,
- USAGE_LOCAL_FILE_PLAYBACK = 0x1
+ USAGE_LOCAL_FILE_PLAYBACK = 0x1,
+ USAGE_CONSTRAINED_QUALITY = 0x2
} END_USAGE;
@@ -150,6 +151,7 @@ extern "C"
int fixed_q;
int worst_allowed_q;
int best_allowed_q;
+ int cq_level;
// allow internal resizing ( currently disabled in the build !!!!!)
int allow_spatial_resampling;
@@ -187,7 +189,6 @@ extern "C"
int arnr_strength ;
int arnr_type ;
-
struct vpx_fixed_buf two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 15b1c2c89..d30068ef5 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -680,7 +680,6 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
{
- char message[512];
int q = oci->filter_level * 10 / 6;
int flags = ppflags->post_proc_flag;
int deblock_level = ppflags->deblocking_level;
@@ -744,6 +743,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
#if CONFIG_POSTPROC_VISUALIZER
if (flags & VP8D_DEBUG_TXT_FRAME_INFO)
{
+ char message[512];
sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
(oci->frame_type == KEY_FRAME),
oci->refresh_golden_frame,
@@ -823,6 +823,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
if (flags & VP8D_DEBUG_TXT_RATE_INFO)
{
+ char message[512];
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
}
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 4702faeed..9305a0556 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -485,7 +485,6 @@ static void setup_token_decoder(VP8D_COMP *pbi,
static void stop_token_decoder(VP8D_COMP *pbi)
{
- int i;
VP8_COMMON *pc = &pbi->common;
if (pc->multi_token_partition != ONE_PARTITION)
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index fea4e1cc1..dac990a26 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -451,7 +451,6 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
#if CONFIG_MULTITHREAD
int core_count = 0;
int ithread;
- int i;
pbi->b_multithreaded_rd = 0;
pbi->allocated_decoding_thread_count = 0;
@@ -721,7 +720,6 @@ void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
/*int mb_row;
int mb_col;
int baseline_filter_level[MAX_MB_SEGMENTS];*/
- int filter_level;
int alt_flt_enabled = mbd->segmentation_enabled;
int i;
@@ -769,7 +767,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
int ibc = 0;
int num_part = 1 << pbi->common.multi_token_partition;
- int i, j;
+ int i;
volatile int *last_row_current_mb_col = NULL;
int nsync = pbi->sync_range;
@@ -809,7 +807,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
{
- int i;
xd->current_bc = &pbi->mbc[mb_row%num_part];
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index cb7cc65d7..4c95f28d6 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -408,7 +408,6 @@ unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
int sum;
unsigned int a;
unsigned int b;
- unsigned int d;
/* TODO: This could also be done over smaller areas (8x8), but that would
* require extensive changes elsewhere, as lambda is assumed to be fixed
* over an entire MB in most of the code.
@@ -629,7 +628,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
VP8_COMMON *const cm = & cpi->common;
MACROBLOCKD *const xd = & x->e_mbd;
- int i;
TOKENEXTRA *tp = cpi->tok;
int segment_counts[MAX_MB_SEGMENTS];
int totalrate;
@@ -712,9 +710,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
}
vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
- //vp8_initialize_rd_consts( cpi, vp8_dc_quant(cpi->avg_frame_qindex, cm->y1dc_delta_q) );
vp8cx_initialize_me_consts(cpi, cm->base_qindex);
- //vp8cx_initialize_me_consts( cpi, cpi->avg_frame_qindex);
// Copy data over into macro block data sturctures.
@@ -734,20 +730,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
x->activity_sum = 0;
-#if 0
- // Experimental rd code
- // 2 Pass - Possibly set Rdmult based on last frame distortion + this frame target bits or other metrics
- // such as cpi->rate_correction_factor that indicate relative complexity.
- /*if ( cpi->pass == 2 && (cpi->last_frame_distortion > 0) && (cpi->target_bits_per_mb > 0) )
- {
- //x->rdmult = ((cpi->last_frame_distortion * 256)/cpi->common.MBs)/ cpi->target_bits_per_mb;
- x->rdmult = (int)(cpi->RDMULT * cpi->rate_correction_factor);
- }
- else
- x->rdmult = cpi->RDMULT; */
- //x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 ));
-#endif
-
xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.uv_mode = DC_PRED;
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 464d4a236..efcea745b 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -273,7 +273,6 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
int x;
int sz;
int next;
- int path;
int rdmult;
int rddiv;
int final_eob;
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index da4d740cb..32a39c5f2 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -58,6 +58,7 @@ extern const int vp8_gf_boost_qadjustment[QINDEX_RANGE];
#define KF_MB_INTRA_MIN 300
#define GF_MB_INTRA_MIN 200
+
#define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001)
#define POW1 (double)cpi->oxcf.two_pass_vbrbias/100.0
@@ -67,6 +68,18 @@ static int vscale_lookup[7] = {0, 1, 1, 2, 2, 3, 3};
static int hscale_lookup[7] = {0, 0, 1, 1, 2, 2, 3};
+const int cq_level[QINDEX_RANGE] =
+{
+ 0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9,
+ 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20,
+ 20,21,22,22,23,24,24,25,26,27,27,28,29,30,30,31,
+ 32,33,33,34,35,36,36,37,38,39,39,40,41,42,42,43,
+ 44,45,46,46,47,48,49,50,50,51,52,53,54,55,55,56,
+ 57,58,59,60,60,61,62,63,64,65,66,67,67,68,69,70,
+ 71,72,73,74,75,75,76,77,78,79,80,81,82,83,84,85,
+ 86,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
+};
+
void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);
int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps);
@@ -249,7 +262,6 @@ extern size_t vp8_firstpass_stats_sz(unsigned int mb_count)
* macroblock.
*/
size_t stats_sz;
- FIRSTPASS_STATS stats;
stats_sz = sizeof(FIRSTPASS_STATS) + mb_count;
stats_sz = (stats_sz + 7) & ~7;
@@ -376,8 +388,6 @@ unsigned char *vp8_fpmm_get_pos(VP8_COMP *cpi)
}
void vp8_fpmm_reset_pos(VP8_COMP *cpi, unsigned char *target_pos)
{
- int Offset;
-
cpi->fp_motion_map_stats = target_pos;
}
@@ -907,7 +917,7 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
double pow_lowq = 0.40;
if (section_target_bandwitdh <= 0)
- return MAXQ;
+ return cpi->maxq_max_limit; // Highest value allowed
target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs);
@@ -943,10 +953,12 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
// Correction factor used for Q values >= 20
corr_high = pow(err_per_mb / BASE_ERRPERMB, pow_highq);
- corr_high = (corr_high < 0.05) ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high;
+ corr_high = (corr_high < 0.05)
+ ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high;
- // Try and pick a Q that should be high enough to encode the content at the given rate.
- for (Q = 0; Q < MAXQ; Q++)
+ // Try and pick a max Q that will be high enough to encode the
+ // content at the given rate.
+ for (Q = cpi->maxq_min_limit; Q < cpi->maxq_max_limit; Q++)
{
int bits_per_mb_at_this_q;
@@ -965,6 +977,28 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
break;
}
+ // Restriction on active max q for constrained quality mode.
+ if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
+ (Q < cpi->cq_target_quality) )
+ //(Q < cpi->oxcf.cq_level;) )
+ {
+ Q = cpi->cq_target_quality;
+ //Q = cpi->oxcf.cq_level;
+ }
+
+ // Adjust maxq_min_limit and maxq_max_limit limits based on
+ // averaga q observed in clip for non kf/gf.arf frames
+ // Give average a chance to settle though.
+ if ( (cpi->ni_frames >
+ ((unsigned int)cpi->total_stats->count >> 8)) &&
+ (cpi->ni_frames > 150) )
+ {
+ cpi->maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality)
+ ? (cpi->ni_av_qi + 32) : cpi->worst_quality;
+ cpi->maxq_min_limit = ((cpi->ni_av_qi - 32) > cpi->best_quality)
+ ? (cpi->ni_av_qi - 32) : cpi->best_quality;
+ }
+
return Q;
}
static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width)
@@ -1113,6 +1147,79 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
return Q;
}
+
+// For cq mode estimate a cq level that matches the observed
+// complexity and data rate.
+static int estimate_cq(VP8_COMP *cpi, double section_err,
+ int section_target_bandwitdh, int Height, int Width)
+{
+ int Q;
+ int num_mbs = ((Height * Width) / (16 * 16));
+ int target_norm_bits_per_mb;
+
+ double err_per_mb = section_err / num_mbs;
+ double correction_factor;
+ double corr_high;
+ double speed_correction = 1.0;
+ double pow_highq = 0.90;
+ double pow_lowq = 0.40;
+ double clip_iiratio;
+ double clip_iifactor;
+
+ target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20))
+ ? (512 * section_target_bandwitdh) / num_mbs
+ : 512 * (section_target_bandwitdh / num_mbs);
+
+ // Corrections for higher compression speed settings
+ // (reduced compression expected)
+ if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
+ {
+ if (cpi->oxcf.cpu_used <= 5)
+ speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04);
+ else
+ speed_correction = 1.25;
+ }
+ // II ratio correction factor for clip as a whole
+ clip_iiratio = cpi->total_stats->intra_error /
+ DOUBLE_DIVIDE_CHECK(cpi->total_stats->coded_error);
+ clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025);
+ if (clip_iifactor < 0.80)
+ clip_iifactor = 0.80;
+
+ // Correction factor used for Q values >= 20
+ corr_high = pow(err_per_mb / BASE_ERRPERMB, pow_highq);
+ corr_high = (corr_high < 0.05) ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high;
+
+ // Try and pick a Q that can encode the content at the given rate.
+ for (Q = 0; Q < MAXQ; Q++)
+ {
+ int bits_per_mb_at_this_q;
+
+ if (Q < 50)
+ {
+ correction_factor =
+ pow( err_per_mb / BASE_ERRPERMB, (pow_lowq + Q * 0.01));
+
+ correction_factor = (correction_factor < 0.05) ? 0.05
+ : (correction_factor > 5.0) ? 5.0
+ : correction_factor;
+ }
+ else
+ correction_factor = corr_high;
+
+ bits_per_mb_at_this_q =
+ (int)( .5 + correction_factor *
+ speed_correction *
+ clip_iifactor *
+ (double)vp8_bits_per_mb[INTER_FRAME][Q] / 1.0);
+
+ if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
+ break;
+ }
+
+ return cq_level[Q];
+}
+
extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate);
void vp8_init_second_pass(VP8_COMP *cpi)
@@ -1268,7 +1375,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// what level of boost is appropriate for the GF or ARF that will be coded with the group
i = 0;
- while (((i < cpi->max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key))
+ while (((i < cpi->static_scene_max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key))
{
double r;
double this_frame_mvr_ratio;
@@ -1378,18 +1485,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
boost_score += (decay_accumulator * r);
// Break out conditions.
- if ( /* i>4 || */
+ if ( /* i>4 || */
+ // Break at cpi->max_gf_interval unless almost totally static
+ (i >= cpi->max_gf_interval && (loop_decay_rate < 0.99)) ||
(
- (i > MIN_GF_INTERVAL) && // Dont break out with a very short interval
- ((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) && // Dont break out very close to a key frame
+ // Dont break out with a very short interval
+ (i > MIN_GF_INTERVAL) &&
+ // Dont break out very close to a key frame
+ ((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) &&
((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
((mv_ratio_accumulator > 100.0) ||
(abs_mv_in_out_accumulator > 3.0) ||
(mv_in_out_accumulator < -2.0) ||
- ((boost_score - old_boost_score) < 2.0)
- )
- )
- )
+ ((boost_score - old_boost_score) < 2.0))
+ ) )
{
boost_score = old_boost_score;
break;
@@ -1766,7 +1875,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
vp8_avg_stats(&sectionstats);
- cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
+ cpi->section_intra_rating =
+ sectionstats.intra_error /
+ DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
//if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
@@ -1993,21 +2104,48 @@ void vp8_second_pass(VP8_COMP *cpi)
if (cpi->common.current_video_frame == 0)
{
- // guess at 2nd pass q
cpi->est_max_qcorrection_factor = 1.0;
- tmp_q = estimate_max_q(cpi, (cpi->total_coded_error_left / frames_left), (int)(cpi->bits_left / frames_left), cpi->common.Height, cpi->common.Width);
- if (tmp_q < cpi->worst_quality)
- {
- cpi->active_worst_quality = tmp_q;
- cpi->ni_av_qi = tmp_q;
- }
- else
+ // Experimental code to try and set a cq_level in constrained
+ // quality mode.
+ if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY )
{
- cpi->active_worst_quality = cpi->worst_quality;
- cpi->ni_av_qi = cpi->worst_quality;
+ int est_cq;
+
+ est_cq =
+ estimate_cq( cpi,
+ (cpi->total_coded_error_left / frames_left),
+ (int)(cpi->bits_left / frames_left),
+ cpi->common.Height, cpi->common.Width);
+
+ cpi->cq_target_quality = cpi->oxcf.cq_level;
+ if ( est_cq > cpi->cq_target_quality )
+ cpi->cq_target_quality = est_cq;
}
+
+ // guess at maxq needed in 2nd pass
+ cpi->maxq_max_limit = cpi->worst_quality;
+ cpi->maxq_min_limit = cpi->best_quality;
+ tmp_q = estimate_max_q( cpi,
+ (cpi->total_coded_error_left / frames_left),
+ (int)(cpi->bits_left / frames_left),
+ cpi->common.Height,
+ cpi->common.Width);
+
+ // Limit the maxq value returned subsequently.
+ // This increases the risk of overspend or underspend if the initial
+ // estimate for the clip is bad, but helps prevent excessive
+ // variation in Q, especially near the end of a clip
+ // where for example a small overspend may cause Q to crash
+ cpi->maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality)
+ ? (tmp_q + 32) : cpi->worst_quality;
+ cpi->maxq_min_limit = ((tmp_q - 32) > cpi->best_quality)
+ ? (tmp_q - 32) : cpi->best_quality;
+
+ cpi->active_worst_quality = tmp_q;
+ cpi->ni_av_qi = tmp_q;
}
+
// The last few frames of a clip almost always have to few or too many
// bits and for the sake of over exact rate control we dont want to make
// radical adjustments to the allowed quantizer range just to use up a
@@ -2029,13 +2167,6 @@ void vp8_second_pass(VP8_COMP *cpi)
cpi->active_worst_quality --;
cpi->active_worst_quality = ((cpi->active_worst_quality * 3) + tmp_q + 2) / 4;
-
- // Clamp to user set limits
- if (cpi->active_worst_quality > cpi->worst_quality)
- cpi->active_worst_quality = cpi->worst_quality;
- else if (cpi->active_worst_quality < cpi->best_quality)
- cpi->active_worst_quality = cpi->best_quality;
-
}
cpi->frames_to_key --;
@@ -2157,6 +2288,9 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->common.frame_type = KEY_FRAME;
+ // is this a forced key frame by interval
+ cpi->this_key_frame_forced = cpi->next_key_frame_forced;
+
// Clear the alt ref active flag as this can never be active on a key frame
cpi->source_alt_ref_active = FALSE;
@@ -2219,7 +2353,11 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
kf_group_err /= 2.0;
kf_group_intra_err /= 2.0;
kf_group_coded_err /= 2.0;
+
+ cpi->next_key_frame_forced = TRUE;
}
+ else
+ cpi->next_key_frame_forced = FALSE;
// Special case for the last frame of the file
if (cpi->stats_in >= cpi->stats_in_end)
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index be00d0218..4738a5b28 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -91,8 +91,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;
-
+#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.search.full_search = vp8_full_search_sad;
+#endif
cpi->rtcd.search.diamond_search = vp8_diamond_search_sad;
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 9b91739cc..d9923fbe9 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -408,6 +408,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
break;
case 3:
+ default:
this_mv.col += 4;
this_mv.row += 4;
diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
@@ -1387,8 +1388,6 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
else
return INT_MAX;
}
-#endif
-
int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
{
@@ -1541,6 +1540,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
else
return INT_MAX;
}
+#endif /* !(CONFIG_REALTIME_ONLY) */
#ifdef ENTROPY_STATS
void print_mode_context(void)
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 122debcae..7600f87fc 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -24,7 +24,6 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS+3)) - 8) // Max full pel mv specified in 1/8 pel units
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units
-#define MAX_POSSIBLE_MV (1 << 11) // Maximum MV in 1/8 pel units
extern void print_mode_context(void);
extern int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight);
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 56f7ef6f8..77fbb29b1 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -155,25 +155,25 @@ extern const int vp8cx_base_skip_false_prob[128];
// Tables relating active max Q to active min Q
static const int kf_low_motion_minq[QINDEX_RANGE] =
{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4,
- 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 10,10,
- 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,
- 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26,
- 27,27,28,28,29,29,30,30,31,32,33,34,35,36,37,38,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,
+ 3,3,3,3,3,3,4,4,4,5,5,5,5,5,6,6,
+ 6,6,7,7,8,8,8,8,9,9,10,10,10,10,11,11,
+ 11,11,12,12,13,13,13,13,14,14,15,15,15,15,16,16,
+ 16,16,17,17,18,18,18,18,19,20,20,21,21,22,23,23
};
static const int kf_high_motion_minq[QINDEX_RANGE] =
{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
- 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
- 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10,10,
- 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,
- 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26,
- 27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34,
- 35,35,36,36,37,38,39,40,41,42,43,44,45,46,47,48,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,2,2,2,2,3,3,3,3,
+ 3,3,3,3,4,4,4,4,5,5,5,5,5,5,6,6,
+ 6,6,7,7,8,8,8,8,9,9,10,10,10,10,11,11,
+ 11,11,12,12,13,13,13,13,14,14,15,15,15,15,16,16,
+ 16,16,17,17,18,18,18,18,19,19,20,20,20,20,21,21,
+ 21,21,22,22,23,23,24,25,25,26,26,27,28,28,29,30
};
static const int gf_low_motion_minq[QINDEX_RANGE] =
{
@@ -195,7 +195,7 @@ static const int gf_mid_motion_minq[QINDEX_RANGE] =
22,22,23,23,24,24,25,25,26,26,27,27,28,28,29,29,
30,30,31,31,32,32,33,33,34,34,35,35,36,36,37,37,
38,39,39,40,40,41,41,42,42,43,43,44,45,46,47,48,
- 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
};
static const int gf_high_motion_minq[QINDEX_RANGE] =
{
@@ -206,7 +206,7 @@ static const int gf_high_motion_minq[QINDEX_RANGE] =
25,25,26,26,27,27,28,28,29,29,30,30,31,31,32,32,
33,33,34,34,35,35,36,36,37,37,38,38,39,39,40,40,
41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54,
- 55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80,
+ 55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80
};
static const int inter_minq[QINDEX_RANGE] =
{
@@ -314,7 +314,7 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
vpx_free(cpi->tok);
cpi->tok = 0;
- // Structure used to minitor GF useage
+ // Structure used to monitor GF usage
if (cpi->gf_active_flags != 0)
vpx_free(cpi->gf_active_flags);
@@ -325,6 +325,7 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
cpi->mb.pip = 0;
+#if !(CONFIG_REALTIME_ONLY)
if(cpi->total_stats)
vpx_free(cpi->total_stats);
@@ -334,6 +335,7 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
vpx_free(cpi->this_frame_stats);
cpi->this_frame_stats = 0;
+#endif
}
static void enable_segmentation(VP8_PTR ptr)
@@ -576,7 +578,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
cpi->mbs_tested_so_far = 0;
- // best quality
+ // best quality defaults
sf->RD = 1;
sf->search_method = NSTEP;
sf->improved_quant = 1;
@@ -592,6 +594,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->iterative_sub_pixel = 1;
sf->optimize_coefficients = 1;
sf->use_fastquant_for_pick = 0;
+ sf->no_skip_block4x4_search = 1;
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -794,6 +797,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->first_step = 1;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->no_skip_block4x4_search = 0;
}
if (Speed > 1)
@@ -1268,6 +1272,15 @@ void vp8_set_speed_features(VP8_COMP *cpi)
};
+ // Slow quant, dct and trellis not worthwhile for first pass
+ // so make sure they are always turned off.
+ if ( cpi->pass == 1 )
+ {
+ sf->improved_quant = 0;
+ sf->optimize_coefficients = 0;
+ sf->improved_dct = 0;
+ }
+
if (cpi->sf.search_method == NSTEP)
{
vp8_init3smotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride);
@@ -1437,6 +1450,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
+#if !(CONFIG_REALTIME_ONLY)
if(cpi->total_stats)
vpx_free(cpi->total_stats);
@@ -1450,6 +1464,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
if(!cpi->total_stats || !cpi->this_frame_stats)
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate firstpass stats");
+#endif
}
@@ -1486,21 +1501,28 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
cpi->av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
- cpi->max_gf_interval = (int)(cpi->output_frame_rate / 2) + 2;
- //cpi->max_gf_interval = (int)(cpi->output_frame_rate * 2 / 3) + 1;
- //cpi->max_gf_interval = 24;
+ // Set Maximum gf/arf interval
+ cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
- if (cpi->max_gf_interval < 12)
+ if(cpi->max_gf_interval < 12)
cpi->max_gf_interval = 12;
+ // Extended interval for genuinely static scenes
+ cpi->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
- // Special conditions when altr ref frame enabled in lagged compress mode
+ // Special conditions when altr ref frame enabled in lagged compress mode
if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames)
{
if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1)
cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1;
+
+ if (cpi->static_scene_max_gf_interval > cpi->oxcf.lag_in_frames - 1)
+ cpi->static_scene_max_gf_interval = cpi->oxcf.lag_in_frames - 1;
}
+
+ if ( cpi->max_gf_interval > cpi->static_scene_max_gf_interval )
+ cpi->max_gf_interval = cpi->static_scene_max_gf_interval;
}
@@ -1540,6 +1562,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->auto_worst_q = 0;
cpi->oxcf.best_allowed_q = MINQ;
cpi->oxcf.worst_allowed_q = MAXQ;
+ cpi->oxcf.cq_level = MINQ;
cpi->oxcf.end_usage = USAGE_STREAM_FROM_SERVER;
cpi->oxcf.starting_buffer_level = 4000;
@@ -1640,6 +1663,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
+ cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
if (oxcf->fixed_q >= 0)
{
@@ -1729,6 +1753,8 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
cpi->best_quality = cpi->oxcf.best_allowed_q;
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
+ cpi->cq_target_quality = cpi->oxcf.cq_level;
+
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
@@ -1925,6 +1951,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
+ cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
if (oxcf->fixed_q >= 0)
{
@@ -2017,6 +2044,8 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
+ cpi->cq_target_quality = cpi->oxcf.cq_level;
+
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
@@ -2258,6 +2287,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->frames_since_key = 8; // Give a sensible default for the first frame.
cpi->key_frame_frequency = cpi->oxcf.key_freq;
+ cpi->this_key_frame_forced = FALSE;
+ cpi->next_key_frame_forced = FALSE;
cpi->source_alt_ref_pending = FALSE;
cpi->source_alt_ref_active = FALSE;
@@ -3081,9 +3112,6 @@ static int pick_frame_size(VP8_COMP *cpi)
}
}
- // Note target_size in bits * 256 per MB
- cpi->target_bits_per_mb = (cpi->this_frame_target * 256) / cpi->common.MBs;
-
return 1;
}
static void set_quantizer(VP8_COMP *cpi, int Q)
@@ -3133,8 +3161,8 @@ static void update_alt_ref_frame_and_stats(VP8_COMP *cpi)
// Update data structure that monitors level of reference to last GF
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
- // this frame refreshes means next frames don't unless specified by user
+ // this frame refreshes means next frames don't unless specified by user
cpi->common.frames_since_golden = 0;
// Clear the alternate reference update pending flag.
@@ -3510,8 +3538,25 @@ static BOOL recode_loop_test( VP8_COMP *cpi,
{
force_recode = TRUE;
}
- // Specific rate control mode related tests
- // TBD
+ // Special Constrained quality tests
+ else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
+ {
+ // Undershoot and below auto cq level
+ if ( (q > cpi->cq_target_quality) &&
+ (cpi->projected_frame_size <
+ ((cpi->this_frame_target * 7) >> 3)))
+ {
+ force_recode = TRUE;
+ }
+ // Severe undershoot and between auto and user cq level
+ else if ( (q > cpi->oxcf.cq_level) &&
+ (cpi->projected_frame_size < cpi->min_frame_bandwidth) &&
+ (cpi->active_best_quality > cpi->oxcf.cq_level))
+ {
+ force_recode = TRUE;
+ cpi->active_best_quality = cpi->oxcf.cq_level;
+ }
+ }
}
return force_recode;
@@ -3778,51 +3823,85 @@ static void encode_frame_to_data_rate
}
// Set an active best quality and if necessary active worst quality
- if (cpi->pass == 2 || (cm->current_video_frame > 150))
+ // There is some odd behaviour for one pass here that needs attention.
+ if ( (cpi->pass == 2) || (cpi->ni_frames > 150))
{
- int Q;
- int i;
- int bpm_target;
- //int tmp;
-
vp8_clear_system_state();
Q = cpi->active_worst_quality;
- if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
+ if ( cm->frame_type == KEY_FRAME )
{
- if (cm->frame_type != KEY_FRAME)
+ if ( cpi->pass == 2 )
{
- if (cpi->avg_frame_qindex < cpi->active_worst_quality)
- Q = cpi->avg_frame_qindex;
+ if (cpi->gfu_boost > 600)
+ cpi->active_best_quality = kf_low_motion_minq[Q];
+ else
+ cpi->active_best_quality = kf_high_motion_minq[Q];
- if ( cpi->gfu_boost > 1000 )
+ // Special case for key frames forced because we have reached
+ // the maximum key frame interval. Here force the Q to a range
+ // based on the ambient Q to reduce the risk of popping
+ if ( cpi->this_key_frame_forced )
+ {
+ if ( cpi->active_best_quality > cpi->avg_frame_qindex * 7/8)
+ cpi->active_best_quality = cpi->avg_frame_qindex * 7/8;
+ else if ( cpi->active_best_quality < cpi->avg_frame_qindex >> 2 )
+ cpi->active_best_quality = cpi->avg_frame_qindex >> 2;
+ }
+ }
+ // One pass more conservative
+ else
+ cpi->active_best_quality = kf_high_motion_minq[Q];
+ }
+
+ else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
+ {
+ // Use the lower of cpi->active_worst_quality and recent
+ // average Q as basis for GF/ARF Q limit unless last frame was
+ // a key frame.
+ if ( (cpi->frames_since_key > 1) &&
+ (cpi->avg_frame_qindex < cpi->active_worst_quality) )
+ {
+ Q = cpi->avg_frame_qindex;
+
+ if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
+ (Q < cpi->oxcf.cq_level) )
+ {
+ Q = cpi->oxcf.cq_level;
+ }
+ }
+
+ if ( cpi->pass == 2 )
+ {
+ if ( cpi->gfu_boost > 1000 )
cpi->active_best_quality = gf_low_motion_minq[Q];
else if ( cpi->gfu_boost < 400 )
cpi->active_best_quality = gf_high_motion_minq[Q];
else
cpi->active_best_quality = gf_mid_motion_minq[Q];
-
- /*cpi->active_best_quality = gf_arf_minq[Q];
- tmp = (cpi->gfu_boost > 1000) ? 600 : cpi->gfu_boost - 400;
- //tmp = (cpi->gfu_boost > 1000) ? 600 :
- //(cpi->gfu_boost < 400) ? 0 : cpi->gfu_boost - 400;
- tmp = 128 - (tmp >> 4);
- cpi->active_best_quality = (cpi->active_best_quality * tmp)>>7;*/
-
- }
- // KEY FRAMES
- else
- {
- if (cpi->gfu_boost > 600)
- cpi->active_best_quality = kf_low_motion_minq[Q];
- else
- cpi->active_best_quality = kf_high_motion_minq[Q];
- }
+ }
+ // One pass more conservative
+ else
+ cpi->active_best_quality = gf_high_motion_minq[Q];
}
else
{
cpi->active_best_quality = inter_minq[Q];
+
+ // For the constant/constrained quality mode we dont want
+ // the quality to rise above the cq level.
+ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
+ (cpi->active_best_quality < cpi->cq_target_quality) )
+ {
+ // If we are strongly undershooting the target rate in the last
+ // frames then use the user passed in cq value not the auto
+ // cq value.
+ if ( cpi->rolling_actual_bits < cpi->min_frame_bandwidth )
+ cpi->active_best_quality = cpi->oxcf.cq_level;
+ else
+ cpi->active_best_quality = cpi->cq_target_quality;
+ }
}
// If CBR and the buffer is as full then it is reasonable to allow higher quality on the frames
@@ -3840,7 +3919,6 @@ static void encode_frame_to_data_rate
cpi->active_best_quality -= min_qadjustment;
}
-
}
}
@@ -4101,9 +4179,44 @@ static void encode_frame_to_data_rate
active_worst_qchanged = FALSE;
#if !(CONFIG_REALTIME_ONLY)
+ // Special case handling for forced key frames
+ if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced )
+ {
+ int last_q = Q;
+ int kf_err = vp8_calc_ss_err(cpi->Source,
+ &cm->yv12_fb[cm->new_fb_idx],
+ IF_RTCD(&cpi->rtcd.variance));
+
+ // The key frame is not good enough
+ if ( kf_err > ((cpi->ambient_err * 3) >> 2) )
+ {
+ // Lower q_high
+ q_high = (Q > q_low) ? (Q - 1) : q_low;
+
+ // Adjust Q
+ Q = (q_high + q_low) >> 1;
+ }
+ // The key frame is much better than the previous frame
+ else if ( kf_err < (cpi->ambient_err >> 1) )
+ {
+ // Raise q_low
+ q_low = (Q < q_high) ? (Q + 1) : q_high;
+
+ // Adjust Q
+ Q = (q_high + q_low + 1) >> 1;
+ }
+
+ // Clamp Q to upper and lower limits:
+ if (Q > q_high)
+ Q = q_high;
+ else if (Q < q_low)
+ Q = q_low;
+
+ Loop = ((Q != last_q)) ? TRUE : FALSE;
+ }
// Is the projected frame size out of range and are we allowed to attempt to recode.
- if ( recode_loop_test( cpi,
+ else if ( recode_loop_test( cpi,
frame_over_shoot_limit, frame_under_shoot_limit,
Q, top_index, bottom_index ) )
{
@@ -4119,7 +4232,7 @@ static void encode_frame_to_data_rate
//if ( cpi->zbin_over_quant == 0 )
q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value
- if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low
+ if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low
zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
//if ( undershoot_seen || (Q == MAXQ) )
@@ -4188,6 +4301,16 @@ static void encode_frame_to_data_rate
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
+ // Special case reset for qlow for constrained quality.
+ // This should only trigger where there is very substantial
+ // undershoot on a frame and the auto cq level is above
+ // the user passsed in value.
+ if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
+ (Q < q_low) )
+ {
+ q_low = Q;
+ }
+
while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10))
{
vp8_update_rate_correction_factors(cpi, 0);
@@ -4252,9 +4375,15 @@ static void encode_frame_to_data_rate
}
#endif
- // Update the GF useage maps.
- // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
- vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
+ // Special case code to reduce pulsing when key frames are forced at a
+ // fixed interval. Note the reconstruction error if it is the frame before
+ // the force key frame
+ if ( cpi->next_key_frame_forced && (cpi->frames_to_key == 0) )
+ {
+ cpi->ambient_err = vp8_calc_ss_err(cpi->Source,
+ &cm->yv12_fb[cm->new_fb_idx],
+ IF_RTCD(&cpi->rtcd.variance));
+ }
// This frame's MVs are saved and will be used in next frame's MV prediction.
if(cm->show_frame) //do not save for altref frame
@@ -4281,7 +4410,6 @@ static void encode_frame_to_data_rate
}
}
-
// Update the GF useage maps.
// This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
@@ -4312,8 +4440,6 @@ static void encode_frame_to_data_rate
else
cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
-
-
//#pragma omp parallel sections
{
@@ -4421,9 +4547,7 @@ static void encode_frame_to_data_rate
}
// Keep a record of ambient average Q.
- if (cm->frame_type == KEY_FRAME)
- cpi->avg_frame_qindex = cm->base_qindex;
- else
+ if (cm->frame_type != KEY_FRAME)
cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2;
// Keep a record from which we can calculate the average Q excluding GF updates and key frames
@@ -4431,30 +4555,38 @@ static void encode_frame_to_data_rate
{
cpi->ni_frames++;
- // Calculate the average Q for normal inter frames (not key or GFU frames)
- // This is used as a basis for setting active worst quality.
- if (cpi->ni_frames > 150)
+ // Calculate the average Q for normal inter frames (not key or GFU
+ // frames).
+ if ( cpi->pass == 2 )
{
cpi->ni_tot_qi += Q;
cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames);
}
- // Early in the clip ... average the current frame Q value with the default
- // entered by the user as a dampening measure
else
{
- cpi->ni_tot_qi += Q;
- cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2;
- }
-
- // If the average Q is higher than what was used in the last frame
- // (after going through the recode loop to keep the frame size within range)
- // then use the last frame value - 1.
- // The -1 is designed to stop Q and hence the data rate, from progressively
- // falling away during difficult sections, but at the same time reduce the number of
- // itterations around the recode loop.
- if (Q > cpi->ni_av_qi)
- cpi->ni_av_qi = Q - 1;
+ // Damp value for first few frames
+ if (cpi->ni_frames > 150 )
+ {
+ cpi->ni_tot_qi += Q;
+ cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames);
+ }
+ // For one pass, early in the clip ... average the current frame Q
+ // value with the worstq entered by the user as a dampening measure
+ else
+ {
+ cpi->ni_tot_qi += Q;
+ cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2;
+ }
+ // If the average Q is higher than what was used in the last frame
+ // (after going through the recode loop to keep the frame size within range)
+ // then use the last frame value - 1.
+ // The -1 is designed to stop Q and hence the data rate, from progressively
+ // falling away during difficult sections, but at the same time reduce the number of
+ // itterations around the recode loop.
+ if (Q > cpi->ni_av_qi)
+ cpi->ni_av_qi = Q - 1;
+ }
}
#if 0
@@ -4548,7 +4680,7 @@ static void encode_frame_to_data_rate
if (cpi->total_coded_error_left != 0.0)
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld"
- "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
+ "%6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
"%10.3f %8ld\n",
cpi->common.current_video_frame, cpi->this_frame_target,
cpi->projected_frame_size,
@@ -4557,7 +4689,8 @@ static void encode_frame_to_data_rate
(cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
(int)cpi->total_actual_bits, cm->base_qindex,
cpi->active_best_quality, cpi->active_worst_quality,
- cpi->avg_frame_qindex, cpi->zbin_over_quant,
+ cpi->ni_av_qi, cpi->cq_target_quality, cpi->zbin_over_quant,
+ //cpi->avg_frame_qindex, cpi->zbin_over_quant,
cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->est_max_qcorrection_factor, (int)cpi->bits_left,
@@ -4566,7 +4699,7 @@ static void encode_frame_to_data_rate
cpi->tot_recode_hits);
else
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld"
- "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
+ "%6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
"%8ld\n",
cpi->common.current_video_frame,
cpi->this_frame_target, cpi->projected_frame_size,
@@ -4575,7 +4708,8 @@ static void encode_frame_to_data_rate
(cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
(int)cpi->total_actual_bits, cm->base_qindex,
cpi->active_best_quality, cpi->active_worst_quality,
- cpi->avg_frame_qindex, cpi->zbin_over_quant,
+ cpi->ni_av_qi, cpi->cq_target_quality, cpi->zbin_over_quant,
+ //cpi->avg_frame_qindex, cpi->zbin_over_quant,
cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->est_max_qcorrection_factor, (int)cpi->bits_left,
@@ -4807,7 +4941,9 @@ extern void vp8_pop_neon(INT64 *store);
#endif
int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
{
+#if HAVE_ARMV7
INT64 store_reg[8];
+#endif
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
struct vpx_usec_timer timer;
@@ -4910,7 +5046,9 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
}
int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush)
{
+#if HAVE_ARMV7
INT64 store_reg[8];
+#endif
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
struct vpx_usec_timer tsctimer;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 2f9cc4776..8a97e983b 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -183,6 +183,7 @@ typedef struct
int optimize_coefficients;
int use_fastquant_for_pick;
+ int no_skip_block4x4_search;
} SPEED_FEATURES;
@@ -316,7 +317,11 @@ typedef struct
unsigned int frames_since_key;
unsigned int key_frame_frequency;
- unsigned int next_key;
+ unsigned int this_key_frame_forced;
+ unsigned int next_key_frame_forced;
+
+ // Ambient reconstruction err target for force key frames
+ int ambient_err;
unsigned int mode_check_freq[MAX_MODES];
unsigned int mode_test_hit_counts[MAX_MODES];
@@ -365,7 +370,6 @@ typedef struct
int this_frame_target;
int projected_frame_size;
int last_q[2]; // Separate values for Intra/Inter
- int target_bits_per_mb;
double rate_correction_factor;
double key_frame_rate_correction_factor;
@@ -398,6 +402,7 @@ typedef struct
int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames
int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame.
int max_gf_interval;
+ int static_scene_max_gf_interval;
int baseline_gf_interval;
int gf_decay_rate;
int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
@@ -447,6 +452,10 @@ typedef struct
int best_quality;
int active_best_quality;
+ int cq_target_quality;
+ int maxq_max_limit;
+ int maxq_min_limit;
+
int drop_frames_allowed; // Are we permitted to drop frames?
int drop_frame; // Drop this frame?
int drop_count; // How many frames have we dropped?
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 8dfca351c..2b0f57508 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -608,8 +608,10 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
}
- //Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
- if (cpi->is_src_frame_alt_ref)
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ // unless ARNR filtering is enabled in which case we want
+ // an unfiltered alternative
+ if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
{
if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
continue;
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index a67299487..be9f26c7f 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -70,7 +70,6 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
{
int i, rc, eob;
- int zbin;
int x, y, z, sz;
short *coeff_ptr = b->coeff;
short *round_ptr = b->round;
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 8455b7bdb..b69a1965e 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1550,12 +1550,21 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
*frame_under_shoot_limit = cpi->this_frame_target * 5 / 8;
}
}
- // VBR
+ // VBR and CQ mode
// Note that tighter restrictions here can help quality but hurt encode speed
else
{
- *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8;
- *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8;
+ // Stron overshoot limit for constrained quality
+ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
+ {
+ *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8;
+ *frame_under_shoot_limit = cpi->this_frame_target * 2 / 8;
+ }
+ else
+ {
+ *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8;
+ *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8;
+ }
}
}
}
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index e6c7c9ab3..b2a3e117f 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -36,7 +36,6 @@
#include "dct.h"
#include "systemdependent.h"
-#define DIAMONDSEARCH 1
#if CONFIG_RUNTIME_CPU_DETECT
#define IF_RTCD(x) (x)
#else
@@ -46,19 +45,6 @@
void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
-
-#define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
-/*int RDFUNC( int RM,int DM, int R, int D, int target_r )
-{
- int rd_value;
-
- rd_value = ( ((128+(R)*(RM)) >> 8) + (DM)*(D) );
-
- return rd_value;
-}*/
-
-#define UVRDFUNC(RM,DM,R,D,target_r) RDFUNC(RM,DM,R,D,target_r)
-
#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
@@ -223,8 +209,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
{
int q;
int i;
- int *thresh;
- int threshmult;
double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
double rdconst = 3.00;
@@ -271,22 +255,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
if (q < 8)
q = 8;
- if (cpi->ref_frame_flags == VP8_ALT_FLAG)
- {
- thresh = &cpi->rd_threshes[THR_NEWA];
- threshmult = cpi->sf.thresh_mult[THR_NEWA];
- }
- else if (cpi->ref_frame_flags == VP8_GOLD_FLAG)
- {
- thresh = &cpi->rd_threshes[THR_NEWG];
- threshmult = cpi->sf.thresh_mult[THR_NEWG];
- }
- else
- {
- thresh = &cpi->rd_threshes[THR_NEWMV];
- threshmult = cpi->sf.thresh_mult[THR_NEWMV];
- }
-
if (cpi->RDMULT > 1000)
{
cpi->RDDIV = 1;
@@ -775,7 +743,7 @@ static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distort
*rate = rd_cost_mbuv(x);
*distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
- return UVRDFUNC(x->rdmult, x->rddiv, *rate, *distortion, cpi->target_bits_per_mb);
+ return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
@@ -800,7 +768,7 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra
distortion = vp8_get_mbuvrecon_error(IF_RTCD(&cpi->rtcd.variance), x);
- this_rd = UVRDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd)
{
@@ -1097,7 +1065,7 @@ void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
// Segmentation method overheads
rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
- this_segment_rd += RDFUNC(x->rdmult, x->rddiv, rate, 0, cpi->target_bits_per_mb);
+ this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
br += rate;
for (i = 0; i < label_count; i++)
@@ -1252,7 +1220,7 @@ void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
rate += labelyrate;
- this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_label_rd)
{
@@ -1357,10 +1325,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
if (bsi.segment_rd < best_rd)
{
- int col_min = (best_ref_mv->col - MAX_POSSIBLE_MV) >>3;
- int col_max = (best_ref_mv->col + MAX_POSSIBLE_MV) >>3;
- int row_min = (best_ref_mv->row - MAX_POSSIBLE_MV) >>3;
- int row_max = (best_ref_mv->row + MAX_POSSIBLE_MV) >>3;
+ int col_min = (best_ref_mv->col - MAX_FULL_PEL_VAL) >>3;
+ int col_max = (best_ref_mv->col + MAX_FULL_PEL_VAL) >>3;
+ int row_min = (best_ref_mv->row - MAX_FULL_PEL_VAL) >>3;
+ int row_max = (best_ref_mv->row + MAX_FULL_PEL_VAL) >>3;
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
@@ -1407,7 +1375,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
}
/* If 8x8 is better than 16x8/8x16, then do 4x4 search */
- if (bsi.segment_num == BLOCK_8X8) /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
+ /* Not skip 4x4 if speed=0 (good quality) */
+ if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8) /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
{
bsi.mvp = &bsi.sv_mvp[0];
vp8_rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
@@ -1751,7 +1720,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
//int intermodecost[MAX_MODES];
MB_PREDICTION_MODE uv_intra_mode;
- int uvintra_eob = 0;
+
int force_no_skip = 0;
MV mvp;
@@ -1760,36 +1729,65 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
int saddone=0;
int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7)
- *returnintra = INT_MAX;
+ MV frame_nearest_mv[4];
+ MV frame_near_mv[4];
+ MV frame_best_ref_mv[4];
+ int frame_mdcounts[4][4];
+ int frame_lf_or_gf[4];
+ unsigned char *y_buffer[4];
+ unsigned char *u_buffer[4];
+ unsigned char *v_buffer[4];
- vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // clean
+ vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
- cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame
+ if (cpi->ref_frame_flags & VP8_LAST_FLAG)
+ {
+ YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
- x->skip = 0;
+ vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[LAST_FRAME], &frame_near_mv[LAST_FRAME],
+ &frame_best_ref_mv[LAST_FRAME], frame_mdcounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias);
- ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded);
+ y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset;
+ u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset;
+ v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset;
+
+ frame_lf_or_gf[LAST_FRAME] = 0;
+ }
+
+ if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
+ {
+ YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
+
+ vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[GOLDEN_FRAME], &frame_near_mv[GOLDEN_FRAME],
+ &frame_best_ref_mv[GOLDEN_FRAME], frame_mdcounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
+
+ y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset;
+ u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset;
+ v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset;
+
+ frame_lf_or_gf[GOLDEN_FRAME] = 1;
+ }
- // Experimental code
- // Adjust the RD multiplier based on the best case distortion we saw in the most recently coded mb
- //if ( (cpi->last_mb_distortion) > 0 && (cpi->target_bits_per_mb > 0) )
- /*{
- int tmprdmult;
+ if (cpi->ref_frame_flags & VP8_ALT_FLAG)
+ {
+ YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
+
+ vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[ALTREF_FRAME], &frame_near_mv[ALTREF_FRAME],
+ &frame_best_ref_mv[ALTREF_FRAME], frame_mdcounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
- //tmprdmult = (cpi->last_mb_distortion * 256) / ((cpi->av_per_frame_bandwidth*256)/cpi->common.MBs);
- tmprdmult = (cpi->last_mb_distortion * 256) / cpi->target_bits_per_mb;
- //tmprdmult = tmprdmult;
+ y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset;
+ u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset;
+ v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset;
- //if ( tmprdmult > cpi->RDMULT * 2 )
- // tmprdmult = cpi->RDMULT * 2;
- //else if ( tmprdmult < cpi->RDMULT / 2 )
- // tmprdmult = cpi->RDMULT / 2;
+ frame_lf_or_gf[ALTREF_FRAME] = 1;
+ }
- //tmprdmult = (tmprdmult < 25) ? 25 : tmprdmult;
+ *returnintra = INT_MAX;
+ cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame
- //x->rdmult = tmprdmult;
+ x->skip = 0;
- }*/
+ ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded);
// Special case treatment when GF and ARF are not sensible options for reference
if (cpi->ref_frame_flags == VP8_LAST_FLAG)
@@ -1820,12 +1818,6 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
- {
- uvintra_eob = 0;
-
- for (i = 16; i < 24; i++)
- uvintra_eob += x->e_mbd.block[i].eob;
- }
for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
{
@@ -1847,8 +1839,6 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
if (best_rd <= cpi->rd_threshes[mode_index])
continue;
-
-
// These variables hold are rolling total cost and distortion for this mode
rate2 = 0;
distortion2 = 0;
@@ -1859,65 +1849,28 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
- //Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
- if (cpi->is_src_frame_alt_ref)
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ // unless ARNR filtering is enabled in which case we want
+ // an unfiltered alternative
+ if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
{
if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
continue;
}
- if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ /* everything but intra */
+ if (x->e_mbd.mode_info_context->mbmi.ref_frame)
{
- YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
-
- if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
- continue;
-
- lf_or_gf = 0; // Local last frame vs Golden frame flag
-
- // Set up pointers for this macro block into the previous frame recon buffer
- x->e_mbd.pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
- x->e_mbd.pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
- x->e_mbd.pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
- }
- else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
- {
- YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
-
- // not supposed to reference gold frame
- if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
- continue;
-
- lf_or_gf = 1; // Local last frame vs Golden frame flag
-
- // Set up pointers for this macro block into the previous frame recon buffer
- x->e_mbd.pre.y_buffer = gld_yv12->y_buffer + recon_yoffset;
- x->e_mbd.pre.u_buffer = gld_yv12->u_buffer + recon_uvoffset;
- x->e_mbd.pre.v_buffer = gld_yv12->v_buffer + recon_uvoffset;
- }
- else if (x->e_mbd.mode_info_context->mbmi.ref_frame == ALTREF_FRAME)
- {
- YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
-
- // not supposed to reference alt ref frame
- if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
- continue;
-
- //if ( !cpi->source_alt_ref_active )
- // continue;
-
- lf_or_gf = 1; // Local last frame vs Golden frame flag
-
- // Set up pointers for this macro block into the previous frame recon buffer
- x->e_mbd.pre.y_buffer = alt_yv12->y_buffer + recon_yoffset;
- x->e_mbd.pre.u_buffer = alt_yv12->u_buffer + recon_uvoffset;
- x->e_mbd.pre.v_buffer = alt_yv12->v_buffer + recon_uvoffset;
+ x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ mode_mv[NEARESTMV] = frame_nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ mode_mv[NEARMV] = frame_near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ vpx_memcpy(mdcounts, frame_mdcounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
+ lf_or_gf = frame_lf_or_gf[x->e_mbd.mode_info_context->mbmi.ref_frame];
}
- vp8_find_near_mvs(&x->e_mbd,
- x->e_mbd.mode_info_context,
- &mode_mv[NEARESTMV], &mode_mv[NEARMV], &best_ref_mv,
- mdcounts, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
if(x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
{
@@ -1986,14 +1939,14 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
/* adjust mvp to make sure it is within MV range */
- if(mvp.row > best_ref_mv.row + MAX_POSSIBLE_MV)
- mvp.row = best_ref_mv.row + MAX_POSSIBLE_MV;
- else if(mvp.row < best_ref_mv.row - MAX_POSSIBLE_MV)
- mvp.row = best_ref_mv.row - MAX_POSSIBLE_MV;
- if(mvp.col > best_ref_mv.col + MAX_POSSIBLE_MV)
- mvp.col = best_ref_mv.col + MAX_POSSIBLE_MV;
- else if(mvp.col < best_ref_mv.col - MAX_POSSIBLE_MV)
- mvp.col = best_ref_mv.col - MAX_POSSIBLE_MV;
+ if(mvp.row > best_ref_mv.row + MAX_FULL_PEL_VAL)
+ mvp.row = best_ref_mv.row + MAX_FULL_PEL_VAL;
+ else if(mvp.row < best_ref_mv.row - MAX_FULL_PEL_VAL)
+ mvp.row = best_ref_mv.row - MAX_FULL_PEL_VAL;
+ if(mvp.col > best_ref_mv.col + MAX_FULL_PEL_VAL)
+ mvp.col = best_ref_mv.col + MAX_FULL_PEL_VAL;
+ else if(mvp.col < best_ref_mv.col - MAX_FULL_PEL_VAL)
+ mvp.col = best_ref_mv.col - MAX_FULL_PEL_VAL;
}
// Check to see if the testing frequency for this mode is at its max
@@ -2125,10 +2078,10 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
int further_steps;
int n;
- int col_min = (best_ref_mv.col - MAX_POSSIBLE_MV) >>3;
- int col_max = (best_ref_mv.col + MAX_POSSIBLE_MV) >>3;
- int row_min = (best_ref_mv.row - MAX_POSSIBLE_MV) >>3;
- int row_max = (best_ref_mv.row + MAX_POSSIBLE_MV) >>3;
+ int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3;
+ int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3;
+ int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3;
+ int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3;
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
@@ -2339,8 +2292,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
distortion_uv = sse2;
disable_skip = 1;
- this_rd = RDFUNC(x->rdmult, x->rddiv, rate2,
- distortion2, cpi->target_bits_per_mb);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2,
+ distortion2);
break;
}
@@ -2414,7 +2367,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
}
}
// Calculate the final RD estimate for this mode
- this_rd = RDFUNC(x->rdmult, x->rddiv, rate2, distortion2, cpi->target_bits_per_mb);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
}
// Experimental debug code.
@@ -2442,8 +2395,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
other_cost += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
/* Calculate the final y RD estimate for this mode */
- best_yrd = RDFUNC(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost),
- (distortion2-distortion_uv), cpi->target_bits_per_mb);
+ best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost),
+ (distortion2-distortion_uv));
*returnrate = rate2;
*returndistortion = distortion2;
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 2fffaa95f..f28daaff9 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -36,36 +36,9 @@
#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
-#define USE_FILTER_LUT 0 // use lookup table to improve filter
#if VP8_TEMPORAL_ALT_REF
-#if USE_FILTER_LUT
-// for (strength = 0; strength <= 6; strength++) {
-// for (delta = 0; delta <= 18; delta++) {
-// float coeff = (3.0 * delta * delta) / pow(2, strength);
-// printf("%3d", (int)roundf(coeff > 16 ? 0 : 16-coeff));
-// }
-// printf("\n");
-// }
-static int modifier_lut[7][19] =
-{
- // Strength=0
- {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- // Strength=1
- {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- // Strength=2
- {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- // Strength=3
- {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- // Strength=4
- {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- // Strength=5
- {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
- // Strength=6
- {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
-};
-#endif
static void vp8_temporal_filter_predictors_mb_c
(
MACROBLOCKD *x,
@@ -86,14 +59,11 @@ static void vp8_temporal_filter_predictors_mb_c
if ((mv_row | mv_col) & 7)
{
-// vp8_sixtap_predict16x16_c(yptr, stride,
-// mv_col & 7, mv_row & 7, &pred[0], 16);
x->subpixel_predict16x16(yptr, stride,
mv_col & 7, mv_row & 7, &pred[0], 16);
}
else
{
- //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16);
RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
}
@@ -127,17 +97,13 @@ void vp8_temporal_filter_apply_c
int strength,
int filter_weight,
unsigned int *accumulator,
- unsigned int *count
+ unsigned short *count
)
{
int i, j, k;
int modifier;
int byte = 0;
-#if USE_FILTER_LUT
- int *lut = modifier_lut[strength];
-#endif
-
for (i = 0,k = 0; i < block_size; i++)
{
for (j = 0; j < block_size; j++, k++)
@@ -146,11 +112,10 @@ void vp8_temporal_filter_apply_c
int src_byte = frame1[byte];
int pixel_value = *frame2++;
-#if USE_FILTER_LUT
- modifier = abs(src_byte-pixel_value);
- modifier = modifier>18 ? 0 : lut[modifier];
-#else
modifier = src_byte - pixel_value;
+ // This is an integer approximation of:
+ // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
+ // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
modifier *= modifier;
modifier *= 3;
modifier += 1 << (strength - 1);
@@ -160,7 +125,6 @@ void vp8_temporal_filter_apply_c
modifier = 16;
modifier = 16 - modifier;
-#endif
modifier *= filter_weight;
count[k] += modifier;
@@ -326,17 +290,17 @@ static void vp8_temporal_filter_iterate_c
int mb_col, mb_row;
unsigned int filter_weight[MAX_LAG_BUFFERS];
unsigned char *mm_ptr = cpi->fp_motion_map;
- int cols = cpi->common.mb_cols;
- int rows = cpi->common.mb_rows;
+ int mb_cols = cpi->common.mb_cols;
+ int mb_rows = cpi->common.mb_rows;
int MBs = cpi->common.MBs;
int mb_y_offset = 0;
int mb_uv_offset = 0;
- unsigned int accumulator[384];
- unsigned int count[384];
+ DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8);
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8);
MACROBLOCKD *mbd = &cpi->mb.e_mbd;
YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
unsigned char *dst1, *dst2;
- DECLARE_ALIGNED(16, unsigned char, predictor[384]);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8);
// Save input state
unsigned char *y_buffer = mbd->pre.y_buffer;
@@ -350,7 +314,7 @@ static void vp8_temporal_filter_iterate_c
filter_weight[frame] = 1;
}
- for (mb_row = 0; mb_row < rows; mb_row++)
+ for (mb_row = 0; mb_row < mb_rows; mb_row++)
{
#if ALT_REF_MC_ENABLED
// Reduced search extent by 3 for 6-tap filter & smaller UMV border
@@ -359,14 +323,14 @@ static void vp8_temporal_filter_iterate_c
+ (VP8BORDERINPIXELS - 19);
#endif
- for (mb_col = 0; mb_col < cols; mb_col++)
+ for (mb_col = 0; mb_col < mb_cols; mb_col++)
{
int i, j, k, w;
int weight_cap;
int stride;
vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
- vpx_memset(count, 0, 384*sizeof(unsigned int));
+ vpx_memset(count, 0, 384*sizeof(unsigned short));
#if ALT_REF_MC_ENABLED
// Reduced search extent by 3 for 6-tap filter & smaller UMV border
@@ -533,8 +497,8 @@ static void vp8_temporal_filter_iterate_c
mb_uv_offset += 8;
}
- mb_y_offset += 16*f->y_stride-f->y_width;
- mb_uv_offset += 8*f->uv_stride-f->uv_width;
+ mb_y_offset += 16*(f->y_stride-mb_cols);
+ mb_uv_offset += 8*(f->uv_stride-mb_cols);
}
// Restore input state
diff --git a/vp8/encoder/temporal_filter.h b/vp8/encoder/temporal_filter.h
index 7b8c21c04..740037a85 100644
--- a/vp8/encoder/temporal_filter.h
+++ b/vp8/encoder/temporal_filter.h
@@ -22,9 +22,13 @@
int strength, \
int filter_weight, \
unsigned int *accumulator, \
- unsigned int *count \
+ unsigned short *count \
)
+#if ARCH_X86 || ARCH_X86_64
+#include "x86/temporal_filter_x86.h"
+#endif
+
#ifndef vp8_temporal_filter_apply
#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
#endif
diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
new file mode 100644
index 000000000..f2adcccba
--- /dev/null
+++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
@@ -0,0 +1,207 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+; void vp8_temporal_filter_apply_sse2 | arg
+; (unsigned char *frame1, | 0
+; unsigned int stride, | 1
+; unsigned char *frame2, | 2
+; unsigned int block_size, | 3
+; int strength, | 4
+; int filter_weight, | 5
+; unsigned int *accumulator, | 6
+; unsigned short *count) | 7
+global sym(vp8_temporal_filter_apply_sse2)
+sym(vp8_temporal_filter_apply_sse2):
+
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ALIGN_STACK 16, rax
+ %define block_size 0
+ %define strength 16
+ %define filter_weight 32
+ %define rounding_bit 48
+ %define rbp_backup 64
+ %define stack_size 80
+ sub rsp, stack_size
+ mov [rsp + rbp_backup], rbp
+ ; end prolog
+
+ mov rdx, arg(3)
+ mov [rsp + block_size], rdx
+ movd xmm6, arg(4)
+ movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
+
+ ; calculate the rounding bit outside the loop
+ ; 0x8000 >> (16 - strength)
+ mov rdx, 16
+ sub rdx, arg(4) ; 16 - strength
+ movd xmm4, rdx ; can't use rdx w/ shift
+ movdqa xmm5, [GLOBAL(_const_top_bit)]
+ psrlw xmm5, xmm4
+ movdqa [rsp + rounding_bit], xmm5
+
+ mov rsi, arg(0) ; src/frame1
+ mov rdx, arg(2) ; predictor frame
+ mov rdi, arg(6) ; accumulator
+ mov rax, arg(7) ; count
+
+ ; dup the filter weight and store for later
+ movd xmm0, arg(5) ; filter_weight
+ pshuflw xmm0, xmm0, 0
+ punpcklwd xmm0, xmm0
+ movdqa [rsp + filter_weight], xmm0
+
+ mov rbp, arg(1) ; stride
+ pxor xmm7, xmm7 ; zero for extraction
+
+ lea rcx, [rdx + 16*16*1]
+ cmp dword ptr [rsp + block_size], 8
+ jne temporal_filter_apply_load_16
+ lea rcx, [rdx + 8*8*1]
+
+temporal_filter_apply_load_8:
+ movq xmm0, [rsi] ; first row
+ lea rsi, [rsi + rbp] ; += stride
+ punpcklbw xmm0, xmm7 ; src[ 0- 7]
+ movq xmm1, [rsi] ; second row
+ lea rsi, [rsi + rbp] ; += stride
+ punpcklbw xmm1, xmm7 ; src[ 8-15]
+ jmp temporal_filter_apply_load_finished
+
+temporal_filter_apply_load_16:
+ movdqu xmm0, [rsi] ; src (frame1)
+ lea rsi, [rsi + rbp] ; += stride
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm7 ; src[ 0- 7]
+ punpckhbw xmm1, xmm7 ; src[ 8-15]
+
+temporal_filter_apply_load_finished:
+ movdqa xmm2, [rdx] ; predictor (frame2)
+ movdqa xmm3, xmm2
+ punpcklbw xmm2, xmm7 ; pred[ 0- 7]
+ punpckhbw xmm3, xmm7 ; pred[ 8-15]
+
+ ; modifier = src_byte - pixel_value
+ psubw xmm0, xmm2 ; src - pred[ 0- 7]
+ psubw xmm1, xmm3 ; src - pred[ 8-15]
+
+ ; modifier *= modifier
+ pmullw xmm0, xmm0 ; modifer[ 0- 7]^2
+ pmullw xmm1, xmm1 ; modifer[ 8-15]^2
+
+ ; modifier *= 3
+ pmullw xmm0, [GLOBAL(_const_3w)]
+ pmullw xmm1, [GLOBAL(_const_3w)]
+
+ ; modifer += 0x8000 >> (16 - strength)
+ paddw xmm0, [rsp + rounding_bit]
+ paddw xmm1, [rsp + rounding_bit]
+
+ ; modifier >>= strength
+ psrlw xmm0, [rsp + strength]
+ psrlw xmm1, [rsp + strength]
+
+ ; modifier = 16 - modifier
+ ; saturation takes care of modifier > 16
+ movdqa xmm3, [GLOBAL(_const_16w)]
+ movdqa xmm2, [GLOBAL(_const_16w)]
+ psubusw xmm3, xmm1
+ psubusw xmm2, xmm0
+
+ ; modifier *= filter_weight
+ pmullw xmm2, [rsp + filter_weight]
+ pmullw xmm3, [rsp + filter_weight]
+
+ ; count
+ movdqa xmm4, [rax]
+ movdqa xmm5, [rax+16]
+ ; += modifier
+ paddw xmm4, xmm2
+ paddw xmm5, xmm3
+ ; write back
+ movdqa [rax], xmm4
+ movdqa [rax+16], xmm5
+ lea rax, [rax + 16*2] ; count += 16*(sizeof(short))
+
+ ; load and extract the predictor up to shorts
+ pxor xmm7, xmm7
+ movdqa xmm0, [rdx]
+ lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char))
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm7 ; pred[ 0- 7]
+ punpckhbw xmm1, xmm7 ; pred[ 8-15]
+
+ ; modifier *= pixel_value
+ pmullw xmm0, xmm2
+ pmullw xmm1, xmm3
+
+ ; expand to double words
+ movdqa xmm2, xmm0
+ punpcklwd xmm0, xmm7 ; [ 0- 3]
+ punpckhwd xmm2, xmm7 ; [ 4- 7]
+ movdqa xmm3, xmm1
+ punpcklwd xmm1, xmm7 ; [ 8-11]
+ punpckhwd xmm3, xmm7 ; [12-15]
+
+ ; accumulator
+ movdqa xmm4, [rdi]
+ movdqa xmm5, [rdi+16]
+ movdqa xmm6, [rdi+32]
+ movdqa xmm7, [rdi+48]
+ ; += modifier
+ paddw xmm4, xmm0
+ paddw xmm5, xmm2
+ paddw xmm6, xmm1
+ paddw xmm7, xmm3
+ ; write back
+ movdqa [rdi], xmm4
+ movdqa [rdi+16], xmm5
+ movdqa [rdi+32], xmm6
+ movdqa [rdi+48], xmm7
+ lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int))
+
+ cmp rdx, rcx
+ je temporal_filter_apply_epilog
+ pxor xmm7, xmm7 ; zero for extraction
+ cmp dword ptr [rsp + block_size], 16
+ je temporal_filter_apply_load_16
+ jmp temporal_filter_apply_load_8
+
+temporal_filter_apply_epilog:
+ ; begin epilog
+ mov rbp, [rsp + rbp_backup]
+ add rsp, stack_size
+ pop rsp
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+_const_3w:
+ times 8 dw 3
+align 16
+_const_top_bit:
+ times 8 dw 1<<15
+align 16
+_const_16w
+ times 8 dw 16
diff --git a/vp8/encoder/x86/temporal_filter_x86.h b/vp8/encoder/x86/temporal_filter_x86.h
new file mode 100644
index 000000000..2daa14018
--- /dev/null
+++ b/vp8/encoder/x86/temporal_filter_x86.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef __INC_VP8_TEMPORAL_FILTER_X86_H
+#define __INC_VP8_TEMPORAL_FILTER_X86_H
+
+#if HAVE_SSE2
+extern prototype_apply(vp8_temporal_filter_apply_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp8_temporal_filter_apply
+#define vp8_temporal_filter_apply vp8_temporal_filter_apply_sse2
+
+#endif
+
+#endif
+
+#endif // __INC_VP8_TEMPORAL_FILTER_X86_H
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 6e317e2a2..f9b3ea1d8 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -309,6 +309,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
+
+ cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
}
#endif
@@ -321,8 +323,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3;
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3;
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3;
+#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.search.full_search = vp8_full_search_sadx3;
-
+#endif
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3;
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3;
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3;
@@ -351,7 +354,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4;
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4;
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4;
+#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.search.full_search = vp8_full_search_sadx8;
+#endif
}
#endif
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index f95920775..903c56c88 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -38,6 +38,7 @@ struct vp8_extracfg
unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */
unsigned int arnr_type; /* alt_ref filter type */
vp8e_tuning tuning;
+ unsigned int cq_level; /* constrained quality level */
};
@@ -69,6 +70,7 @@ static const struct extraconfig_map extracfg_map[] =
3, /* arnr_strength */
3, /* arnr_type*/
0, /* tuning*/
+ 10, /* cq_level */
}
}
};
@@ -148,7 +150,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
#else
RANGE_CHECK_HI(cfg, g_lag_in_frames, 0);
#endif
- RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CBR);
+ RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ);
RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100);
RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO);
@@ -190,7 +192,9 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
RANGE_CHECK(vp8_cfg, arnr_type, 1, 3);
+ RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
+#if !(CONFIG_REALTIME_ONLY)
if (cfg->g_pass == VPX_RC_LAST_PASS)
{
int mb_r = (cfg->g_h + 15) / 16;
@@ -214,6 +218,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
if ((int)(stats->count + 0.5) != n_packets - 1)
ERROR("rc_twopass_stats_in missing EOS stats packet");
}
+#endif
return VPX_CODEC_OK;
}
@@ -298,11 +303,16 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
{
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
}
+ else if (cfg.rc_end_usage == VPX_CQ)
+ {
+ oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
+ }
oxcf->target_bandwidth = cfg.rc_target_bitrate;
oxcf->best_allowed_q = cfg.rc_min_quantizer;
oxcf->worst_allowed_q = cfg.rc_max_quantizer;
+ oxcf->cq_level = vp8_cfg.cq_level;
oxcf->fixed_q = -1;
oxcf->under_shoot_pct = cfg.rc_undershoot_pct;
@@ -453,6 +463,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
MAP(VP8E_SET_ARNR_STRENGTH , xcfg.arnr_strength);
MAP(VP8E_SET_ARNR_TYPE , xcfg.arnr_type);
MAP(VP8E_SET_TUNING, xcfg.tuning);
+ MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level);
}
@@ -1034,6 +1045,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] =
{VP8E_SET_ARNR_STRENGTH , set_param},
{VP8E_SET_ARNR_TYPE , set_param},
{VP8E_SET_TUNING, set_param},
+ {VP8E_SET_CQ_LEVEL, set_param},
{ -1, NULL},
};
@@ -1069,7 +1081,6 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
4, /* rc_min_quantizer */
63, /* rc_max_quantizer */
-
95, /* rc_undershoot_pct */
200, /* rc_overshoot_pct */
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 683d785e6..932f145e6 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -94,6 +94,7 @@ VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h
+VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm
@@ -107,6 +108,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm