summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/arm/arm_csystemdependent.c8
-rw-r--r--vp8/encoder/arm/neon/vp8_mse16x16_neon.asm57
-rw-r--r--vp8/encoder/arm/variance_arm.h12
-rw-r--r--vp8/encoder/asm_enc_offsets.c44
-rw-r--r--vp8/encoder/bitstream.c13
-rw-r--r--vp8/encoder/encodeframe.c485
-rw-r--r--vp8/encoder/encodeintra.c53
-rw-r--r--vp8/encoder/encodeintra.h1
-rw-r--r--vp8/encoder/ethreading.c3
-rw-r--r--vp8/encoder/firstpass.c157
-rw-r--r--vp8/encoder/generic/csystemdependent.c3
-rw-r--r--vp8/encoder/onyx_if.c61
-rw-r--r--vp8/encoder/onyx_int.h8
-rw-r--r--vp8/encoder/pickinter.c169
-rw-r--r--vp8/encoder/pickinter.h3
-rw-r--r--vp8/encoder/ppc/csystemdependent.c9
-rw-r--r--vp8/encoder/ratectrl.c43
-rw-r--r--vp8/encoder/rdopt.c178
-rw-r--r--vp8/encoder/rdopt.h4
-rw-r--r--vp8/encoder/tokenize.c62
-rw-r--r--vp8/encoder/variance.h18
-rw-r--r--vp8/encoder/variance_c.c34
-rw-r--r--vp8/encoder/x86/dct_x86.h6
-rw-r--r--vp8/encoder/x86/variance_impl_mmx.asm130
-rw-r--r--vp8/encoder/x86/variance_impl_sse2.asm116
-rw-r--r--vp8/encoder/x86/variance_mmx.c37
-rw-r--r--vp8/encoder/x86/variance_sse2.c7
-rw-r--r--vp8/encoder/x86/variance_x86.h21
-rw-r--r--vp8/encoder/x86/x86_csystemdependent.c42
29 files changed, 679 insertions, 1105 deletions
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index db079d5ed..56c858c71 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -53,10 +53,7 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_armv6;
/*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
- /*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
- cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/
+ /*cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/
/*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;*/
@@ -103,9 +100,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon;
/*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon;
- /*cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;*/
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon;
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
index 6af4e87ba..55edbf512 100644
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
@@ -10,7 +10,6 @@
EXPORT |vp8_mse16x16_neon|
- EXPORT |vp8_get16x16pred_error_neon|
EXPORT |vp8_get4x4sse_cs_neon|
ARM
@@ -76,62 +75,6 @@ mse16x16_neon_loop
ENDP
-;============================
-; r0 unsigned char *src_ptr
-; r1 int src_stride
-; r2 unsigned char *ref_ptr
-; r3 int ref_stride
-|vp8_get16x16pred_error_neon| PROC
- vmov.i8 q8, #0 ;q8 - sum
- vmov.i8 q9, #0 ;q9, q10 - pred_error
- vmov.i8 q10, #0
-
- mov r12, #8
-
-get16x16pred_error_neon_loop
- vld1.8 {q0}, [r0], r1 ;Load up source and reference
- vld1.8 {q2}, [r2], r3
- vld1.8 {q1}, [r0], r1
- vld1.8 {q3}, [r2], r3
-
- vsubl.u8 q11, d0, d4
- vsubl.u8 q12, d1, d5
- vsubl.u8 q13, d2, d6
- vsubl.u8 q14, d3, d7
-
- vpadal.s16 q8, q11
- vmlal.s16 q9, d22, d22
- vmlal.s16 q10, d23, d23
-
- subs r12, r12, #1
-
- vpadal.s16 q8, q12
- vmlal.s16 q9, d24, d24
- vmlal.s16 q10, d25, d25
- vpadal.s16 q8, q13
- vmlal.s16 q9, d26, d26
- vmlal.s16 q10, d27, d27
- vpadal.s16 q8, q14
- vmlal.s16 q9, d28, d28
- vmlal.s16 q10, d29, d29
-
- bne get16x16pred_error_neon_loop
-
- vadd.u32 q10, q9, q10
- vpaddl.s32 q0, q8
-
- vpaddl.u32 q1, q10
- vadd.s64 d0, d0, d1
- vadd.u64 d1, d2, d3
-
- vmull.s32 q5, d0, d0
- vshr.s32 d10, d10, #8
- vsub.s32 d0, d1, d10
-
- vmov.32 r0, d0[0]
- bx lr
-
- ENDP
;=============================
; r0 unsigned char *src_ptr,
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index ad0d37193..f2f761f9e 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -83,9 +83,6 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);
//extern prototype_getmbss(vp8_get_mb_ss_c);
extern prototype_variance(vp8_mse16x16_neon);
-extern prototype_get16x16prederror(vp8_get16x16pred_error_neon);
-//extern prototype_variance2(vp8_get8x8var_c);
-//extern prototype_variance2(vp8_get16x16var_c);
extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -149,15 +146,6 @@ extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon);
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_neon
-#undef vp8_variance_get16x16prederror
-#define vp8_variance_get16x16prederror vp8_get16x16pred_error_neon
-
-//#undef vp8_variance_get8x8var
-//#define vp8_variance_get8x8var vp8_get8x8var_c
-
-//#undef vp8_variance_get16x16var
-//#define vp8_variance_get16x16var vp8_get16x16var_c
-
#undef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
#endif
diff --git a/vp8/encoder/asm_enc_offsets.c b/vp8/encoder/asm_enc_offsets.c
index 9c81c8d0a..c79e915f8 100644
--- a/vp8/encoder/asm_enc_offsets.c
+++ b/vp8/encoder/asm_enc_offsets.c
@@ -9,31 +9,17 @@
*/
-#include "vpx_ports/config.h"
-#include <stddef.h>
-
+#include "vpx_ports/asm_offsets.h"
+#include "vpx_config.h"
#include "block.h"
#include "vp8/common/blockd.h"
#include "onyx_int.h"
#include "treewriter.h"
#include "tokenize.h"
-#define ct_assert(name,cond) \
- static void assert_##name(void) UNUSED;\
- static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
-
-#define DEFINE(sym, val) int sym = val;
-
-/*
-#define BLANK() asm volatile("\n->" : : )
-*/
-
-/*
- * int main(void)
- * {
- */
+BEGIN
-//regular quantize
+/* regular quantize */
DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff));
DEFINE(vp8_block_zbin, offsetof(BLOCK, zbin));
DEFINE(vp8_block_round, offsetof(BLOCK, round));
@@ -48,7 +34,7 @@ DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant));
DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob));
-// subtract
+/* subtract */
DEFINE(vp8_block_base_src, offsetof(BLOCK, base_src));
DEFINE(vp8_block_src, offsetof(BLOCK, src));
DEFINE(vp8_block_src_diff, offsetof(BLOCK, src_diff));
@@ -56,7 +42,7 @@ DEFINE(vp8_block_src_stride, offsetof(BLOCK, src_stride));
DEFINE(vp8_blockd_predictor, offsetof(BLOCKD, predictor));
-//pack tokens
+/* pack tokens */
DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue));
DEFINE(vp8_writer_range, offsetof(vp8_writer, range));
DEFINE(vp8_writer_value, offsetof(vp8_writer, value));
@@ -90,16 +76,16 @@ DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
-// These two sizes are used in vp8cx_pack_tokens. They are hard coded
-// so if the size changes this will have to be adjusted.
+END
+
+/* add asserts for any offset that is not supported by assembly code
+ * add asserts for any size that is not supported by assembly code
+
+ * These are used in vp8cx_pack_tokens. They are hard coded so if their sizes
+ * change they will have to be adjusted.
+ */
+
#if HAVE_ARMV5TE
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
#endif
-
-//add asserts for any offset that is not supported by assembly code
-//add asserts for any size that is not supported by assembly code
-/*
- * return 0;
- * }
- */
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index ced963559..e93d30d1a 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -776,9 +776,9 @@ static void write_mv_ref
vp8_writer *w, MB_PREDICTION_MODE m, const vp8_prob *p
)
{
-
+#if CONFIG_DEBUG
assert(NEARESTMV <= m && m <= SPLITMV);
-
+#endif
vp8_write_token(w, vp8_mv_ref_tree, p,
vp8_mv_ref_encoding_array - NEARESTMV + m);
}
@@ -788,8 +788,9 @@ static void write_sub_mv_ref
vp8_writer *w, B_PREDICTION_MODE m, const vp8_prob *p
)
{
+#if CONFIG_DEBUG
assert(LEFT4X4 <= m && m <= NEW4X4);
-
+#endif
vp8_write_token(w, vp8_sub_mv_ref_tree, p,
vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
}
@@ -1017,11 +1018,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
blockmode = cpi->mb.partition_info->bmi[j].mode;
blockmv = cpi->mb.partition_info->bmi[j].mv;
-
+#if CONFIG_DEBUG
while (j != L[++k])
if (k >= 16)
assert(0);
-
+#else
+ while (j != L[++k]);
+#endif
leftmv.as_int = left_block_mv(m, k);
abovemv.as_int = above_block_mv(m, k, mis);
mv_contz = vp8_mv_cont(&leftmv, &abovemv);
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 5372f8da2..5834e6f08 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -50,6 +50,7 @@ void vp8_build_block_offsets(MACROBLOCK *x);
void vp8_setup_block_ptrs(MACROBLOCK *x);
int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
+static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x );
#ifdef MODE_STATS
unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -84,8 +85,6 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
{
unsigned int act;
unsigned int sse;
- int sum;
-
/* TODO: This could also be done over smaller areas (8x8), but that would
* require extensive changes elsewhere, as lambda is assumed to be fixed
* over an entire MB in most of the code.
@@ -93,14 +92,9 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
* lambda using a non-linear combination (e.g., the smallest, or second
* smallest, etc.).
*/
- VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
- x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
-
- /* This requires a full 32 bits of precision. */
- act = (sse<<8) - sum*sum;
-
- /* Drop 4 to give us some headroom to work with. */
- act = (act + 8) >> 4;
+ act = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)(x->src.y_buffer,
+ x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
+ act = act<<4;
/* If the region is flat, lower the activity some more. */
if (act < 8<<12)
@@ -110,70 +104,121 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
}
// Stub for alternative experimental activity measures.
-static unsigned int alt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
+static unsigned int alt_activity_measure( VP8_COMP *cpi,
+ MACROBLOCK *x, int use_dc_pred )
{
- unsigned int mb_activity = VP8_ACTIVITY_AVG_MIN;
-
- x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
- x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
-
- vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
-
- mb_activity = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff);
-
- return mb_activity;
+ return vp8_encode_intra(cpi,x, use_dc_pred);
}
// Measure the activity of the current macroblock
// What we measure here is TBD so abstracted to this function
-static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
+#define ALT_ACT_MEASURE 1
+static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x,
+ int mb_row, int mb_col)
{
unsigned int mb_activity;
- if ( 1 )
+ if ( ALT_ACT_MEASURE )
{
- // Original activity measure from Tim T's code.
- mb_activity = tt_activity_measure( cpi, x );
+ int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
+
+ // Or use and alternative.
+ mb_activity = alt_activity_measure( cpi, x, use_dc_pred );
}
else
{
- // Or use and alternative.
- mb_activity = alt_activity_measure( cpi, x );
+ // Original activity measure from Tim T's code.
+ mb_activity = tt_activity_measure( cpi, x );
}
+ if ( mb_activity < VP8_ACTIVITY_AVG_MIN )
+ mb_activity = VP8_ACTIVITY_AVG_MIN;
+
return mb_activity;
}
// Calculate an "average" mb activity value for the frame
+#define ACT_MEDIAN 0
static void calc_av_activity( VP8_COMP *cpi, INT64 activity_sum )
{
+#if ACT_MEDIAN
+ // Find median: Simple n^2 algorithm for experimentation
+ {
+ unsigned int median;
+ unsigned int i,j;
+ unsigned int * sortlist;
+ unsigned int tmp;
+
+ // Create a list to sort to
+ CHECK_MEM_ERROR(sortlist,
+ vpx_calloc(sizeof(unsigned int),
+ cpi->common.MBs));
+
+ // Copy map to sort list
+ vpx_memcpy( sortlist, cpi->mb_activity_map,
+ sizeof(unsigned int) * cpi->common.MBs );
+
+
+ // Ripple each value down to its correct position
+ for ( i = 1; i < cpi->common.MBs; i ++ )
+ {
+ for ( j = i; j > 0; j -- )
+ {
+ if ( sortlist[j] < sortlist[j-1] )
+ {
+ // Swap values
+ tmp = sortlist[j-1];
+ sortlist[j-1] = sortlist[j];
+ sortlist[j] = tmp;
+ }
+ else
+ break;
+ }
+ }
+
+ // Even number MBs so estimate median as mean of two either side.
+ median = ( 1 + sortlist[cpi->common.MBs >> 1] +
+ sortlist[(cpi->common.MBs >> 1) + 1] ) >> 1;
+
+ cpi->activity_avg = median;
+
+ vpx_free(sortlist);
+ }
+#else
// Simple mean for now
cpi->activity_avg = (unsigned int)(activity_sum/cpi->common.MBs);
+#endif
+
if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
+
+ // Experimental code: return fixed value normalized for several clips
+ if ( ALT_ACT_MEASURE )
+ cpi->activity_avg = 100000;
}
+#define USE_ACT_INDEX 0
#define OUTPUT_NORM_ACT_STATS 0
-// Calculate a normalized activity value for each mb
-static void calc_norm_activity( VP8_COMP *cpi, MACROBLOCK *x )
+
+#if USE_ACT_INDEX
+// Calculate and activity index for each mb
+static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
{
VP8_COMMON *const cm = & cpi->common;
int mb_row, mb_col;
- unsigned int act;
- unsigned int a;
- unsigned int b;
+ INT64 act;
+ INT64 a;
+ INT64 b;
#if OUTPUT_NORM_ACT_STATS
FILE *f = fopen("norm_act.stt", "a");
- fprintf(f, "\n");
+ fprintf(f, "\n%12d\n", cpi->activity_avg );
#endif
// Reset pointers to start of activity map
x->mb_activity_ptr = cpi->mb_activity_map;
- x->mb_norm_activity_ptr = cpi->mb_norm_activity_map;
// Calculate normalized mb activity number.
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
@@ -185,25 +230,19 @@ static void calc_norm_activity( VP8_COMP *cpi, MACROBLOCK *x )
act = *(x->mb_activity_ptr);
// Calculate a normalized activity number
- a = act + 2*cpi->activity_avg;
- b = 2*act + cpi->activity_avg;
+ a = act + 4*cpi->activity_avg;
+ b = 4*act + cpi->activity_avg;
if ( b >= a )
- *(x->mb_norm_activity_ptr) = (int)((b + (a>>1))/a);
+ *(x->activity_ptr) = (int)((b + (a>>1))/a) - 1;
else
- *(x->mb_norm_activity_ptr) = -(int)((a + (b>>1))/b);
-
- if ( *(x->mb_norm_activity_ptr) == 0 )
- {
- *(x->mb_norm_activity_ptr) = 1;
- }
+ *(x->activity_ptr) = 1 - (int)((a + (b>>1))/b);
#if OUTPUT_NORM_ACT_STATS
- fprintf(f, " %6d", *(x->mb_norm_activity_ptr));
+ fprintf(f, " %6d", *(x->mb_activity_ptr));
#endif
// Increment activity map pointers
x->mb_activity_ptr++;
- x->mb_norm_activity_ptr++;
}
#if OUTPUT_NORM_ACT_STATS
@@ -217,33 +256,44 @@ static void calc_norm_activity( VP8_COMP *cpi, MACROBLOCK *x )
#endif
}
-
+#endif
// Loop through all MBs. Note activity of each, average activity and
// calculate a normalized activity for each
static void build_activity_map( VP8_COMP *cpi )
{
MACROBLOCK *const x = & cpi->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
VP8_COMMON *const cm = & cpi->common;
+#if ALT_ACT_MEASURE
+ YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
+ int recon_yoffset;
+ int recon_y_stride = new_yv12->y_stride;
+#endif
+
int mb_row, mb_col;
unsigned int mb_activity;
INT64 activity_sum = 0;
- // Initialise source buffer pointer
- x->src = *cpi->Source;
-
- // Set pointer to start of activity map
- x->mb_activity_ptr = cpi->mb_activity_map;
-
// for each macroblock row in image
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
+#if ALT_ACT_MEASURE
+ // reset above block coeffs
+ xd->up_available = (mb_row != 0);
+ recon_yoffset = (mb_row * recon_y_stride * 16);
+#endif
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
+#if ALT_ACT_MEASURE
+ xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
+ xd->left_available = (mb_col != 0);
+ recon_yoffset += 16;
+#endif
// measure activity
- mb_activity = mb_activity_measure( cpi, x );
+ mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col );
// Keep frame sum
activity_sum += mb_activity;
@@ -258,49 +308,50 @@ static void build_activity_map( VP8_COMP *cpi )
x->src.y_buffer += 16;
}
+
// adjust to the next row of mbs
x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
+
+#if ALT_ACT_MEASURE
+ //extend the recon for intra prediction
+ vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
+ xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+#endif
+
}
// Calculate an "average" MB activity
calc_av_activity(cpi, activity_sum);
- // Calculate a normalized activity number of each mb
- calc_norm_activity( cpi, x );
+#if USE_ACT_INDEX
+ // Calculate an activity index number of each mb
+ calc_activity_index( cpi, x );
+#endif
+
}
-// Activity masking based on Tim T's original code
+// Macroblock activity masking
void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
{
-
- unsigned int a;
- unsigned int b;
- unsigned int act = *(x->mb_activity_ptr);
+#if USE_ACT_INDEX
+ x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
+ x->errorperbit = x->rdmult/x->rddiv;
+#else
+ INT64 a;
+ INT64 b;
+ INT64 act = *(x->mb_activity_ptr);
// Apply the masking to the RD multiplier.
- a = act + 2*cpi->activity_avg;
- b = 2*act + cpi->activity_avg;
+ a = act + (2*cpi->activity_avg);
+ b = (2*act) + cpi->activity_avg;
- //tmp = (unsigned int)(((INT64)tmp*b + (a>>1))/a);
x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
+ x->errorperbit = x->rdmult/x->rddiv;
- // For now now zbin adjustment on mode choice
- x->act_zbin_adj = 0;
-}
-
-// Stub function to use a normalized activity measure stored at mb level.
-void vp8_norm_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
-{
- int norm_act;
-
- norm_act = *(x->mb_norm_activity_ptr);
- if (norm_act > 0)
- x->rdmult = norm_act * (x->rdmult);
- else
- x->rdmult = -(x->rdmult / norm_act);
+#endif
- // For now now zbin adjustment on mode choice
- x->act_zbin_adj = 0;
+ // Activity based Zbin adjustment
+ adjust_act_zbin(cpi, x);
}
static
@@ -356,7 +407,6 @@ void encode_mb_row(VP8_COMP *cpi,
// Set the mb activity pointer to the start of the row.
x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
- x->mb_norm_activity_ptr = &cpi->mb_norm_activity_map[map_index];
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -476,7 +526,6 @@ void encode_mb_row(VP8_COMP *cpi,
// Increment the activity mask pointers.
x->mb_activity_ptr++;
- x->mb_norm_activity_ptr++;
/* save the block info */
for (i = 0; i < 16; i++)
@@ -525,6 +574,92 @@ void encode_mb_row(VP8_COMP *cpi,
#endif
}
+void init_encode_frame_mb_context(VP8_COMP *cpi)
+{
+ MACROBLOCK *const x = & cpi->mb;
+ VP8_COMMON *const cm = & cpi->common;
+ MACROBLOCKD *const xd = & x->e_mbd;
+
+ // GF active flags data structure
+ x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
+
+ // Activity map pointer
+ x->mb_activity_ptr = cpi->mb_activity_map;
+
+ x->vector_range = 32;
+
+ x->act_zbin_adj = 0;
+
+ x->partition_info = x->pi;
+
+ xd->mode_info_context = cm->mi;
+ xd->mode_info_stride = cm->mode_info_stride;
+
+ xd->frame_type = cm->frame_type;
+
+ xd->frames_since_golden = cm->frames_since_golden;
+ xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
+
+ // reset intra mode contexts
+ if (cm->frame_type == KEY_FRAME)
+ vp8_init_mbmode_probs(cm);
+
+ // Copy data over into macro block data sturctures.
+ x->src = * cpi->Source;
+ xd->pre = cm->yv12_fb[cm->lst_fb_idx];
+ xd->dst = cm->yv12_fb[cm->new_fb_idx];
+
+ // set up frame for intra coded blocks
+ vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
+
+ vp8_build_block_offsets(x);
+
+ vp8_setup_block_dptrs(&x->e_mbd);
+
+ vp8_setup_block_ptrs(x);
+
+ xd->mode_info_context->mbmi.mode = DC_PRED;
+ xd->mode_info_context->mbmi.uv_mode = DC_PRED;
+
+ xd->left_context = &cm->left_context;
+
+ vp8_zero(cpi->count_mb_ref_frame_usage)
+ vp8_zero(cpi->ymode_count)
+ vp8_zero(cpi->uv_mode_count)
+
+ x->mvc = cm->fc.mvc;
+
+ vpx_memset(cm->above_context, 0,
+ sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
+
+ xd->ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded);
+
+ // Special case treatment when GF and ARF are not sensible options for reference
+ if (cpi->ref_frame_flags == VP8_LAST_FLAG)
+ {
+ xd->ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ + vp8_cost_zero(255);
+ xd->ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ + vp8_cost_one(255)
+ + vp8_cost_zero(128);
+ xd->ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ + vp8_cost_one(255)
+ + vp8_cost_one(128);
+ }
+ else
+ {
+ xd->ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ + vp8_cost_zero(cpi->prob_last_coded);
+ xd->ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ + vp8_cost_one(cpi->prob_last_coded)
+ + vp8_cost_zero(cpi->prob_gf_coded);
+ xd->ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ + vp8_cost_one(cpi->prob_last_coded)
+ + vp8_cost_one(cpi->prob_gf_coded);
+ }
+
+}
+
void vp8_encode_frame(VP8_COMP *cpi)
{
int mb_row;
@@ -536,6 +671,17 @@ void vp8_encode_frame(VP8_COMP *cpi)
int segment_counts[MAX_MB_SEGMENTS];
int totalrate;
+ vpx_memset(segment_counts, 0, sizeof(segment_counts));
+ totalrate = 0;
+
+ if (cpi->compressor_speed == 2)
+ {
+ if (cpi->oxcf.cpu_used < 0)
+ cpi->Speed = -(cpi->oxcf.cpu_used);
+ else
+ vp8_auto_select_speed(cpi);
+ }
+
// Functions setup for all frame types so we can use MC in AltRef
if (cm->mcomp_filter_type == SIXTAP)
{
@@ -560,10 +706,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
&cpi->common.rtcd.subpix, bilinear16x16);
}
- x->gf_active_ptr = (signed char *)cpi->gf_active_flags; // Point to base of GF active flags data structure
-
- x->vector_range = 32;
-
// Reset frame count of inter 0,0 motion vector useage.
cpi->inter_zz_count = 0;
@@ -574,89 +716,34 @@ void vp8_encode_frame(VP8_COMP *cpi)
cpi->skip_true_count = 0;
cpi->skip_false_count = 0;
- x->act_zbin_adj = 0;
-
#if 0
// Experimental code
cpi->frame_distortion = 0;
cpi->last_mb_distortion = 0;
#endif
- totalrate = 0;
-
- x->partition_info = x->pi;
-
xd->mode_info_context = cm->mi;
- xd->mode_info_stride = cm->mode_info_stride;
-
- xd->frame_type = cm->frame_type;
- xd->frames_since_golden = cm->frames_since_golden;
- xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
vp8_zero(cpi->MVcount);
- // vp8_zero( Contexts)
vp8_zero(cpi->coef_counts);
- // reset intra mode contexts
- if (cm->frame_type == KEY_FRAME)
- vp8_init_mbmode_probs(cm);
-
-
vp8cx_frame_init_quantizer(cpi);
- if (cpi->compressor_speed == 2)
- {
- if (cpi->oxcf.cpu_used < 0)
- cpi->Speed = -(cpi->oxcf.cpu_used);
- else
- vp8_auto_select_speed(cpi);
- }
-
vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
vp8cx_initialize_me_consts(cpi, cm->base_qindex);
- // Copy data over into macro block data sturctures.
- x->src = * cpi->Source;
- xd->pre = cm->yv12_fb[cm->lst_fb_idx];
- xd->dst = cm->yv12_fb[cm->new_fb_idx];
-
- // set up frame new frame for intra coded blocks
-
- vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
-
- vp8_build_block_offsets(x);
-
- vp8_setup_block_dptrs(&x->e_mbd);
-
- vp8_setup_block_ptrs(x);
-
- xd->mode_info_context->mbmi.mode = DC_PRED;
- xd->mode_info_context->mbmi.uv_mode = DC_PRED;
-
- xd->left_context = &cm->left_context;
-
- vp8_zero(cpi->count_mb_ref_frame_usage)
- vp8_zero(cpi->ymode_count)
- vp8_zero(cpi->uv_mode_count)
-
- x->mvc = cm->fc.mvc;
-
- vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
-
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
{
- if(1)
- {
- // Build a frame level activity map
- build_activity_map(cpi);
- }
+ // Initialize encode frame context.
+ init_encode_frame_mb_context(cpi);
- // Reset various MB pointers.
- x->src = *cpi->Source;
- x->mb_activity_ptr = cpi->mb_activity_map;
- x->mb_norm_activity_ptr = cpi->mb_norm_activity_map;
+ // Build a frame level activity map
+ build_activity_map(cpi);
}
+ // re-initencode frame context.
+ init_encode_frame_mb_context(cpi);
+
{
struct vpx_usec_timer emr_timer;
vpx_usec_timer_start(&emr_timer);
@@ -997,99 +1084,45 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
// Experimental stub function to create a per MB zbin adjustment based on
// some previously calculated measure of MB activity.
-void adjust_act_zbin( VP8_COMP *cpi, int rate, MACROBLOCK *x )
+static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
{
- INT64 act;
+#if USE_ACT_INDEX
+ x->act_zbin_adj = *(x->mb_activity_ptr);
+#else
INT64 a;
INT64 b;
+ INT64 act = *(x->mb_activity_ptr);
- // Read activity from the map
- act = (INT64)(*(x->mb_activity_ptr));
-
- // Calculate a zbin adjustment for this mb
+ // Apply the masking to the RD multiplier.
a = act + 4*cpi->activity_avg;
b = 4*act + cpi->activity_avg;
- if ( b > a )
- //x->act_zbin_adj = (char)((b * 8) / a) - 8;
- x->act_zbin_adj = 8;
- else
- x->act_zbin_adj = 0;
-
- // Tmp force to 0 to disable.
- x->act_zbin_adj = 0;
+ if ( act > cpi->activity_avg )
+ x->act_zbin_adj = (int)(((INT64)b + (a>>1))/a) - 1;
+ else
+ x->act_zbin_adj = 1 - (int)(((INT64)a + (b>>1))/b);
+#endif
}
int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
{
- int Error4x4, Error16x16;
- int rate4x4, rate16x16, rateuv;
- int dist4x4, dist16x16, distuv;
- int rate = 0;
- int rate4x4_tokenonly = 0;
- int rate16x16_tokenonly = 0;
- int rateuv_tokenonly = 0;
-
- x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
+ int rate;
if (cpi->sf.RD && cpi->compressor_speed != 2)
- {
- vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
- rate += rateuv;
-
- Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
-
- Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16);
-
- rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
-
- if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
- {
- adjust_act_zbin( cpi, rate, x );
- vp8_update_zbin_extra(cpi, x);
- }
- }
+ vp8_rd_pick_intra_mode(cpi, x, &rate);
else
- {
- int rate2, best_distortion;
- MB_PREDICTION_MODE mode, best_mode = DC_PRED;
- int this_rd;
- Error16x16 = INT_MAX;
-
- vp8_pick_intra_mbuv_mode(x);
-
- for (mode = DC_PRED; mode <= TM_PRED; mode ++)
- {
- int distortion2;
-
- x->e_mbd.mode_info_context->mbmi.mode = mode;
- RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
- (&x->e_mbd);
- distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16);
- rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode];
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ vp8_pick_intra_mode(cpi, x, &rate);
- if (Error16x16 > this_rd)
- {
- Error16x16 = this_rd;
- best_mode = mode;
- best_distortion = distortion2;
- }
- }
- x->e_mbd.mode_info_context->mbmi.mode = best_mode;
-
- Error4x4 = vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion);
+ if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
+ {
+ adjust_act_zbin( cpi, x );
+ vp8_update_zbin_extra(cpi, x);
}
- if (Error4x4 < Error16x16)
- {
- x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
+ if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED)
vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
- }
else
- {
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
- }
vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
sum_intra_stats(cpi, x);
@@ -1163,7 +1196,7 @@ int vp8cx_encode_inter_macroblock
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
{
// Adjust the zbin based on this MB rate.
- adjust_act_zbin( cpi, rate, x );
+ adjust_act_zbin( cpi, x );
}
#if 0
@@ -1193,11 +1226,10 @@ int vp8cx_encode_inter_macroblock
{
// Experimental code. Special case for gf and arf zeromv modes.
// Increase zbin size to supress noise
+ cpi->zbin_mode_boost = 0;
if (cpi->zbin_mode_boost_enabled)
{
- if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
- cpi->zbin_mode_boost = 0;
- else
+ if ( xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME )
{
if (xd->mode_info_context->mbmi.mode == ZEROMV)
{
@@ -1212,9 +1244,6 @@ int vp8cx_encode_inter_macroblock
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
}
}
- else
- cpi->zbin_mode_boost = 0;
-
vp8_update_zbin_extra(cpi, x);
}
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 5da69bc59..f076bbbb3 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -28,6 +28,34 @@
#define IF_RTCD(x) NULL
#endif
+int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
+{
+
+ int i;
+ int intra_pred_var = 0;
+ (void) cpi;
+
+ if (use_dc_pred)
+ {
+ x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
+
+ vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
+ }
+ else
+ {
+ for (i = 0; i < 16; i++)
+ {
+ x->e_mbd.block[i].bmi.as_mode = B_DC_PRED;
+ vp8_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i);
+ }
+ }
+
+ intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff);
+
+ return intra_pred_var;
+}
void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
MACROBLOCK *x, int ib)
@@ -81,30 +109,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
RECON_INVOKE(&rtcd->common->recon, recon_mby)
(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
- // make sure block modes are set the way we want them for context updates
- for (b = 0; b < 16; b++)
- {
- BLOCKD *d = &x->e_mbd.block[b];
-
- switch (x->e_mbd.mode_info_context->mbmi.mode)
- {
- case DC_PRED:
- d->bmi.as_mode = B_DC_PRED;
- break;
- case V_PRED:
- d->bmi.as_mode = B_VE_PRED;
- break;
- case H_PRED:
- d->bmi.as_mode = B_HE_PRED;
- break;
- case TM_PRED:
- d->bmi.as_mode = B_TM_PRED;
- break;
- default:
- d->bmi.as_mode = B_DC_PRED;
- break;
- }
- }
}
void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
@@ -124,4 +128,3 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
}
-
diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h
index 5861fd1fc..9c1fa5684 100644
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -13,6 +13,7 @@
#define _ENCODEINTRA_H_
#include "onyx_int.h"
+int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred);
void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *, MACROBLOCK *mb);
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 665b2d5dc..420ed8eff 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -114,8 +114,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// Set the mb activity pointer to the start of the row.
x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
- x->mb_norm_activity_ptr =
- &cpi->mb_norm_activity_map[map_index];
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -230,7 +228,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// Increment the activity mask pointers.
x->mb_activity_ptr++;
- x->mb_norm_activity_ptr++;
/* save the block info */
for (i = 0; i < 16; i++)
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 3c4b8f4a0..571fac17c 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -81,35 +81,6 @@ static const int cq_level[QINDEX_RANGE] =
static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);
-static int encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
-{
-
- int i;
- int intra_pred_var = 0;
- (void) cpi;
-
- if (use_dc_pred)
- {
- x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
- x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
-
- vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
- }
- else
- {
- for (i = 0; i < 16; i++)
- {
- x->e_mbd.block[i].bmi.as_mode = B_DC_PRED;
- vp8_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i);
- }
- }
-
- intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff);
-
- return intra_pred_var;
-}
-
// Resets the first pass file to the given position using a relative seek from the current position
static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position)
{
@@ -243,33 +214,58 @@ static int frame_max_bits(VP8_COMP *cpi)
int max_bits;
// For CBR we need to also consider buffer fullness.
- // If we are running below the optimal level then we need to gradually tighten up on max_bits.
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
- double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level);
+ max_bits = 2 * cpi->av_per_frame_bandwidth;
+ max_bits -= cpi->buffered_av_per_frame_bandwidth;
+ max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0);
+ }
+ // VBR
+ else
+ {
+ // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
+ max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+ }
+
+ // Trap case where we are out of bits
+ if (max_bits < 0)
+ max_bits = 0;
- // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user
- max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+ return max_bits;
+}
- // If our buffer is below the optimum level
- if (buffer_fullness_ratio < 1.0)
- {
- // The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4.
- int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2;
- max_bits = (int)(max_bits * buffer_fullness_ratio);
+static int gf_group_max_bits(VP8_COMP *cpi)
+{
+ // Max allocation for a golden frame group
+ int max_bits;
- if (max_bits < min_max_bits)
- max_bits = min_max_bits; // Lowest value we will set ... which should allow the buffer to refil.
+ // For CBR we need to also consider buffer fullness.
+ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
+ {
+ max_bits = cpi->av_per_frame_bandwidth * cpi->baseline_gf_interval;
+ if (max_bits > cpi->oxcf.optimal_buffer_level)
+ {
+ max_bits -= cpi->oxcf.optimal_buffer_level;
+ max_bits += cpi->buffer_level;
}
+ else
+ {
+ max_bits -= (cpi->buffered_av_per_frame_bandwidth
+ - cpi->av_per_frame_bandwidth)
+ * cpi->baseline_gf_interval;
+ }
+
+ max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0);
}
- // VBR
else
{
// For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+ max_bits *= cpi->baseline_gf_interval;
}
+
// Trap case where we are out of bits
if (max_bits < 0)
max_bits = 0;
@@ -582,7 +578,7 @@ void vp8_first_pass(VP8_COMP *cpi)
xd->left_available = (mb_col != 0);
// do intra 16x16 prediction
- this_error = encode_intra(cpi, x, use_dc_pred);
+ this_error = vp8_encode_intra(cpi, x, use_dc_pred);
// "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame)
// We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv.
@@ -1362,7 +1358,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
double abs_mv_in_out_accumulator = 0.0;
double mod_err_per_mb_accumulator = 0.0;
- int max_bits = frame_max_bits(cpi); // Max for a single frame
+ int max_group_bits;
unsigned int allow_alt_ref =
cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
@@ -1715,8 +1711,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.gf_group_bits = (cpi->twopass.gf_group_bits < 0) ? 0 : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
// Clip cpi->twopass.gf_group_bits based on user supplied data rate variability limit (cpi->oxcf.two_pass_vbrmax_section)
- if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval)
- cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval;
+ max_group_bits = gf_group_max_bits(cpi);
+ if (cpi->twopass.gf_group_bits > max_group_bits)
+ cpi->twopass.gf_group_bits = max_group_bits;
// Reset the file position
reset_fpf_position(cpi, start_pos);
@@ -1725,14 +1722,15 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.modified_error_used += gf_group_err;
// Assign bits to the arf or gf.
- {
+ for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) {
int Boost;
int frames_in_section;
int allocation_chunks;
int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
+ int gf_bits;
// For ARF frames
- if (cpi->source_alt_ref_pending)
+ if (cpi->source_alt_ref_pending && i == 0)
{
Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
//Boost += (cpi->baseline_gf_interval * 25);
@@ -1771,7 +1769,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
// Calculate the number of bits to be spent on the gf or arf based on the boost number
- cpi->twopass.gf_bits = (int)((double)Boost * (cpi->twopass.gf_group_bits / (double)allocation_chunks));
+ gf_bits = (int)((double)Boost * (cpi->twopass.gf_group_bits / (double)allocation_chunks));
// If the frame that is to be boosted is simpler than the average for
// the gf/arf group then use an alternative calculation
@@ -1789,9 +1787,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits /
(double)allocation_chunks));
- if (cpi->twopass.gf_bits > alt_gf_bits)
+ if (gf_bits > alt_gf_bits)
{
- cpi->twopass.gf_bits = alt_gf_bits;
+ gf_bits = alt_gf_bits;
}
}
// Else if it is harder than other frames in the group make sure it at
@@ -1804,23 +1802,29 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
mod_frame_err /
DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left));
- if (alt_gf_bits > cpi->twopass.gf_bits)
+ if (alt_gf_bits > gf_bits)
{
- cpi->twopass.gf_bits = alt_gf_bits;
+ gf_bits = alt_gf_bits;
}
}
- // Apply an additional limit for CBR
- if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
+ // Dont allow a negative value for gf_bits
+ if (gf_bits < 0)
+ gf_bits = 0;
+
+ gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame
+
+ if (i == 0)
{
- if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1))
- cpi->twopass.gf_bits = cpi->buffer_level >> 1;
+ cpi->twopass.gf_bits = gf_bits;
}
+ if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)))
+ {
+ cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame
+ }
+ }
- // Dont allow a negative value for gf_bits
- if (cpi->twopass.gf_bits < 0)
- cpi->twopass.gf_bits = 0;
-
+ {
// Adjust KF group bits and error remainin
cpi->twopass.kf_group_error_left -= gf_group_err;
cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits;
@@ -1835,7 +1839,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
else
cpi->twopass.gf_group_error_left = gf_group_err;
- cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits;
+ cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth;
if (cpi->twopass.gf_group_bits < 0)
cpi->twopass.gf_group_bits = 0;
@@ -1851,13 +1855,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
else
cpi->twopass.mid_gf_extra_bits = 0;
-
- cpi->twopass.gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame
- }
-
- if (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) // Normal GF and not a KF
- {
- cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for this frame
}
// Adjustment to estimate_max_q based on a measure of complexity of the section
@@ -1907,12 +1904,6 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int max_bits = frame_max_bits(cpi); // Max for a single frame
- // The final few frames have special treatment
- if (cpi->frames_till_gf_update_due >= (int)(cpi->twopass.total_stats->count - cpi->common.current_video_frame))
- {
- cpi->twopass.gf_group_bits = (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0;;
- }
-
// Calculate modified prediction error used in bit allocation
modified_err = calculate_modified_err(cpi, this_frame);
@@ -2014,22 +2005,10 @@ void vp8_second_pass(VP8_COMP *cpi)
if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))
{
// Assign a standard frames worth of bits from those allocated to the GF group
+ int bak = cpi->per_frame_bandwidth;
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
assign_std_frame_bits(cpi, &this_frame_copy);
-
- // If appropriate (we are switching into ARF active but it was not previously active) apply a boost for the gf at the start of the group.
- //if ( !cpi->source_alt_ref_active && (cpi->gfu_boost > 150) )
- if (FALSE)
- {
- int extra_bits;
- int pct_extra = (cpi->gfu_boost - 100) / 50;
-
- pct_extra = (pct_extra > 20) ? 20 : pct_extra;
-
- extra_bits = (cpi->twopass.gf_group_bits * pct_extra) / 100;
- cpi->twopass.gf_group_bits -= extra_bits;
- cpi->per_frame_bandwidth += extra_bits;
- }
+ cpi->per_frame_bandwidth = bak;
}
}
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index b9b371fe4..1ec3a9894 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -67,9 +67,6 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 4a9b95a2e..cf3f5510c 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1546,6 +1546,7 @@ static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
+ cpi->buffered_av_per_frame_bandwidth = cpi->av_per_frame_bandwidth;
cpi->total_actual_bits = 0;
cpi->total_target_vs_actual = 0;
@@ -1641,7 +1642,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
break;
}
- if (cpi->pass == 0)
+ if (cpi->pass == 0 && cpi->oxcf.end_usage != USAGE_STREAM_FROM_SERVER)
cpi->auto_worst_q = 1;
cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
@@ -3528,7 +3529,8 @@ static void encode_frame_to_data_rate
// For CBR if the buffer reaches its maximum level then we can no longer
// save up bits for later frames so we might as well use them up
// on the current frame.
- if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
+ if (cpi->pass == 2
+ && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
(cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode)
{
int Adjustment = cpi->active_worst_quality / 4; // Max adjustment is 1/4
@@ -3619,6 +3621,9 @@ static void encode_frame_to_data_rate
}
else
{
+ if(cpi->pass != 2)
+ Q = cpi->avg_frame_qindex;
+
cpi->active_best_quality = inter_minq[Q];
// For the constant/constrained quality mode we dont want
@@ -3931,15 +3936,16 @@ static void encode_frame_to_data_rate
(cpi->active_worst_quality < cpi->worst_quality) &&
(cpi->projected_frame_size > frame_over_shoot_limit))
{
- int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit;
+ /* step down active_worst_quality such that the corresponding
+ * active_best_quality will be equal to the current
+ * active_worst_quality + 1
+ */
+ int i;
- // If so is there any scope for relaxing it
- while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0))
- {
- cpi->active_worst_quality++;
- top_index = cpi->active_worst_quality;
- over_size_percent = (int)(over_size_percent * 0.96); // Assume 1 qstep = about 4% on frame size.
- }
+ for(i=cpi->active_worst_quality; i<cpi->worst_quality; i++)
+ if(inter_minq[i] >= cpi->active_worst_quality + 1)
+ break;
+ cpi->active_worst_quality = i;
// If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop.
active_worst_qchanged = TRUE;
@@ -4327,10 +4333,9 @@ static void encode_frame_to_data_rate
// Update the buffer level variable.
// Non-viewable frames are a special case and are treated as pure overhead.
- if ( !cm->show_frame )
- cpi->bits_off_target -= cpi->projected_frame_size;
- else
- cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size;
+ if ( cm->show_frame )
+ cpi->bits_off_target += cpi->av_per_frame_bandwidth;
+ cpi->bits_off_target -= cpi->projected_frame_size;
// Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass.
cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
@@ -4344,7 +4349,33 @@ static void encode_frame_to_data_rate
// Debug stats
cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size);
- cpi->buffer_level = cpi->bits_off_target;
+ // Update the buffered average bitrate
+ {
+ long long numerator;
+
+ numerator = cpi->oxcf.maximum_buffer_size
+ - cpi->buffered_av_per_frame_bandwidth
+ + cpi->projected_frame_size;
+ numerator *= cpi->buffered_av_per_frame_bandwidth;
+ cpi->buffered_av_per_frame_bandwidth = numerator
+ / cpi->oxcf.maximum_buffer_size;
+ }
+
+ {
+ long long tmp = (long long)cpi->buffered_av_per_frame_bandwidth
+ * cpi->oxcf.maximum_buffer_size
+ / cpi->av_per_frame_bandwidth;
+ cpi->buffer_level = cpi->oxcf.maximum_buffer_size
+ - tmp
+ + cpi->oxcf.optimal_buffer_level;
+ }
+
+ // Accumulate overshoot error.
+ cpi->accumulated_overshoot +=
+ (cpi->projected_frame_size > cpi->av_per_frame_bandwidth)
+ ? cpi->projected_frame_size - cpi->av_per_frame_bandwidth
+ : 0;
+
// Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames
if (cm->frame_type == KEY_FRAME)
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 663786004..c460b9da9 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -47,8 +47,8 @@
#define MIN_THRESHMULT 32
#define MAX_THRESHMULT 512
-#define GF_ZEROMV_ZBIN_BOOST 24
-#define LF_ZEROMV_ZBIN_BOOST 12
+#define GF_ZEROMV_ZBIN_BOOST 12
+#define LF_ZEROMV_ZBIN_BOOST 6
#define MV_ZBIN_BOOST 4
#define ZBIN_OQ_MAX 192
@@ -351,6 +351,10 @@ typedef struct VP8_COMP
int per_frame_bandwidth; // Current section per frame bandwidth target
int av_per_frame_bandwidth; // Average frame size target for clip
int min_frame_bandwidth; // Minimum allocation that should be used for any frame
+ int buffered_av_per_frame_bandwidth; // Average bitrate over the last buffer
+ int buffered_av_per_frame_bandwidth_rem; // Average bitrate remainder
+ int accumulated_overshoot; // Accumulated # of bits spent > target
+
int inter_frame_target;
double output_frame_rate;
long long last_time_stamp_seen;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 456059cf8..b60d2419f 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -43,7 +43,6 @@ extern const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES];
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
-extern unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
extern int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *, int *, int *, int, int *mvcost[2], int, int fullpixel);
extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
@@ -98,37 +97,6 @@ static int get_inter_mbpred_error(MACROBLOCK *mb,
}
-unsigned int vp8_get16x16pred_error_c
-(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride
-)
-{
- unsigned pred_error = 0;
- int i, j;
- int sum = 0;
-
- for (i = 0; i < 16; i++)
- {
- int diff;
-
- for (j = 0; j < 16; j++)
- {
- diff = src_ptr[j] - ref_ptr[j];
- sum += diff;
- pred_error += diff * diff;
- }
-
- src_ptr += src_stride;
- ref_ptr += ref_stride;
- }
-
- pred_error -= sum * sum / 256;
- return pred_error;
-}
-
unsigned int vp8_get4x4sse_cs_c
(
@@ -172,8 +140,7 @@ static int pick_intra4x4block(
MACROBLOCK *x,
int ib,
B_PREDICTION_MODE *best_mode,
- B_PREDICTION_MODE above,
- B_PREDICTION_MODE left,
+ unsigned int *mode_costs,
int *bestrate,
int *bestdistortion)
@@ -185,16 +152,6 @@ static int pick_intra4x4block(
int best_rd = INT_MAX; // 1<<30
int rate;
int distortion;
- unsigned int *mode_costs;
-
- if (x->e_mbd.frame_type == KEY_FRAME)
- {
- mode_costs = x->bmode_costs[above][left];
- }
- else
- {
- mode_costs = x->inter_bmode_costs;
- }
for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++)
{
@@ -221,7 +178,7 @@ static int pick_intra4x4block(
}
-int vp8_pick_intra4x4mby_modes
+static int pick_intra4x4mby_modes
(
const VP8_ENCODER_RTCD *rtcd,
MACROBLOCK *mb,
@@ -234,20 +191,30 @@ int vp8_pick_intra4x4mby_modes
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int error;
int distortion = 0;
+ unsigned int *bmode_costs;
vp8_intra_prediction_down_copy(xd);
+ bmode_costs = mb->inter_bmode_costs;
+
for (i = 0; i < 16; i++)
{
MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
- const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
- const B_PREDICTION_MODE L = left_block_mode(mic, i);
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(d);
- pick_intra4x4block(rtcd, mb, i, &best_mode, A, L, &r, &d);
+ if (mb->e_mbd.frame_type == KEY_FRAME)
+ {
+ const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
+ const B_PREDICTION_MODE L = left_block_mode(mic, i);
+
+ bmode_costs = mb->bmode_costs[A][L];
+ }
+
+
+ pick_intra4x4block(rtcd, mb, i, &best_mode, bmode_costs, &r, &d);
cost += r;
distortion += d;
@@ -275,7 +242,7 @@ int vp8_pick_intra4x4mby_modes
return error;
}
-void vp8_pick_intra_mbuv_mode(MACROBLOCK *mb)
+static void pick_intra_mbuv_mode(MACROBLOCK *mb)
{
MACROBLOCKD *x = &mb->e_mbd;
@@ -443,26 +410,23 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
BLOCK *b = &x->block[0];
BLOCKD *d = &x->e_mbd.block[0];
MACROBLOCKD *xd = &x->e_mbd;
- union b_mode_info best_bmodes[16];
MB_MODE_INFO best_mbmode;
int_mv best_ref_mv;
int_mv mode_mv[MB_MODE_COUNT];
MB_PREDICTION_MODE this_mode;
int num00;
- int i;
int mdcounts[4];
int best_rd = INT_MAX; // 1 << 30;
int best_intra_rd = INT_MAX;
int mode_index;
- int ref_frame_cost[MAX_REF_FRAMES];
int rate;
int rate2;
int distortion2;
int bestsme;
//int all_rds[MAX_MODES]; // Experimental debug code.
int best_mode_index = 0;
- unsigned int sse = INT_MAX;
+ unsigned int sse = INT_MAX, best_sse = INT_MAX;
int_mv mvp;
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
@@ -485,7 +449,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
vpx_memset(nearest_mv, 0, sizeof(nearest_mv));
vpx_memset(near_mv, 0, sizeof(near_mv));
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
- vpx_memset(&best_bmodes, 0, sizeof(best_bmodes));
// set up all the refframe dependent pointers.
@@ -536,32 +499,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
*returnintra = INT_MAX;
x->skip = 0;
- ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded);
-
- // Special case treatment when GF and ARF are not sensible options for reference
- if (cpi->ref_frame_flags == VP8_LAST_FLAG)
- {
- ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_zero(255);
- ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_one(255)
- + vp8_cost_zero(128);
- ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_one(255)
- + vp8_cost_one(128);
- }
- else
- {
- ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_zero(cpi->prob_last_coded);
- ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_one(cpi->prob_last_coded)
- + vp8_cost_zero(cpi->prob_gf_coded);
- ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_one(cpi->prob_last_coded)
- + vp8_cost_one(cpi->prob_gf_coded);
- }
-
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
// if we encode a new mv this is important
@@ -613,7 +550,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
// Work out the cost assosciated with selecting the reference frame
- frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ frame_cost =
+ x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
rate2 += frame_cost;
// everything but intra
@@ -659,10 +597,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
switch (this_mode)
{
case B_PRED:
- // Pass best so far to vp8_pick_intra4x4mby_modes to use as breakout
- distortion2 = *returndistortion;
- vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x,
- &rate, &distortion2);
+ // Pass best so far to pick_intra4x4mby_modes to use as breakout
+ distortion2 = best_sse;
+ pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, &distortion2);
if (distortion2 == INT_MAX)
{
@@ -672,9 +609,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
{
rate2 += rate;
distortion2 = VARIANCE_INVOKE
- (&cpi->rtcd.variance, get16x16prederror)(
+ (&cpi->rtcd.variance, var16x16)(
x->src.y_buffer, x->src.y_stride,
- x->e_mbd.predictor, 16);
+ x->e_mbd.predictor, 16, &sse);
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (this_rd < best_intra_rd)
@@ -697,7 +634,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
case TM_PRED:
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
(&x->e_mbd);
- distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16);
+ distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
+ (x->src.y_buffer, x->src.y_stride,
+ x->e_mbd.predictor, 16, &sse);
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@@ -886,15 +825,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
*returnrate = rate2;
*returndistortion = distortion2;
+ best_sse = sse;
best_rd = this_rd;
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
- if (this_mode == B_PRED)
- for (i = 0; i < 16; i++)
- {
- best_bmodes[i].as_mode = x->e_mbd.block[i].bmi.as_mode;
- }
-
// Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
@@ -956,15 +890,52 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
if (best_mbmode.mode <= B_PRED)
{
/* set mode_info_context->mbmi.uv_mode */
- vp8_pick_intra_mbuv_mode(x);
+ pick_intra_mbuv_mode(x);
}
- if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED)
+ update_mvcount(cpi, &x->e_mbd, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
+}
+
+
+void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
+{
+ int error4x4, error16x16 = INT_MAX;
+ int rate, best_rate = 0, distortion, best_sse;
+ MB_PREDICTION_MODE mode, best_mode = DC_PRED;
+ int this_rd;
+ unsigned int sse;
+
+ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
+
+ pick_intra_mbuv_mode(x);
+
+ for (mode = DC_PRED; mode <= TM_PRED; mode ++)
{
- for (i = 0; i < 16; i++)
+ x->e_mbd.mode_info_context->mbmi.mode = mode;
+ RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
+ (&x->e_mbd);
+ distortion = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
+ (x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, &sse);
+ rate = x->mbmode_cost[x->e_mbd.frame_type][mode];
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+ if (error16x16 > this_rd)
{
- x->e_mbd.block[i].bmi.as_mode = best_bmodes[i].as_mode;
+ error16x16 = this_rd;
+ best_mode = mode;
+ best_sse = sse;
+ best_rate = rate;
}
}
- update_mvcount(cpi, &x->e_mbd, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
+ x->e_mbd.mode_info_context->mbmi.mode = best_mode;
+
+ error4x4 = pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate,
+ &best_sse);
+ if (error4x4 < error16x16)
+ {
+ x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
+ best_rate = rate;
+ }
+
+ *rate_ = best_rate;
}
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index f96fc5376..a0103d165 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -14,7 +14,6 @@
#include "vpx_ports/config.h"
#include "vp8/common/onyxc_int.h"
-extern int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *, MACROBLOCK *mb, int *Rate, int *Distortion);
-extern void vp8_pick_intra_mbuv_mode(MACROBLOCK *mb);
extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
+extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
#endif
diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c
index 8dfd2a543..63f235784 100644
--- a/vp8/encoder/ppc/csystemdependent.c
+++ b/vp8/encoder/ppc/csystemdependent.c
@@ -48,9 +48,6 @@ void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, i
void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
-unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
// c imports
@@ -88,9 +85,6 @@ extern sub_pixel_variance_function sub_pixel_variance16x8_c;
extern sub_pixel_variance_function sub_pixel_variance16x16_c;
extern unsigned int vp8_get_mb_ss_c(short *);
-extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
// ppc
@@ -149,9 +143,6 @@ void vp8_cmachine_specific_config(void)
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ppc;
vp8_get_mb_ss = vp8_get_mb_ss_c;
- vp8_get16x16pred_error = vp8_get16x16pred_error_c;
- vp8_get8x8var = vp8_get8x8var_ppc;
- vp8_get16x16var = vp8_get16x16var_ppc;
vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
vp8_sad16x16 = vp8_sad16x16_ppc;
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 78b3b4715..642660a0e 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -650,10 +650,10 @@ static void calc_gf_params(VP8_COMP *cpi)
static void calc_pframe_target_size(VP8_COMP *cpi)
{
- int min_frame_target;
+ int min_frame_target, max_frame_target;
int Adjustment;
- min_frame_target = 0;
+ min_frame_target = 1;
if (cpi->pass == 2)
{
@@ -661,10 +661,19 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
if (min_frame_target < (cpi->av_per_frame_bandwidth >> 5))
min_frame_target = cpi->av_per_frame_bandwidth >> 5;
+
+ max_frame_target = INT_MAX;
}
- else if (min_frame_target < cpi->per_frame_bandwidth / 4)
- min_frame_target = cpi->per_frame_bandwidth / 4;
+ else
+ {
+ if (min_frame_target < cpi->per_frame_bandwidth / 4)
+ min_frame_target = cpi->per_frame_bandwidth / 4;
+ /* Don't allow the target to completely deplete the buffer. */
+ max_frame_target = cpi->buffer_level + cpi->av_per_frame_bandwidth;
+ if(max_frame_target < min_frame_target)
+ max_frame_target = min_frame_target;
+ }
// Special alt reference frame case
if (cpi->common.refresh_alt_ref_frame)
@@ -1157,6 +1166,32 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
}
}
+
+ if (cpi->pass==0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER){
+ /* determine the accumulated error to apply to this frame. Apply
+ * more of the error when we've been undershooting, less when
+ * we've been overshooting
+ */
+ long long adjust;
+ int bitrate_error;
+
+ bitrate_error = cpi->av_per_frame_bandwidth
+ - cpi->buffered_av_per_frame_bandwidth;
+
+ adjust = cpi->accumulated_overshoot;
+ adjust *= cpi->av_per_frame_bandwidth + bitrate_error;
+ adjust /= cpi->oxcf.maximum_buffer_size;
+ if (adjust > (cpi->this_frame_target - min_frame_target))
+ adjust = (cpi->this_frame_target - min_frame_target);
+ else if (adjust < 0)
+ adjust = 0;
+
+ cpi->this_frame_target -= adjust;
+ cpi->accumulated_overshoot -= adjust;
+ }
+
+ if(cpi->this_frame_target > max_frame_target)
+ cpi->this_frame_target = max_frame_target;
}
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 3186fa6ed..cd250c425 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -201,47 +201,47 @@ static int rdmult_lut[QINDEX_RANGE]=
/* values are now correlated to quantizer */
static int sad_per_bit16lut[QINDEX_RANGE] =
{
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 6, 6,
- 6, 6, 6, 6, 6, 7, 7, 7,
- 7, 7, 7, 7, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 9, 9,
- 9, 9, 9, 9, 10, 10, 10, 10,
- 10, 10, 11, 11, 11, 11, 11, 11,
- 12, 12, 12, 12, 12, 12, 12, 13,
- 13, 13, 13, 13, 13, 14, 14, 14,
- 14, 14, 15, 15, 15, 15, 15, 15,
- 16, 16, 16, 16, 16, 16, 17, 17,
- 17, 17, 17, 17, 17, 18, 18, 18,
- 18, 18, 19, 19, 19, 19, 19, 19,
- 20, 20, 20, 21, 21, 21, 21, 22,
- 22, 22, 23, 23, 23, 24, 24, 24,
- 25, 25, 26, 26, 27, 27, 27, 28,
- 28, 28, 29, 29, 30, 30, 31, 31
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 10, 10,
+ 10, 10, 10, 10, 10, 10, 11, 11,
+ 11, 11, 11, 11, 12, 12, 12, 12,
+ 12, 12, 13, 13, 13, 13, 14, 14
};
static int sad_per_bit4lut[QINDEX_RANGE] =
{
- 5, 5, 5, 5, 5, 5, 7, 7,
+ 2, 2, 2, 2, 2, 2, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 5, 5,
+ 5, 5, 5, 5, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 8, 8, 8,
- 8, 8, 8, 8, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 11, 11,
- 11, 11, 11, 11, 13, 13, 13, 13,
- 13, 13, 14, 14, 14, 14, 14, 14,
- 16, 16, 16, 16, 16, 16, 16, 17,
- 17, 17, 17, 17, 17, 19, 19, 19,
- 19, 19, 20, 20, 20, 20, 20, 20,
- 22, 22, 22, 22, 22, 22, 23, 23,
- 23, 23, 23, 23, 23, 25, 25, 25,
- 25, 25, 26, 26, 26, 26, 26, 26,
- 28, 28, 28, 29, 29, 29, 29, 31,
- 31, 31, 32, 32, 32, 34, 34, 34,
- 35, 35, 37, 37, 38, 38, 38, 40,
- 40, 40, 41, 41, 43, 43, 44, 44,
+ 8, 8, 9, 9, 9, 9, 9, 9,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 13, 13, 13, 13, 13, 13, 13, 14,
+ 14, 14, 14, 14, 15, 15, 15, 15,
+ 16, 16, 16, 16, 17, 17, 17, 18,
+ 18, 18, 19, 19, 19, 20, 20, 20,
};
void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
{
- cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex]/2;
- cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]/2;
+ cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
+ cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
}
@@ -719,8 +719,8 @@ static int rd_pick_intra4x4block(
return best_rd;
}
-int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
- int *rate_y, int *Distortion, int best_rd)
+static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
+ int *rate_y, int *Distortion, int best_rd)
{
MACROBLOCKD *const xd = &mb->e_mbd;
int i;
@@ -782,11 +782,13 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
-int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
- MACROBLOCK *x,
- int *Rate,
- int *rate_y,
- int *Distortion)
+
+
+static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
+ MACROBLOCK *x,
+ int *Rate,
+ int *rate_y,
+ int *Distortion)
{
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
@@ -858,7 +860,7 @@ static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distort
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-void vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
+static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
{
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
@@ -1795,7 +1797,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
int distortion;
int best_rd = INT_MAX;
int best_intra_rd = INT_MAX;
- int ref_frame_cost[MAX_REF_FRAMES];
int rate2, distortion2;
int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
@@ -1872,36 +1873,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
x->skip = 0;
- ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded);
-
- // Special case treatment when GF and ARF are not sensible options for reference
- if (cpi->ref_frame_flags == VP8_LAST_FLAG)
- {
- ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_zero(255);
- ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_one(255)
- + vp8_cost_zero(128);
- ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_one(255)
- + vp8_cost_one(128);
- }
- else
- {
- ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_zero(cpi->prob_last_coded);
- ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_one(cpi->prob_last_coded)
- + vp8_cost_zero(cpi->prob_gf_coded);
- ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
- + vp8_cost_one(cpi->prob_last_coded)
- + vp8_cost_one(cpi->prob_gf_coded);
- }
-
vpx_memset(mode_mv, 0, sizeof(mode_mv));
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
- vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
+ rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
@@ -2024,7 +1999,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
int tmp_rd;
// Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
- tmp_rd = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd);
+ tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd);
rate2 += rate;
distortion2 += distortion;
@@ -2247,29 +2222,28 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
}
else if (x->encode_breakout)
{
- int sum;
unsigned int sse;
+ unsigned int var;
int threshold = (xd->block[0].dequant[1]
* xd->block[0].dequant[1] >>4);
if(threshold < x->encode_breakout)
threshold = x->encode_breakout;
- VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)
- (x->src.y_buffer, x->src.y_stride,
- x->e_mbd.predictor, 16, &sse, &sum);
+ var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
+ (x->src.y_buffer, x->src.y_stride,
+ x->e_mbd.predictor, 16, &sse);
if (sse < threshold)
{
- // Check u and v to make sure skip is ok
- int sse2 = 0;
+ unsigned int q2dc = xd->block[24].dequant[0];
/* If theres is no codeable 2nd order dc
or a very small uniform pixel change change */
- if (abs(sum) < (xd->block[24].dequant[0]<<2)||
- ((sum * sum>>8) > sse && abs(sum) <128))
+ if ((sse - var < q2dc * q2dc >>4) ||
+ (sse /2 > var && sse-var < 64))
{
- sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
-
+ // Check u and v to make sure skip is ok
+ int sse2= VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
if (sse2 * 2 < threshold)
{
x->skip = 1;
@@ -2319,8 +2293,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
rate2 += other_cost;
}
- // Estimate the reference frame signaling cost and add it to the rolling cost variable.
- rate2 += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ /* Estimate the reference frame signaling cost and add it
+ * to the rolling cost variable.
+ */
+ rate2 +=
+ x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
if (!disable_skip)
{
@@ -2384,7 +2361,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
}
- other_cost += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ other_cost +=
+ x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
/* Calculate the final y RD estimate for this mode */
best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost),
@@ -2492,3 +2470,39 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
rd_update_mvcount(cpi, x, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
}
+
+void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
+{
+ int error4x4, error16x16;
+ int rate4x4, rate16x16 = 0, rateuv;
+ int dist4x4, dist16x16, distuv;
+ int rate;
+ int rate4x4_tokenonly = 0;
+ int rate16x16_tokenonly = 0;
+ int rateuv_tokenonly = 0;
+
+ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
+
+ rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
+ rate = rateuv;
+
+ error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
+ &rate16x16, &rate16x16_tokenonly,
+ &dist16x16);
+
+ error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
+ &rate4x4, &rate4x4_tokenonly,
+ &dist4x4, error16x16);
+
+ if (error4x4 < error16x16)
+ {
+ x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
+ rate += rate4x4;
+ }
+ else
+ {
+ rate += rate16x16;
+ }
+
+ *rate_ = rate;
+}
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index fbbf9077f..95134cb81 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -15,10 +15,8 @@
#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
-extern int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd);
-extern int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion);
-extern void vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_to, int *distortion);
extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
+extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
extern void vp8_mv_pred
(
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 1c5923813..329abac68 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -98,7 +98,6 @@ static void tokenize2nd_order_b
const BLOCKD *const b,
TOKENEXTRA **tp,
const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
- const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@@ -120,9 +119,9 @@ static void tokenize2nd_order_b
{
int rc = vp8_default_zig_zag1d[c];
const int v = qcoeff_ptr[rc];
-
+#if CONFIG_DEBUG
assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE));
-
+#endif
t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
x = vp8_dct_value_tokens_ptr[v].Token;
}
@@ -149,7 +148,6 @@ static void tokenize1st_order_b
const BLOCKD *const b,
TOKENEXTRA **tp,
const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
- const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@@ -173,9 +171,9 @@ static void tokenize1st_order_b
{
int rc = vp8_default_zig_zag1d[c];
const int v = qcoeff_ptr[rc];
-
+#if CONFIG_DEBUG
assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE));
-
+#endif
t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
x = vp8_dct_value_tokens_ptr[v].Token;
}
@@ -196,14 +194,11 @@ static void tokenize1st_order_b
}
-static int mb_is_skippable(MACROBLOCKD *x)
+static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block)
{
- int has_y2_block;
int skip = 1;
int i = 0;
- has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
- && x->mode_info_context->mbmi.mode != SPLITMV);
if (has_y2_block)
{
for (i = 0; i < 16; i++)
@@ -223,8 +218,12 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
int plane_type;
int b;
+ int has_y2_block;
+
+ has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
+ && x->mode_info_context->mbmi.mode != SPLITMV);
- x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x);
+ x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block);
if (x->mode_info_context->mbmi.mb_skip_coeff)
{
cpi->skip_true_count++;
@@ -241,29 +240,24 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
cpi->skip_false_count++;
-#if 0
- vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
-#endif
- if (x->mode_info_context->mbmi.mode == B_PRED || x->mode_info_context->mbmi.mode == SPLITMV)
- {
- plane_type = 3;
- }
- else
+
+ plane_type = 3;
+ if(has_y2_block)
{
- tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
+ tokenize2nd_order_b(x->block + 24, t, 1,
A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0;
}
for (b = 0; b < 16; b++)
- tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
+ tokenize1st_order_b(x->block + b, t, plane_type,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++)
- tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
+ tokenize1st_order_b(x->block + b, t, 2,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
@@ -352,10 +346,7 @@ void vp8_tokenize_initialize()
static __inline void stuff2nd_order_b
(
- const BLOCKD *const b,
TOKENEXTRA **tp,
- const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
- const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@@ -364,9 +355,6 @@ static __inline void stuff2nd_order_b
int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- (void) frametype;
- (void) type;
- (void) b;
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
@@ -382,10 +370,7 @@ static __inline void stuff2nd_order_b
static __inline void stuff1st_order_b
(
- const BLOCKD *const b,
TOKENEXTRA **tp,
- const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
- const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@@ -394,9 +379,6 @@ static __inline void stuff1st_order_b
int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- (void) frametype;
- (void) type;
- (void) b;
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [0] [1] [pt];
@@ -411,10 +393,7 @@ static __inline void stuff1st_order_b
static __inline
void stuff1st_order_buv
(
- const BLOCKD *const b,
TOKENEXTRA **tp,
- const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
- const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@@ -423,9 +402,6 @@ void stuff1st_order_buv
int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- (void) frametype;
- (void) type;
- (void) b;
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
@@ -445,17 +421,17 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
int plane_type;
int b;
- stuff2nd_order_b(x->block + 24, t, 1, x->frame_type,
+ stuff2nd_order_b(t,
A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0;
for (b = 0; b < 16; b++)
- stuff1st_order_b(x->block + b, t, plane_type, x->frame_type,
+ stuff1st_order_b(t,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++)
- stuff1st_order_buv(x->block + b, t, 2, x->frame_type,
+ stuff1st_order_buv(t,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index 0d7d977d7..894b4f9e4 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -308,21 +308,6 @@ extern prototype_getmbss(vp8_variance_getmbss);
#endif
extern prototype_variance(vp8_variance_mse16x16);
-#ifndef vp8_variance_get16x16prederror
-#define vp8_variance_get16x16prederror vp8_get16x16pred_error_c
-#endif
-extern prototype_get16x16prederror(vp8_variance_get16x16prederror);
-
-#ifndef vp8_variance_get8x8var
-#define vp8_variance_get8x8var vp8_get8x8var_c
-#endif
-extern prototype_variance2(vp8_variance_get8x8var);
-
-#ifndef vp8_variance_get16x16var
-#define vp8_variance_get16x16var vp8_get16x16var_c
-#endif
-extern prototype_variance2(vp8_variance_get16x16var);
-
#ifndef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_c
#endif
@@ -376,9 +361,6 @@ typedef struct
vp8_getmbss_fn_t getmbss;
vp8_variance_fn_t mse16x16;
- vp8_get16x16prederror_fn_t get16x16prederror;
- vp8_variance2_fn_t get8x8var;
- vp8_variance2_fn_t get16x16var;
vp8_get16x16prederror_fn_t get4x4sse_cs;
vp8_sad_multi_fn_t sad16x16x3;
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index ede07c8db..c7b9c2209 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -61,40 +61,6 @@ static void variance(
}
}
-unsigned int
-vp8_get8x8var_c
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-)
-{
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
- return (*SSE - (((*Sum) * (*Sum)) >> 6));
-}
-
-unsigned int
-vp8_get16x16var_c
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-)
-{
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
- return (*SSE - (((*Sum) * (*Sum)) >> 8));
-
-}
-
-
unsigned int vp8_variance16x16_c(
const unsigned char *src_ptr,
diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h
index 59a5cb1d7..19f6c1686 100644
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@@ -31,6 +31,12 @@ extern prototype_fdct(vp8_short_fdct8x4_mmx);
#undef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
+#undef vp8_fdct_fast4x4
+#define vp8_fdct_fast4x4 vp8_short_fdct4x4_mmx
+
+#undef vp8_fdct_fast8x4
+#define vp8_fdct_fast8x4 vp8_short_fdct8x4_mmx
+
#endif
#endif
diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm
index 67a9b4d3e..13b76ea91 100644
--- a/vp8/encoder/x86/variance_impl_mmx.asm
+++ b/vp8/encoder/x86/variance_impl_mmx.asm
@@ -843,136 +843,6 @@ filter_block2d_bil_var_mmx_loop:
pop rbp
ret
-;unsigned int vp8_get16x16pred_error_mmx
-;(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride
-;)
-global sym(vp8_get16x16pred_error_mmx)
-sym(vp8_get16x16pred_error_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
- mov rsi, arg(0) ;DWORD PTR [src_ptr]
- mov rdi, arg(2) ;DWORD PTR [ref_ptr]
-
- movsxd rax, DWORD PTR arg(1) ;[src_stride]
- movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
-
- pxor mm0, mm0 ; clear xmm0 for unpack
- pxor mm7, mm7 ; clear xmm7 for accumulating diffs
-
- pxor mm6, mm6 ; clear xmm6 for accumulating sse
- mov rcx, 16
-
-var16loop:
-
- movq mm1, [rsi]
- movq mm2, [rdi]
-
- movq mm3, mm1
- movq mm4, mm2
-
- punpcklbw mm1, mm0
- punpckhbw mm3, mm0
-
- punpcklbw mm2, mm0
- punpckhbw mm4, mm0
-
- psubw mm1, mm2
- psubw mm3, mm4
-
- paddw mm7, mm1
- pmaddwd mm1, mm1
-
- paddw mm7, mm3
- pmaddwd mm3, mm3
-
- paddd mm6, mm1
- paddd mm6, mm3
-
-
- movq mm1, [rsi+8]
- movq mm2, [rdi+8]
-
- movq mm3, mm1
- movq mm4, mm2
-
- punpcklbw mm1, mm0
- punpckhbw mm3, mm0
-
- punpcklbw mm2, mm0
- punpckhbw mm4, mm0
-
- psubw mm1, mm2
- psubw mm3, mm4
-
- paddw mm7, mm1
- pmaddwd mm1, mm1
-
- paddw mm7, mm3
- pmaddwd mm3, mm3
-
- paddd mm6, mm1
- paddd mm6, mm3
-
- add rsi, rax
- add rdi, rdx
-
- sub rcx, 1
- jnz var16loop
-
-
- movq mm1, mm6
- pxor mm6, mm6
-
- pxor mm5, mm5
- punpcklwd mm6, mm7
-
- punpckhwd mm5, mm7
- psrad mm5, 16
-
- psrad mm6, 16
- paddd mm6, mm5
-
- movq mm2, mm1
- psrlq mm1, 32
-
- paddd mm2, mm1
- movq mm7, mm6
-
- psrlq mm6, 32
- paddd mm6, mm7
-
- movd DWORD PTR [rsp], mm6 ;Sum
- movd DWORD PTR [rsp+4], mm2 ;SSE
-
- ; return (SSE-((Sum*Sum)>>8));
- movsxd rdx, dword ptr [rsp]
- imul rdx, rdx
- sar rdx, 8
- movsxd rax, dword ptr [rsp + 4]
- sub rax, rdx
-
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
SECTION_RODATA
;short mmx_bi_rd[4] = { 64, 64, 64, 64};
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 5becc7344..b7a6b3286 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -213,122 +213,6 @@ var16loop:
ret
-;unsigned int vp8_get16x16pred_error_sse2
-;(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride
-;)
-global sym(vp8_get16x16pred_error_sse2)
-sym(vp8_get16x16pred_error_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
- mov rsi, arg(0) ;[src_ptr]
- mov rdi, arg(2) ;[ref_ptr]
-
- movsxd rax, DWORD PTR arg(1) ;[src_stride]
- movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
-
- pxor xmm0, xmm0 ; clear xmm0 for unpack
- pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
-
- pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
- mov rcx, 16
-
-var16peloop:
- movdqu xmm1, XMMWORD PTR [rsi]
- movdqu xmm2, XMMWORD PTR [rdi]
-
- movdqa xmm3, xmm1
- movdqa xmm4, xmm2
-
- punpcklbw xmm1, xmm0
- punpckhbw xmm3, xmm0
-
- punpcklbw xmm2, xmm0
- punpckhbw xmm4, xmm0
-
- psubw xmm1, xmm2
- psubw xmm3, xmm4
-
- paddw xmm7, xmm1
- pmaddwd xmm1, xmm1
-
- paddw xmm7, xmm3
- pmaddwd xmm3, xmm3
-
- paddd xmm6, xmm1
- paddd xmm6, xmm3
-
- add rsi, rax
- add rdi, rdx
-
- sub rcx, 1
- jnz var16peloop
-
-
- movdqa xmm1, xmm6
- pxor xmm6, xmm6
-
- pxor xmm5, xmm5
- punpcklwd xmm6, xmm7
-
- punpckhwd xmm5, xmm7
- psrad xmm5, 16
-
- psrad xmm6, 16
- paddd xmm6, xmm5
-
- movdqa xmm2, xmm1
- punpckldq xmm1, xmm0
-
- punpckhdq xmm2, xmm0
- movdqa xmm7, xmm6
-
- paddd xmm1, xmm2
- punpckldq xmm6, xmm0
-
- punpckhdq xmm7, xmm0
- paddd xmm6, xmm7
-
- movdqa xmm2, xmm1
- movdqa xmm7, xmm6
-
- psrldq xmm1, 8
- psrldq xmm6, 8
-
- paddd xmm7, xmm6
- paddd xmm1, xmm2
-
- movd DWORD PTR [rsp], xmm7 ;Sum
- movd DWORD PTR [rsp+4], xmm1 ;SSE
-
- ; return (SSE-((Sum*Sum)>>8));
- movsxd rdx, dword ptr [rsp]
- imul rdx, rdx
- sar rdx, 8
- movsxd rax, dword ptr [rsp + 4]
- sub rax, rdx
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
;unsigned int vp8_get8x8var_sse2
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 4a89868c2..92b695f17 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -76,43 +76,6 @@ extern void vp8_filter_block2d_bil_var_mmx
int *sum,
unsigned int *sumsquared
);
-extern unsigned int vp8_get16x16pred_error_mmx
-(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride
-);
-
-unsigned int vp8_get16x16var_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *SUM
-)
-{
- unsigned int sse0, sse1, sse2, sse3, var;
- int sum0, sum1, sum2, sum3, avg;
-
-
- vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
- vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
- vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
- vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
-
- var = sse0 + sse1 + sse2 + sse3;
- avg = sum0 + sum1 + sum2 + sum3;
-
- *SSE = var;
- *SUM = avg;
- return (var - ((avg * avg) >> 8));
-
-}
-
-
-
unsigned int vp8_variance4x4_mmx(
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index dfc0915b9..24062eb9b 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -53,13 +53,6 @@ unsigned int vp8_get16x16var_sse2
unsigned int *SSE,
int *Sum
);
-unsigned int vp8_get16x16pred_error_sse2
-(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride
-);
unsigned int vp8_get8x8var_sse2
(
const unsigned char *src_ptr,
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index 77e05e1e8..0ee8eb7e5 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -41,9 +41,7 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx);
extern prototype_getmbss(vp8_get_mb_ss_mmx);
extern prototype_variance(vp8_mse16x16_mmx);
-extern prototype_get16x16prederror(vp8_get16x16pred_error_mmx);
extern prototype_variance2(vp8_get8x8var_mmx);
-extern prototype_variance2(vp8_get16x16var_mmx);
extern prototype_get16x16prederror(vp8_get4x4sse_cs_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -110,15 +108,6 @@ extern prototype_get16x16prederror(vp8_get4x4sse_cs_mmx);
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_mmx
-#undef vp8_variance_get16x16prederror
-#define vp8_variance_get16x16prederror vp8_get16x16pred_error_mmx
-
-#undef vp8_variance_get8x8var
-#define vp8_variance_get8x8var vp8_get8x8var_mmx
-
-#undef vp8_variance_get16x16var
-#define vp8_variance_get16x16var vp8_get16x16var_mmx
-
#undef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_mmx
@@ -148,7 +137,6 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt);
extern prototype_getmbss(vp8_get_mb_ss_sse2);
extern prototype_variance(vp8_mse16x16_wmt);
-extern prototype_get16x16prederror(vp8_get16x16pred_error_sse2);
extern prototype_variance2(vp8_get8x8var_sse2);
extern prototype_variance2(vp8_get16x16var_sse2);
@@ -216,15 +204,6 @@ extern prototype_variance2(vp8_get16x16var_sse2);
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_wmt
-#undef vp8_variance_get16x16prederror
-#define vp8_variance_get16x16prederror vp8_get16x16pred_error_sse2
-
-#undef vp8_variance_get8x8var
-#define vp8_variance_get8x8var vp8_get8x8var_sse2
-
-#undef vp8_variance_get16x16var
-#define vp8_variance_get16x16var vp8_get16x16var_sse2
-
#endif
#endif
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 378b14066..9a324ec12 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -16,7 +16,7 @@
#if HAVE_MMX
-static void short_fdct8x4_mmx(short *input, short *output, int pitch)
+void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
{
vp8_short_fdct4x4_mmx(input, output, pitch);
vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
@@ -26,7 +26,7 @@ int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
short *qcoeff_ptr, short *dequant_ptr,
short *scan_mask, short *round_ptr,
short *quant_ptr, short *dqcoeff_ptr);
-static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
+void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
{
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
short *coeff_ptr = b->coeff;
@@ -51,7 +51,7 @@ static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
}
int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-static int mbblock_error_mmx(MACROBLOCK *mb, int dc)
+int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc)
{
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
@@ -59,7 +59,7 @@ static int mbblock_error_mmx(MACROBLOCK *mb, int dc)
}
int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
-static int mbuverror_mmx(MACROBLOCK *mb)
+int vp8_mbuverror_mmx(MACROBLOCK *mb)
{
short *s_ptr = &mb->coeff[256];
short *d_ptr = &mb->e_mbd.dqcoeff[256];
@@ -69,7 +69,7 @@ static int mbuverror_mmx(MACROBLOCK *mb)
void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
short *diff, unsigned char *predictor,
int pitch);
-static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
+void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
{
unsigned char *z = *(be->base_src) + be->src;
unsigned int src_stride = be->src_stride;
@@ -82,7 +82,7 @@ static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
#if HAVE_SSE2
int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-static int mbblock_error_xmm(MACROBLOCK *mb, int dc)
+int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
{
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
@@ -90,7 +90,7 @@ static int mbblock_error_xmm(MACROBLOCK *mb, int dc)
}
int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
-static int mbuverror_xmm(MACROBLOCK *mb)
+int vp8_mbuverror_xmm(MACROBLOCK *mb)
{
short *s_ptr = &mb->coeff[256];
short *d_ptr = &mb->e_mbd.dqcoeff[256];
@@ -100,7 +100,7 @@ static int mbuverror_xmm(MACROBLOCK *mb)
void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
short *diff, unsigned char *predictor,
int pitch);
-static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
+void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
{
unsigned char *z = *(be->base_src) + be->src;
unsigned int src_stride = be->src_stride;
@@ -175,26 +175,23 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_mmx;
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_mmx;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx;
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx;
- cpi->rtcd.fdct.short8x4 = short_fdct8x4_mmx;
+ cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx;
- cpi->rtcd.fdct.fast8x4 = short_fdct8x4_mmx;
+ cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
cpi->rtcd.encodemb.berr = vp8_block_error_mmx;
- cpi->rtcd.encodemb.mberr = mbblock_error_mmx;
- cpi->rtcd.encodemb.mbuverr = mbuverror_mmx;
- cpi->rtcd.encodemb.subb = subtract_b_mmx;
+ cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx;
+ cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx;
+ cpi->rtcd.encodemb.subb = vp8_subtract_b_mmx;
cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx;
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx;
- /*cpi->rtcd.quantize.fastquantb = fast_quantize_b_mmx;*/
+ /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/
}
#endif
@@ -226,11 +223,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_sse2;
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_sse2;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_sse2;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_sse2;
-
-
/* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2;
@@ -241,9 +233,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ;
cpi->rtcd.encodemb.berr = vp8_block_error_xmm;
- cpi->rtcd.encodemb.mberr = mbblock_error_xmm;
- cpi->rtcd.encodemb.mbuverr = mbuverror_xmm;
- cpi->rtcd.encodemb.subb = subtract_b_sse2;
+ cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm;
+ cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm;
+ cpi->rtcd.encodemb.subb = vp8_subtract_b_sse2;
cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2;
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2;