diff options
author | Johann <johannkoenig@google.com> | 2011-06-17 14:56:27 -0400 |
---|---|---|
committer | Johann <johannkoenig@google.com> | 2011-06-17 14:56:27 -0400 |
commit | e18d7bc23095b89132007c64e38ce262dcb0a943 (patch) | |
tree | 44e1121039142e6637b81b1f389292988dadb546 /vp8/encoder | |
parent | 6c8205d37eb47c0757f752505ada46bc4079c003 (diff) | |
parent | a60fc419f50eefbef568ceecb93233471e894fa5 (diff) | |
download | libvpx-e18d7bc23095b89132007c64e38ce262dcb0a943.tar libvpx-e18d7bc23095b89132007c64e38ce262dcb0a943.tar.gz libvpx-e18d7bc23095b89132007c64e38ce262dcb0a943.tar.bz2 libvpx-e18d7bc23095b89132007c64e38ce262dcb0a943.zip |
Merge remote branch 'origin/master' into experimental
Conflicts:
vp8/encoder/encodeframe.c
vp8/encoder/rdopt.c
Change-Id: I8bab720889ac652361abdedfe2cc91a89742cb30
Diffstat (limited to 'vp8/encoder')
29 files changed, 679 insertions, 1105 deletions
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c index db079d5ed..56c858c71 100644 --- a/vp8/encoder/arm/arm_csystemdependent.c +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -53,10 +53,7 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.mse16x16 = vp8_mse16x16_armv6; /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ - /*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/ + /*cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/ /*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;*/ @@ -103,9 +100,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon; /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon; - /*cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;*/ cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon; cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon; diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm index 6af4e87ba..55edbf512 100644 --- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm +++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm @@ -10,7 +10,6 @@ EXPORT |vp8_mse16x16_neon| - EXPORT |vp8_get16x16pred_error_neon| EXPORT |vp8_get4x4sse_cs_neon| ARM @@ -76,62 +75,6 @@ mse16x16_neon_loop ENDP -;============================ -; r0 unsigned char *src_ptr -; r1 int src_stride -; r2 unsigned char *ref_ptr -; r3 int ref_stride -|vp8_get16x16pred_error_neon| PROC - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - pred_error - vmov.i8 q10, #0 - - mov r12, #8 - -get16x16pred_error_neon_loop - vld1.8 {q0}, [r0], r1 ;Load up source and reference - vld1.8 {q2}, [r2], r3 - vld1.8 {q1}, [r0], r1 - vld1.8 {q3}, [r2], r3 - - vsubl.u8 q11, d0, d4 - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - vpadal.s16 q8, q11 - vmlal.s16 q9, d22, d22 - vmlal.s16 q10, d23, d23 - - subs r12, r12, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vpadal.s16 q8, q13 - vmlal.s16 q9, d26, d26 - vmlal.s16 q10, d27, d27 - vpadal.s16 q8, q14 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - bne get16x16pred_error_neon_loop - - vadd.u32 q10, q9, q10 - vpaddl.s32 q0, q8 - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] - bx lr - - ENDP ;============================= ; r0 unsigned char *src_ptr, diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index ad0d37193..f2f761f9e 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -83,9 +83,6 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon); //extern prototype_getmbss(vp8_get_mb_ss_c); extern prototype_variance(vp8_mse16x16_neon); -extern prototype_get16x16prederror(vp8_get16x16pred_error_neon); -//extern prototype_variance2(vp8_get8x8var_c); -//extern prototype_variance2(vp8_get16x16var_c); extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon); #if !CONFIG_RUNTIME_CPU_DETECT @@ -149,15 +146,6 @@ extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon); #undef vp8_variance_mse16x16 #define vp8_variance_mse16x16 vp8_mse16x16_neon -#undef vp8_variance_get16x16prederror -#define vp8_variance_get16x16prederror vp8_get16x16pred_error_neon - -//#undef vp8_variance_get8x8var -//#define vp8_variance_get8x8var vp8_get8x8var_c - -//#undef vp8_variance_get16x16var -//#define vp8_variance_get16x16var vp8_get16x16var_c - #undef vp8_variance_get4x4sse_cs #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon #endif diff --git a/vp8/encoder/asm_enc_offsets.c b/vp8/encoder/asm_enc_offsets.c index 9c81c8d0a..c79e915f8 100644 --- a/vp8/encoder/asm_enc_offsets.c +++ b/vp8/encoder/asm_enc_offsets.c @@ -9,31 +9,17 @@ */ -#include "vpx_ports/config.h" -#include <stddef.h> - +#include "vpx_ports/asm_offsets.h" +#include "vpx_config.h" #include "block.h" #include "vp8/common/blockd.h" #include "onyx_int.h" #include "treewriter.h" #include "tokenize.h" -#define ct_assert(name,cond) \ - static void assert_##name(void) UNUSED;\ - static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} - -#define DEFINE(sym, val) int sym = val; - -/* -#define BLANK() asm volatile("\n->" : : ) -*/ - -/* - * int main(void) - * { - */ +BEGIN -//regular quantize +/* regular quantize */ DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff)); DEFINE(vp8_block_zbin, offsetof(BLOCK, zbin)); DEFINE(vp8_block_round, offsetof(BLOCK, round)); @@ -48,7 +34,7 @@ DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant)); DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff)); DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob)); -// subtract +/* subtract */ DEFINE(vp8_block_base_src, offsetof(BLOCK, base_src)); DEFINE(vp8_block_src, offsetof(BLOCK, src)); DEFINE(vp8_block_src_diff, offsetof(BLOCK, src_diff)); @@ -56,7 +42,7 @@ DEFINE(vp8_block_src_stride, offsetof(BLOCK, src_stride)); DEFINE(vp8_blockd_predictor, offsetof(BLOCKD, predictor)); -//pack tokens +/* pack tokens */ DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue)); DEFINE(vp8_writer_range, offsetof(vp8_writer, range)); DEFINE(vp8_writer_value, offsetof(vp8_writer, value)); @@ -90,16 +76,16 @@ DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST)); DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows)); -// These two sizes are used in vp8cx_pack_tokens. They are hard coded -// so if the size changes this will have to be adjusted. +END + +/* add asserts for any offset that is not supported by assembly code + * add asserts for any size that is not supported by assembly code + + * These are used in vp8cx_pack_tokens. They are hard coded so if their sizes + * change they will have to be adjusted. + */ + #if HAVE_ARMV5TE ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8) ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16) #endif - -//add asserts for any offset that is not supported by assembly code -//add asserts for any size that is not supported by assembly code -/* - * return 0; - * } - */ diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index ced963559..e93d30d1a 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -776,9 +776,9 @@ static void write_mv_ref vp8_writer *w, MB_PREDICTION_MODE m, const vp8_prob *p ) { - +#if CONFIG_DEBUG assert(NEARESTMV <= m && m <= SPLITMV); - +#endif vp8_write_token(w, vp8_mv_ref_tree, p, vp8_mv_ref_encoding_array - NEARESTMV + m); } @@ -788,8 +788,9 @@ static void write_sub_mv_ref vp8_writer *w, B_PREDICTION_MODE m, const vp8_prob *p ) { +#if CONFIG_DEBUG assert(LEFT4X4 <= m && m <= NEW4X4); - +#endif vp8_write_token(w, vp8_sub_mv_ref_tree, p, vp8_sub_mv_ref_encoding_array - LEFT4X4 + m); } @@ -1017,11 +1018,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) blockmode = cpi->mb.partition_info->bmi[j].mode; blockmv = cpi->mb.partition_info->bmi[j].mv; - +#if CONFIG_DEBUG while (j != L[++k]) if (k >= 16) assert(0); - +#else + while (j != L[++k]); +#endif leftmv.as_int = left_block_mv(m, k); abovemv.as_int = above_block_mv(m, k, mis); mv_contz = vp8_mv_cont(&leftmv, &abovemv); diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 5372f8da2..5834e6f08 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -50,6 +50,7 @@ void vp8_build_block_offsets(MACROBLOCK *x); void vp8_setup_block_ptrs(MACROBLOCK *x); int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset); int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t); +static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ); #ifdef MODE_STATS unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -84,8 +85,6 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) { unsigned int act; unsigned int sse; - int sum; - /* TODO: This could also be done over smaller areas (8x8), but that would * require extensive changes elsewhere, as lambda is assumed to be fixed * over an entire MB in most of the code. @@ -93,14 +92,9 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) * lambda using a non-linear combination (e.g., the smallest, or second * smallest, etc.). */ - VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer, - x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum); - - /* This requires a full 32 bits of precision. */ - act = (sse<<8) - sum*sum; - - /* Drop 4 to give us some headroom to work with. */ - act = (act + 8) >> 4; + act = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)(x->src.y_buffer, + x->src.y_stride, VP8_VAR_OFFS, 0, &sse); + act = act<<4; /* If the region is flat, lower the activity some more. */ if (act < 8<<12) @@ -110,70 +104,121 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) } // Stub for alternative experimental activity measures. -static unsigned int alt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) +static unsigned int alt_activity_measure( VP8_COMP *cpi, + MACROBLOCK *x, int use_dc_pred ) { - unsigned int mb_activity = VP8_ACTIVITY_AVG_MIN; - - x->e_mbd.mode_info_context->mbmi.mode = DC_PRED; - x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; - x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; - - vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - - mb_activity = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff); - - return mb_activity; + return vp8_encode_intra(cpi,x, use_dc_pred); } // Measure the activity of the current macroblock // What we measure here is TBD so abstracted to this function -static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) +#define ALT_ACT_MEASURE 1 +static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, + int mb_row, int mb_col) { unsigned int mb_activity; - if ( 1 ) + if ( ALT_ACT_MEASURE ) { - // Original activity measure from Tim T's code. - mb_activity = tt_activity_measure( cpi, x ); + int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); + + // Or use and alternative. + mb_activity = alt_activity_measure( cpi, x, use_dc_pred ); } else { - // Or use and alternative. - mb_activity = alt_activity_measure( cpi, x ); + // Original activity measure from Tim T's code. + mb_activity = tt_activity_measure( cpi, x ); } + if ( mb_activity < VP8_ACTIVITY_AVG_MIN ) + mb_activity = VP8_ACTIVITY_AVG_MIN; + return mb_activity; } // Calculate an "average" mb activity value for the frame +#define ACT_MEDIAN 0 static void calc_av_activity( VP8_COMP *cpi, INT64 activity_sum ) { +#if ACT_MEDIAN + // Find median: Simple n^2 algorithm for experimentation + { + unsigned int median; + unsigned int i,j; + unsigned int * sortlist; + unsigned int tmp; + + // Create a list to sort to + CHECK_MEM_ERROR(sortlist, + vpx_calloc(sizeof(unsigned int), + cpi->common.MBs)); + + // Copy map to sort list + vpx_memcpy( sortlist, cpi->mb_activity_map, + sizeof(unsigned int) * cpi->common.MBs ); + + + // Ripple each value down to its correct position + for ( i = 1; i < cpi->common.MBs; i ++ ) + { + for ( j = i; j > 0; j -- ) + { + if ( sortlist[j] < sortlist[j-1] ) + { + // Swap values + tmp = sortlist[j-1]; + sortlist[j-1] = sortlist[j]; + sortlist[j] = tmp; + } + else + break; + } + } + + // Even number MBs so estimate median as mean of two either side. + median = ( 1 + sortlist[cpi->common.MBs >> 1] + + sortlist[(cpi->common.MBs >> 1) + 1] ) >> 1; + + cpi->activity_avg = median; + + vpx_free(sortlist); + } +#else // Simple mean for now cpi->activity_avg = (unsigned int)(activity_sum/cpi->common.MBs); +#endif + if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN) cpi->activity_avg = VP8_ACTIVITY_AVG_MIN; + + // Experimental code: return fixed value normalized for several clips + if ( ALT_ACT_MEASURE ) + cpi->activity_avg = 100000; } +#define USE_ACT_INDEX 0 #define OUTPUT_NORM_ACT_STATS 0 -// Calculate a normalized activity value for each mb -static void calc_norm_activity( VP8_COMP *cpi, MACROBLOCK *x ) + +#if USE_ACT_INDEX +// Calculate and activity index for each mb +static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) { VP8_COMMON *const cm = & cpi->common; int mb_row, mb_col; - unsigned int act; - unsigned int a; - unsigned int b; + INT64 act; + INT64 a; + INT64 b; #if OUTPUT_NORM_ACT_STATS FILE *f = fopen("norm_act.stt", "a"); - fprintf(f, "\n"); + fprintf(f, "\n%12d\n", cpi->activity_avg ); #endif // Reset pointers to start of activity map x->mb_activity_ptr = cpi->mb_activity_map; - x->mb_norm_activity_ptr = cpi->mb_norm_activity_map; // Calculate normalized mb activity number. for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) @@ -185,25 +230,19 @@ static void calc_norm_activity( VP8_COMP *cpi, MACROBLOCK *x ) act = *(x->mb_activity_ptr); // Calculate a normalized activity number - a = act + 2*cpi->activity_avg; - b = 2*act + cpi->activity_avg; + a = act + 4*cpi->activity_avg; + b = 4*act + cpi->activity_avg; if ( b >= a ) - *(x->mb_norm_activity_ptr) = (int)((b + (a>>1))/a); + *(x->activity_ptr) = (int)((b + (a>>1))/a) - 1; else - *(x->mb_norm_activity_ptr) = -(int)((a + (b>>1))/b); - - if ( *(x->mb_norm_activity_ptr) == 0 ) - { - *(x->mb_norm_activity_ptr) = 1; - } + *(x->activity_ptr) = 1 - (int)((a + (b>>1))/b); #if OUTPUT_NORM_ACT_STATS - fprintf(f, " %6d", *(x->mb_norm_activity_ptr)); + fprintf(f, " %6d", *(x->mb_activity_ptr)); #endif // Increment activity map pointers x->mb_activity_ptr++; - x->mb_norm_activity_ptr++; } #if OUTPUT_NORM_ACT_STATS @@ -217,33 +256,44 @@ static void calc_norm_activity( VP8_COMP *cpi, MACROBLOCK *x ) #endif } - +#endif // Loop through all MBs. Note activity of each, average activity and // calculate a normalized activity for each static void build_activity_map( VP8_COMP *cpi ) { MACROBLOCK *const x = & cpi->mb; + MACROBLOCKD *xd = &x->e_mbd; VP8_COMMON *const cm = & cpi->common; +#if ALT_ACT_MEASURE + YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; + int recon_yoffset; + int recon_y_stride = new_yv12->y_stride; +#endif + int mb_row, mb_col; unsigned int mb_activity; INT64 activity_sum = 0; - // Initialise source buffer pointer - x->src = *cpi->Source; - - // Set pointer to start of activity map - x->mb_activity_ptr = cpi->mb_activity_map; - // for each macroblock row in image for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { +#if ALT_ACT_MEASURE + // reset above block coeffs + xd->up_available = (mb_row != 0); + recon_yoffset = (mb_row * recon_y_stride * 16); +#endif // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { +#if ALT_ACT_MEASURE + xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; + xd->left_available = (mb_col != 0); + recon_yoffset += 16; +#endif // measure activity - mb_activity = mb_activity_measure( cpi, x ); + mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col ); // Keep frame sum activity_sum += mb_activity; @@ -258,49 +308,50 @@ static void build_activity_map( VP8_COMP *cpi ) x->src.y_buffer += 16; } + // adjust to the next row of mbs x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; + +#if ALT_ACT_MEASURE + //extend the recon for intra prediction + vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); +#endif + } // Calculate an "average" MB activity calc_av_activity(cpi, activity_sum); - // Calculate a normalized activity number of each mb - calc_norm_activity( cpi, x ); +#if USE_ACT_INDEX + // Calculate an activity index number of each mb + calc_activity_index( cpi, x ); +#endif + } -// Activity masking based on Tim T's original code +// Macroblock activity masking void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) { - - unsigned int a; - unsigned int b; - unsigned int act = *(x->mb_activity_ptr); +#if USE_ACT_INDEX + x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2); + x->errorperbit = x->rdmult/x->rddiv; +#else + INT64 a; + INT64 b; + INT64 act = *(x->mb_activity_ptr); // Apply the masking to the RD multiplier. - a = act + 2*cpi->activity_avg; - b = 2*act + cpi->activity_avg; + a = act + (2*cpi->activity_avg); + b = (2*act) + cpi->activity_avg; - //tmp = (unsigned int)(((INT64)tmp*b + (a>>1))/a); x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a); + x->errorperbit = x->rdmult/x->rddiv; - // For now now zbin adjustment on mode choice - x->act_zbin_adj = 0; -} - -// Stub function to use a normalized activity measure stored at mb level. -void vp8_norm_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) -{ - int norm_act; - - norm_act = *(x->mb_norm_activity_ptr); - if (norm_act > 0) - x->rdmult = norm_act * (x->rdmult); - else - x->rdmult = -(x->rdmult / norm_act); +#endif - // For now now zbin adjustment on mode choice - x->act_zbin_adj = 0; + // Activity based Zbin adjustment + adjust_act_zbin(cpi, x); } static @@ -356,7 +407,6 @@ void encode_mb_row(VP8_COMP *cpi, // Set the mb activity pointer to the start of the row. x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - x->mb_norm_activity_ptr = &cpi->mb_norm_activity_map[map_index]; // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) @@ -476,7 +526,6 @@ void encode_mb_row(VP8_COMP *cpi, // Increment the activity mask pointers. x->mb_activity_ptr++; - x->mb_norm_activity_ptr++; /* save the block info */ for (i = 0; i < 16; i++) @@ -525,6 +574,92 @@ void encode_mb_row(VP8_COMP *cpi, #endif } +void init_encode_frame_mb_context(VP8_COMP *cpi) +{ + MACROBLOCK *const x = & cpi->mb; + VP8_COMMON *const cm = & cpi->common; + MACROBLOCKD *const xd = & x->e_mbd; + + // GF active flags data structure + x->gf_active_ptr = (signed char *)cpi->gf_active_flags; + + // Activity map pointer + x->mb_activity_ptr = cpi->mb_activity_map; + + x->vector_range = 32; + + x->act_zbin_adj = 0; + + x->partition_info = x->pi; + + xd->mode_info_context = cm->mi; + xd->mode_info_stride = cm->mode_info_stride; + + xd->frame_type = cm->frame_type; + + xd->frames_since_golden = cm->frames_since_golden; + xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame; + + // reset intra mode contexts + if (cm->frame_type == KEY_FRAME) + vp8_init_mbmode_probs(cm); + + // Copy data over into macro block data sturctures. + x->src = * cpi->Source; + xd->pre = cm->yv12_fb[cm->lst_fb_idx]; + xd->dst = cm->yv12_fb[cm->new_fb_idx]; + + // set up frame for intra coded blocks + vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); + + vp8_build_block_offsets(x); + + vp8_setup_block_dptrs(&x->e_mbd); + + vp8_setup_block_ptrs(x); + + xd->mode_info_context->mbmi.mode = DC_PRED; + xd->mode_info_context->mbmi.uv_mode = DC_PRED; + + xd->left_context = &cm->left_context; + + vp8_zero(cpi->count_mb_ref_frame_usage) + vp8_zero(cpi->ymode_count) + vp8_zero(cpi->uv_mode_count) + + x->mvc = cm->fc.mvc; + + vpx_memset(cm->above_context, 0, + sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); + + xd->ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded); + + // Special case treatment when GF and ARF are not sensible options for reference + if (cpi->ref_frame_flags == VP8_LAST_FLAG) + { + xd->ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_zero(255); + xd->ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_one(255) + + vp8_cost_zero(128); + xd->ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_one(255) + + vp8_cost_one(128); + } + else + { + xd->ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_zero(cpi->prob_last_coded); + xd->ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_one(cpi->prob_last_coded) + + vp8_cost_zero(cpi->prob_gf_coded); + xd->ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_one(cpi->prob_last_coded) + + vp8_cost_one(cpi->prob_gf_coded); + } + +} + void vp8_encode_frame(VP8_COMP *cpi) { int mb_row; @@ -536,6 +671,17 @@ void vp8_encode_frame(VP8_COMP *cpi) int segment_counts[MAX_MB_SEGMENTS]; int totalrate; + vpx_memset(segment_counts, 0, sizeof(segment_counts)); + totalrate = 0; + + if (cpi->compressor_speed == 2) + { + if (cpi->oxcf.cpu_used < 0) + cpi->Speed = -(cpi->oxcf.cpu_used); + else + vp8_auto_select_speed(cpi); + } + // Functions setup for all frame types so we can use MC in AltRef if (cm->mcomp_filter_type == SIXTAP) { @@ -560,10 +706,6 @@ void vp8_encode_frame(VP8_COMP *cpi) &cpi->common.rtcd.subpix, bilinear16x16); } - x->gf_active_ptr = (signed char *)cpi->gf_active_flags; // Point to base of GF active flags data structure - - x->vector_range = 32; - // Reset frame count of inter 0,0 motion vector useage. cpi->inter_zz_count = 0; @@ -574,89 +716,34 @@ void vp8_encode_frame(VP8_COMP *cpi) cpi->skip_true_count = 0; cpi->skip_false_count = 0; - x->act_zbin_adj = 0; - #if 0 // Experimental code cpi->frame_distortion = 0; cpi->last_mb_distortion = 0; #endif - totalrate = 0; - - x->partition_info = x->pi; - xd->mode_info_context = cm->mi; - xd->mode_info_stride = cm->mode_info_stride; - - xd->frame_type = cm->frame_type; - xd->frames_since_golden = cm->frames_since_golden; - xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame; vp8_zero(cpi->MVcount); - // vp8_zero( Contexts) vp8_zero(cpi->coef_counts); - // reset intra mode contexts - if (cm->frame_type == KEY_FRAME) - vp8_init_mbmode_probs(cm); - - vp8cx_frame_init_quantizer(cpi); - if (cpi->compressor_speed == 2) - { - if (cpi->oxcf.cpu_used < 0) - cpi->Speed = -(cpi->oxcf.cpu_used); - else - vp8_auto_select_speed(cpi); - } - vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q); vp8cx_initialize_me_consts(cpi, cm->base_qindex); - // Copy data over into macro block data sturctures. - x->src = * cpi->Source; - xd->pre = cm->yv12_fb[cm->lst_fb_idx]; - xd->dst = cm->yv12_fb[cm->new_fb_idx]; - - // set up frame new frame for intra coded blocks - - vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); - - vp8_build_block_offsets(x); - - vp8_setup_block_dptrs(&x->e_mbd); - - vp8_setup_block_ptrs(x); - - xd->mode_info_context->mbmi.mode = DC_PRED; - xd->mode_info_context->mbmi.uv_mode = DC_PRED; - - xd->left_context = &cm->left_context; - - vp8_zero(cpi->count_mb_ref_frame_usage) - vp8_zero(cpi->ymode_count) - vp8_zero(cpi->uv_mode_count) - - x->mvc = cm->fc.mvc; - - vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) { - if(1) - { - // Build a frame level activity map - build_activity_map(cpi); - } + // Initialize encode frame context. + init_encode_frame_mb_context(cpi); - // Reset various MB pointers. - x->src = *cpi->Source; - x->mb_activity_ptr = cpi->mb_activity_map; - x->mb_norm_activity_ptr = cpi->mb_norm_activity_map; + // Build a frame level activity map + build_activity_map(cpi); } + // re-initencode frame context. + init_encode_frame_mb_context(cpi); + { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); @@ -997,99 +1084,45 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) // Experimental stub function to create a per MB zbin adjustment based on // some previously calculated measure of MB activity. -void adjust_act_zbin( VP8_COMP *cpi, int rate, MACROBLOCK *x ) +static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) { - INT64 act; +#if USE_ACT_INDEX + x->act_zbin_adj = *(x->mb_activity_ptr); +#else INT64 a; INT64 b; + INT64 act = *(x->mb_activity_ptr); - // Read activity from the map - act = (INT64)(*(x->mb_activity_ptr)); - - // Calculate a zbin adjustment for this mb + // Apply the masking to the RD multiplier. a = act + 4*cpi->activity_avg; b = 4*act + cpi->activity_avg; - if ( b > a ) - //x->act_zbin_adj = (char)((b * 8) / a) - 8; - x->act_zbin_adj = 8; - else - x->act_zbin_adj = 0; - - // Tmp force to 0 to disable. - x->act_zbin_adj = 0; + if ( act > cpi->activity_avg ) + x->act_zbin_adj = (int)(((INT64)b + (a>>1))/a) - 1; + else + x->act_zbin_adj = 1 - (int)(((INT64)a + (b>>1))/b); +#endif } int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { - int Error4x4, Error16x16; - int rate4x4, rate16x16, rateuv; - int dist4x4, dist16x16, distuv; - int rate = 0; - int rate4x4_tokenonly = 0; - int rate16x16_tokenonly = 0; - int rateuv_tokenonly = 0; - - x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; + int rate; if (cpi->sf.RD && cpi->compressor_speed != 2) - { - vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv); - rate += rateuv; - - Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16); - - Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16); - - rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16; - - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) - { - adjust_act_zbin( cpi, rate, x ); - vp8_update_zbin_extra(cpi, x); - } - } + vp8_rd_pick_intra_mode(cpi, x, &rate); else - { - int rate2, best_distortion; - MB_PREDICTION_MODE mode, best_mode = DC_PRED; - int this_rd; - Error16x16 = INT_MAX; - - vp8_pick_intra_mbuv_mode(x); - - for (mode = DC_PRED; mode <= TM_PRED; mode ++) - { - int distortion2; - - x->e_mbd.mode_info_context->mbmi.mode = mode; - RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby) - (&x->e_mbd); - distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16); - rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode]; - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + vp8_pick_intra_mode(cpi, x, &rate); - if (Error16x16 > this_rd) - { - Error16x16 = this_rd; - best_mode = mode; - best_distortion = distortion2; - } - } - x->e_mbd.mode_info_context->mbmi.mode = best_mode; - - Error4x4 = vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion); + if(cpi->oxcf.tuning == VP8_TUNE_SSIM) + { + adjust_act_zbin( cpi, x ); + vp8_update_zbin_extra(cpi, x); } - if (Error4x4 < Error16x16) - { - x->e_mbd.mode_info_context->mbmi.mode = B_PRED; + if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED) vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); - } else - { vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - } vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); sum_intra_stats(cpi, x); @@ -1163,7 +1196,7 @@ int vp8cx_encode_inter_macroblock if(cpi->oxcf.tuning == VP8_TUNE_SSIM) { // Adjust the zbin based on this MB rate. - adjust_act_zbin( cpi, rate, x ); + adjust_act_zbin( cpi, x ); } #if 0 @@ -1193,11 +1226,10 @@ int vp8cx_encode_inter_macroblock { // Experimental code. Special case for gf and arf zeromv modes. // Increase zbin size to supress noise + cpi->zbin_mode_boost = 0; if (cpi->zbin_mode_boost_enabled) { - if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME ) - cpi->zbin_mode_boost = 0; - else + if ( xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME ) { if (xd->mode_info_context->mbmi.mode == ZEROMV) { @@ -1212,9 +1244,6 @@ int vp8cx_encode_inter_macroblock cpi->zbin_mode_boost = MV_ZBIN_BOOST; } } - else - cpi->zbin_mode_boost = 0; - vp8_update_zbin_extra(cpi, x); } diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 5da69bc59..f076bbbb3 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -28,6 +28,34 @@ #define IF_RTCD(x) NULL #endif +int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred) +{ + + int i; + int intra_pred_var = 0; + (void) cpi; + + if (use_dc_pred) + { + x->e_mbd.mode_info_context->mbmi.mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; + + vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); + } + else + { + for (i = 0; i < 16; i++) + { + x->e_mbd.block[i].bmi.as_mode = B_DC_PRED; + vp8_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i); + } + } + + intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff); + + return intra_pred_var; +} void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, int ib) @@ -81,30 +109,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) RECON_INVOKE(&rtcd->common->recon, recon_mby) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); - // make sure block modes are set the way we want them for context updates - for (b = 0; b < 16; b++) - { - BLOCKD *d = &x->e_mbd.block[b]; - - switch (x->e_mbd.mode_info_context->mbmi.mode) - { - case DC_PRED: - d->bmi.as_mode = B_DC_PRED; - break; - case V_PRED: - d->bmi.as_mode = B_VE_PRED; - break; - case H_PRED: - d->bmi.as_mode = B_HE_PRED; - break; - case TM_PRED: - d->bmi.as_mode = B_TM_PRED; - break; - default: - d->bmi.as_mode = B_DC_PRED; - break; - } - } } void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) @@ -124,4 +128,3 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd); } - diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h index 5861fd1fc..9c1fa5684 100644 --- a/vp8/encoder/encodeintra.h +++ b/vp8/encoder/encodeintra.h @@ -13,6 +13,7 @@ #define _ENCODEINTRA_H_ #include "onyx_int.h" +int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred); void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *, MACROBLOCK *x); void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *, MACROBLOCK *x); void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *, MACROBLOCK *mb); diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 665b2d5dc..420ed8eff 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -114,8 +114,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) // Set the mb activity pointer to the start of the row. x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - x->mb_norm_activity_ptr = - &cpi->mb_norm_activity_map[map_index]; // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) @@ -230,7 +228,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) // Increment the activity mask pointers. x->mb_activity_ptr++; - x->mb_norm_activity_ptr++; /* save the block info */ for (i = 0; i < 16; i++) diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 3c4b8f4a0..571fac17c 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -81,35 +81,6 @@ static const int cq_level[QINDEX_RANGE] = static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame); -static int encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred) -{ - - int i; - int intra_pred_var = 0; - (void) cpi; - - if (use_dc_pred) - { - x->e_mbd.mode_info_context->mbmi.mode = DC_PRED; - x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; - x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; - - vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - } - else - { - for (i = 0; i < 16; i++) - { - x->e_mbd.block[i].bmi.as_mode = B_DC_PRED; - vp8_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i); - } - } - - intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff); - - return intra_pred_var; -} - // Resets the first pass file to the given position using a relative seek from the current position static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position) { @@ -243,33 +214,58 @@ static int frame_max_bits(VP8_COMP *cpi) int max_bits; // For CBR we need to also consider buffer fullness. - // If we are running below the optimal level then we need to gradually tighten up on max_bits. if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level); + max_bits = 2 * cpi->av_per_frame_bandwidth; + max_bits -= cpi->buffered_av_per_frame_bandwidth; + max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0); + } + // VBR + else + { + // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user + max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); + } + + // Trap case where we are out of bits + if (max_bits < 0) + max_bits = 0; - // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user - max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); + return max_bits; +} - // If our buffer is below the optimum level - if (buffer_fullness_ratio < 1.0) - { - // The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. - int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2; - max_bits = (int)(max_bits * buffer_fullness_ratio); +static int gf_group_max_bits(VP8_COMP *cpi) +{ + // Max allocation for a golden frame group + int max_bits; - if (max_bits < min_max_bits) - max_bits = min_max_bits; // Lowest value we will set ... which should allow the buffer to refil. + // For CBR we need to also consider buffer fullness. + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { + max_bits = cpi->av_per_frame_bandwidth * cpi->baseline_gf_interval; + if (max_bits > cpi->oxcf.optimal_buffer_level) + { + max_bits -= cpi->oxcf.optimal_buffer_level; + max_bits += cpi->buffer_level; } + else + { + max_bits -= (cpi->buffered_av_per_frame_bandwidth + - cpi->av_per_frame_bandwidth) + * cpi->baseline_gf_interval; + } + + max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0); } - // VBR else { // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); + max_bits *= cpi->baseline_gf_interval; } + // Trap case where we are out of bits if (max_bits < 0) max_bits = 0; @@ -582,7 +578,7 @@ void vp8_first_pass(VP8_COMP *cpi) xd->left_available = (mb_col != 0); // do intra 16x16 prediction - this_error = encode_intra(cpi, x, use_dc_pred); + this_error = vp8_encode_intra(cpi, x, use_dc_pred); // "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame) // We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv. @@ -1362,7 +1358,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) double abs_mv_in_out_accumulator = 0.0; double mod_err_per_mb_accumulator = 0.0; - int max_bits = frame_max_bits(cpi); // Max for a single frame + int max_group_bits; unsigned int allow_alt_ref = cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; @@ -1715,8 +1711,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.gf_group_bits = (cpi->twopass.gf_group_bits < 0) ? 0 : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits; // Clip cpi->twopass.gf_group_bits based on user supplied data rate variability limit (cpi->oxcf.two_pass_vbrmax_section) - if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval) - cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval; + max_group_bits = gf_group_max_bits(cpi); + if (cpi->twopass.gf_group_bits > max_group_bits) + cpi->twopass.gf_group_bits = max_group_bits; // Reset the file position reset_fpf_position(cpi, start_pos); @@ -1725,14 +1722,15 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.modified_error_used += gf_group_err; // Assign bits to the arf or gf. - { + for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) { int Boost; int frames_in_section; int allocation_chunks; int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; + int gf_bits; // For ARF frames - if (cpi->source_alt_ref_pending) + if (cpi->source_alt_ref_pending && i == 0) { Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100); //Boost += (cpi->baseline_gf_interval * 25); @@ -1771,7 +1769,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } // Calculate the number of bits to be spent on the gf or arf based on the boost number - cpi->twopass.gf_bits = (int)((double)Boost * (cpi->twopass.gf_group_bits / (double)allocation_chunks)); + gf_bits = (int)((double)Boost * (cpi->twopass.gf_group_bits / (double)allocation_chunks)); // If the frame that is to be boosted is simpler than the average for // the gf/arf group then use an alternative calculation @@ -1789,9 +1787,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits / (double)allocation_chunks)); - if (cpi->twopass.gf_bits > alt_gf_bits) + if (gf_bits > alt_gf_bits) { - cpi->twopass.gf_bits = alt_gf_bits; + gf_bits = alt_gf_bits; } } // Else if it is harder than other frames in the group make sure it at @@ -1804,23 +1802,29 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left)); - if (alt_gf_bits > cpi->twopass.gf_bits) + if (alt_gf_bits > gf_bits) { - cpi->twopass.gf_bits = alt_gf_bits; + gf_bits = alt_gf_bits; } } - // Apply an additional limit for CBR - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + // Dont allow a negative value for gf_bits + if (gf_bits < 0) + gf_bits = 0; + + gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame + + if (i == 0) { - if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1)) - cpi->twopass.gf_bits = cpi->buffer_level >> 1; + cpi->twopass.gf_bits = gf_bits; } + if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))) + { + cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame + } + } - // Dont allow a negative value for gf_bits - if (cpi->twopass.gf_bits < 0) - cpi->twopass.gf_bits = 0; - + { // Adjust KF group bits and error remainin cpi->twopass.kf_group_error_left -= gf_group_err; cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits; @@ -1835,7 +1839,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) else cpi->twopass.gf_group_error_left = gf_group_err; - cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits; + cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; @@ -1851,13 +1855,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } else cpi->twopass.mid_gf_extra_bits = 0; - - cpi->twopass.gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame - } - - if (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) // Normal GF and not a KF - { - cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for this frame } // Adjustment to estimate_max_q based on a measure of complexity of the section @@ -1907,12 +1904,6 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) int max_bits = frame_max_bits(cpi); // Max for a single frame - // The final few frames have special treatment - if (cpi->frames_till_gf_update_due >= (int)(cpi->twopass.total_stats->count - cpi->common.current_video_frame)) - { - cpi->twopass.gf_group_bits = (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0;; - } - // Calculate modified prediction error used in bit allocation modified_err = calculate_modified_err(cpi, this_frame); @@ -2014,22 +2005,10 @@ void vp8_second_pass(VP8_COMP *cpi) if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) { // Assign a standard frames worth of bits from those allocated to the GF group + int bak = cpi->per_frame_bandwidth; vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); - - // If appropriate (we are switching into ARF active but it was not previously active) apply a boost for the gf at the start of the group. - //if ( !cpi->source_alt_ref_active && (cpi->gfu_boost > 150) ) - if (FALSE) - { - int extra_bits; - int pct_extra = (cpi->gfu_boost - 100) / 50; - - pct_extra = (pct_extra > 20) ? 20 : pct_extra; - - extra_bits = (cpi->twopass.gf_group_bits * pct_extra) / 100; - cpi->twopass.gf_group_bits -= extra_bits; - cpi->per_frame_bandwidth += extra_bits; - } + cpi->per_frame_bandwidth = bak; } } diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index b9b371fe4..1ec3a9894 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -67,9 +67,6 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 4a9b95a2e..cf3f5510c 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1546,6 +1546,7 @@ static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth; cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth; cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth; + cpi->buffered_av_per_frame_bandwidth = cpi->av_per_frame_bandwidth; cpi->total_actual_bits = 0; cpi->total_target_vs_actual = 0; @@ -1641,7 +1642,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) break; } - if (cpi->pass == 0) + if (cpi->pass == 0 && cpi->oxcf.end_usage != USAGE_STREAM_FROM_SERVER) cpi->auto_worst_q = 1; cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; @@ -3528,7 +3529,8 @@ static void encode_frame_to_data_rate // For CBR if the buffer reaches its maximum level then we can no longer // save up bits for later frames so we might as well use them up // on the current frame. - if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && + if (cpi->pass == 2 + && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode) { int Adjustment = cpi->active_worst_quality / 4; // Max adjustment is 1/4 @@ -3619,6 +3621,9 @@ static void encode_frame_to_data_rate } else { + if(cpi->pass != 2) + Q = cpi->avg_frame_qindex; + cpi->active_best_quality = inter_minq[Q]; // For the constant/constrained quality mode we dont want @@ -3931,15 +3936,16 @@ static void encode_frame_to_data_rate (cpi->active_worst_quality < cpi->worst_quality) && (cpi->projected_frame_size > frame_over_shoot_limit)) { - int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit; + /* step down active_worst_quality such that the corresponding + * active_best_quality will be equal to the current + * active_worst_quality + 1 + */ + int i; - // If so is there any scope for relaxing it - while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0)) - { - cpi->active_worst_quality++; - top_index = cpi->active_worst_quality; - over_size_percent = (int)(over_size_percent * 0.96); // Assume 1 qstep = about 4% on frame size. - } + for(i=cpi->active_worst_quality; i<cpi->worst_quality; i++) + if(inter_minq[i] >= cpi->active_worst_quality + 1) + break; + cpi->active_worst_quality = i; // If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop. active_worst_qchanged = TRUE; @@ -4327,10 +4333,9 @@ static void encode_frame_to_data_rate // Update the buffer level variable. // Non-viewable frames are a special case and are treated as pure overhead. - if ( !cm->show_frame ) - cpi->bits_off_target -= cpi->projected_frame_size; - else - cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size; + if ( cm->show_frame ) + cpi->bits_off_target += cpi->av_per_frame_bandwidth; + cpi->bits_off_target -= cpi->projected_frame_size; // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass. cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4; @@ -4344,7 +4349,33 @@ static void encode_frame_to_data_rate // Debug stats cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size); - cpi->buffer_level = cpi->bits_off_target; + // Update the buffered average bitrate + { + long long numerator; + + numerator = cpi->oxcf.maximum_buffer_size + - cpi->buffered_av_per_frame_bandwidth + + cpi->projected_frame_size; + numerator *= cpi->buffered_av_per_frame_bandwidth; + cpi->buffered_av_per_frame_bandwidth = numerator + / cpi->oxcf.maximum_buffer_size; + } + + { + long long tmp = (long long)cpi->buffered_av_per_frame_bandwidth + * cpi->oxcf.maximum_buffer_size + / cpi->av_per_frame_bandwidth; + cpi->buffer_level = cpi->oxcf.maximum_buffer_size + - tmp + + cpi->oxcf.optimal_buffer_level; + } + + // Accumulate overshoot error. + cpi->accumulated_overshoot += + (cpi->projected_frame_size > cpi->av_per_frame_bandwidth) + ? cpi->projected_frame_size - cpi->av_per_frame_bandwidth + : 0; + // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames if (cm->frame_type == KEY_FRAME) diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 663786004..c460b9da9 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -47,8 +47,8 @@ #define MIN_THRESHMULT 32 #define MAX_THRESHMULT 512 -#define GF_ZEROMV_ZBIN_BOOST 24 -#define LF_ZEROMV_ZBIN_BOOST 12 +#define GF_ZEROMV_ZBIN_BOOST 12 +#define LF_ZEROMV_ZBIN_BOOST 6 #define MV_ZBIN_BOOST 4 #define ZBIN_OQ_MAX 192 @@ -351,6 +351,10 @@ typedef struct VP8_COMP int per_frame_bandwidth; // Current section per frame bandwidth target int av_per_frame_bandwidth; // Average frame size target for clip int min_frame_bandwidth; // Minimum allocation that should be used for any frame + int buffered_av_per_frame_bandwidth; // Average bitrate over the last buffer + int buffered_av_per_frame_bandwidth_rem; // Average bitrate remainder + int accumulated_overshoot; // Accumulated # of bits spent > target + int inter_frame_target; double output_frame_rate; long long last_time_stamp_seen; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 456059cf8..b60d2419f 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -43,7 +43,6 @@ extern const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES]; extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; -extern unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); extern unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); extern int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *, int *, int *, int, int *mvcost[2], int, int fullpixel); extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); @@ -98,37 +97,6 @@ static int get_inter_mbpred_error(MACROBLOCK *mb, } -unsigned int vp8_get16x16pred_error_c -( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride -) -{ - unsigned pred_error = 0; - int i, j; - int sum = 0; - - for (i = 0; i < 16; i++) - { - int diff; - - for (j = 0; j < 16; j++) - { - diff = src_ptr[j] - ref_ptr[j]; - sum += diff; - pred_error += diff * diff; - } - - src_ptr += src_stride; - ref_ptr += ref_stride; - } - - pred_error -= sum * sum / 256; - return pred_error; -} - unsigned int vp8_get4x4sse_cs_c ( @@ -172,8 +140,7 @@ static int pick_intra4x4block( MACROBLOCK *x, int ib, B_PREDICTION_MODE *best_mode, - B_PREDICTION_MODE above, - B_PREDICTION_MODE left, + unsigned int *mode_costs, int *bestrate, int *bestdistortion) @@ -185,16 +152,6 @@ static int pick_intra4x4block( int best_rd = INT_MAX; // 1<<30 int rate; int distortion; - unsigned int *mode_costs; - - if (x->e_mbd.frame_type == KEY_FRAME) - { - mode_costs = x->bmode_costs[above][left]; - } - else - { - mode_costs = x->inter_bmode_costs; - } for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++) { @@ -221,7 +178,7 @@ static int pick_intra4x4block( } -int vp8_pick_intra4x4mby_modes +static int pick_intra4x4mby_modes ( const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, @@ -234,20 +191,30 @@ int vp8_pick_intra4x4mby_modes int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; int error; int distortion = 0; + unsigned int *bmode_costs; vp8_intra_prediction_down_copy(xd); + bmode_costs = mb->inter_bmode_costs; + for (i = 0; i < 16; i++) { MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; - const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); - const B_PREDICTION_MODE L = left_block_mode(mic, i); B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(d); - pick_intra4x4block(rtcd, mb, i, &best_mode, A, L, &r, &d); + if (mb->e_mbd.frame_type == KEY_FRAME) + { + const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); + const B_PREDICTION_MODE L = left_block_mode(mic, i); + + bmode_costs = mb->bmode_costs[A][L]; + } + + + pick_intra4x4block(rtcd, mb, i, &best_mode, bmode_costs, &r, &d); cost += r; distortion += d; @@ -275,7 +242,7 @@ int vp8_pick_intra4x4mby_modes return error; } -void vp8_pick_intra_mbuv_mode(MACROBLOCK *mb) +static void pick_intra_mbuv_mode(MACROBLOCK *mb) { MACROBLOCKD *x = &mb->e_mbd; @@ -443,26 +410,23 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, BLOCK *b = &x->block[0]; BLOCKD *d = &x->e_mbd.block[0]; MACROBLOCKD *xd = &x->e_mbd; - union b_mode_info best_bmodes[16]; MB_MODE_INFO best_mbmode; int_mv best_ref_mv; int_mv mode_mv[MB_MODE_COUNT]; MB_PREDICTION_MODE this_mode; int num00; - int i; int mdcounts[4]; int best_rd = INT_MAX; // 1 << 30; int best_intra_rd = INT_MAX; int mode_index; - int ref_frame_cost[MAX_REF_FRAMES]; int rate; int rate2; int distortion2; int bestsme; //int all_rds[MAX_MODES]; // Experimental debug code. int best_mode_index = 0; - unsigned int sse = INT_MAX; + unsigned int sse = INT_MAX, best_sse = INT_MAX; int_mv mvp; int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; @@ -485,7 +449,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, vpx_memset(nearest_mv, 0, sizeof(nearest_mv)); vpx_memset(near_mv, 0, sizeof(near_mv)); vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); - vpx_memset(&best_bmodes, 0, sizeof(best_bmodes)); // set up all the refframe dependent pointers. @@ -536,32 +499,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, *returnintra = INT_MAX; x->skip = 0; - ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded); - - // Special case treatment when GF and ARF are not sensible options for reference - if (cpi->ref_frame_flags == VP8_LAST_FLAG) - { - ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_zero(255); - ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_one(255) - + vp8_cost_zero(128); - ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_one(255) - + vp8_cost_one(128); - } - else - { - ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_zero(cpi->prob_last_coded); - ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_one(cpi->prob_last_coded) - + vp8_cost_zero(cpi->prob_gf_coded); - ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_one(cpi->prob_last_coded) - + vp8_cost_one(cpi->prob_gf_coded); - } - x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; // if we encode a new mv this is important @@ -613,7 +550,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; // Work out the cost assosciated with selecting the reference frame - frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; + frame_cost = + x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; rate2 += frame_cost; // everything but intra @@ -659,10 +597,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, switch (this_mode) { case B_PRED: - // Pass best so far to vp8_pick_intra4x4mby_modes to use as breakout - distortion2 = *returndistortion; - vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, - &rate, &distortion2); + // Pass best so far to pick_intra4x4mby_modes to use as breakout + distortion2 = best_sse; + pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, &distortion2); if (distortion2 == INT_MAX) { @@ -672,9 +609,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, { rate2 += rate; distortion2 = VARIANCE_INVOKE - (&cpi->rtcd.variance, get16x16prederror)( + (&cpi->rtcd.variance, var16x16)( x->src.y_buffer, x->src.y_stride, - x->e_mbd.predictor, 16); + x->e_mbd.predictor, 16, &sse); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); if (this_rd < best_intra_rd) @@ -697,7 +634,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, case TM_PRED: RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby) (&x->e_mbd); - distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16); + distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) + (x->src.y_buffer, x->src.y_stride, + x->e_mbd.predictor, 16, &sse); rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); @@ -886,15 +825,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, *returnrate = rate2; *returndistortion = distortion2; + best_sse = sse; best_rd = this_rd; vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); - if (this_mode == B_PRED) - for (i = 0; i < 16; i++) - { - best_bmodes[i].as_mode = x->e_mbd.block[i].bmi.as_mode; - } - // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; @@ -956,15 +890,52 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, if (best_mbmode.mode <= B_PRED) { /* set mode_info_context->mbmi.uv_mode */ - vp8_pick_intra_mbuv_mode(x); + pick_intra_mbuv_mode(x); } - if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED) + update_mvcount(cpi, &x->e_mbd, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]); +} + + +void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) +{ + int error4x4, error16x16 = INT_MAX; + int rate, best_rate = 0, distortion, best_sse; + MB_PREDICTION_MODE mode, best_mode = DC_PRED; + int this_rd; + unsigned int sse; + + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; + + pick_intra_mbuv_mode(x); + + for (mode = DC_PRED; mode <= TM_PRED; mode ++) { - for (i = 0; i < 16; i++) + x->e_mbd.mode_info_context->mbmi.mode = mode; + RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby) + (&x->e_mbd); + distortion = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) + (x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, &sse); + rate = x->mbmode_cost[x->e_mbd.frame_type][mode]; + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + + if (error16x16 > this_rd) { - x->e_mbd.block[i].bmi.as_mode = best_bmodes[i].as_mode; + error16x16 = this_rd; + best_mode = mode; + best_sse = sse; + best_rate = rate; } } - update_mvcount(cpi, &x->e_mbd, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]); + x->e_mbd.mode_info_context->mbmi.mode = best_mode; + + error4x4 = pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, + &best_sse); + if (error4x4 < error16x16) + { + x->e_mbd.mode_info_context->mbmi.mode = B_PRED; + best_rate = rate; + } + + *rate_ = best_rate; } diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h index f96fc5376..a0103d165 100644 --- a/vp8/encoder/pickinter.h +++ b/vp8/encoder/pickinter.h @@ -14,7 +14,6 @@ #include "vpx_ports/config.h" #include "vp8/common/onyxc_int.h" -extern int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *, MACROBLOCK *mb, int *Rate, int *Distortion); -extern void vp8_pick_intra_mbuv_mode(MACROBLOCK *mb); extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); +extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate); #endif diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c index 8dfd2a543..63f235784 100644 --- a/vp8/encoder/ppc/csystemdependent.c +++ b/vp8/encoder/ppc/csystemdependent.c @@ -48,9 +48,6 @@ void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, i void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d); -unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); -unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); // c imports @@ -88,9 +85,6 @@ extern sub_pixel_variance_function sub_pixel_variance16x8_c; extern sub_pixel_variance_function sub_pixel_variance16x16_c; extern unsigned int vp8_get_mb_ss_c(short *); -extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); -extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); // ppc @@ -149,9 +143,6 @@ void vp8_cmachine_specific_config(void) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ppc; vp8_get_mb_ss = vp8_get_mb_ss_c; - vp8_get16x16pred_error = vp8_get16x16pred_error_c; - vp8_get8x8var = vp8_get8x8var_ppc; - vp8_get16x16var = vp8_get16x16var_ppc; vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; vp8_sad16x16 = vp8_sad16x16_ppc; diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 78b3b4715..642660a0e 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -650,10 +650,10 @@ static void calc_gf_params(VP8_COMP *cpi) static void calc_pframe_target_size(VP8_COMP *cpi) { - int min_frame_target; + int min_frame_target, max_frame_target; int Adjustment; - min_frame_target = 0; + min_frame_target = 1; if (cpi->pass == 2) { @@ -661,10 +661,19 @@ static void calc_pframe_target_size(VP8_COMP *cpi) if (min_frame_target < (cpi->av_per_frame_bandwidth >> 5)) min_frame_target = cpi->av_per_frame_bandwidth >> 5; + + max_frame_target = INT_MAX; } - else if (min_frame_target < cpi->per_frame_bandwidth / 4) - min_frame_target = cpi->per_frame_bandwidth / 4; + else + { + if (min_frame_target < cpi->per_frame_bandwidth / 4) + min_frame_target = cpi->per_frame_bandwidth / 4; + /* Don't allow the target to completely deplete the buffer. */ + max_frame_target = cpi->buffer_level + cpi->av_per_frame_bandwidth; + if(max_frame_target < min_frame_target) + max_frame_target = min_frame_target; + } // Special alt reference frame case if (cpi->common.refresh_alt_ref_frame) @@ -1157,6 +1166,32 @@ static void calc_pframe_target_size(VP8_COMP *cpi) } } + + if (cpi->pass==0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER){ + /* determine the accumulated error to apply to this frame. Apply + * more of the error when we've been undershooting, less when + * we've been overshooting + */ + long long adjust; + int bitrate_error; + + bitrate_error = cpi->av_per_frame_bandwidth + - cpi->buffered_av_per_frame_bandwidth; + + adjust = cpi->accumulated_overshoot; + adjust *= cpi->av_per_frame_bandwidth + bitrate_error; + adjust /= cpi->oxcf.maximum_buffer_size; + if (adjust > (cpi->this_frame_target - min_frame_target)) + adjust = (cpi->this_frame_target - min_frame_target); + else if (adjust < 0) + adjust = 0; + + cpi->this_frame_target -= adjust; + cpi->accumulated_overshoot -= adjust; + } + + if(cpi->this_frame_target > max_frame_target) + cpi->this_frame_target = max_frame_target; } diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 3186fa6ed..cd250c425 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -201,47 +201,47 @@ static int rdmult_lut[QINDEX_RANGE]= /* values are now correlated to quantizer */ static int sad_per_bit16lut[QINDEX_RANGE] = { + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, - 6, 6, 6, 6, 6, 7, 7, 7, - 7, 7, 7, 7, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 9, 9, - 9, 9, 9, 9, 10, 10, 10, 10, - 10, 10, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 13, - 13, 13, 13, 13, 13, 14, 14, 14, - 14, 14, 15, 15, 15, 15, 15, 15, - 16, 16, 16, 16, 16, 16, 17, 17, - 17, 17, 17, 17, 17, 18, 18, 18, - 18, 18, 19, 19, 19, 19, 19, 19, - 20, 20, 20, 21, 21, 21, 21, 22, - 22, 22, 23, 23, 23, 24, 24, 24, - 25, 25, 26, 26, 27, 27, 27, 28, - 28, 28, 29, 29, 30, 30, 31, 31 + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 10, 10, + 10, 10, 10, 10, 10, 10, 11, 11, + 11, 11, 11, 11, 12, 12, 12, 12, + 12, 12, 13, 13, 13, 13, 14, 14 }; static int sad_per_bit4lut[QINDEX_RANGE] = { - 5, 5, 5, 5, 5, 5, 7, 7, + 2, 2, 2, 2, 2, 2, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 5, + 5, 5, 5, 5, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, - 8, 8, 8, 8, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 11, 11, - 11, 11, 11, 11, 13, 13, 13, 13, - 13, 13, 14, 14, 14, 14, 14, 14, - 16, 16, 16, 16, 16, 16, 16, 17, - 17, 17, 17, 17, 17, 19, 19, 19, - 19, 19, 20, 20, 20, 20, 20, 20, - 22, 22, 22, 22, 22, 22, 23, 23, - 23, 23, 23, 23, 23, 25, 25, 25, - 25, 25, 26, 26, 26, 26, 26, 26, - 28, 28, 28, 29, 29, 29, 29, 31, - 31, 31, 32, 32, 32, 34, 34, 34, - 35, 35, 37, 37, 38, 38, 38, 40, - 40, 40, 41, 41, 43, 43, 44, 44, + 8, 8, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 14, + 14, 14, 14, 14, 15, 15, 15, 15, + 16, 16, 16, 16, 17, 17, 17, 18, + 18, 18, 19, 19, 19, 20, 20, 20, }; void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) { - cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex]/2; - cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]/2; + cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex]; + cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]; } @@ -719,8 +719,8 @@ static int rd_pick_intra4x4block( return best_rd; } -int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, - int *rate_y, int *Distortion, int best_rd) +static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, + int *rate_y, int *Distortion, int best_rd) { MACROBLOCKD *const xd = &mb->e_mbd; int i; @@ -782,11 +782,13 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } -int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, - MACROBLOCK *x, - int *Rate, - int *rate_y, - int *Distortion) + + +static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi, + MACROBLOCK *x, + int *Rate, + int *rate_y, + int *Distortion) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); @@ -858,7 +860,7 @@ static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distort return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -void vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion) +static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); @@ -1795,7 +1797,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int int distortion; int best_rd = INT_MAX; int best_intra_rd = INT_MAX; - int ref_frame_cost[MAX_REF_FRAMES]; int rate2, distortion2; int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); @@ -1872,36 +1873,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int x->skip = 0; - ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded); - - // Special case treatment when GF and ARF are not sensible options for reference - if (cpi->ref_frame_flags == VP8_LAST_FLAG) - { - ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_zero(255); - ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_one(255) - + vp8_cost_zero(128); - ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_one(255) - + vp8_cost_one(128); - } - else - { - ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_zero(cpi->prob_last_coded); - ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_one(cpi->prob_last_coded) - + vp8_cost_zero(cpi->prob_gf_coded); - ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded) - + vp8_cost_one(cpi->prob_last_coded) - + vp8_cost_one(cpi->prob_gf_coded); - } - vpx_memset(mode_mv, 0, sizeof(mode_mv)); x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; - vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion); + rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion); uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; for (mode_index = 0; mode_index < MAX_MODES; mode_index++) @@ -2024,7 +1999,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int int tmp_rd; // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED]; - tmp_rd = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd); + tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd); rate2 += rate; distortion2 += distortion; @@ -2247,29 +2222,28 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } else if (x->encode_breakout) { - int sum; unsigned int sse; + unsigned int var; int threshold = (xd->block[0].dequant[1] * xd->block[0].dequant[1] >>4); if(threshold < x->encode_breakout) threshold = x->encode_breakout; - VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var) - (x->src.y_buffer, x->src.y_stride, - x->e_mbd.predictor, 16, &sse, &sum); + var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) + (x->src.y_buffer, x->src.y_stride, + x->e_mbd.predictor, 16, &sse); if (sse < threshold) { - // Check u and v to make sure skip is ok - int sse2 = 0; + unsigned int q2dc = xd->block[24].dequant[0]; /* If theres is no codeable 2nd order dc or a very small uniform pixel change change */ - if (abs(sum) < (xd->block[24].dequant[0]<<2)|| - ((sum * sum>>8) > sse && abs(sum) <128)) + if ((sse - var < q2dc * q2dc >>4) || + (sse /2 > var && sse-var < 64)) { - sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance)); - + // Check u and v to make sure skip is ok + int sse2= VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance)); if (sse2 * 2 < threshold) { x->skip = 1; @@ -2319,8 +2293,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rate2 += other_cost; } - // Estimate the reference frame signaling cost and add it to the rolling cost variable. - rate2 += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; + /* Estimate the reference frame signaling cost and add it + * to the rolling cost variable. + */ + rate2 += + x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; if (!disable_skip) { @@ -2384,7 +2361,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; } - other_cost += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; + other_cost += + x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; /* Calculate the final y RD estimate for this mode */ best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost), @@ -2492,3 +2470,39 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rd_update_mvcount(cpi, x, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]); } + +void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) +{ + int error4x4, error16x16; + int rate4x4, rate16x16 = 0, rateuv; + int dist4x4, dist16x16, distuv; + int rate; + int rate4x4_tokenonly = 0; + int rate16x16_tokenonly = 0; + int rateuv_tokenonly = 0; + + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; + + rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv); + rate = rateuv; + + error16x16 = rd_pick_intra16x16mby_mode(cpi, x, + &rate16x16, &rate16x16_tokenonly, + &dist16x16); + + error4x4 = rd_pick_intra4x4mby_modes(cpi, x, + &rate4x4, &rate4x4_tokenonly, + &dist4x4, error16x16); + + if (error4x4 < error16x16) + { + x->e_mbd.mode_info_context->mbmi.mode = B_PRED; + rate += rate4x4; + } + else + { + rate += rate16x16; + } + + *rate_ = rate; +} diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index fbbf9077f..95134cb81 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -15,10 +15,8 @@ #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); -extern int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd); -extern int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion); -extern void vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_to, int *distortion); extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); +extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate); extern void vp8_mv_pred ( diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 1c5923813..329abac68 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -98,7 +98,6 @@ static void tokenize2nd_order_b const BLOCKD *const b, TOKENEXTRA **tp, const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ - const FRAME_TYPE frametype, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, VP8_COMP *cpi @@ -120,9 +119,9 @@ static void tokenize2nd_order_b { int rc = vp8_default_zig_zag1d[c]; const int v = qcoeff_ptr[rc]; - +#if CONFIG_DEBUG assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); - +#endif t->Extra = vp8_dct_value_tokens_ptr[v].Extra; x = vp8_dct_value_tokens_ptr[v].Token; } @@ -149,7 +148,6 @@ static void tokenize1st_order_b const BLOCKD *const b, TOKENEXTRA **tp, const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ - const FRAME_TYPE frametype, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, VP8_COMP *cpi @@ -173,9 +171,9 @@ static void tokenize1st_order_b { int rc = vp8_default_zig_zag1d[c]; const int v = qcoeff_ptr[rc]; - +#if CONFIG_DEBUG assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); - +#endif t->Extra = vp8_dct_value_tokens_ptr[v].Extra; x = vp8_dct_value_tokens_ptr[v].Token; } @@ -196,14 +194,11 @@ static void tokenize1st_order_b } -static int mb_is_skippable(MACROBLOCKD *x) +static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) { - int has_y2_block; int skip = 1; int i = 0; - has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED - && x->mode_info_context->mbmi.mode != SPLITMV); if (has_y2_block) { for (i = 0; i < 16; i++) @@ -223,8 +218,12 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; int plane_type; int b; + int has_y2_block; + + has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED + && x->mode_info_context->mbmi.mode != SPLITMV); - x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x); + x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block); if (x->mode_info_context->mbmi.mb_skip_coeff) { cpi->skip_true_count++; @@ -241,29 +240,24 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) cpi->skip_false_count++; -#if 0 - vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts)); -#endif - if (x->mode_info_context->mbmi.mode == B_PRED || x->mode_info_context->mbmi.mode == SPLITMV) - { - plane_type = 3; - } - else + + plane_type = 3; + if(has_y2_block) { - tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type, + tokenize2nd_order_b(x->block + 24, t, 1, A + vp8_block2above[24], L + vp8_block2left[24], cpi); plane_type = 0; } for (b = 0; b < 16; b++) - tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type, + tokenize1st_order_b(x->block + b, t, plane_type, A + vp8_block2above[b], L + vp8_block2left[b], cpi); for (b = 16; b < 24; b++) - tokenize1st_order_b(x->block + b, t, 2, x->frame_type, + tokenize1st_order_b(x->block + b, t, 2, A + vp8_block2above[b], L + vp8_block2left[b], cpi); @@ -352,10 +346,7 @@ void vp8_tokenize_initialize() static __inline void stuff2nd_order_b ( - const BLOCKD *const b, TOKENEXTRA **tp, - const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ - const FRAME_TYPE frametype, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, VP8_COMP *cpi @@ -364,9 +355,6 @@ static __inline void stuff2nd_order_b int pt; /* near block/prev token context index */ TOKENEXTRA *t = *tp; /* store tokens starting here */ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - (void) frametype; - (void) type; - (void) b; t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; @@ -382,10 +370,7 @@ static __inline void stuff2nd_order_b static __inline void stuff1st_order_b ( - const BLOCKD *const b, TOKENEXTRA **tp, - const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ - const FRAME_TYPE frametype, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, VP8_COMP *cpi @@ -394,9 +379,6 @@ static __inline void stuff1st_order_b int pt; /* near block/prev token context index */ TOKENEXTRA *t = *tp; /* store tokens starting here */ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - (void) frametype; - (void) type; - (void) b; t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs [0] [1] [pt]; @@ -411,10 +393,7 @@ static __inline void stuff1st_order_b static __inline void stuff1st_order_buv ( - const BLOCKD *const b, TOKENEXTRA **tp, - const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ - const FRAME_TYPE frametype, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, VP8_COMP *cpi @@ -423,9 +402,6 @@ void stuff1st_order_buv int pt; /* near block/prev token context index */ TOKENEXTRA *t = *tp; /* store tokens starting here */ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - (void) frametype; - (void) type; - (void) b; t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; @@ -445,17 +421,17 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) int plane_type; int b; - stuff2nd_order_b(x->block + 24, t, 1, x->frame_type, + stuff2nd_order_b(t, A + vp8_block2above[24], L + vp8_block2left[24], cpi); plane_type = 0; for (b = 0; b < 16; b++) - stuff1st_order_b(x->block + b, t, plane_type, x->frame_type, + stuff1st_order_b(t, A + vp8_block2above[b], L + vp8_block2left[b], cpi); for (b = 16; b < 24; b++) - stuff1st_order_buv(x->block + b, t, 2, x->frame_type, + stuff1st_order_buv(t, A + vp8_block2above[b], L + vp8_block2left[b], cpi); diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h index 0d7d977d7..894b4f9e4 100644 --- a/vp8/encoder/variance.h +++ b/vp8/encoder/variance.h @@ -308,21 +308,6 @@ extern prototype_getmbss(vp8_variance_getmbss); #endif extern prototype_variance(vp8_variance_mse16x16); -#ifndef vp8_variance_get16x16prederror -#define vp8_variance_get16x16prederror vp8_get16x16pred_error_c -#endif -extern prototype_get16x16prederror(vp8_variance_get16x16prederror); - -#ifndef vp8_variance_get8x8var -#define vp8_variance_get8x8var vp8_get8x8var_c -#endif -extern prototype_variance2(vp8_variance_get8x8var); - -#ifndef vp8_variance_get16x16var -#define vp8_variance_get16x16var vp8_get16x16var_c -#endif -extern prototype_variance2(vp8_variance_get16x16var); - #ifndef vp8_variance_get4x4sse_cs #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_c #endif @@ -376,9 +361,6 @@ typedef struct vp8_getmbss_fn_t getmbss; vp8_variance_fn_t mse16x16; - vp8_get16x16prederror_fn_t get16x16prederror; - vp8_variance2_fn_t get8x8var; - vp8_variance2_fn_t get16x16var; vp8_get16x16prederror_fn_t get4x4sse_cs; vp8_sad_multi_fn_t sad16x16x3; diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c index ede07c8db..c7b9c2209 100644 --- a/vp8/encoder/variance_c.c +++ b/vp8/encoder/variance_c.c @@ -61,40 +61,6 @@ static void variance( } } -unsigned int -vp8_get8x8var_c -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -) -{ - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum); - return (*SSE - (((*Sum) * (*Sum)) >> 6)); -} - -unsigned int -vp8_get16x16var_c -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -) -{ - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum); - return (*SSE - (((*Sum) * (*Sum)) >> 8)); - -} - - unsigned int vp8_variance16x16_c( const unsigned char *src_ptr, diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h index 59a5cb1d7..19f6c1686 100644 --- a/vp8/encoder/x86/dct_x86.h +++ b/vp8/encoder/x86/dct_x86.h @@ -31,6 +31,12 @@ extern prototype_fdct(vp8_short_fdct8x4_mmx); #undef vp8_fdct_short8x4 #define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx +#undef vp8_fdct_fast4x4 +#define vp8_fdct_fast4x4 vp8_short_fdct4x4_mmx + +#undef vp8_fdct_fast8x4 +#define vp8_fdct_fast8x4 vp8_short_fdct8x4_mmx + #endif #endif diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm index 67a9b4d3e..13b76ea91 100644 --- a/vp8/encoder/x86/variance_impl_mmx.asm +++ b/vp8/encoder/x86/variance_impl_mmx.asm @@ -843,136 +843,6 @@ filter_block2d_bil_var_mmx_loop: pop rbp ret -;unsigned int vp8_get16x16pred_error_mmx -;( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride -;) -global sym(vp8_get16x16pred_error_mmx) -sym(vp8_get16x16pred_error_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - mov rsi, arg(0) ;DWORD PTR [src_ptr] - mov rdi, arg(2) ;DWORD PTR [ref_ptr] - - movsxd rax, DWORD PTR arg(1) ;[src_stride] - movsxd rdx, DWORD PTR arg(3) ;[ref_stride] - - pxor mm0, mm0 ; clear xmm0 for unpack - pxor mm7, mm7 ; clear xmm7 for accumulating diffs - - pxor mm6, mm6 ; clear xmm6 for accumulating sse - mov rcx, 16 - -var16loop: - - movq mm1, [rsi] - movq mm2, [rdi] - - movq mm3, mm1 - movq mm4, mm2 - - punpcklbw mm1, mm0 - punpckhbw mm3, mm0 - - punpcklbw mm2, mm0 - punpckhbw mm4, mm0 - - psubw mm1, mm2 - psubw mm3, mm4 - - paddw mm7, mm1 - pmaddwd mm1, mm1 - - paddw mm7, mm3 - pmaddwd mm3, mm3 - - paddd mm6, mm1 - paddd mm6, mm3 - - - movq mm1, [rsi+8] - movq mm2, [rdi+8] - - movq mm3, mm1 - movq mm4, mm2 - - punpcklbw mm1, mm0 - punpckhbw mm3, mm0 - - punpcklbw mm2, mm0 - punpckhbw mm4, mm0 - - psubw mm1, mm2 - psubw mm3, mm4 - - paddw mm7, mm1 - pmaddwd mm1, mm1 - - paddw mm7, mm3 - pmaddwd mm3, mm3 - - paddd mm6, mm1 - paddd mm6, mm3 - - add rsi, rax - add rdi, rdx - - sub rcx, 1 - jnz var16loop - - - movq mm1, mm6 - pxor mm6, mm6 - - pxor mm5, mm5 - punpcklwd mm6, mm7 - - punpckhwd mm5, mm7 - psrad mm5, 16 - - psrad mm6, 16 - paddd mm6, mm5 - - movq mm2, mm1 - psrlq mm1, 32 - - paddd mm2, mm1 - movq mm7, mm6 - - psrlq mm6, 32 - paddd mm6, mm7 - - movd DWORD PTR [rsp], mm6 ;Sum - movd DWORD PTR [rsp+4], mm2 ;SSE - - ; return (SSE-((Sum*Sum)>>8)); - movsxd rdx, dword ptr [rsp] - imul rdx, rdx - sar rdx, 8 - movsxd rax, dword ptr [rsp + 4] - sub rax, rdx - - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - SECTION_RODATA ;short mmx_bi_rd[4] = { 64, 64, 64, 64}; diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm index 5becc7344..b7a6b3286 100644 --- a/vp8/encoder/x86/variance_impl_sse2.asm +++ b/vp8/encoder/x86/variance_impl_sse2.asm @@ -213,122 +213,6 @@ var16loop: ret -;unsigned int vp8_get16x16pred_error_sse2 -;( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride -;) -global sym(vp8_get16x16pred_error_sse2) -sym(vp8_get16x16pred_error_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - mov rsi, arg(0) ;[src_ptr] - mov rdi, arg(2) ;[ref_ptr] - - movsxd rax, DWORD PTR arg(1) ;[src_stride] - movsxd rdx, DWORD PTR arg(3) ;[ref_stride] - - pxor xmm0, xmm0 ; clear xmm0 for unpack - pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs - - pxor xmm6, xmm6 ; clear xmm6 for accumulating sse - mov rcx, 16 - -var16peloop: - movdqu xmm1, XMMWORD PTR [rsi] - movdqu xmm2, XMMWORD PTR [rdi] - - movdqa xmm3, xmm1 - movdqa xmm4, xmm2 - - punpcklbw xmm1, xmm0 - punpckhbw xmm3, xmm0 - - punpcklbw xmm2, xmm0 - punpckhbw xmm4, xmm0 - - psubw xmm1, xmm2 - psubw xmm3, xmm4 - - paddw xmm7, xmm1 - pmaddwd xmm1, xmm1 - - paddw xmm7, xmm3 - pmaddwd xmm3, xmm3 - - paddd xmm6, xmm1 - paddd xmm6, xmm3 - - add rsi, rax - add rdi, rdx - - sub rcx, 1 - jnz var16peloop - - - movdqa xmm1, xmm6 - pxor xmm6, xmm6 - - pxor xmm5, xmm5 - punpcklwd xmm6, xmm7 - - punpckhwd xmm5, xmm7 - psrad xmm5, 16 - - psrad xmm6, 16 - paddd xmm6, xmm5 - - movdqa xmm2, xmm1 - punpckldq xmm1, xmm0 - - punpckhdq xmm2, xmm0 - movdqa xmm7, xmm6 - - paddd xmm1, xmm2 - punpckldq xmm6, xmm0 - - punpckhdq xmm7, xmm0 - paddd xmm6, xmm7 - - movdqa xmm2, xmm1 - movdqa xmm7, xmm6 - - psrldq xmm1, 8 - psrldq xmm6, 8 - - paddd xmm7, xmm6 - paddd xmm1, xmm2 - - movd DWORD PTR [rsp], xmm7 ;Sum - movd DWORD PTR [rsp+4], xmm1 ;SSE - - ; return (SSE-((Sum*Sum)>>8)); - movsxd rdx, dword ptr [rsp] - imul rdx, rdx - sar rdx, 8 - movsxd rax, dword ptr [rsp + 4] - sub rax, rdx - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - ;unsigned int vp8_get8x8var_sse2 diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c index 4a89868c2..92b695f17 100644 --- a/vp8/encoder/x86/variance_mmx.c +++ b/vp8/encoder/x86/variance_mmx.c @@ -76,43 +76,6 @@ extern void vp8_filter_block2d_bil_var_mmx int *sum, unsigned int *sumsquared ); -extern unsigned int vp8_get16x16pred_error_mmx -( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride -); - -unsigned int vp8_get16x16var_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *SUM -) -{ - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3, avg; - - - vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; - vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); - vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; - vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - avg = sum0 + sum1 + sum2 + sum3; - - *SSE = var; - *SUM = avg; - return (var - ((avg * avg) >> 8)); - -} - - - unsigned int vp8_variance4x4_mmx( diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c index dfc0915b9..24062eb9b 100644 --- a/vp8/encoder/x86/variance_sse2.c +++ b/vp8/encoder/x86/variance_sse2.c @@ -53,13 +53,6 @@ unsigned int vp8_get16x16var_sse2 unsigned int *SSE, int *Sum ); -unsigned int vp8_get16x16pred_error_sse2 -( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride -); unsigned int vp8_get8x8var_sse2 ( const unsigned char *src_ptr, diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h index 77e05e1e8..0ee8eb7e5 100644 --- a/vp8/encoder/x86/variance_x86.h +++ b/vp8/encoder/x86/variance_x86.h @@ -41,9 +41,7 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx); extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx); extern prototype_getmbss(vp8_get_mb_ss_mmx); extern prototype_variance(vp8_mse16x16_mmx); -extern prototype_get16x16prederror(vp8_get16x16pred_error_mmx); extern prototype_variance2(vp8_get8x8var_mmx); -extern prototype_variance2(vp8_get16x16var_mmx); extern prototype_get16x16prederror(vp8_get4x4sse_cs_mmx); #if !CONFIG_RUNTIME_CPU_DETECT @@ -110,15 +108,6 @@ extern prototype_get16x16prederror(vp8_get4x4sse_cs_mmx); #undef vp8_variance_mse16x16 #define vp8_variance_mse16x16 vp8_mse16x16_mmx -#undef vp8_variance_get16x16prederror -#define vp8_variance_get16x16prederror vp8_get16x16pred_error_mmx - -#undef vp8_variance_get8x8var -#define vp8_variance_get8x8var vp8_get8x8var_mmx - -#undef vp8_variance_get16x16var -#define vp8_variance_get16x16var vp8_get16x16var_mmx - #undef vp8_variance_get4x4sse_cs #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_mmx @@ -148,7 +137,6 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt); extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt); extern prototype_getmbss(vp8_get_mb_ss_sse2); extern prototype_variance(vp8_mse16x16_wmt); -extern prototype_get16x16prederror(vp8_get16x16pred_error_sse2); extern prototype_variance2(vp8_get8x8var_sse2); extern prototype_variance2(vp8_get16x16var_sse2); @@ -216,15 +204,6 @@ extern prototype_variance2(vp8_get16x16var_sse2); #undef vp8_variance_mse16x16 #define vp8_variance_mse16x16 vp8_mse16x16_wmt -#undef vp8_variance_get16x16prederror -#define vp8_variance_get16x16prederror vp8_get16x16pred_error_sse2 - -#undef vp8_variance_get8x8var -#define vp8_variance_get8x8var vp8_get8x8var_sse2 - -#undef vp8_variance_get16x16var -#define vp8_variance_get16x16var vp8_get16x16var_sse2 - #endif #endif diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 378b14066..9a324ec12 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -16,7 +16,7 @@ #if HAVE_MMX -static void short_fdct8x4_mmx(short *input, short *output, int pitch) +void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch) { vp8_short_fdct4x4_mmx(input, output, pitch); vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); @@ -26,7 +26,7 @@ int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, short *qcoeff_ptr, short *dequant_ptr, short *scan_mask, short *round_ptr, short *quant_ptr, short *dqcoeff_ptr); -static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) +void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) { short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; short *coeff_ptr = b->coeff; @@ -51,7 +51,7 @@ static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) } int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -static int mbblock_error_mmx(MACROBLOCK *mb, int dc) +int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc) { short *coeff_ptr = mb->block[0].coeff; short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; @@ -59,7 +59,7 @@ static int mbblock_error_mmx(MACROBLOCK *mb, int dc) } int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -static int mbuverror_mmx(MACROBLOCK *mb) +int vp8_mbuverror_mmx(MACROBLOCK *mb) { short *s_ptr = &mb->coeff[256]; short *d_ptr = &mb->e_mbd.dqcoeff[256]; @@ -69,7 +69,7 @@ static int mbuverror_mmx(MACROBLOCK *mb) void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, short *diff, unsigned char *predictor, int pitch); -static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) +void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *z = *(be->base_src) + be->src; unsigned int src_stride = be->src_stride; @@ -82,7 +82,7 @@ static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) #if HAVE_SSE2 int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -static int mbblock_error_xmm(MACROBLOCK *mb, int dc) +int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc) { short *coeff_ptr = mb->block[0].coeff; short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; @@ -90,7 +90,7 @@ static int mbblock_error_xmm(MACROBLOCK *mb, int dc) } int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -static int mbuverror_xmm(MACROBLOCK *mb) +int vp8_mbuverror_xmm(MACROBLOCK *mb) { short *s_ptr = &mb->coeff[256]; short *d_ptr = &mb->e_mbd.dqcoeff[256]; @@ -100,7 +100,7 @@ static int mbuverror_xmm(MACROBLOCK *mb) void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, short *diff, unsigned char *predictor, int pitch); -static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) +void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *z = *(be->base_src) + be->src; unsigned int src_stride = be->src_stride; @@ -175,26 +175,23 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx; cpi->rtcd.variance.getmbss = vp8_get_mb_ss_mmx; - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_mmx; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx; cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx; cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx; - cpi->rtcd.fdct.short8x4 = short_fdct8x4_mmx; + cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx; cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx; - cpi->rtcd.fdct.fast8x4 = short_fdct8x4_mmx; + cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx; cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; cpi->rtcd.encodemb.berr = vp8_block_error_mmx; - cpi->rtcd.encodemb.mberr = mbblock_error_mmx; - cpi->rtcd.encodemb.mbuverr = mbuverror_mmx; - cpi->rtcd.encodemb.subb = subtract_b_mmx; + cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx; + cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx; + cpi->rtcd.encodemb.subb = vp8_subtract_b_mmx; cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx; cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx; - /*cpi->rtcd.quantize.fastquantb = fast_quantize_b_mmx;*/ + /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/ } #endif @@ -226,11 +223,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt; cpi->rtcd.variance.getmbss = vp8_get_mb_ss_sse2; - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_sse2; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_sse2; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_sse2; - - /* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */; cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2; @@ -241,9 +233,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ; cpi->rtcd.encodemb.berr = vp8_block_error_xmm; - cpi->rtcd.encodemb.mberr = mbblock_error_xmm; - cpi->rtcd.encodemb.mbuverr = mbuverror_xmm; - cpi->rtcd.encodemb.subb = subtract_b_sse2; + cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm; + cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm; + cpi->rtcd.encodemb.subb = vp8_subtract_b_sse2; cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2; cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2; |