diff options
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/encodeframe.c | 7 | ||||
-rw-r--r-- | vp8/encoder/encodeintra.c | 11 | ||||
-rw-r--r-- | vp8/encoder/encodemb.c | 6 | ||||
-rw-r--r-- | vp8/encoder/generic/csystemdependent.c | 2 | ||||
-rw-r--r-- | vp8/encoder/mcomp.c | 3 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 4 | ||||
-rw-r--r-- | vp8/encoder/parms.cpp | 107 | ||||
-rw-r--r-- | vp8/encoder/pickinter.c | 10 | ||||
-rw-r--r-- | vp8/encoder/pickinter.h | 1 | ||||
-rw-r--r-- | vp8/encoder/rdopt.c | 22 | ||||
-rw-r--r-- | vp8/encoder/rdopt.h | 3 | ||||
-rw-r--r-- | vp8/encoder/x86/quantize_ssse3.asm | 106 | ||||
-rw-r--r-- | vp8/encoder/x86/quantize_x86.h | 20 | ||||
-rw-r--r-- | vp8/encoder/x86/x86_csystemdependent.c | 21 |
14 files changed, 99 insertions, 224 deletions
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index ab4071d35..1dbf46eb1 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -1157,7 +1157,6 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; -#if !(CONFIG_REALTIME_ONLY) if (cpi->sf.RD && cpi->compressor_speed != 2) { vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv); @@ -1170,7 +1169,6 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16; } else -#endif { int rate2, best_distortion; MB_PREDICTION_MODE mode, best_mode = DC_PRED; @@ -1188,7 +1186,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) (&x->e_mbd); distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff); rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode]; - this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); if (Error16x16 > this_rd) { @@ -1242,8 +1240,6 @@ int vp8cx_encode_inter_macroblock else x->encode_breakout = cpi->oxcf.encode_breakout; -#if !(CONFIG_REALTIME_ONLY) - if (cpi->sf.RD) { int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; @@ -1270,7 +1266,6 @@ int vp8cx_encode_inter_macroblock } else -#endif vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error); cpi->prediction_error += distortion; diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 44000063c..7d835a1cc 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -75,14 +75,9 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_quantize_mby(x); -#if !(CONFIG_REALTIME_ONLY) -#if 1 if (x->optimize) vp8_optimize_mby(x, rtcd); -#endif -#endif - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); RECON_INVOKE(&rtcd->common->recon, recon_mby) @@ -126,15 +121,9 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_quantize_mbuv(x); -#if !(CONFIG_REALTIME_ONLY) -#if 1 - if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) vp8_optimize_mbuv(x, rtcd); -#endif -#endif - vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd); vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd); diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index 463dbcaa9..798aa683a 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -19,6 +19,7 @@ #include "vp8/common/reconintra.h" #include "dct.h" #include "vpx_mem/vpx_mem.h" +#include "rdopt.h" #if CONFIG_RUNTIME_CPU_DETECT #define IF_RTCD(x) (x) @@ -229,8 +230,6 @@ void vp8_stuff_inter16x16(MACROBLOCK *x) */ } -#if !(CONFIG_REALTIME_ONLY) -#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) typedef struct vp8_token_state vp8_token_state; @@ -608,7 +607,6 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } } -#endif void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { @@ -620,10 +618,8 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_quantize_mb(x); -#if !(CONFIG_REALTIME_ONLY) if (x->optimize) optimize_mb(x, rtcd); -#endif vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd); diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 1d672bef9..d48c95bf7 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -89,9 +89,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; -#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.search.full_search = vp8_full_search_sad; -#endif cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; #if !(CONFIG_REALTIME_ONLY) cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index 716f514af..f7e7c03fe 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -1190,8 +1190,6 @@ int vp8_diamond_search_sadx4 + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit); } - -#if !(CONFIG_REALTIME_ONLY) int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv) { unsigned char *what = (*(b->base_src) + b->src); @@ -1571,7 +1569,6 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er else return INT_MAX; } -#endif /* !(CONFIG_REALTIME_ONLY) */ #ifdef ENTROPY_STATS void print_mode_context(void) diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index a18447d51..8fb1f8d23 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1542,7 +1542,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) break; -#if !(CONFIG_REALTIME_ONLY) case MODE_GOODQUALITY: cpi->pass = 0; cpi->compressor_speed = 1; @@ -1583,7 +1582,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->pass = 2; cpi->compressor_speed = 0; break; -#endif } if (cpi->pass == 0) @@ -2117,9 +2115,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8); cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d); -#if !(CONFIG_REALTIME_ONLY) cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search); -#endif cpi->diamond_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, diamond_search); cpi->ready_for_new_frame = 1; diff --git a/vp8/encoder/parms.cpp b/vp8/encoder/parms.cpp deleted file mode 100644 index 2a39b2ca3..000000000 --- a/vp8/encoder/parms.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#if 0 - -#include <map> -#include <string> -#include <fstream> -extern "C" -{ - #include "vp8/common/onyx.h" -} - - -using namespace std; - -typedef map<string,int> Parms; - -#define ALLPARMS(O,DOTHIS) \ - DOTHIS(O, interquantizer )\ - DOTHIS(O, auto_gold )\ - DOTHIS(O, auto_adjust_gold_quantizer )\ - DOTHIS(O, goldquantizer )\ - DOTHIS(O, goldfreq )\ - DOTHIS(O, auto_key )\ - DOTHIS(O, auto_adjust_key_quantizer )\ - DOTHIS(O, keyquantizer )\ - DOTHIS(O, keyfreq )\ - DOTHIS(O, pass )\ - DOTHIS(O, fixed_q )\ - DOTHIS(O, target_bandwidth )\ - DOTHIS(O, auto_worst_q )\ - DOTHIS(O, worst_quality )\ - DOTHIS(O, best_allowed_q )\ - DOTHIS(O, end_usage )\ - DOTHIS(O, starting_buffer_level )\ - DOTHIS(O, optimal_buffer_level )\ - DOTHIS(O, maximum_buffer_size )\ - DOTHIS(O, under_shoot_pct )\ - DOTHIS(O, allow_df )\ - DOTHIS(O, drop_frames_water_mark )\ - DOTHIS(O, max_allowed_datarate )\ - DOTHIS(O, two_pass_vbrbias )\ - DOTHIS(O, two_pass_vbrmin_section )\ - DOTHIS(O, two_pass_vbrmax_section )\ - DOTHIS(O, filter_type )\ - DOTHIS(O, compressor_speed )\ - DOTHIS(O, mbpitch_feature )\ - DOTHIS(O, allow_spatial_resampling )\ - DOTHIS(O, resample_down_water_mark )\ - DOTHIS(O, resample_up_water_mark )\ - DOTHIS(O, noise_sensitivity )\ - DOTHIS(O, horiz_scale )\ - DOTHIS(O, vert_scale ) - - -#define GET(O,V) O->V = x[#V]; -#define PUT(O,V) x[#V] = O->V; - - -extern "C" void get_parms(VP8_CONFIG *ocf,char *filename) -{ - - Parms x; - int value; - string variable; - string equal; - - ifstream config_file(filename); - - ALLPARMS(ocf, PUT); - - // store all the parms in a map (really simple parsing) - while(!config_file.eof() && config_file.is_open()) - { - config_file >> variable; - config_file >> equal; - - if(equal != "=") - continue; - - config_file >> value; - - x[variable] = value; - } - - ALLPARMS(ocf, GET); - -} - -#define PRINT(O,V) debug_file<<#V <<" = " << O->V <<"\n"; -extern "C" void print_parms(VP8_CONFIG *ocf,char *filename) -{ - ofstream debug_file(filename,ios_base::app); - ALLPARMS(ocf, PRINT); - debug_file << "=============================================="<<"\n"; -} - -#endif diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index a92bcef91..52566e9fd 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -194,7 +194,7 @@ static int pick_intra4x4block( rate = mode_costs[mode]; vp8_predict_intra4x4(b, mode, b->predictor); distortion = get_prediction_error(be, b, &rtcd->variance); - this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate, distortion); + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { @@ -252,7 +252,7 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int if (i == 16) { *best_dist = distortion; - error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, distortion); + error = RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } else { @@ -643,7 +643,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re } else { - this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); if (this_rd < best_intra_rd) { @@ -667,7 +667,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re (&x->e_mbd); distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff); rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; - this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); if (this_rd < best_intra_rd) { @@ -813,7 +813,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse)); - this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); if (cpi->active_map_enabled && x->active_ptr[0] == 0) { diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h index 8fea98397..f96fc5376 100644 --- a/vp8/encoder/pickinter.h +++ b/vp8/encoder/pickinter.h @@ -14,7 +14,6 @@ #include "vpx_ports/config.h" #include "vp8/common/onyxc_int.h" -#define RD_ESTIMATE(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) extern int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *, MACROBLOCK *mb, int *Rate, int *Distortion); extern void vp8_pick_intra_mbuv_mode(MACROBLOCK *mb); extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 733585909..5d5069d8d 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -31,7 +31,7 @@ #include "vp8/common/g_common.h" #include "variance.h" #include "mcomp.h" - +#include "rdopt.h" #include "vpx_mem/vpx_mem.h" #include "dct.h" #include "vp8/common/systemdependent.h" @@ -46,13 +46,8 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x); - -#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) - #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) - - static const int auto_speed_thresh[17] = { 1000, @@ -480,7 +475,6 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) } -#if !(CONFIG_REALTIME_ONLY) static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { int c = !type; /* start at coef 0, unless Y with Y2 */ @@ -847,7 +841,6 @@ void vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *r x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; } -#endif int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) { @@ -875,7 +868,6 @@ void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv) } } -#if !(CONFIG_REALTIME_ONLY) static int labels2mode( MACROBLOCK *x, int const *labelings, int which_label, @@ -1190,7 +1182,12 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, // Should we do a full search (best quality only) if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { - thissme = cpi->full_search_sad(x, c, e, bsi->mvp, + MV full_mvp; + + full_mvp.row = bsi->mvp->row >>3; + full_mvp.col = bsi->mvp->col >>3; + + thissme = cpi->full_search_sad(x, c, e, &full_mvp, sadpb / 4, 16, v_fn_ptr, x->mvcost, bsi->ref_mv); if (thissme < bestsme) @@ -1432,7 +1429,6 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, return bsi.segment_rd; } -#endif static void swap(int *x,int *y) { @@ -1720,7 +1716,6 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse } } -#if !(CONFIG_REALTIME_ONLY) void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra) { BLOCK *b = &x->block[0]; @@ -2225,8 +2220,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int case ZEROMV: - mv_selected: - // Trap vectors that reach beyond the UMV borders // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point // because of the lack of break statements in the previous two cases. @@ -2503,4 +2496,3 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv; } -#endif diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index 1d1be11a4..1d5f9a3a8 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -11,6 +11,9 @@ #ifndef __INC_RDOPT_H #define __INC_RDOPT_H + +#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) + extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); extern int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd); extern int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion); diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm index 2f33199e5..912007e02 100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm @@ -9,38 +9,62 @@ %include "vpx_ports/x86_abi_support.asm" +%include "asm_enc_offsets.asm" -;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr -; short *qcoeff_ptr,short *dequant_ptr, -; short *round_ptr, -; short *quant_ptr, short *dqcoeff_ptr); +; void vp8_fast_quantize_b_ssse3 | arg +; (BLOCK *b, | 0 +; BLOCKD *d) | 1 ; -global sym(vp8_fast_quantize_b_impl_ssse3) -sym(vp8_fast_quantize_b_impl_ssse3): + +global sym(vp8_fast_quantize_b_ssse3) +sym(vp8_fast_quantize_b_ssse3): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 GET_GOT rbx + +%if ABI_IS_32BIT + push rdi push rsi +%else + %ifidn __OUTPUT_FORMAT__,x64 push rdi + push rsi + %endif +%endif ; end prolog - mov rdx, arg(0) ;coeff_ptr - mov rdi, arg(3) ;round_ptr - mov rsi, arg(4) ;quant_ptr - - movdqa xmm0, [rdx] - movdqa xmm4, [rdx + 16] - - movdqa xmm2, [rdi] ;round lo - movdqa xmm3, [rdi + 16] ;round hi +%if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d +%else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif +%endif + + mov rax, [rdi + vp8_block_coeff] + mov rcx, [rdi + vp8_block_round] + mov rdx, [rdi + vp8_block_quant_fast] + + ; coeff + movdqa xmm0, [rax] + movdqa xmm4, [rax + 16] + + ; round + movdqa xmm2, [rcx] + movdqa xmm3, [rcx + 16] movdqa xmm1, xmm0 movdqa xmm5, xmm4 - psraw xmm0, 15 ;sign of z (aka sz) - psraw xmm4, 15 ;sign of z (aka sz) + ; sz = z >> 15 + psraw xmm0, 15 + psraw xmm4, 15 pabsw xmm1, xmm1 pabsw xmm5, xmm5 @@ -48,23 +72,24 @@ sym(vp8_fast_quantize_b_impl_ssse3): paddw xmm1, xmm2 paddw xmm5, xmm3 - pmulhw xmm1, [rsi] - pmulhw xmm5, [rsi + 16] + ; quant_fast + pmulhw xmm1, [rdx] + pmulhw xmm5, [rdx + 16] - mov rdi, arg(1) ;qcoeff_ptr - mov rcx, arg(2) ;dequant_ptr - mov rsi, arg(5) ;dqcoeff_ptr + mov rax, [rsi + vp8_blockd_qcoeff] + mov rdi, [rsi + vp8_blockd_dequant] + mov rcx, [rsi + vp8_blockd_dqcoeff] pxor xmm1, xmm0 pxor xmm5, xmm4 psubw xmm1, xmm0 psubw xmm5, xmm4 - movdqa [rdi], xmm1 - movdqa [rdi + 16], xmm5 + movdqa [rax], xmm1 + movdqa [rax + 16], xmm5 - movdqa xmm2, [rcx] - movdqa xmm3, [rcx + 16] + movdqa xmm2, [rdi] + movdqa xmm3, [rdi + 16] pxor xmm4, xmm4 pmullw xmm2, xmm1 @@ -73,38 +98,37 @@ sym(vp8_fast_quantize_b_impl_ssse3): pcmpeqw xmm1, xmm4 ;non zero mask pcmpeqw xmm5, xmm4 ;non zero mask packsswb xmm1, xmm5 - pshufb xmm1, [ GLOBAL(zz_shuf)] + pshufb xmm1, [GLOBAL(zz_shuf)] pmovmskb edx, xmm1 -; xor ecx, ecx -; mov eax, -1 -;find_eob_loop: -; shr edx, 1 -; jc fq_skip -; mov eax, ecx -;fq_skip: -; inc ecx -; cmp ecx, 16 -; jne find_eob_loop xor rdi, rdi mov eax, -1 xor dx, ax ;flip the bits for bsr bsr eax, edx - movdqa [rsi], xmm2 ;store dqcoeff - movdqa [rsi + 16], xmm3 ;store dqcoeff + movdqa [rcx], xmm2 ;store dqcoeff + movdqa [rcx + 16], xmm3 ;store dqcoeff sub edi, edx ;check for all zeros in bit mask sar edi, 31 ;0 or -1 add eax, 1 and eax, edi ;if the bit mask was all zero, ;then eob = 0 + mov [rsi + vp8_blockd_eob], eax + ; begin epilog +%if ABI_IS_32BIT + pop rsi pop rdi +%else + %ifidn __OUTPUT_FORMAT__,x64 pop rsi + pop rdi + %endif +%endif + RESTORE_GOT - UNSHADOW_ARGS pop rbp ret diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h index df2e0bc39..f09358061 100644 --- a/vp8/encoder/x86/quantize_x86.h +++ b/vp8/encoder/x86/quantize_x86.h @@ -19,7 +19,7 @@ */ #if HAVE_MMX -#endif +#endif /* HAVE_MMX */ #if HAVE_SSE2 @@ -34,9 +34,21 @@ extern prototype_quantize_block(vp8_fast_quantize_b_sse2); #undef vp8_quantize_fastquantb #define vp8_quantize_fastquantb vp8_fast_quantize_b_sse2 -#endif +#endif /* !CONFIG_RUNTIME_CPU_DETECT */ -#endif +#endif /* HAVE_SSE2 */ -#endif +#if HAVE_SSSE3 +extern prototype_quantize_block(vp8_fast_quantize_b_ssse3); + +#if !CONFIG_RUNTIME_CPU_DETECT + +#undef vp8_quantize_fastquantb +#define vp8_quantize_fastquantb vp8_fast_quantize_b_ssse3 + +#endif /* !CONFIG_RUNTIME_CPU_DETECT */ + +#endif /* HAVE_SSSE3 */ + +#endif /* QUANTIZE_X86_H */ diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 8bceecec4..47a1788bc 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -112,21 +112,6 @@ static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) #endif #if HAVE_SSSE3 -int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr, - short *qcoeff_ptr, short *dequant_ptr, - short *round_ptr, - short *quant_ptr, short *dqcoeff_ptr); -static void fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) -{ - d->eob = vp8_fast_quantize_b_impl_ssse3( - b->coeff, - d->qcoeff, - d->dequant, - b->round, - b->quant_fast, - d->dqcoeff - ); -} #if CONFIG_PSNR #if ARCH_X86_64 typedef void ssimpf @@ -286,9 +271,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3; cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3; cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3; -#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.search.full_search = vp8_full_search_sadx3; -#endif cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3; cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3; cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3; @@ -307,7 +290,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3; cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3; - cpi->rtcd.quantize.fastquantb = fast_quantize_b_ssse3; + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3; #if CONFIG_PSNR #if ARCH_X86_64 @@ -329,9 +312,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4; cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; -#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.search.full_search = vp8_full_search_sadx8; -#endif } #endif |