summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/encodeframe.c7
-rw-r--r--vp8/encoder/encodeintra.c11
-rw-r--r--vp8/encoder/encodemb.c6
-rw-r--r--vp8/encoder/generic/csystemdependent.c2
-rw-r--r--vp8/encoder/mcomp.c3
-rw-r--r--vp8/encoder/onyx_if.c4
-rw-r--r--vp8/encoder/parms.cpp107
-rw-r--r--vp8/encoder/pickinter.c10
-rw-r--r--vp8/encoder/pickinter.h1
-rw-r--r--vp8/encoder/rdopt.c22
-rw-r--r--vp8/encoder/rdopt.h3
-rw-r--r--vp8/encoder/x86/quantize_ssse3.asm106
-rw-r--r--vp8/encoder/x86/quantize_x86.h20
-rw-r--r--vp8/encoder/x86/x86_csystemdependent.c21
14 files changed, 99 insertions, 224 deletions
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index ab4071d35..1dbf46eb1 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1157,7 +1157,6 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
-#if !(CONFIG_REALTIME_ONLY)
if (cpi->sf.RD && cpi->compressor_speed != 2)
{
vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
@@ -1170,7 +1169,6 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
}
else
-#endif
{
int rate2, best_distortion;
MB_PREDICTION_MODE mode, best_mode = DC_PRED;
@@ -1188,7 +1186,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
(&x->e_mbd);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode];
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (Error16x16 > this_rd)
{
@@ -1242,8 +1240,6 @@ int vp8cx_encode_inter_macroblock
else
x->encode_breakout = cpi->oxcf.encode_breakout;
-#if !(CONFIG_REALTIME_ONLY)
-
if (cpi->sf.RD)
{
int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
@@ -1270,7 +1266,6 @@ int vp8cx_encode_inter_macroblock
}
else
-#endif
vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
cpi->prediction_error += distortion;
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 44000063c..7d835a1cc 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -75,14 +75,9 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_quantize_mby(x);
-#if !(CONFIG_REALTIME_ONLY)
-#if 1
if (x->optimize)
vp8_optimize_mby(x, rtcd);
-#endif
-#endif
-
vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
RECON_INVOKE(&rtcd->common->recon, recon_mby)
@@ -126,15 +121,9 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_quantize_mbuv(x);
-#if !(CONFIG_REALTIME_ONLY)
-#if 1
-
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
vp8_optimize_mbuv(x, rtcd);
-#endif
-#endif
-
vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 463dbcaa9..798aa683a 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -19,6 +19,7 @@
#include "vp8/common/reconintra.h"
#include "dct.h"
#include "vpx_mem/vpx_mem.h"
+#include "rdopt.h"
#if CONFIG_RUNTIME_CPU_DETECT
#define IF_RTCD(x) (x)
@@ -229,8 +230,6 @@ void vp8_stuff_inter16x16(MACROBLOCK *x)
*/
}
-#if !(CONFIG_REALTIME_ONLY)
-#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
typedef struct vp8_token_state vp8_token_state;
@@ -608,7 +607,6 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
}
-#endif
void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
@@ -620,10 +618,8 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_quantize_mb(x);
-#if !(CONFIG_REALTIME_ONLY)
if (x->optimize)
optimize_mb(x, rtcd);
-#endif
vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 1d672bef9..d48c95bf7 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -89,9 +89,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;
-#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.search.full_search = vp8_full_search_sad;
-#endif
cpi->rtcd.search.diamond_search = vp8_diamond_search_sad;
#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 716f514af..f7e7c03fe 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1190,8 +1190,6 @@ int vp8_diamond_search_sadx4
+ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
}
-
-#if !(CONFIG_REALTIME_ONLY)
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv)
{
unsigned char *what = (*(b->base_src) + b->src);
@@ -1571,7 +1569,6 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
else
return INT_MAX;
}
-#endif /* !(CONFIG_REALTIME_ONLY) */
#ifdef ENTROPY_STATS
void print_mode_context(void)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index a18447d51..8fb1f8d23 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1542,7 +1542,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
break;
-#if !(CONFIG_REALTIME_ONLY)
case MODE_GOODQUALITY:
cpi->pass = 0;
cpi->compressor_speed = 1;
@@ -1583,7 +1582,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->pass = 2;
cpi->compressor_speed = 0;
break;
-#endif
}
if (cpi->pass == 0)
@@ -2117,9 +2115,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
-#if !(CONFIG_REALTIME_ONLY)
cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);
-#endif
cpi->diamond_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, diamond_search);
cpi->ready_for_new_frame = 1;
diff --git a/vp8/encoder/parms.cpp b/vp8/encoder/parms.cpp
deleted file mode 100644
index 2a39b2ca3..000000000
--- a/vp8/encoder/parms.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#if 0
-
-#include <map>
-#include <string>
-#include <fstream>
-extern "C"
-{
- #include "vp8/common/onyx.h"
-}
-
-
-using namespace std;
-
-typedef map<string,int> Parms;
-
-#define ALLPARMS(O,DOTHIS) \
- DOTHIS(O, interquantizer )\
- DOTHIS(O, auto_gold )\
- DOTHIS(O, auto_adjust_gold_quantizer )\
- DOTHIS(O, goldquantizer )\
- DOTHIS(O, goldfreq )\
- DOTHIS(O, auto_key )\
- DOTHIS(O, auto_adjust_key_quantizer )\
- DOTHIS(O, keyquantizer )\
- DOTHIS(O, keyfreq )\
- DOTHIS(O, pass )\
- DOTHIS(O, fixed_q )\
- DOTHIS(O, target_bandwidth )\
- DOTHIS(O, auto_worst_q )\
- DOTHIS(O, worst_quality )\
- DOTHIS(O, best_allowed_q )\
- DOTHIS(O, end_usage )\
- DOTHIS(O, starting_buffer_level )\
- DOTHIS(O, optimal_buffer_level )\
- DOTHIS(O, maximum_buffer_size )\
- DOTHIS(O, under_shoot_pct )\
- DOTHIS(O, allow_df )\
- DOTHIS(O, drop_frames_water_mark )\
- DOTHIS(O, max_allowed_datarate )\
- DOTHIS(O, two_pass_vbrbias )\
- DOTHIS(O, two_pass_vbrmin_section )\
- DOTHIS(O, two_pass_vbrmax_section )\
- DOTHIS(O, filter_type )\
- DOTHIS(O, compressor_speed )\
- DOTHIS(O, mbpitch_feature )\
- DOTHIS(O, allow_spatial_resampling )\
- DOTHIS(O, resample_down_water_mark )\
- DOTHIS(O, resample_up_water_mark )\
- DOTHIS(O, noise_sensitivity )\
- DOTHIS(O, horiz_scale )\
- DOTHIS(O, vert_scale )
-
-
-#define GET(O,V) O->V = x[#V];
-#define PUT(O,V) x[#V] = O->V;
-
-
-extern "C" void get_parms(VP8_CONFIG *ocf,char *filename)
-{
-
- Parms x;
- int value;
- string variable;
- string equal;
-
- ifstream config_file(filename);
-
- ALLPARMS(ocf, PUT);
-
- // store all the parms in a map (really simple parsing)
- while(!config_file.eof() && config_file.is_open())
- {
- config_file >> variable;
- config_file >> equal;
-
- if(equal != "=")
- continue;
-
- config_file >> value;
-
- x[variable] = value;
- }
-
- ALLPARMS(ocf, GET);
-
-}
-
-#define PRINT(O,V) debug_file<<#V <<" = " << O->V <<"\n";
-extern "C" void print_parms(VP8_CONFIG *ocf,char *filename)
-{
- ofstream debug_file(filename,ios_base::app);
- ALLPARMS(ocf, PRINT);
- debug_file << "=============================================="<<"\n";
-}
-
-#endif
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index a92bcef91..52566e9fd 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -194,7 +194,7 @@ static int pick_intra4x4block(
rate = mode_costs[mode];
vp8_predict_intra4x4(b, mode, b->predictor);
distortion = get_prediction_error(be, b, &rtcd->variance);
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate, distortion);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd)
{
@@ -252,7 +252,7 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
if (i == 16)
{
*best_dist = distortion;
- error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, distortion);
+ error = RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
else
{
@@ -643,7 +643,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re
}
else
{
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (this_rd < best_intra_rd)
{
@@ -667,7 +667,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re
(&x->e_mbd);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (this_rd < best_intra_rd)
{
@@ -813,7 +813,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re
distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (cpi->active_map_enabled && x->active_ptr[0] == 0)
{
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index 8fea98397..f96fc5376 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -14,7 +14,6 @@
#include "vpx_ports/config.h"
#include "vp8/common/onyxc_int.h"
-#define RD_ESTIMATE(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
extern int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *, MACROBLOCK *mb, int *Rate, int *Distortion);
extern void vp8_pick_intra_mbuv_mode(MACROBLOCK *mb);
extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 733585909..5d5069d8d 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -31,7 +31,7 @@
#include "vp8/common/g_common.h"
#include "variance.h"
#include "mcomp.h"
-
+#include "rdopt.h"
#include "vpx_mem/vpx_mem.h"
#include "dct.h"
#include "vp8/common/systemdependent.h"
@@ -46,13 +46,8 @@
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
-
-#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
-
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
-
-
static const int auto_speed_thresh[17] =
{
1000,
@@ -480,7 +475,6 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
}
-#if !(CONFIG_REALTIME_ONLY)
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
{
int c = !type; /* start at coef 0, unless Y with Y2 */
@@ -847,7 +841,6 @@ void vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *r
x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
}
-#endif
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
{
@@ -875,7 +868,6 @@ void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)
}
}
-#if !(CONFIG_REALTIME_ONLY)
static int labels2mode(
MACROBLOCK *x,
int const *labelings, int which_label,
@@ -1190,7 +1182,12 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
// Should we do a full search (best quality only)
if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
{
- thissme = cpi->full_search_sad(x, c, e, bsi->mvp,
+ MV full_mvp;
+
+ full_mvp.row = bsi->mvp->row >>3;
+ full_mvp.col = bsi->mvp->col >>3;
+
+ thissme = cpi->full_search_sad(x, c, e, &full_mvp,
sadpb / 4, 16, v_fn_ptr, x->mvcost, bsi->ref_mv);
if (thissme < bestsme)
@@ -1432,7 +1429,6 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
return bsi.segment_rd;
}
-#endif
static void swap(int *x,int *y)
{
@@ -1720,7 +1716,6 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
}
}
-#if !(CONFIG_REALTIME_ONLY)
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
{
BLOCK *b = &x->block[0];
@@ -2225,8 +2220,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
case ZEROMV:
- mv_selected:
-
// Trap vectors that reach beyond the UMV borders
// Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point
// because of the lack of break statements in the previous two cases.
@@ -2503,4 +2496,3 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
}
-#endif
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index 1d1be11a4..1d5f9a3a8 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -11,6 +11,9 @@
#ifndef __INC_RDOPT_H
#define __INC_RDOPT_H
+
+#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
+
extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
extern int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd);
extern int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion);
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
index 2f33199e5..912007e02 100644
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -9,38 +9,62 @@
%include "vpx_ports/x86_abi_support.asm"
+%include "asm_enc_offsets.asm"
-;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
-; short *qcoeff_ptr,short *dequant_ptr,
-; short *round_ptr,
-; short *quant_ptr, short *dqcoeff_ptr);
+; void vp8_fast_quantize_b_ssse3 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
;
-global sym(vp8_fast_quantize_b_impl_ssse3)
-sym(vp8_fast_quantize_b_impl_ssse3):
+
+global sym(vp8_fast_quantize_b_ssse3)
+sym(vp8_fast_quantize_b_ssse3):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
+
+%if ABI_IS_32BIT
+ push rdi
push rsi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
push rdi
+ push rsi
+ %endif
+%endif
; end prolog
- mov rdx, arg(0) ;coeff_ptr
- mov rdi, arg(3) ;round_ptr
- mov rsi, arg(4) ;quant_ptr
-
- movdqa xmm0, [rdx]
- movdqa xmm4, [rdx + 16]
-
- movdqa xmm2, [rdi] ;round lo
- movdqa xmm3, [rdi + 16] ;round hi
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rax, [rdi + vp8_block_coeff]
+ mov rcx, [rdi + vp8_block_round]
+ mov rdx, [rdi + vp8_block_quant_fast]
+
+ ; coeff
+ movdqa xmm0, [rax]
+ movdqa xmm4, [rax + 16]
+
+ ; round
+ movdqa xmm2, [rcx]
+ movdqa xmm3, [rcx + 16]
movdqa xmm1, xmm0
movdqa xmm5, xmm4
- psraw xmm0, 15 ;sign of z (aka sz)
- psraw xmm4, 15 ;sign of z (aka sz)
+ ; sz = z >> 15
+ psraw xmm0, 15
+ psraw xmm4, 15
pabsw xmm1, xmm1
pabsw xmm5, xmm5
@@ -48,23 +72,24 @@ sym(vp8_fast_quantize_b_impl_ssse3):
paddw xmm1, xmm2
paddw xmm5, xmm3
- pmulhw xmm1, [rsi]
- pmulhw xmm5, [rsi + 16]
+ ; quant_fast
+ pmulhw xmm1, [rdx]
+ pmulhw xmm5, [rdx + 16]
- mov rdi, arg(1) ;qcoeff_ptr
- mov rcx, arg(2) ;dequant_ptr
- mov rsi, arg(5) ;dqcoeff_ptr
+ mov rax, [rsi + vp8_blockd_qcoeff]
+ mov rdi, [rsi + vp8_blockd_dequant]
+ mov rcx, [rsi + vp8_blockd_dqcoeff]
pxor xmm1, xmm0
pxor xmm5, xmm4
psubw xmm1, xmm0
psubw xmm5, xmm4
- movdqa [rdi], xmm1
- movdqa [rdi + 16], xmm5
+ movdqa [rax], xmm1
+ movdqa [rax + 16], xmm5
- movdqa xmm2, [rcx]
- movdqa xmm3, [rcx + 16]
+ movdqa xmm2, [rdi]
+ movdqa xmm3, [rdi + 16]
pxor xmm4, xmm4
pmullw xmm2, xmm1
@@ -73,38 +98,37 @@ sym(vp8_fast_quantize_b_impl_ssse3):
pcmpeqw xmm1, xmm4 ;non zero mask
pcmpeqw xmm5, xmm4 ;non zero mask
packsswb xmm1, xmm5
- pshufb xmm1, [ GLOBAL(zz_shuf)]
+ pshufb xmm1, [GLOBAL(zz_shuf)]
pmovmskb edx, xmm1
-; xor ecx, ecx
-; mov eax, -1
-;find_eob_loop:
-; shr edx, 1
-; jc fq_skip
-; mov eax, ecx
-;fq_skip:
-; inc ecx
-; cmp ecx, 16
-; jne find_eob_loop
xor rdi, rdi
mov eax, -1
xor dx, ax ;flip the bits for bsr
bsr eax, edx
- movdqa [rsi], xmm2 ;store dqcoeff
- movdqa [rsi + 16], xmm3 ;store dqcoeff
+ movdqa [rcx], xmm2 ;store dqcoeff
+ movdqa [rcx + 16], xmm3 ;store dqcoeff
sub edi, edx ;check for all zeros in bit mask
sar edi, 31 ;0 or -1
add eax, 1
and eax, edi ;if the bit mask was all zero,
;then eob = 0
+ mov [rsi + vp8_blockd_eob], eax
+
; begin epilog
+%if ABI_IS_32BIT
+ pop rsi
pop rdi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
pop rsi
+ pop rdi
+ %endif
+%endif
+
RESTORE_GOT
- UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h
index df2e0bc39..f09358061 100644
--- a/vp8/encoder/x86/quantize_x86.h
+++ b/vp8/encoder/x86/quantize_x86.h
@@ -19,7 +19,7 @@
*/
#if HAVE_MMX
-#endif
+#endif /* HAVE_MMX */
#if HAVE_SSE2
@@ -34,9 +34,21 @@ extern prototype_quantize_block(vp8_fast_quantize_b_sse2);
#undef vp8_quantize_fastquantb
#define vp8_quantize_fastquantb vp8_fast_quantize_b_sse2
-#endif
+#endif /* !CONFIG_RUNTIME_CPU_DETECT */
-#endif
+#endif /* HAVE_SSE2 */
-#endif
+#if HAVE_SSSE3
+extern prototype_quantize_block(vp8_fast_quantize_b_ssse3);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp8_quantize_fastquantb
+#define vp8_quantize_fastquantb vp8_fast_quantize_b_ssse3
+
+#endif /* !CONFIG_RUNTIME_CPU_DETECT */
+
+#endif /* HAVE_SSSE3 */
+
+#endif /* QUANTIZE_X86_H */
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 8bceecec4..47a1788bc 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -112,21 +112,6 @@ static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
#endif
#if HAVE_SSSE3
-int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr,
- short *qcoeff_ptr, short *dequant_ptr,
- short *round_ptr,
- short *quant_ptr, short *dqcoeff_ptr);
-static void fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
-{
- d->eob = vp8_fast_quantize_b_impl_ssse3(
- b->coeff,
- d->qcoeff,
- d->dequant,
- b->round,
- b->quant_fast,
- d->dqcoeff
- );
-}
#if CONFIG_PSNR
#if ARCH_X86_64
typedef void ssimpf
@@ -286,9 +271,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3;
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3;
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3;
-#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.search.full_search = vp8_full_search_sadx3;
-#endif
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3;
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3;
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3;
@@ -307,7 +290,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3;
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
- cpi->rtcd.quantize.fastquantb = fast_quantize_b_ssse3;
+ cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
#if CONFIG_PSNR
#if ARCH_X86_64
@@ -329,9 +312,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4;
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4;
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4;
-#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.search.full_search = vp8_full_search_sadx8;
-#endif
}
#endif