diff options
author | Scott LaVarnway <slavarnway@google.com> | 2011-11-09 15:30:35 -0500 |
---|---|---|
committer | Scott LaVarnway <slavarnway@google.com> | 2011-11-09 15:30:35 -0500 |
commit | df49c7c58dcb6bca3d479b224245b53f366c7725 (patch) | |
tree | 6c81f0fbb5a223c6c402f6e58c149bba575c999e /vp8/common/x86/recon_wrapper_sse2.c | |
parent | 9532bda0fb028ade3f486985be030ed3aaedc69c (diff) | |
download | libvpx-df49c7c58dcb6bca3d479b224245b53f366c7725.tar libvpx-df49c7c58dcb6bca3d479b224245b53f366c7725.tar.gz libvpx-df49c7c58dcb6bca3d479b224245b53f366c7725.tar.bz2 libvpx-df49c7c58dcb6bca3d479b224245b53f366c7725.zip |
SSE2 optimizations for vp8_build_intra_predictors_mby{,_s}()
Ronald recently sent me this patch that he did in April.
> From: Ronald S. Bultje <rbultje@google.com>
> Date: Thu, 28 Apr 2011 17:30:15 -0700
> Subject: [PATCH] SSE2 optimizations for
> vp8_build_intra_predictors_mby{,_s}().
HD decode tests have shown a performance boost up to 1.5%,
depending on material.
Patch set 3: Fixed encoder crash.
Change-Id: Ie1fd1fa3dc750eec1a7a20bfa2decc079dcf48c8
Diffstat (limited to 'vp8/common/x86/recon_wrapper_sse2.c')
-rw-r--r-- | vp8/common/x86/recon_wrapper_sse2.c | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c index fcc75a901..44221cd0b 100644 --- a/vp8/common/x86/recon_wrapper_sse2.c +++ b/vp8/common/x86/recon_wrapper_sse2.c @@ -94,3 +94,69 @@ void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x) vp8_intra_pred_uv_tm_ssse3, vp8_intra_pred_uv_ho_ssse3); } + +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dctop_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dcleft_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc128_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ho_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ve_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_ssse3); + +static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x, + unsigned char *dst_y, + int dst_stride, + build_intra_predictors_mbuv_fn_t tm_func) +{ + int mode = x->mode_info_context->mbmi.mode; + build_intra_predictors_mbuv_fn_t fn; + int src_stride = x->dst.y_stride; + switch (mode) { + case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break; + case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break; + case TM_PRED: fn = tm_func; break; + case DC_PRED: + if (x->up_available) { + if (x->left_available) { + fn = vp8_intra_pred_y_dc_sse2; break; + } else { + fn = vp8_intra_pred_y_dctop_sse2; break; + } + } else if (x->left_available) { + fn = vp8_intra_pred_y_dcleft_sse2; break; + } else { + fn = vp8_intra_pred_y_dc128_sse2; break; + } + break; + default: return; + } + + fn(dst_y, dst_stride, x->dst.y_buffer, src_stride); + return; +} + +void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->predictor, 16, + vp8_intra_pred_y_tm_sse2); +} + +void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->predictor, 16, + vp8_intra_pred_y_tm_ssse3); +} + +void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride, + vp8_intra_pred_y_tm_sse2); +} + +void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride, + vp8_intra_pred_y_tm_ssse3); + +} |