From 1083fe499973fb363ab4325ec3b4a905cdb5c0e1 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 27 Apr 2011 10:05:10 -0700 Subject: SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}(). decoding before 10.425 10.432 10.423 =10.426 after: 10.405 10.416 10.398 =10.406, 0.2% faster encoding before 14.252 14.331 14.250 14.223 14.241 14.220 14.221 =14.248 after 14.095 14.090 14.085 14.095 14.064 14.081 14.089 =14.086, 1.1% faster Change-Id: I483d3d8f0deda8ad434cea76e16028380722aee2 --- vp8/common/x86/recon_wrapper_sse2.c | 90 +++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 vp8/common/x86/recon_wrapper_sse2.c (limited to 'vp8/common/x86/recon_wrapper_sse2.c') diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c new file mode 100644 index 000000000..7b17851b5 --- /dev/null +++ b/vp8/common/x86/recon_wrapper_sse2.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_ports/config.h" +#include "vp8/common/recon.h" +#include "recon_x86.h" +#include "vpx_mem/vpx_mem.h" + +#define build_intra_predictors_mbuv_prototype(sym) \ + void sym(unsigned char *dst, int dst_stride, \ + const unsigned char *src, int src_stride) +typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t)); + +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dctop_mmx2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dcleft_mmx2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc128_mmx); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_mmx2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ve_mmx); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3); + +static inline void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x, + unsigned char *dst_u, + unsigned char *dst_v, + int dst_stride, + build_intra_predictors_mbuv_fn_t tm_func) +{ + int mode = x->mode_info_context->mbmi.uv_mode; + build_intra_predictors_mbuv_fn_t fn; + int src_stride = x->dst.uv_stride; + + switch (mode) { + case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break; + case H_PRED: fn = vp8_intra_pred_uv_ho_mmx2; break; + case TM_PRED: fn = tm_func; break; + case DC_PRED: + if (x->up_available) { + if (x->left_available) { + fn = vp8_intra_pred_uv_dc_mmx2; break; + } else { + fn = vp8_intra_pred_uv_dctop_mmx2; break; + } + } else if (x->left_available) { + fn = vp8_intra_pred_uv_dcleft_mmx2; break; + } else { + fn = vp8_intra_pred_uv_dc128_mmx; break; + } + break; + default: return; + } + + fn(dst_u, dst_stride, x->dst.u_buffer, src_stride); + fn(dst_v, dst_stride, x->dst.v_buffer, src_stride); +} + +void vp8_build_intra_predictors_mbuv_sse2(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256], + &x->predictor[320], 8, + vp8_intra_pred_uv_tm_sse2); +} + +void vp8_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256], + &x->predictor[320], 8, + vp8_intra_pred_uv_tm_ssse3); +} + +void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer, + x->dst.v_buffer, x->dst.uv_stride, + vp8_intra_pred_uv_tm_sse2); +} + +void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer, + x->dst.v_buffer, x->dst.uv_stride, + vp8_intra_pred_uv_tm_ssse3); +} -- cgit v1.2.3