summaryrefslogtreecommitdiff
path: root/vp8/common/x86/recon_wrapper_sse2.c
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2011-11-09 15:30:35 -0500
committerScott LaVarnway <slavarnway@google.com>2011-11-09 15:30:35 -0500
commitdf49c7c58dcb6bca3d479b224245b53f366c7725 (patch)
tree6c81f0fbb5a223c6c402f6e58c149bba575c999e /vp8/common/x86/recon_wrapper_sse2.c
parent9532bda0fb028ade3f486985be030ed3aaedc69c (diff)
downloadlibvpx-df49c7c58dcb6bca3d479b224245b53f366c7725.tar
libvpx-df49c7c58dcb6bca3d479b224245b53f366c7725.tar.gz
libvpx-df49c7c58dcb6bca3d479b224245b53f366c7725.tar.bz2
libvpx-df49c7c58dcb6bca3d479b224245b53f366c7725.zip
SSE2 optimizations for vp8_build_intra_predictors_mby{,_s}()
Ronald recently sent me this patch that he did in April. > From: Ronald S. Bultje <rbultje@google.com> > Date: Thu, 28 Apr 2011 17:30:15 -0700 > Subject: [PATCH] SSE2 optimizations for > vp8_build_intra_predictors_mby{,_s}(). HD decode tests have shown a performance boost up to 1.5%, depending on material. Patch set 3: Fixed encoder crash. Change-Id: Ie1fd1fa3dc750eec1a7a20bfa2decc079dcf48c8
Diffstat (limited to 'vp8/common/x86/recon_wrapper_sse2.c')
-rw-r--r--vp8/common/x86/recon_wrapper_sse2.c66
1 files changed, 66 insertions, 0 deletions
diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c
index fcc75a901..44221cd0b 100644
--- a/vp8/common/x86/recon_wrapper_sse2.c
+++ b/vp8/common/x86/recon_wrapper_sse2.c
@@ -94,3 +94,69 @@ void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
+
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dctop_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dcleft_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc128_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ho_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ve_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_ssse3);
+
+static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x,
+ unsigned char *dst_y,
+ int dst_stride,
+ build_intra_predictors_mbuv_fn_t tm_func)
+{
+ int mode = x->mode_info_context->mbmi.mode;
+ build_intra_predictors_mbuv_fn_t fn;
+ int src_stride = x->dst.y_stride;
+ switch (mode) {
+ case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break;
+ case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break;
+ case TM_PRED: fn = tm_func; break;
+ case DC_PRED:
+ if (x->up_available) {
+ if (x->left_available) {
+ fn = vp8_intra_pred_y_dc_sse2; break;
+ } else {
+ fn = vp8_intra_pred_y_dctop_sse2; break;
+ }
+ } else if (x->left_available) {
+ fn = vp8_intra_pred_y_dcleft_sse2; break;
+ } else {
+ fn = vp8_intra_pred_y_dc128_sse2; break;
+ }
+ break;
+ default: return;
+ }
+
+ fn(dst_y, dst_stride, x->dst.y_buffer, src_stride);
+ return;
+}
+
+void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
+ vp8_intra_pred_y_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
+ vp8_intra_pred_y_tm_ssse3);
+}
+
+void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
+ vp8_intra_pred_y_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
+ vp8_intra_pred_y_tm_ssse3);
+
+}