summaryrefslogtreecommitdiff
path: root/vpx_dsp/ppc/intrapred_vsx.c
diff options
context:
space:
mode:
authorLuca Barbato <lu_zero@gentoo.org>2017-04-07 14:49:00 +0000
committerLuca Barbato <lu_zero@gentoo.org>2017-04-19 01:48:09 +0200
commita7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b (patch)
tree4c5017239285e628d42a4a5067322e8febcf1fe4 /vpx_dsp/ppc/intrapred_vsx.c
parent7ad1faa6f8ed5472864155dca5a7a01b787f0365 (diff)
downloadlibvpx-a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b.tar
libvpx-a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b.tar.gz
libvpx-a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b.tar.bz2
libvpx-a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b.zip
ppc: horizontal predictor 16x16
About 10x faster. Change-Id: Ie81077fa32ad214cdb46bdcb0be4e9e2c7df47c2
Diffstat (limited to 'vpx_dsp/ppc/intrapred_vsx.c')
-rw-r--r--vpx_dsp/ppc/intrapred_vsx.c58
1 files changed, 58 insertions, 0 deletions
diff --git a/vpx_dsp/ppc/intrapred_vsx.c b/vpx_dsp/ppc/intrapred_vsx.c
index 16e08eec4..d803d3876 100644
--- a/vpx_dsp/ppc/intrapred_vsx.c
+++ b/vpx_dsp/ppc/intrapred_vsx.c
@@ -34,3 +34,61 @@ void vpx_v_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
vec_vsx_st(d1, 16, dst);
}
}
+
+void vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint8x16_t d = vec_vsx_ld(0, left);
+ const uint8x16_t v0 = vec_splat(d, 0);
+ const uint8x16_t v1 = vec_splat(d, 1);
+ const uint8x16_t v2 = vec_splat(d, 2);
+ const uint8x16_t v3 = vec_splat(d, 3);
+
+ const uint8x16_t v4 = vec_splat(d, 4);
+ const uint8x16_t v5 = vec_splat(d, 5);
+ const uint8x16_t v6 = vec_splat(d, 6);
+ const uint8x16_t v7 = vec_splat(d, 7);
+
+ const uint8x16_t v8 = vec_splat(d, 8);
+ const uint8x16_t v9 = vec_splat(d, 9);
+ const uint8x16_t v10 = vec_splat(d, 10);
+ const uint8x16_t v11 = vec_splat(d, 11);
+
+ const uint8x16_t v12 = vec_splat(d, 12);
+ const uint8x16_t v13 = vec_splat(d, 13);
+ const uint8x16_t v14 = vec_splat(d, 14);
+ const uint8x16_t v15 = vec_splat(d, 15);
+
+ (void)above;
+
+ vec_vsx_st(v0, 0, dst);
+ dst += stride;
+ vec_vsx_st(v1, 0, dst);
+ dst += stride;
+ vec_vsx_st(v2, 0, dst);
+ dst += stride;
+ vec_vsx_st(v3, 0, dst);
+ dst += stride;
+ vec_vsx_st(v4, 0, dst);
+ dst += stride;
+ vec_vsx_st(v5, 0, dst);
+ dst += stride;
+ vec_vsx_st(v6, 0, dst);
+ dst += stride;
+ vec_vsx_st(v7, 0, dst);
+ dst += stride;
+ vec_vsx_st(v8, 0, dst);
+ dst += stride;
+ vec_vsx_st(v9, 0, dst);
+ dst += stride;
+ vec_vsx_st(v10, 0, dst);
+ dst += stride;
+ vec_vsx_st(v11, 0, dst);
+ dst += stride;
+ vec_vsx_st(v12, 0, dst);
+ dst += stride;
+ vec_vsx_st(v13, 0, dst);
+ dst += stride;
+ vec_vsx_st(v14, 0, dst);
+ dst += stride;
+ vec_vsx_st(v15, 0, dst);
+}