diff options
author | Luca Barbato <lu_zero@gentoo.org> | 2017-04-07 14:49:00 +0000 |
---|---|---|
committer | Luca Barbato <lu_zero@gentoo.org> | 2017-04-19 01:48:09 +0200 |
commit | a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b (patch) | |
tree | 4c5017239285e628d42a4a5067322e8febcf1fe4 /vpx_dsp/ppc/intrapred_vsx.c | |
parent | 7ad1faa6f8ed5472864155dca5a7a01b787f0365 (diff) | |
download | libvpx-a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b.tar libvpx-a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b.tar.gz libvpx-a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b.tar.bz2 libvpx-a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b.zip |
ppc: horizontal predictor 16x16
About 10x faster.
Change-Id: Ie81077fa32ad214cdb46bdcb0be4e9e2c7df47c2
Diffstat (limited to 'vpx_dsp/ppc/intrapred_vsx.c')
-rw-r--r-- | vpx_dsp/ppc/intrapred_vsx.c | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/vpx_dsp/ppc/intrapred_vsx.c b/vpx_dsp/ppc/intrapred_vsx.c index 16e08eec4..d803d3876 100644 --- a/vpx_dsp/ppc/intrapred_vsx.c +++ b/vpx_dsp/ppc/intrapred_vsx.c @@ -34,3 +34,61 @@ void vpx_v_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, vec_vsx_st(d1, 16, dst); } } + +void vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const uint8x16_t d = vec_vsx_ld(0, left); + const uint8x16_t v0 = vec_splat(d, 0); + const uint8x16_t v1 = vec_splat(d, 1); + const uint8x16_t v2 = vec_splat(d, 2); + const uint8x16_t v3 = vec_splat(d, 3); + + const uint8x16_t v4 = vec_splat(d, 4); + const uint8x16_t v5 = vec_splat(d, 5); + const uint8x16_t v6 = vec_splat(d, 6); + const uint8x16_t v7 = vec_splat(d, 7); + + const uint8x16_t v8 = vec_splat(d, 8); + const uint8x16_t v9 = vec_splat(d, 9); + const uint8x16_t v10 = vec_splat(d, 10); + const uint8x16_t v11 = vec_splat(d, 11); + + const uint8x16_t v12 = vec_splat(d, 12); + const uint8x16_t v13 = vec_splat(d, 13); + const uint8x16_t v14 = vec_splat(d, 14); + const uint8x16_t v15 = vec_splat(d, 15); + + (void)above; + + vec_vsx_st(v0, 0, dst); + dst += stride; + vec_vsx_st(v1, 0, dst); + dst += stride; + vec_vsx_st(v2, 0, dst); + dst += stride; + vec_vsx_st(v3, 0, dst); + dst += stride; + vec_vsx_st(v4, 0, dst); + dst += stride; + vec_vsx_st(v5, 0, dst); + dst += stride; + vec_vsx_st(v6, 0, dst); + dst += stride; + vec_vsx_st(v7, 0, dst); + dst += stride; + vec_vsx_st(v8, 0, dst); + dst += stride; + vec_vsx_st(v9, 0, dst); + dst += stride; + vec_vsx_st(v10, 0, dst); + dst += stride; + vec_vsx_st(v11, 0, dst); + dst += stride; + vec_vsx_st(v12, 0, dst); + dst += stride; + vec_vsx_st(v13, 0, dst); + dst += stride; + vec_vsx_st(v14, 0, dst); + dst += stride; + vec_vsx_st(v15, 0, dst); +} |