diff options
author | Luca Barbato <lu_zero@gentoo.org> | 2018-07-08 16:03:41 +0200 |
---|---|---|
committer | Luca Barbato <lu_zero@gentoo.org> | 2018-07-08 16:03:47 +0200 |
commit | 73962af3719401d7b33a04026cae25ac1270d9bb (patch) | |
tree | 8fee70f6939d97d353610e43325a50b813565820 /vpx_dsp/ppc | |
parent | 4745bc2ff30f5ab344202685e103a49aa56adf07 (diff) | |
download | libvpx-73962af3719401d7b33a04026cae25ac1270d9bb.tar libvpx-73962af3719401d7b33a04026cae25ac1270d9bb.tar.gz libvpx-73962af3719401d7b33a04026cae25ac1270d9bb.tar.bz2 libvpx-73962af3719401d7b33a04026cae25ac1270d9bb.zip |
[VSX] Add support to Power9-only vec_absd
~5% gain for SAD.
Change-Id: Ief7d7691f837474f5b6b582129628276fdcce319
Diffstat (limited to 'vpx_dsp/ppc')
-rw-r--r-- | vpx_dsp/ppc/deblock_vsx.c | 8 | ||||
-rw-r--r-- | vpx_dsp/ppc/sad_vsx.c | 8 | ||||
-rw-r--r-- | vpx_dsp/ppc/types_vsx.h | 4 |
3 files changed, 10 insertions, 10 deletions
diff --git a/vpx_dsp/ppc/deblock_vsx.c b/vpx_dsp/ppc/deblock_vsx.c index 4329081ee..212991169 100644 --- a/vpx_dsp/ppc/deblock_vsx.c +++ b/vpx_dsp/ppc/deblock_vsx.c @@ -23,17 +23,13 @@ static const uint8x16_t st8_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; -static INLINE uint8x16_t vec_abd_s8(uint8x16_t a, uint8x16_t b) { - return vec_sub(vec_max(a, b), vec_min(a, b)); -} - static INLINE uint8x16_t apply_filter(uint8x16_t ctx[4], uint8x16_t v, uint8x16_t filter) { const uint8x16_t k1 = vec_avg(ctx[0], ctx[1]); const uint8x16_t k2 = vec_avg(ctx[3], ctx[2]); const uint8x16_t k3 = vec_avg(k1, k2); - const uint8x16_t f_a = vec_max(vec_abd_s8(v, ctx[0]), vec_abd_s8(v, ctx[1])); - const uint8x16_t f_b = vec_max(vec_abd_s8(v, ctx[2]), vec_abd_s8(v, ctx[3])); + const uint8x16_t f_a = vec_max(vec_absd(v, ctx[0]), vec_absd(v, ctx[1])); + const uint8x16_t f_b = vec_max(vec_absd(v, ctx[2]), vec_absd(v, ctx[3])); const bool8x16_t mask = vec_cmplt(vec_max(f_a, f_b), filter); return vec_sel(v, vec_avg(k3, v), mask); } diff --git a/vpx_dsp/ppc/sad_vsx.c b/vpx_dsp/ppc/sad_vsx.c index 18673f682..cdee8f3d1 100644 --- a/vpx_dsp/ppc/sad_vsx.c +++ b/vpx_dsp/ppc/sad_vsx.c @@ -17,10 +17,10 @@ #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" -#define PROCESS16(offset) \ - v_a = vec_vsx_ld(offset, a); \ - v_b = vec_vsx_ld(offset, b); \ - v_abs = vec_sub(vec_max(v_a, v_b), vec_min(v_a, v_b)); \ +#define PROCESS16(offset) \ + v_a = vec_vsx_ld(offset, a); \ + v_b = vec_vsx_ld(offset, b); \ + v_abs = vec_absd(v_a, v_b); \ v_sad = vec_sum4s(v_abs, v_sad); #define SAD8(height) \ diff --git a/vpx_dsp/ppc/types_vsx.h b/vpx_dsp/ppc/types_vsx.h index 2bbf665c3..f8db6ca89 100644 --- a/vpx_dsp/ppc/types_vsx.h +++ b/vpx_dsp/ppc/types_vsx.h @@ -75,6 +75,10 @@ static INLINE uint8x16_t read4x2(const uint8_t *a, int stride) { return (uint8x16_t)vec_mergeh(a0, a1); } +#ifndef __POWER9_VECTOR__ +#define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b)) +#endif + static const uint8x16_t vec_zeros_u8 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const int16x8_t vec_zeros_s16 = { 0, 0, 0, 0, 0, 0, 0, 0 }; |