diff options
author | Luc Trudeau <luc@trud.ca> | 2018-06-26 17:06:52 -0400 |
---|---|---|
committer | Luc Trudeau <luc@trud.ca> | 2018-06-27 14:32:14 +0000 |
commit | b0adbb6c22f514ac66ef32ee1265983a52eb2934 (patch) | |
tree | 1e5f4ba63f310424f7fa3898991c023139dfee7e /vp9/encoder/ppc | |
parent | dc93b6298b77a8e68c80175f2f7f5fb211be358a (diff) | |
download | libvpx-b0adbb6c22f514ac66ef32ee1265983a52eb2934.tar libvpx-b0adbb6c22f514ac66ef32ee1265983a52eb2934.tar.gz libvpx-b0adbb6c22f514ac66ef32ee1265983a52eb2934.tar.bz2 libvpx-b0adbb6c22f514ac66ef32ee1265983a52eb2934.zip |
[VSX] Replace vec_pack and vec_perm with single vec_perm
vpx_quantize_b:
VP9QuantizeTest Speed Test (POWER8 Model 2.1)
32x32 Old VSX time = 8.1 ms, new VSX time = 7.9 ms
vp9_quantize_fp:
VP9QuantizeTest Speed Test (POWER8 Model 2.1)
32x32 Old VSX time = 6.5 ms, new VSX time = 6.2 ms
Change-Id: Ic2183e8bd721bb69eaeb4865b542b656255a0870
Diffstat (limited to 'vp9/encoder/ppc')
-rw-r--r-- | vp9/encoder/ppc/vp9_quantize_vsx.c | 4 |
1 files changed, 1 insertions, 3 deletions
diff --git a/vp9/encoder/ppc/vp9_quantize_vsx.c b/vp9/encoder/ppc/vp9_quantize_vsx.c index f6fdb55a6..3720b0876 100644 --- a/vp9/encoder/ppc/vp9_quantize_vsx.c +++ b/vp9/encoder/ppc/vp9_quantize_vsx.c @@ -154,7 +154,6 @@ static INLINE int32x4_t vec_is_neg(int32x4_t a) { // vec_mladd results in overflow. static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff, int16x8_t dequant) { - int16x8_t dqcoeff; int32x4_t dqcoeffe = vec_mule(qcoeff, dequant); int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant); // Add 1 if negative to round towards zero because the C uses division. @@ -162,8 +161,7 @@ static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff, dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo)); dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32); dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32); - dqcoeff = vec_pack(dqcoeffe, dqcoeffo); - return vec_perm(dqcoeff, dqcoeff, vec_perm_merge); + return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack); } void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, |