summaryrefslogtreecommitdiff
path: root/vp9/encoder/ppc
diff options
context:
space:
mode:
authorLuc Trudeau <luc@trud.ca>2018-06-26 17:06:52 -0400
committerLuc Trudeau <luc@trud.ca>2018-06-27 14:32:14 +0000
commitb0adbb6c22f514ac66ef32ee1265983a52eb2934 (patch)
tree1e5f4ba63f310424f7fa3898991c023139dfee7e /vp9/encoder/ppc
parentdc93b6298b77a8e68c80175f2f7f5fb211be358a (diff)
downloadlibvpx-b0adbb6c22f514ac66ef32ee1265983a52eb2934.tar
libvpx-b0adbb6c22f514ac66ef32ee1265983a52eb2934.tar.gz
libvpx-b0adbb6c22f514ac66ef32ee1265983a52eb2934.tar.bz2
libvpx-b0adbb6c22f514ac66ef32ee1265983a52eb2934.zip
[VSX] Replace vec_pack and vec_perm with single vec_perm
vpx_quantize_b: VP9QuantizeTest Speed Test (POWER8 Model 2.1) 32x32 Old VSX time = 8.1 ms, new VSX time = 7.9 ms vp9_quantize_fp: VP9QuantizeTest Speed Test (POWER8 Model 2.1) 32x32 Old VSX time = 6.5 ms, new VSX time = 6.2 ms Change-Id: Ic2183e8bd721bb69eaeb4865b542b656255a0870
Diffstat (limited to 'vp9/encoder/ppc')
-rw-r--r--vp9/encoder/ppc/vp9_quantize_vsx.c4
1 files changed, 1 insertions, 3 deletions
diff --git a/vp9/encoder/ppc/vp9_quantize_vsx.c b/vp9/encoder/ppc/vp9_quantize_vsx.c
index f6fdb55a6..3720b0876 100644
--- a/vp9/encoder/ppc/vp9_quantize_vsx.c
+++ b/vp9/encoder/ppc/vp9_quantize_vsx.c
@@ -154,7 +154,6 @@ static INLINE int32x4_t vec_is_neg(int32x4_t a) {
// vec_mladd results in overflow.
static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
int16x8_t dequant) {
- int16x8_t dqcoeff;
int32x4_t dqcoeffe = vec_mule(qcoeff, dequant);
int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant);
// Add 1 if negative to round towards zero because the C uses division.
@@ -162,8 +161,7 @@ static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo));
dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32);
dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32);
- dqcoeff = vec_pack(dqcoeffe, dqcoeffo);
- return vec_perm(dqcoeff, dqcoeff, vec_perm_merge);
+ return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack);
}
void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,