diff options
author | Kaustubh Raste <kaustubh.raste@imgtec.com> | 2016-10-10 16:15:06 +0530 |
---|---|---|
committer | Kaustubh Raste <kaustubh.raste@imgtec.com> | 2016-10-18 04:05:33 +0000 |
commit | b7310e2affd82ce75eee4385fca36275d048f457 (patch) | |
tree | 9ab1366c789814873d057d5e14f46afdd46db584 /vpx_dsp/mips/sad_msa.c | |
parent | 8b5eddf709b5ecd09c2cec98c5418a2e3b0cfe14 (diff) | |
download | libvpx-b7310e2affd82ce75eee4385fca36275d048f457.tar libvpx-b7310e2affd82ce75eee4385fca36275d048f457.tar.gz libvpx-b7310e2affd82ce75eee4385fca36275d048f457.tar.bz2 libvpx-b7310e2affd82ce75eee4385fca36275d048f457.zip |
Optimize sad_64width_x4d_msa function
Reduced HADD_UH_U32 macro calls
Change-Id: Ie089b9a443de516646b46e8f72156aa826ca8cfa
Diffstat (limited to 'vpx_dsp/mips/sad_msa.c')
-rw-r--r-- | vpx_dsp/mips/sad_msa.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/vpx_dsp/mips/sad_msa.c b/vpx_dsp/mips/sad_msa.c index 6455814e1..e295123ac 100644 --- a/vpx_dsp/mips/sad_msa.c +++ b/vpx_dsp/mips/sad_msa.c @@ -1030,6 +1030,7 @@ static void sad_64width_x4d_msa(const uint8_t *src, int32_t src_stride, v8u16 sad2_1 = { 0 }; v8u16 sad3_0 = { 0 }; v8u16 sad3_1 = { 0 }; + v4u32 sad; ref0_ptr = aref_ptr[0]; ref1_ptr = aref_ptr[1]; @@ -1061,14 +1062,21 @@ static void sad_64width_x4d_msa(const uint8_t *src, int32_t src_stride, sad3_1 += SAD_UB2_UH(src2, src3, ref2, ref3); } - sad_array[0] = HADD_UH_U32(sad0_0); - sad_array[0] += HADD_UH_U32(sad0_1); - sad_array[1] = HADD_UH_U32(sad1_0); - sad_array[1] += HADD_UH_U32(sad1_1); - sad_array[2] = HADD_UH_U32(sad2_0); - sad_array[2] += HADD_UH_U32(sad2_1); - sad_array[3] = HADD_UH_U32(sad3_0); - sad_array[3] += HADD_UH_U32(sad3_1); + sad = __msa_hadd_u_w(sad0_0, sad0_0); + sad += __msa_hadd_u_w(sad0_1, sad0_1); + sad_array[0] = HADD_UW_U32(sad); + + sad = __msa_hadd_u_w(sad1_0, sad1_0); + sad += __msa_hadd_u_w(sad1_1, sad1_1); + sad_array[1] = HADD_UW_U32(sad); + + sad = __msa_hadd_u_w(sad2_0, sad2_0); + sad += __msa_hadd_u_w(sad2_1, sad2_1); + sad_array[2] = HADD_UW_U32(sad); + + sad = __msa_hadd_u_w(sad3_0, sad3_0); + sad += __msa_hadd_u_w(sad3_1, sad3_1); + sad_array[3] = HADD_UW_U32(sad); } static uint32_t avgsad_4width_msa(const uint8_t *src_ptr, int32_t src_stride, |