diff options
author | Kaustubh Raste <kaustubh.raste@imgtec.com> | 2017-01-31 10:00:43 +0530 |
---|---|---|
committer | Kaustubh Raste <kaustubh.raste@imgtec.com> | 2017-01-31 10:00:43 +0530 |
commit | df7e1fecc11f9459282e722dcbd9d9dfb94df6da (patch) | |
tree | c50e6492011a0292c66c0e62ffdecd0717d02573 /vpx_dsp/mips | |
parent | 280ad355532a61df1bb6f4e7918f120d47f3fc55 (diff) | |
download | libvpx-df7e1fecc11f9459282e722dcbd9d9dfb94df6da.tar libvpx-df7e1fecc11f9459282e722dcbd9d9dfb94df6da.tar.gz libvpx-df7e1fecc11f9459282e722dcbd9d9dfb94df6da.tar.bz2 libvpx-df7e1fecc11f9459282e722dcbd9d9dfb94df6da.zip |
Add mips msa vpx_minmax_8x8 function
average improvement ~4x-5x
Change-Id: I83aee9977534fddb8a9b80d31af646c0b6b1a8c3
Diffstat (limited to 'vpx_dsp/mips')
-rw-r--r-- | vpx_dsp/mips/avg_msa.c | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/vpx_dsp/mips/avg_msa.c b/vpx_dsp/mips/avg_msa.c index e8b5fc059..48b841969 100644 --- a/vpx_dsp/mips/avg_msa.c +++ b/vpx_dsp/mips/avg_msa.c @@ -677,3 +677,50 @@ int vpx_vector_var_msa(const int16_t *ref, const int16_t *src, const int bwl) { return var; } + +void vpx_minmax_8x8_msa(const uint8_t *s, int p, const uint8_t *d, int dp, + int *min, int *max) { + v16u8 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7; + v16u8 diff0, diff1, diff2, diff3, min0, min1, max0, max1; + + LD_UB8(s, p, s0, s1, s2, s3, s4, s5, s6, s7); + LD_UB8(d, dp, d0, d1, d2, d3, d4, d5, d6, d7); + PCKEV_D4_UB(s1, s0, s3, s2, s5, s4, s7, s6, s0, s1, s2, s3); + PCKEV_D4_UB(d1, d0, d3, d2, d5, d4, d7, d6, d0, d1, d2, d3); + + diff0 = __msa_asub_u_b(s0, d0); + diff1 = __msa_asub_u_b(s1, d1); + diff2 = __msa_asub_u_b(s2, d2); + diff3 = __msa_asub_u_b(s3, d3); + + min0 = __msa_min_u_b(diff0, diff1); + min1 = __msa_min_u_b(diff2, diff3); + min0 = __msa_min_u_b(min0, min1); + + max0 = __msa_max_u_b(diff0, diff1); + max1 = __msa_max_u_b(diff2, diff3); + max0 = __msa_max_u_b(max0, max1); + + min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 8); + min0 = __msa_min_u_b(min0, min1); + max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 8); + max0 = __msa_max_u_b(max0, max1); + + min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 4); + min0 = __msa_min_u_b(min0, min1); + max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 4); + max0 = __msa_max_u_b(max0, max1); + + min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 2); + min0 = __msa_min_u_b(min0, min1); + max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 2); + max0 = __msa_max_u_b(max0, max1); + + min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 1); + min0 = __msa_min_u_b(min0, min1); + max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 1); + max0 = __msa_max_u_b(max0, max1); + + *min = min0[0]; + *max = max0[0]; +} |