summaryrefslogtreecommitdiff
path: root/vpx_dsp/mips
diff options
context:
space:
mode:
authorKaustubh Raste <kaustubh.raste@imgtec.com>2017-01-31 10:00:43 +0530
committerKaustubh Raste <kaustubh.raste@imgtec.com>2017-01-31 10:00:43 +0530
commitdf7e1fecc11f9459282e722dcbd9d9dfb94df6da (patch)
treec50e6492011a0292c66c0e62ffdecd0717d02573 /vpx_dsp/mips
parent280ad355532a61df1bb6f4e7918f120d47f3fc55 (diff)
downloadlibvpx-df7e1fecc11f9459282e722dcbd9d9dfb94df6da.tar
libvpx-df7e1fecc11f9459282e722dcbd9d9dfb94df6da.tar.gz
libvpx-df7e1fecc11f9459282e722dcbd9d9dfb94df6da.tar.bz2
libvpx-df7e1fecc11f9459282e722dcbd9d9dfb94df6da.zip
Add mips msa vpx_minmax_8x8 function
average improvement ~4x-5x Change-Id: I83aee9977534fddb8a9b80d31af646c0b6b1a8c3
Diffstat (limited to 'vpx_dsp/mips')
-rw-r--r--vpx_dsp/mips/avg_msa.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/vpx_dsp/mips/avg_msa.c b/vpx_dsp/mips/avg_msa.c
index e8b5fc059..48b841969 100644
--- a/vpx_dsp/mips/avg_msa.c
+++ b/vpx_dsp/mips/avg_msa.c
@@ -677,3 +677,50 @@ int vpx_vector_var_msa(const int16_t *ref, const int16_t *src, const int bwl) {
return var;
}
+
+void vpx_minmax_8x8_msa(const uint8_t *s, int p, const uint8_t *d, int dp,
+ int *min, int *max) {
+ v16u8 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7;
+ v16u8 diff0, diff1, diff2, diff3, min0, min1, max0, max1;
+
+ LD_UB8(s, p, s0, s1, s2, s3, s4, s5, s6, s7);
+ LD_UB8(d, dp, d0, d1, d2, d3, d4, d5, d6, d7);
+ PCKEV_D4_UB(s1, s0, s3, s2, s5, s4, s7, s6, s0, s1, s2, s3);
+ PCKEV_D4_UB(d1, d0, d3, d2, d5, d4, d7, d6, d0, d1, d2, d3);
+
+ diff0 = __msa_asub_u_b(s0, d0);
+ diff1 = __msa_asub_u_b(s1, d1);
+ diff2 = __msa_asub_u_b(s2, d2);
+ diff3 = __msa_asub_u_b(s3, d3);
+
+ min0 = __msa_min_u_b(diff0, diff1);
+ min1 = __msa_min_u_b(diff2, diff3);
+ min0 = __msa_min_u_b(min0, min1);
+
+ max0 = __msa_max_u_b(diff0, diff1);
+ max1 = __msa_max_u_b(diff2, diff3);
+ max0 = __msa_max_u_b(max0, max1);
+
+ min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 8);
+ min0 = __msa_min_u_b(min0, min1);
+ max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 8);
+ max0 = __msa_max_u_b(max0, max1);
+
+ min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 4);
+ min0 = __msa_min_u_b(min0, min1);
+ max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 4);
+ max0 = __msa_max_u_b(max0, max1);
+
+ min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 2);
+ min0 = __msa_min_u_b(min0, min1);
+ max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 2);
+ max0 = __msa_max_u_b(max0, max1);
+
+ min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 1);
+ min0 = __msa_min_u_b(min0, min1);
+ max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 1);
+ max0 = __msa_max_u_b(max0, max1);
+
+ *min = min0[0];
+ *max = max0[0];
+}