mips msa vp8 quantize optimization

average improvement ~2x-3x Change-Id: I6fc37191bf9cb5a67e1af9787d0d27659c17bdba
author: Parag Salasakar <img.mips1@gmail.com> 2015-07-30 10:56:40 +0530
committer: James Zern <jzern@google.com> 2015-07-30 12:56:57 -0700
commit: 56aa0da405b072447bf8165650983fdb36d272a5 (patch)
tree: e1ca26634149e0629f9105fdf1ef4e7a88307635 /vp8/common
parent: 0c2a14f9e24fda448161bbaf13878b202ea57f1f (diff)
download: libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar
libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar.gz
libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar.bz2
libvpx-56aa0da405b072447bf8165650983fdb36d272a5.zip
2 files changed, 53 insertions, 2 deletions
diff --git a/vp8/common/mips/msa/vp8_macros_msa.h b/vp8/common/mips/msa/vp8_macros_msa.h
index b533cc696..0ed94cd43 100644
--- a/vp8/common/mips/msa/vp8_macros_msa.h
+++ b/vp8/common/mips/msa/vp8_macros_msa.h
@@ -553,6 +553,20 @@
 }
 #define VSHF_B3_SB(...) VSHF_B3(v16i8, __VA_ARGS__)
 
+/* Description : Shuffle halfword vector elements as per mask vector
+   Arguments   : Inputs  - in0, in1, in2, in3, mask0, mask1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : halfword elements from 'in0' & 'in1' are copied selectively to
+                 'out0' as per control vector 'mask0'
+*/
+#define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1)   \
+{                                                                      \
+    out0 = (RTYPE)__msa_vshf_h((v8i16)mask0, (v8i16)in1, (v8i16)in0);  \
+    out1 = (RTYPE)__msa_vshf_h((v8i16)mask1, (v8i16)in3, (v8i16)in2);  \
+}
+#define VSHF_H2_SH(...) VSHF_H2(v8i16, __VA_ARGS__)
+
 /* Description : Dot product of byte vector elements
    Arguments   : Inputs  - mult0, mult1, cnst0, cnst1
                  Outputs - out0, out1
@@ -604,6 +618,31 @@
 }
 #define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__)
 
+/* Description : Dot product of halfword vector elements
+   Arguments   : Inputs  - mult0, mult1, cnst0, cnst1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Signed halfword elements from 'mult0' are multiplied with
+                 signed halfword elements from 'cnst0' producing a result
+                 twice the size of input i.e. signed word.
+                 The multiplication result of adjacent odd-even elements
+                 are added together and written to the 'out0' vector
+*/
+#define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1)  \
+{                                                                \
+    out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0);    \
+    out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1);    \
+}
+
+#define DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3,           \
+                 cnst0, cnst1, cnst2, cnst3,                  \
+                 out0, out1, out2, out3)                      \
+{                                                             \
+    DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);  \
+    DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);  \
+}
+#define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__)
+
 /* Description : Dot product & addition of byte vector elements
    Arguments   : Inputs  - mult0, mult1, cnst0, cnst1
                  Outputs - out0, out1
@@ -1309,6 +1348,18 @@
     ADD2(in4, in5, in6, in7, out2, out3);             \
 }
 
+/* Description : Subtraction of 2 pairs of vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+   Details     : Each element in 'in1' is subtracted from 'in0' and result is
+                 written to 'out0'.
+*/
+#define SUB2(in0, in1, in2, in3, out0, out1)  \
+{                                             \
+    out0 = in0 - in1;                         \
+    out1 = in2 - in3;                         \
+}
+
 /* Description : Sign extend halfword elements from right half of the vector
    Arguments   : Input  - in    (halfword vector)
                  Output - out   (sign extended word vector)
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index ad7429a54..3cf4f9801 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -286,10 +286,10 @@ $vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
 # Quantizer
 #
 add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
-specialize qw/vp8_regular_quantize_b sse2 sse4_1/;
+specialize qw/vp8_regular_quantize_b sse2 sse4_1 msa/;
 
 add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
-specialize qw/vp8_fast_quantize_b sse2 ssse3 neon/;
+specialize qw/vp8_fast_quantize_b sse2 ssse3 neon msa/;
 
 #
 # Block subtraction
author	Parag Salasakar <img.mips1@gmail.com>	2015-07-30 10:56:40 +0530
committer	James Zern <jzern@google.com>	2015-07-30 12:56:57 -0700
commit	56aa0da405b072447bf8165650983fdb36d272a5 (patch)
tree	e1ca26634149e0629f9105fdf1ef4e7a88307635 /vp8/common
parent	0c2a14f9e24fda448161bbaf13878b202ea57f1f (diff)
download	libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar.gz libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar.bz2 libvpx-56aa0da405b072447bf8165650983fdb36d272a5.zip