summaryrefslogtreecommitdiff
path: root/vp8/common
diff options
context:
space:
mode:
authorParag Salasakar <img.mips1@gmail.com>2015-07-30 10:56:40 +0530
committerJames Zern <jzern@google.com>2015-07-30 12:56:57 -0700
commit56aa0da405b072447bf8165650983fdb36d272a5 (patch)
treee1ca26634149e0629f9105fdf1ef4e7a88307635 /vp8/common
parent0c2a14f9e24fda448161bbaf13878b202ea57f1f (diff)
downloadlibvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar
libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar.gz
libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar.bz2
libvpx-56aa0da405b072447bf8165650983fdb36d272a5.zip
mips msa vp8 quantize optimization
average improvement ~2x-3x Change-Id: I6fc37191bf9cb5a67e1af9787d0d27659c17bdba
Diffstat (limited to 'vp8/common')
-rw-r--r--vp8/common/mips/msa/vp8_macros_msa.h51
-rw-r--r--vp8/common/rtcd_defs.pl4
2 files changed, 53 insertions, 2 deletions
diff --git a/vp8/common/mips/msa/vp8_macros_msa.h b/vp8/common/mips/msa/vp8_macros_msa.h
index b533cc696..0ed94cd43 100644
--- a/vp8/common/mips/msa/vp8_macros_msa.h
+++ b/vp8/common/mips/msa/vp8_macros_msa.h
@@ -553,6 +553,20 @@
}
#define VSHF_B3_SB(...) VSHF_B3(v16i8, __VA_ARGS__)
+/* Description : Shuffle halfword vector elements as per mask vector
+ Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : halfword elements from 'in0' & 'in1' are copied selectively to
+ 'out0' as per control vector 'mask0'
+*/
+#define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_vshf_h((v8i16)mask0, (v8i16)in1, (v8i16)in0); \
+ out1 = (RTYPE)__msa_vshf_h((v8i16)mask1, (v8i16)in3, (v8i16)in2); \
+}
+#define VSHF_H2_SH(...) VSHF_H2(v8i16, __VA_ARGS__)
+
/* Description : Dot product of byte vector elements
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
@@ -604,6 +618,31 @@
}
#define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__)
+/* Description : Dot product of halfword vector elements
+ Arguments : Inputs - mult0, mult1, cnst0, cnst1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Signed halfword elements from 'mult0' are multiplied with
+ signed halfword elements from 'cnst0' producing a result
+ twice the size of input i.e. signed word.
+ The multiplication result of adjacent odd-even elements
+ are added together and written to the 'out0' vector
+*/
+#define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0); \
+ out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1); \
+}
+
+#define DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, \
+ out0, out1, out2, out3) \
+{ \
+ DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+#define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__)
+
/* Description : Dot product & addition of byte vector elements
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
@@ -1309,6 +1348,18 @@
ADD2(in4, in5, in6, in7, out2, out3); \
}
+/* Description : Subtraction of 2 pairs of vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Details : Each element in 'in1' is subtracted from 'in0' and result is
+ written to 'out0'.
+*/
+#define SUB2(in0, in1, in2, in3, out0, out1) \
+{ \
+ out0 = in0 - in1; \
+ out1 = in2 - in3; \
+}
+
/* Description : Sign extend halfword elements from right half of the vector
Arguments : Input - in (halfword vector)
Output - out (sign extended word vector)
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index ad7429a54..3cf4f9801 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -286,10 +286,10 @@ $vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
# Quantizer
#
add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
-specialize qw/vp8_regular_quantize_b sse2 sse4_1/;
+specialize qw/vp8_regular_quantize_b sse2 sse4_1 msa/;
add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
-specialize qw/vp8_fast_quantize_b sse2 ssse3 neon/;
+specialize qw/vp8_fast_quantize_b sse2 ssse3 neon msa/;
#
# Block subtraction