summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
authorParag Salasakar <img.mips1@gmail.com>2015-06-17 07:53:06 +0530
committerParag Salasakar <img.mips1@gmail.com>2015-06-17 07:58:34 +0530
commitd9fedf7832292b4f8f83d60444064dc5066cb0e1 (patch)
tree528ee9445d6723d1fc45047a654e305315c06739 /vp9/common
parentfa53008fb739f2ce64a391552cc50804321683c1 (diff)
downloadlibvpx-d9fedf7832292b4f8f83d60444064dc5066cb0e1.tar
libvpx-d9fedf7832292b4f8f83d60444064dc5066cb0e1.tar.gz
libvpx-d9fedf7832292b4f8f83d60444064dc5066cb0e1.tar.bz2
libvpx-d9fedf7832292b4f8f83d60444064dc5066cb0e1.zip
mips msa vp9 fdct 32x32 optimization
average improvement ~4x-6x Change-Id: Ibcac3ef8ed5e207cf8c121e696570e6b63d3c0f4
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/mips/msa/vp9_macros_msa.h40
-rw-r--r--vp9/common/vp9_rtcd_defs.pl6
2 files changed, 41 insertions, 5 deletions
diff --git a/vp9/common/mips/msa/vp9_macros_msa.h b/vp9/common/mips/msa/vp9_macros_msa.h
index 2f2390bb2..863b2dc15 100644
--- a/vp9/common/mips/msa/vp9_macros_msa.h
+++ b/vp9/common/mips/msa/vp9_macros_msa.h
@@ -720,6 +720,24 @@
}
#define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__)
+/* Description : Dot product of word vector elements
+ Arguments : Inputs - mult0, mult1
+ cnst0, cnst1
+ Outputs - out0, out1
+ Return Type - signed word
+ Details : Signed word elements from mult0 are multiplied with
+ signed word elements from cnst0 producing a result
+ twice the size of input i.e. signed double word.
+ Then this multiplication results of adjacent odd-even elements
+ are added together and stored to the out vector
+ (2 signed double word results)
+*/
+#define DOTP_SW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
+ out0 = (RTYPE)__msa_dotp_s_d((v4i32)mult0, (v4i32)cnst0); \
+ out1 = (RTYPE)__msa_dotp_s_d((v4i32)mult1, (v4i32)cnst1); \
+}
+#define DOTP_SW2_SD(...) DOTP_SW2(v2i64, __VA_ARGS__)
+
/* Description : Dot product & addition of byte vector elements
Arguments : Inputs - mult0, mult1
cnst0, cnst1
@@ -1103,7 +1121,7 @@
Return Type - unsigned halfword
Details : Each unsigned halfword element from 'in0' is saturated to the
value generated with (sat_val+1) bit range.
- The results are in placed to original vectors
+ The results are stored in place
*/
#define SAT_UH2(RTYPE, in0, in1, sat_val) { \
in0 = (RTYPE)__msa_sat_u_h((v8u16)in0, sat_val); \
@@ -1125,7 +1143,7 @@
Return Type - unsigned halfword
Details : Each unsigned halfword element from 'in0' is saturated to the
value generated with (sat_val+1) bit range
- The results are in placed to original vectors
+ The results are stored in place
*/
#define SAT_SH2(RTYPE, in0, in1, sat_val) { \
in0 = (RTYPE)__msa_sat_s_h((v8i16)in0, sat_val); \
@@ -1440,6 +1458,24 @@
ILVRL_B2_SH(zero_m, in, out0, out1); \
}
+/* Description : Sign extend halfword elements from input vector and return
+ result in pair of vectors
+ Arguments : Inputs - in (1 input halfword vector)
+ Outputs - out0, out1 (sign extended 2 word vectors)
+ Return Type - signed word
+ Details : Sign bit of halfword elements from input vector 'in' is
+ extracted and interleaved right with same vector 'in0' to
+ generate 4 signed word elements in 'out0'
+ Then interleaved left with same vector 'in0' to
+ generate 4 signed word elements in 'out1'
+*/
+#define UNPCK_SH_SW(in, out0, out1) { \
+ v8i16 tmp_m; \
+ \
+ tmp_m = __msa_clti_s_h((v8i16)in, 0); \
+ ILVRL_H2_SW(tmp_m, in, out0, out1); \
+}
+
/* Description : Butterfly of 4 input vectors
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1, out2, out3
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 4a9970cf5..159929646 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1053,13 +1053,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fdct16x16 sse2 msa/;
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32_1 sse2/;
+ specialize qw/vp9_fdct32x32_1 sse2 msa/;
add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32 sse2 avx2/;
+ specialize qw/vp9_fdct32x32 sse2 avx2 msa/;
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32_rd sse2 avx2/;
+ specialize qw/vp9_fdct32x32_rd sse2 avx2 msa/;
}
#