diff options
author | Parag Salasakar <img.mips1@gmail.com> | 2015-07-30 10:56:40 +0530 |
---|---|---|
committer | James Zern <jzern@google.com> | 2015-07-30 12:56:57 -0700 |
commit | 56aa0da405b072447bf8165650983fdb36d272a5 (patch) | |
tree | e1ca26634149e0629f9105fdf1ef4e7a88307635 /vp8/common | |
parent | 0c2a14f9e24fda448161bbaf13878b202ea57f1f (diff) | |
download | libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar.gz libvpx-56aa0da405b072447bf8165650983fdb36d272a5.tar.bz2 libvpx-56aa0da405b072447bf8165650983fdb36d272a5.zip |
mips msa vp8 quantize optimization
average improvement ~2x-3x
Change-Id: I6fc37191bf9cb5a67e1af9787d0d27659c17bdba
Diffstat (limited to 'vp8/common')
-rw-r--r-- | vp8/common/mips/msa/vp8_macros_msa.h | 51 | ||||
-rw-r--r-- | vp8/common/rtcd_defs.pl | 4 |
2 files changed, 53 insertions, 2 deletions
diff --git a/vp8/common/mips/msa/vp8_macros_msa.h b/vp8/common/mips/msa/vp8_macros_msa.h index b533cc696..0ed94cd43 100644 --- a/vp8/common/mips/msa/vp8_macros_msa.h +++ b/vp8/common/mips/msa/vp8_macros_msa.h @@ -553,6 +553,20 @@ } #define VSHF_B3_SB(...) VSHF_B3(v16i8, __VA_ARGS__) +/* Description : Shuffle halfword vector elements as per mask vector + Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 + Outputs - out0, out1 + Return Type - as per RTYPE + Details : halfword elements from 'in0' & 'in1' are copied selectively to + 'out0' as per control vector 'mask0' +*/ +#define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ +{ \ + out0 = (RTYPE)__msa_vshf_h((v8i16)mask0, (v8i16)in1, (v8i16)in0); \ + out1 = (RTYPE)__msa_vshf_h((v8i16)mask1, (v8i16)in3, (v8i16)in2); \ +} +#define VSHF_H2_SH(...) VSHF_H2(v8i16, __VA_ARGS__) + /* Description : Dot product of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 @@ -604,6 +618,31 @@ } #define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__) +/* Description : Dot product of halfword vector elements + Arguments : Inputs - mult0, mult1, cnst0, cnst1 + Outputs - out0, out1 + Return Type - as per RTYPE + Details : Signed halfword elements from 'mult0' are multiplied with + signed halfword elements from 'cnst0' producing a result + twice the size of input i.e. signed word. + The multiplication result of adjacent odd-even elements + are added together and written to the 'out0' vector +*/ +#define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0); \ + out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1); \ +} + +#define DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, \ + out0, out1, out2, out3) \ +{ \ + DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} +#define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__) + /* Description : Dot product & addition of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 @@ -1309,6 +1348,18 @@ ADD2(in4, in5, in6, in7, out2, out3); \ } +/* Description : Subtraction of 2 pairs of vectors + Arguments : Inputs - in0, in1, in2, in3 + Outputs - out0, out1 + Details : Each element in 'in1' is subtracted from 'in0' and result is + written to 'out0'. +*/ +#define SUB2(in0, in1, in2, in3, out0, out1) \ +{ \ + out0 = in0 - in1; \ + out1 = in2 - in3; \ +} + /* Description : Sign extend halfword elements from right half of the vector Arguments : Input - in (halfword vector) Output - out (sign extended word vector) diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index ad7429a54..3cf4f9801 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -286,10 +286,10 @@ $vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6; # Quantizer # add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *"; -specialize qw/vp8_regular_quantize_b sse2 sse4_1/; +specialize qw/vp8_regular_quantize_b sse2 sse4_1 msa/; add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *"; -specialize qw/vp8_fast_quantize_b sse2 ssse3 neon/; +specialize qw/vp8_fast_quantize_b sse2 ssse3 neon msa/; # # Block subtraction |