summaryrefslogtreecommitdiff
path: root/vp9/encoder/mips
diff options
context:
space:
mode:
authorclang-format <noreply@google.com>2016-07-26 20:43:23 -0700
committerJames Zern <jzern@google.com>2016-08-02 16:47:11 -0700
commite0cc52db3fc9b09c99d7bbee35153cf82964a860 (patch)
tree4988f1d3a21056339e2ffbd7a3b3d52fab54cb6b /vp9/encoder/mips
parent3a04c9c9c4c4935925f4c00dcc70610100c5e9dd (diff)
downloadlibvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar
libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar.gz
libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar.bz2
libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.zip
vp9/encoder: apply clang-format
Change-Id: I45d9fb4013f50766b24363a86365e8063e8954c2
Diffstat (limited to 'vp9/encoder/mips')
-rw-r--r--vp9/encoder/mips/msa/vp9_error_msa.c155
-rw-r--r--vp9/encoder/mips/msa/vp9_fdct16x16_msa.c133
-rw-r--r--vp9/encoder/mips/msa/vp9_fdct4x4_msa.c4
-rw-r--r--vp9/encoder/mips/msa/vp9_fdct8x8_msa.c56
-rw-r--r--vp9/encoder/mips/msa/vp9_fdct_msa.h183
-rw-r--r--vp9/encoder/mips/msa/vp9_temporal_filter_msa.c78
6 files changed, 291 insertions, 318 deletions
diff --git a/vp9/encoder/mips/msa/vp9_error_msa.c b/vp9/encoder/mips/msa/vp9_error_msa.c
index 1dc70bd82..188d04d8f 100644
--- a/vp9/encoder/mips/msa/vp9_error_msa.c
+++ b/vp9/encoder/mips/msa/vp9_error_msa.c
@@ -11,74 +11,73 @@
#include "./vp9_rtcd.h"
#include "vpx_dsp/mips/macros_msa.h"
-#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
-static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr, \
- const int16_t *dq_coeff_ptr, \
- int64_t *ssz) { \
- int64_t err = 0; \
- uint32_t loop_cnt; \
- v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
- v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
- v2i64 sq_coeff_r, sq_coeff_l; \
- v2i64 err0, err_dup0, err1, err_dup1; \
- \
- coeff = LD_SH(coeff_ptr); \
- dq_coeff = LD_SH(dq_coeff_ptr); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, \
- sq_coeff_r, sq_coeff_l); \
- DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
- \
- coeff = LD_SH(coeff_ptr + 8); \
- dq_coeff = LD_SH(dq_coeff_ptr + 8); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
- DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
- \
- coeff_ptr += 16; \
- dq_coeff_ptr += 16; \
- \
- for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
- coeff = LD_SH(coeff_ptr); \
- dq_coeff = LD_SH(dq_coeff_ptr); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
- DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
- \
- coeff = LD_SH(coeff_ptr + 8); \
- dq_coeff = LD_SH(dq_coeff_ptr + 8); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
- DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
- \
- coeff_ptr += 16; \
- dq_coeff_ptr += 16; \
- } \
- \
- err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
- err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
- sq_coeff_r += err_dup0; \
- sq_coeff_l += err_dup1; \
- *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
- *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
- \
- err_dup0 = __msa_splati_d(err0, 1); \
- err_dup1 = __msa_splati_d(err1, 1); \
- err0 += err_dup0; \
- err1 += err_dup1; \
- err = __msa_copy_s_d(err0, 0); \
- err += __msa_copy_s_d(err1, 0); \
- \
- return err; \
-}
+#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
+ static int64_t block_error_##BSize##size_msa( \
+ const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
+ int64_t err = 0; \
+ uint32_t loop_cnt; \
+ v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
+ v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
+ v2i64 sq_coeff_r, sq_coeff_l; \
+ v2i64 err0, err_dup0, err1, err_dup1; \
+ \
+ coeff = LD_SH(coeff_ptr); \
+ dq_coeff = LD_SH(dq_coeff_ptr); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
+ sq_coeff_l); \
+ DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
+ \
+ coeff = LD_SH(coeff_ptr + 8); \
+ dq_coeff = LD_SH(dq_coeff_ptr + 8); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff_ptr += 16; \
+ dq_coeff_ptr += 16; \
+ \
+ for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
+ coeff = LD_SH(coeff_ptr); \
+ dq_coeff = LD_SH(dq_coeff_ptr); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff = LD_SH(coeff_ptr + 8); \
+ dq_coeff = LD_SH(dq_coeff_ptr + 8); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff_ptr += 16; \
+ dq_coeff_ptr += 16; \
+ } \
+ \
+ err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
+ err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
+ sq_coeff_r += err_dup0; \
+ sq_coeff_l += err_dup1; \
+ *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
+ *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
+ \
+ err_dup0 = __msa_splati_d(err0, 1); \
+ err_dup1 = __msa_splati_d(err1, 1); \
+ err0 += err_dup0; \
+ err1 += err_dup1; \
+ err = __msa_copy_s_d(err0, 0); \
+ err += __msa_copy_s_d(err1, 0); \
+ \
+ return err; \
+ }
BLOCK_ERROR_BLOCKSIZE_MSA(16);
BLOCK_ERROR_BLOCKSIZE_MSA(64);
@@ -86,25 +85,17 @@ BLOCK_ERROR_BLOCKSIZE_MSA(256);
BLOCK_ERROR_BLOCKSIZE_MSA(1024);
int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr,
- const tran_low_t *dq_coeff_ptr,
- intptr_t blk_size, int64_t *ssz) {
+ const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
+ int64_t *ssz) {
int64_t err;
const int16_t *coeff = (const int16_t *)coeff_ptr;
const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
switch (blk_size) {
- case 16:
- err = block_error_16size_msa(coeff, dq_coeff, ssz);
- break;
- case 64:
- err = block_error_64size_msa(coeff, dq_coeff, ssz);
- break;
- case 256:
- err = block_error_256size_msa(coeff, dq_coeff, ssz);
- break;
- case 1024:
- err = block_error_1024size_msa(coeff, dq_coeff, ssz);
- break;
+ case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
+ case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
+ case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
+ case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
default:
err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
break;
diff --git a/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c b/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c
index 6dabb5890..0831e5914 100644
--- a/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c
+++ b/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c
@@ -159,8 +159,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
- TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
- r0, r1, r2, r3, r4, r5, r6, r7);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
+ r7);
FDCT_POSTPROC_2V_NEG_H(r0, r1);
FDCT_POSTPROC_2V_NEG_H(r2, r3);
FDCT_POSTPROC_2V_NEG_H(r4, r5);
@@ -169,8 +169,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out += 64;
LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
- TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
- r8, r9, r10, r11, r12, r13, r14, r15);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
+ r12, r13, r14, r15);
FDCT_POSTPROC_2V_NEG_H(r8, r9);
FDCT_POSTPROC_2V_NEG_H(r10, r11);
FDCT_POSTPROC_2V_NEG_H(r12, r13);
@@ -181,8 +181,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
input += 128;
LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
- TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
- r0, r1, r2, r3, r4, r5, r6, r7);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
+ r7);
FDCT_POSTPROC_2V_NEG_H(r0, r1);
FDCT_POSTPROC_2V_NEG_H(r2, r3);
FDCT_POSTPROC_2V_NEG_H(r4, r5);
@@ -191,8 +191,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out += 64;
LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
- TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
- r8, r9, r10, r11, r12, r13, r14, r15);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
+ r12, r13, r14, r15);
FDCT_POSTPROC_2V_NEG_H(r8, r9);
FDCT_POSTPROC_2V_NEG_H(r10, r11);
FDCT_POSTPROC_2V_NEG_H(r12, r13);
@@ -339,24 +339,24 @@ static void fadst16_transpose_msa(int16_t *input, int16_t *out) {
v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
/* load input data */
- LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
- l4, l12, l5, l13, l6, l14, l7, l15);
- TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
- r0, r1, r2, r3, r4, r5, r6, r7);
- TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
- r8, r9, r10, r11, r12, r13, r14, r15);
+ LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
+ l7, l15);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
+ r7);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
+ r12, r13, r14, r15);
ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
out += 16 * 8;
/* load input data */
input += 128;
- LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
- l4, l12, l5, l13, l6, l14, l7, l15);
- TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
- r0, r1, r2, r3, r4, r5, r6, r7);
- TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
- r8, r9, r10, r11, r12, r13, r14, r15);
+ LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
+ l7, l15);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
+ r7);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
+ r12, r13, r14, r15);
ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
}
@@ -371,10 +371,10 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7);
temp = intermediate + 8;
LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
- in8, in9, in10, in11, in12, in13, in14, in15);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
+ in10, in11, in12, in13, in14, in15);
FDCT_POSTPROC_2V_NEG_H(in0, in1);
FDCT_POSTPROC_2V_NEG_H(in2, in3);
FDCT_POSTPROC_2V_NEG_H(in4, in5);
@@ -383,29 +383,28 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
FDCT_POSTPROC_2V_NEG_H(in10, in11);
FDCT_POSTPROC_2V_NEG_H(in12, in13);
FDCT_POSTPROC_2V_NEG_H(in14, in15);
- BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
- in8, in9, in10, in11, in12, in13, in14, in15,
- tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
- in8, in9, in10, in11, in12, in13, in14, in15);
+ BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11,
+ in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6,
+ tmp7, in8, in9, in10, in11, in12, in13, in14, in15);
temp = intermediate;
ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16);
- FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
- tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+ FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1,
+ tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
temp = intermediate;
LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
- FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15,
- in0, in1, in2, in3, in4, in5, in6, in7);
- TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3,
- tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3);
+ FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0,
+ tmp1, in1, tmp2, in2, tmp3, in3);
ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16);
- TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7,
- tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7);
+ TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4,
+ tmp5, in5, tmp6, in6, tmp7, in7);
out = output + 8;
ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16);
}
-void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
- int32_t stride, int32_t tx_type) {
+void vp9_fht16x16_msa(const int16_t *input, int16_t *output, int32_t stride,
+ int32_t tx_type) {
DECLARE_ALIGNED(32, int16_t, tmp[256]);
DECLARE_ALIGNED(32, int16_t, trans_buf[256]);
DECLARE_ALIGNED(32, int16_t, tmp_buf[128]);
@@ -413,35 +412,31 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
int16_t *ptmpbuf = &tmp_buf[0];
int16_t *trans = &trans_buf[0];
const int32_t const_arr[29 * 4] = {
- 52707308, 52707308, 52707308, 52707308,
- -1072430300, -1072430300, -1072430300, -1072430300,
- 795618043, 795618043, 795618043, 795618043,
- -721080468, -721080468, -721080468, -721080468,
- 459094491, 459094491, 459094491, 459094491,
- -970646691, -970646691, -970646691, -970646691,
- 1010963856, 1010963856, 1010963856, 1010963856,
- -361743294, -361743294, -361743294, -361743294,
- 209469125, 209469125, 209469125, 209469125,
- -1053094788, -1053094788, -1053094788, -1053094788,
- 1053160324, 1053160324, 1053160324, 1053160324,
- 639644520, 639644520, 639644520, 639644520,
- -862444000, -862444000, -862444000, -862444000,
- 1062144356, 1062144356, 1062144356, 1062144356,
- -157532337, -157532337, -157532337, -157532337,
- 260914709, 260914709, 260914709, 260914709,
- -1041559667, -1041559667, -1041559667, -1041559667,
- 920985831, 920985831, 920985831, 920985831,
- -551995675, -551995675, -551995675, -551995675,
- 596522295, 596522295, 596522295, 596522295,
- 892853362, 892853362, 892853362, 892853362,
- -892787826, -892787826, -892787826, -892787826,
- 410925857, 410925857, 410925857, 410925857,
- -992012162, -992012162, -992012162, -992012162,
- 992077698, 992077698, 992077698, 992077698,
- 759246145, 759246145, 759246145, 759246145,
- -759180609, -759180609, -759180609, -759180609,
- -759222975, -759222975, -759222975, -759222975,
- 759288511, 759288511, 759288511, 759288511 };
+ 52707308, 52707308, 52707308, 52707308, -1072430300,
+ -1072430300, -1072430300, -1072430300, 795618043, 795618043,
+ 795618043, 795618043, -721080468, -721080468, -721080468,
+ -721080468, 459094491, 459094491, 459094491, 459094491,
+ -970646691, -970646691, -970646691, -970646691, 1010963856,
+ 1010963856, 1010963856, 1010963856, -361743294, -361743294,
+ -361743294, -361743294, 209469125, 209469125, 209469125,
+ 209469125, -1053094788, -1053094788, -1053094788, -1053094788,
+ 1053160324, 1053160324, 1053160324, 1053160324, 639644520,
+ 639644520, 639644520, 639644520, -862444000, -862444000,
+ -862444000, -862444000, 1062144356, 1062144356, 1062144356,
+ 1062144356, -157532337, -157532337, -157532337, -157532337,
+ 260914709, 260914709, 260914709, 260914709, -1041559667,
+ -1041559667, -1041559667, -1041559667, 920985831, 920985831,
+ 920985831, 920985831, -551995675, -551995675, -551995675,
+ -551995675, 596522295, 596522295, 596522295, 596522295,
+ 892853362, 892853362, 892853362, 892853362, -892787826,
+ -892787826, -892787826, -892787826, 410925857, 410925857,
+ 410925857, 410925857, -992012162, -992012162, -992012162,
+ -992012162, 992077698, 992077698, 992077698, 992077698,
+ 759246145, 759246145, 759246145, 759246145, -759180609,
+ -759180609, -759180609, -759180609, -759222975, -759222975,
+ -759222975, -759222975, 759288511, 759288511, 759288511,
+ 759288511
+ };
switch (tx_type) {
case DCT_DCT:
@@ -500,8 +495,6 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
fadst16_transpose_msa(tmp, output);
break;
- default:
- assert(0);
- break;
+ default: assert(0); break;
}
}
diff --git a/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c b/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c
index 574016f15..fa36f09ab 100644
--- a/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c
+++ b/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c
@@ -86,9 +86,7 @@ void vp9_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride,
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
break;
- default:
- assert(0);
- break;
+ default: assert(0); break;
}
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
diff --git a/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c b/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c
index 7c3c635f8..604db853c 100644
--- a/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c
+++ b/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c
@@ -23,44 +23,42 @@ void vp9_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride,
switch (tx_type) {
case DCT_DCT:
- VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
break;
case ADST_DCT:
- VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
break;
case DCT_ADST:
- VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
break;
case ADST_ADST:
- VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
- break;
- default:
- assert(0);
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
break;
+ default: assert(0); break;
}
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
- in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7);
ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);
}
diff --git a/vp9/encoder/mips/msa/vp9_fdct_msa.h b/vp9/encoder/mips/msa/vp9_fdct_msa.h
index d7d40cb72..794bec70b 100644
--- a/vp9/encoder/mips/msa/vp9_fdct_msa.h
+++ b/vp9/encoder/mips/msa/vp9_fdct_msa.h
@@ -15,103 +15,102 @@
#include "vpx_dsp/mips/txfm_macros_msa.h"
#include "vpx_ports/mem.h"
-#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, \
- out0, out1, out2, out3, out4, out5, out6, out7) { \
- v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
- v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
- v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
- cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
- v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
- cospi_24_64, -cospi_24_64, 0, 0 }; \
- \
- SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
- cnst2_m = -cnst0_m; \
- ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
- SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
- cnst4_m = -cnst2_m; \
- ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
- \
- ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
- ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
- DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
- cnst1_m, cnst2_m, cnst3_m, in7, in0, \
- in4, in3); \
- \
- SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
- cnst2_m = -cnst0_m; \
- ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
- SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
- cnst4_m = -cnst2_m; \
- ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
+#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
+ out3, out4, out5, out6, out7) \
+ { \
+ v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
+ v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
+ v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
+ cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
+ v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
+ cospi_24_64, -cospi_24_64, 0, 0 }; \
+ \
+ SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
+ cnst2_m = -cnst0_m; \
+ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
+ SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
+ cnst4_m = -cnst2_m; \
+ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
+ \
+ ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
+ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
+ cnst2_m, cnst3_m, in7, in0, in4, in3); \
+ \
+ SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
+ cnst2_m = -cnst0_m; \
+ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
+ SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
+ cnst4_m = -cnst2_m; \
+ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
+ \
+ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
+ \
+ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
+ cnst2_m, cnst3_m, in5, in2, in6, in1); \
+ BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
+ out7 = -s0_m; \
+ out0 = s1_m; \
+ \
+ SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
+ \
+ ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
+ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ cnst1_m = cnst0_m; \
+ \
+ ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
+ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m, \
+ cnst3_m, cnst1_m, out1, out6, s0_m, s1_m); \
+ \
+ SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
+ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ \
+ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
+ ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
+ out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+ out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
+ out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
+ out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
+ \
+ out1 = -out1; \
+ out3 = -out3; \
+ out5 = -out5; \
+ }
+
+#define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) \
+ { \
+ v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \
+ v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \
\
- ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
- ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
+ UNPCK_R_SH_SW(in0, in0_r_m); \
+ UNPCK_R_SH_SW(in1, in1_r_m); \
+ UNPCK_R_SH_SW(in2, in2_r_m); \
+ UNPCK_R_SH_SW(in3, in3_r_m); \
\
- DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
- cnst1_m, cnst2_m, cnst3_m, in5, in2, \
- in6, in1); \
- BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
- out7 = -s0_m; \
- out0 = s1_m; \
+ constant_m = __msa_fill_w(sinpi_4_9); \
+ MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \
\
- SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
+ constant_m = __msa_fill_w(sinpi_1_9); \
+ s0_m += in0_r_m * constant_m; \
+ s1_m -= in1_r_m * constant_m; \
\
- ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
- cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
- cnst1_m = cnst0_m; \
+ constant_m = __msa_fill_w(sinpi_2_9); \
+ s0_m += in1_r_m * constant_m; \
+ s1_m += in3_r_m * constant_m; \
\
- ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
- ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
- DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
- cnst2_m, cnst3_m, cnst1_m, out1, out6, \
- s0_m, s1_m); \
+ s2_m = in0_r_m + in1_r_m - in3_r_m; \
\
- SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
- cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ constant_m = __msa_fill_w(sinpi_3_9); \
+ MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \
\
- ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
- ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
- out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
- out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
- out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
- out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
+ in0_r_m = s0_m + s3_m; \
+ s2_m = s1_m - s3_m; \
+ s3_m = s1_m - s0_m + s3_m; \
\
- out1 = -out1; \
- out3 = -out3; \
- out5 = -out5; \
-}
-
-#define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) { \
- v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \
- v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \
- \
- UNPCK_R_SH_SW(in0, in0_r_m); \
- UNPCK_R_SH_SW(in1, in1_r_m); \
- UNPCK_R_SH_SW(in2, in2_r_m); \
- UNPCK_R_SH_SW(in3, in3_r_m); \
- \
- constant_m = __msa_fill_w(sinpi_4_9); \
- MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \
- \
- constant_m = __msa_fill_w(sinpi_1_9); \
- s0_m += in0_r_m * constant_m; \
- s1_m -= in1_r_m * constant_m; \
- \
- constant_m = __msa_fill_w(sinpi_2_9); \
- s0_m += in1_r_m * constant_m; \
- s1_m += in3_r_m * constant_m; \
- \
- s2_m = in0_r_m + in1_r_m - in3_r_m; \
- \
- constant_m = __msa_fill_w(sinpi_3_9); \
- MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \
- \
- in0_r_m = s0_m + s3_m; \
- s2_m = s1_m - s3_m; \
- s3_m = s1_m - s0_m + s3_m; \
- \
- SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \
- PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, \
- s3_m, s3_m, out0, out1, out2, out3); \
-}
-#endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */
+ SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \
+ PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, s3_m, s3_m, \
+ out0, out1, out2, out3); \
+ }
+#endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */
diff --git a/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c b/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c
index 363aabb7c..23f7ebace 100644
--- a/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c
+++ b/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c
@@ -11,12 +11,9 @@
#include "./vp9_rtcd.h"
#include "vpx_dsp/mips/macros_msa.h"
-static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
- uint32_t stride,
- uint8_t *frm2_ptr,
- int32_t filt_sth,
- int32_t filt_wgt,
- uint32_t *acc,
+static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride,
+ uint8_t *frm2_ptr, int32_t filt_sth,
+ int32_t filt_wgt, uint32_t *acc,
uint16_t *cnt) {
uint32_t row;
uint64_t f0, f1, f2, f3;
@@ -54,10 +51,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
UNPCK_SH_SW(diff0, diff0_r, diff0_l);
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
- diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
+ mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@@ -65,8 +62,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
+ mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@@ -85,8 +82,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
+ mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
@@ -101,10 +98,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
UNPCK_SH_SW(diff0, diff0_r, diff0_l);
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
- diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
+ mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@@ -112,8 +109,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
+ mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@@ -131,8 +128,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
+ mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
@@ -141,13 +138,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
}
}
-static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
- uint32_t stride,
+static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride,
uint8_t *frm2_ptr,
- int32_t filt_sth,
- int32_t filt_wgt,
- uint32_t *acc,
- uint16_t *cnt) {
+ int32_t filt_sth, int32_t filt_wgt,
+ uint32_t *acc, uint16_t *cnt) {
uint32_t row;
v16i8 frm1, frm2, frm3, frm4;
v16u8 frm_r, frm_l;
@@ -183,8 +177,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
+ mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@@ -192,8 +186,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
+ mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@@ -212,8 +206,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
+ mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
@@ -230,8 +224,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
+ mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@@ -239,8 +233,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
+ mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@@ -259,8 +253,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
- mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
+ mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
ST_SW2(mod2_w, mod3_w, acc, 4);
@@ -277,11 +271,11 @@ void vp9_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
int32_t filt_wgt, uint32_t *accu,
uint16_t *cnt) {
if (8 == (blk_w * blk_h)) {
- temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr,
- strength, filt_wgt, accu, cnt);
+ temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength,
+ filt_wgt, accu, cnt);
} else if (16 == (blk_w * blk_h)) {
- temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr,
- strength, filt_wgt, accu, cnt);
+ temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength,
+ filt_wgt, accu, cnt);
} else {
vp9_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
strength, filt_wgt, accu, cnt);