From e0cc52db3fc9b09c99d7bbee35153cf82964a860 Mon Sep 17 00:00:00 2001 From: clang-format Date: Tue, 26 Jul 2016 20:43:23 -0700 Subject: vp9/encoder: apply clang-format Change-Id: I45d9fb4013f50766b24363a86365e8063e8954c2 --- vp9/encoder/mips/msa/vp9_error_msa.c | 155 ++++++++++----------- vp9/encoder/mips/msa/vp9_fdct16x16_msa.c | 133 +++++++++--------- vp9/encoder/mips/msa/vp9_fdct4x4_msa.c | 4 +- vp9/encoder/mips/msa/vp9_fdct8x8_msa.c | 56 ++++---- vp9/encoder/mips/msa/vp9_fdct_msa.h | 183 ++++++++++++------------- vp9/encoder/mips/msa/vp9_temporal_filter_msa.c | 78 +++++------ 6 files changed, 291 insertions(+), 318 deletions(-) (limited to 'vp9/encoder/mips') diff --git a/vp9/encoder/mips/msa/vp9_error_msa.c b/vp9/encoder/mips/msa/vp9_error_msa.c index 1dc70bd82..188d04d8f 100644 --- a/vp9/encoder/mips/msa/vp9_error_msa.c +++ b/vp9/encoder/mips/msa/vp9_error_msa.c @@ -11,74 +11,73 @@ #include "./vp9_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" -#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \ -static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr, \ - const int16_t *dq_coeff_ptr, \ - int64_t *ssz) { \ - int64_t err = 0; \ - uint32_t loop_cnt; \ - v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \ - v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \ - v2i64 sq_coeff_r, sq_coeff_l; \ - v2i64 err0, err_dup0, err1, err_dup1; \ - \ - coeff = LD_SH(coeff_ptr); \ - dq_coeff = LD_SH(dq_coeff_ptr); \ - UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ - ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ - HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ - DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, \ - sq_coeff_r, sq_coeff_l); \ - DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \ - \ - coeff = LD_SH(coeff_ptr + 8); \ - dq_coeff = LD_SH(dq_coeff_ptr + 8); \ - UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ - ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ - HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ - DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ - DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ - \ - coeff_ptr += 16; \ - dq_coeff_ptr += 16; \ - \ - for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \ - coeff = LD_SH(coeff_ptr); \ - dq_coeff = LD_SH(dq_coeff_ptr); \ - UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ - ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ - HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ - DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ - DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ - \ - coeff = LD_SH(coeff_ptr + 8); \ - dq_coeff = LD_SH(dq_coeff_ptr + 8); \ - UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ - ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ - HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ - DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ - DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ - \ - coeff_ptr += 16; \ - dq_coeff_ptr += 16; \ - } \ - \ - err_dup0 = __msa_splati_d(sq_coeff_r, 1); \ - err_dup1 = __msa_splati_d(sq_coeff_l, 1); \ - sq_coeff_r += err_dup0; \ - sq_coeff_l += err_dup1; \ - *ssz = __msa_copy_s_d(sq_coeff_r, 0); \ - *ssz += __msa_copy_s_d(sq_coeff_l, 0); \ - \ - err_dup0 = __msa_splati_d(err0, 1); \ - err_dup1 = __msa_splati_d(err1, 1); \ - err0 += err_dup0; \ - err1 += err_dup1; \ - err = __msa_copy_s_d(err0, 0); \ - err += __msa_copy_s_d(err1, 0); \ - \ - return err; \ -} +#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \ + static int64_t block_error_##BSize##size_msa( \ + const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \ + int64_t err = 0; \ + uint32_t loop_cnt; \ + v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \ + v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \ + v2i64 sq_coeff_r, sq_coeff_l; \ + v2i64 err0, err_dup0, err1, err_dup1; \ + \ + coeff = LD_SH(coeff_ptr); \ + dq_coeff = LD_SH(dq_coeff_ptr); \ + UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ + ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ + HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ + DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \ + sq_coeff_l); \ + DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \ + \ + coeff = LD_SH(coeff_ptr + 8); \ + dq_coeff = LD_SH(dq_coeff_ptr + 8); \ + UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ + ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ + HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ + DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ + DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ + \ + coeff_ptr += 16; \ + dq_coeff_ptr += 16; \ + \ + for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \ + coeff = LD_SH(coeff_ptr); \ + dq_coeff = LD_SH(dq_coeff_ptr); \ + UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ + ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ + HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ + DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ + DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ + \ + coeff = LD_SH(coeff_ptr + 8); \ + dq_coeff = LD_SH(dq_coeff_ptr + 8); \ + UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ + ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ + HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ + DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ + DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ + \ + coeff_ptr += 16; \ + dq_coeff_ptr += 16; \ + } \ + \ + err_dup0 = __msa_splati_d(sq_coeff_r, 1); \ + err_dup1 = __msa_splati_d(sq_coeff_l, 1); \ + sq_coeff_r += err_dup0; \ + sq_coeff_l += err_dup1; \ + *ssz = __msa_copy_s_d(sq_coeff_r, 0); \ + *ssz += __msa_copy_s_d(sq_coeff_l, 0); \ + \ + err_dup0 = __msa_splati_d(err0, 1); \ + err_dup1 = __msa_splati_d(err1, 1); \ + err0 += err_dup0; \ + err1 += err_dup1; \ + err = __msa_copy_s_d(err0, 0); \ + err += __msa_copy_s_d(err1, 0); \ + \ + return err; \ + } BLOCK_ERROR_BLOCKSIZE_MSA(16); BLOCK_ERROR_BLOCKSIZE_MSA(64); @@ -86,25 +85,17 @@ BLOCK_ERROR_BLOCKSIZE_MSA(256); BLOCK_ERROR_BLOCKSIZE_MSA(1024); int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr, - const tran_low_t *dq_coeff_ptr, - intptr_t blk_size, int64_t *ssz) { + const tran_low_t *dq_coeff_ptr, intptr_t blk_size, + int64_t *ssz) { int64_t err; const int16_t *coeff = (const int16_t *)coeff_ptr; const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr; switch (blk_size) { - case 16: - err = block_error_16size_msa(coeff, dq_coeff, ssz); - break; - case 64: - err = block_error_64size_msa(coeff, dq_coeff, ssz); - break; - case 256: - err = block_error_256size_msa(coeff, dq_coeff, ssz); - break; - case 1024: - err = block_error_1024size_msa(coeff, dq_coeff, ssz); - break; + case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break; + case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break; + case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break; + case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break; default: err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz); break; diff --git a/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c b/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c index 6dabb5890..0831e5914 100644 --- a/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c +++ b/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c @@ -159,8 +159,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) { /* load input data */ LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7); - TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, - r0, r1, r2, r3, r4, r5, r6, r7); + TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6, + r7); FDCT_POSTPROC_2V_NEG_H(r0, r1); FDCT_POSTPROC_2V_NEG_H(r2, r3); FDCT_POSTPROC_2V_NEG_H(r4, r5); @@ -169,8 +169,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) { out += 64; LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15); - TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, - r8, r9, r10, r11, r12, r13, r14, r15); + TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11, + r12, r13, r14, r15); FDCT_POSTPROC_2V_NEG_H(r8, r9); FDCT_POSTPROC_2V_NEG_H(r10, r11); FDCT_POSTPROC_2V_NEG_H(r12, r13); @@ -181,8 +181,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) { /* load input data */ input += 128; LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7); - TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, - r0, r1, r2, r3, r4, r5, r6, r7); + TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6, + r7); FDCT_POSTPROC_2V_NEG_H(r0, r1); FDCT_POSTPROC_2V_NEG_H(r2, r3); FDCT_POSTPROC_2V_NEG_H(r4, r5); @@ -191,8 +191,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) { out += 64; LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15); - TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, - r8, r9, r10, r11, r12, r13, r14, r15); + TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11, + r12, r13, r14, r15); FDCT_POSTPROC_2V_NEG_H(r8, r9); FDCT_POSTPROC_2V_NEG_H(r10, r11); FDCT_POSTPROC_2V_NEG_H(r12, r13); @@ -339,24 +339,24 @@ static void fadst16_transpose_msa(int16_t *input, int16_t *out) { v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15; /* load input data */ - LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, - l4, l12, l5, l13, l6, l14, l7, l15); - TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, - r0, r1, r2, r3, r4, r5, r6, r7); - TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, - r8, r9, r10, r11, r12, r13, r14, r15); + LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14, + l7, l15); + TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6, + r7); + TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11, + r12, r13, r14, r15); ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8); ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8); out += 16 * 8; /* load input data */ input += 128; - LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, - l4, l12, l5, l13, l6, l14, l7, l15); - TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, - r0, r1, r2, r3, r4, r5, r6, r7); - TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, - r8, r9, r10, r11, r12, r13, r14, r15); + LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14, + l7, l15); + TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6, + r7); + TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11, + r12, r13, r14, r15); ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8); ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8); } @@ -371,10 +371,10 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) { LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7); temp = intermediate + 8; LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15); - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, - in8, in9, in10, in11, in12, in13, in14, in15); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, + in4, in5, in6, in7); + TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9, + in10, in11, in12, in13, in14, in15); FDCT_POSTPROC_2V_NEG_H(in0, in1); FDCT_POSTPROC_2V_NEG_H(in2, in3); FDCT_POSTPROC_2V_NEG_H(in4, in5); @@ -383,29 +383,28 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) { FDCT_POSTPROC_2V_NEG_H(in10, in11); FDCT_POSTPROC_2V_NEG_H(in12, in13); FDCT_POSTPROC_2V_NEG_H(in14, in15); - BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, - in8, in9, in10, in11, in12, in13, in14, in15, - tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, - in8, in9, in10, in11, in12, in13, in14, in15); + BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, + in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, + tmp7, in8, in9, in10, in11, in12, in13, in14, in15); temp = intermediate; ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16); - FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, - tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); + FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1, + tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); temp = intermediate; LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15); - FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, - in0, in1, in2, in3, in4, in5, in6, in7); - TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, - tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3); + FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3, + in4, in5, in6, in7); + TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0, + tmp1, in1, tmp2, in2, tmp3, in3); ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16); - TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, - tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7); + TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4, + tmp5, in5, tmp6, in6, tmp7, in7); out = output + 8; ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16); } -void vp9_fht16x16_msa(const int16_t *input, int16_t *output, - int32_t stride, int32_t tx_type) { +void vp9_fht16x16_msa(const int16_t *input, int16_t *output, int32_t stride, + int32_t tx_type) { DECLARE_ALIGNED(32, int16_t, tmp[256]); DECLARE_ALIGNED(32, int16_t, trans_buf[256]); DECLARE_ALIGNED(32, int16_t, tmp_buf[128]); @@ -413,35 +412,31 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output, int16_t *ptmpbuf = &tmp_buf[0]; int16_t *trans = &trans_buf[0]; const int32_t const_arr[29 * 4] = { - 52707308, 52707308, 52707308, 52707308, - -1072430300, -1072430300, -1072430300, -1072430300, - 795618043, 795618043, 795618043, 795618043, - -721080468, -721080468, -721080468, -721080468, - 459094491, 459094491, 459094491, 459094491, - -970646691, -970646691, -970646691, -970646691, - 1010963856, 1010963856, 1010963856, 1010963856, - -361743294, -361743294, -361743294, -361743294, - 209469125, 209469125, 209469125, 209469125, - -1053094788, -1053094788, -1053094788, -1053094788, - 1053160324, 1053160324, 1053160324, 1053160324, - 639644520, 639644520, 639644520, 639644520, - -862444000, -862444000, -862444000, -862444000, - 1062144356, 1062144356, 1062144356, 1062144356, - -157532337, -157532337, -157532337, -157532337, - 260914709, 260914709, 260914709, 260914709, - -1041559667, -1041559667, -1041559667, -1041559667, - 920985831, 920985831, 920985831, 920985831, - -551995675, -551995675, -551995675, -551995675, - 596522295, 596522295, 596522295, 596522295, - 892853362, 892853362, 892853362, 892853362, - -892787826, -892787826, -892787826, -892787826, - 410925857, 410925857, 410925857, 410925857, - -992012162, -992012162, -992012162, -992012162, - 992077698, 992077698, 992077698, 992077698, - 759246145, 759246145, 759246145, 759246145, - -759180609, -759180609, -759180609, -759180609, - -759222975, -759222975, -759222975, -759222975, - 759288511, 759288511, 759288511, 759288511 }; + 52707308, 52707308, 52707308, 52707308, -1072430300, + -1072430300, -1072430300, -1072430300, 795618043, 795618043, + 795618043, 795618043, -721080468, -721080468, -721080468, + -721080468, 459094491, 459094491, 459094491, 459094491, + -970646691, -970646691, -970646691, -970646691, 1010963856, + 1010963856, 1010963856, 1010963856, -361743294, -361743294, + -361743294, -361743294, 209469125, 209469125, 209469125, + 209469125, -1053094788, -1053094788, -1053094788, -1053094788, + 1053160324, 1053160324, 1053160324, 1053160324, 639644520, + 639644520, 639644520, 639644520, -862444000, -862444000, + -862444000, -862444000, 1062144356, 1062144356, 1062144356, + 1062144356, -157532337, -157532337, -157532337, -157532337, + 260914709, 260914709, 260914709, 260914709, -1041559667, + -1041559667, -1041559667, -1041559667, 920985831, 920985831, + 920985831, 920985831, -551995675, -551995675, -551995675, + -551995675, 596522295, 596522295, 596522295, 596522295, + 892853362, 892853362, 892853362, 892853362, -892787826, + -892787826, -892787826, -892787826, 410925857, 410925857, + 410925857, 410925857, -992012162, -992012162, -992012162, + -992012162, 992077698, 992077698, 992077698, 992077698, + 759246145, 759246145, 759246145, 759246145, -759180609, + -759180609, -759180609, -759180609, -759222975, -759222975, + -759222975, -759222975, 759288511, 759288511, 759288511, + 759288511 + }; switch (tx_type) { case DCT_DCT: @@ -500,8 +495,6 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output, fadst16_transpose_msa(tmp, output); break; - default: - assert(0); - break; + default: assert(0); break; } } diff --git a/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c b/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c index 574016f15..fa36f09ab 100644 --- a/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c +++ b/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c @@ -86,9 +86,7 @@ void vp9_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride, TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3); break; - default: - assert(0); - break; + default: assert(0); break; } TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); diff --git a/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c b/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c index 7c3c635f8..604db853c 100644 --- a/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c +++ b/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c @@ -23,44 +23,42 @@ void vp9_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride, switch (tx_type) { case DCT_DCT: - VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, + in3, in4, in5, in6, in7); + VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); break; case ADST_DCT: - VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, + in3, in4, in5, in6, in7); + VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); break; case DCT_ADST: - VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, + in3, in4, in5, in6, in7); + VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); break; case ADST_ADST: - VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - break; - default: - assert(0); + VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, + in3, in4, in5, in6, in7); + VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); break; + default: assert(0); break; } - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, + in4, in5, in6, in7); SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7); ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8); } diff --git a/vp9/encoder/mips/msa/vp9_fdct_msa.h b/vp9/encoder/mips/msa/vp9_fdct_msa.h index d7d40cb72..794bec70b 100644 --- a/vp9/encoder/mips/msa/vp9_fdct_msa.h +++ b/vp9/encoder/mips/msa/vp9_fdct_msa.h @@ -15,103 +15,102 @@ #include "vpx_dsp/mips/txfm_macros_msa.h" #include "vpx_ports/mem.h" -#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, \ - out0, out1, out2, out3, out4, out5, out6, out7) { \ - v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \ - v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \ - v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \ - cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \ - v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \ - cospi_24_64, -cospi_24_64, 0, 0 }; \ - \ - SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \ - cnst2_m = -cnst0_m; \ - ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \ - SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \ - cnst4_m = -cnst2_m; \ - ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \ - \ - ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \ - ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \ - DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \ - cnst1_m, cnst2_m, cnst3_m, in7, in0, \ - in4, in3); \ - \ - SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \ - cnst2_m = -cnst0_m; \ - ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \ - SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \ - cnst4_m = -cnst2_m; \ - ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \ +#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ + out3, out4, out5, out6, out7) \ + { \ + v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \ + v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \ + v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \ + cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \ + v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \ + cospi_24_64, -cospi_24_64, 0, 0 }; \ + \ + SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \ + cnst2_m = -cnst0_m; \ + ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \ + SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \ + cnst4_m = -cnst2_m; \ + ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \ + \ + ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \ + ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \ + DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \ + cnst2_m, cnst3_m, in7, in0, in4, in3); \ + \ + SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \ + cnst2_m = -cnst0_m; \ + ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \ + SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \ + cnst4_m = -cnst2_m; \ + ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \ + \ + ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ + ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ + \ + DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \ + cnst2_m, cnst3_m, in5, in2, in6, in1); \ + BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \ + out7 = -s0_m; \ + out0 = s1_m; \ + \ + SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \ + \ + ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \ + cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ + cnst1_m = cnst0_m; \ + \ + ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \ + ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ + DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m, \ + cnst3_m, cnst1_m, out1, out6, s0_m, s1_m); \ + \ + SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \ + cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ + \ + ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ + ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \ + out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ + out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ + out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \ + out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \ + \ + out1 = -out1; \ + out3 = -out3; \ + out5 = -out5; \ + } + +#define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) \ + { \ + v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \ + v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \ \ - ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ - ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ + UNPCK_R_SH_SW(in0, in0_r_m); \ + UNPCK_R_SH_SW(in1, in1_r_m); \ + UNPCK_R_SH_SW(in2, in2_r_m); \ + UNPCK_R_SH_SW(in3, in3_r_m); \ \ - DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \ - cnst1_m, cnst2_m, cnst3_m, in5, in2, \ - in6, in1); \ - BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \ - out7 = -s0_m; \ - out0 = s1_m; \ + constant_m = __msa_fill_w(sinpi_4_9); \ + MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \ \ - SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \ + constant_m = __msa_fill_w(sinpi_1_9); \ + s0_m += in0_r_m * constant_m; \ + s1_m -= in1_r_m * constant_m; \ \ - ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \ - cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ - cnst1_m = cnst0_m; \ + constant_m = __msa_fill_w(sinpi_2_9); \ + s0_m += in1_r_m * constant_m; \ + s1_m += in3_r_m * constant_m; \ \ - ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \ - ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ - DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \ - cnst2_m, cnst3_m, cnst1_m, out1, out6, \ - s0_m, s1_m); \ + s2_m = in0_r_m + in1_r_m - in3_r_m; \ \ - SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \ - cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ + constant_m = __msa_fill_w(sinpi_3_9); \ + MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \ \ - ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ - ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \ - out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ - out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ - out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \ - out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \ + in0_r_m = s0_m + s3_m; \ + s2_m = s1_m - s3_m; \ + s3_m = s1_m - s0_m + s3_m; \ \ - out1 = -out1; \ - out3 = -out3; \ - out5 = -out5; \ -} - -#define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) { \ - v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \ - v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \ - \ - UNPCK_R_SH_SW(in0, in0_r_m); \ - UNPCK_R_SH_SW(in1, in1_r_m); \ - UNPCK_R_SH_SW(in2, in2_r_m); \ - UNPCK_R_SH_SW(in3, in3_r_m); \ - \ - constant_m = __msa_fill_w(sinpi_4_9); \ - MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \ - \ - constant_m = __msa_fill_w(sinpi_1_9); \ - s0_m += in0_r_m * constant_m; \ - s1_m -= in1_r_m * constant_m; \ - \ - constant_m = __msa_fill_w(sinpi_2_9); \ - s0_m += in1_r_m * constant_m; \ - s1_m += in3_r_m * constant_m; \ - \ - s2_m = in0_r_m + in1_r_m - in3_r_m; \ - \ - constant_m = __msa_fill_w(sinpi_3_9); \ - MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \ - \ - in0_r_m = s0_m + s3_m; \ - s2_m = s1_m - s3_m; \ - s3_m = s1_m - s0_m + s3_m; \ - \ - SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \ - PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, \ - s3_m, s3_m, out0, out1, out2, out3); \ -} -#endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */ + SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \ + PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, s3_m, s3_m, \ + out0, out1, out2, out3); \ + } +#endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */ diff --git a/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c b/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c index 363aabb7c..23f7ebace 100644 --- a/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c +++ b/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c @@ -11,12 +11,9 @@ #include "./vp9_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" -static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, - uint32_t stride, - uint8_t *frm2_ptr, - int32_t filt_sth, - int32_t filt_wgt, - uint32_t *acc, +static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride, + uint8_t *frm2_ptr, int32_t filt_sth, + int32_t filt_wgt, uint32_t *acc, uint16_t *cnt) { uint32_t row; uint64_t f0, f1, f2, f3; @@ -54,10 +51,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, HSUB_UB2_SH(frm_r, frm_l, diff0, diff1); UNPCK_SH_SW(diff0, diff0_r, diff0_l); UNPCK_SH_SW(diff1, diff1_r, diff1_l); - MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, - diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); - MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, + MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); + MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w, + mod1_w, mod2_w, mod3_w); SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); diff0_r = (mod0_w < cnst16); @@ -65,8 +62,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, diff1_r = (mod2_w < cnst16); diff1_l = (mod3_w < cnst16); - SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, - mod0_w, mod1_w, mod2_w, mod3_w); + SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w, + mod1_w, mod2_w, mod3_w); mod0_w = diff0_r & mod0_w; mod1_w = diff0_l & mod1_w; @@ -85,8 +82,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll); MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll, mod0_w, mod1_w, mod2_w, mod3_w); - ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, - mod0_w, mod1_w, mod2_w, mod3_w); + ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, + mod2_w, mod3_w); ST_SW2(mod0_w, mod1_w, acc, 4); acc += 8; @@ -101,10 +98,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, HSUB_UB2_SH(frm_r, frm_l, diff0, diff1); UNPCK_SH_SW(diff0, diff0_r, diff0_l); UNPCK_SH_SW(diff1, diff1_r, diff1_l); - MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, - diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); - MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, + MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); + MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w, + mod1_w, mod2_w, mod3_w); SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); diff0_r = (mod0_w < cnst16); @@ -112,8 +109,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, diff1_r = (mod2_w < cnst16); diff1_l = (mod3_w < cnst16); - SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, - mod0_w, mod1_w, mod2_w, mod3_w); + SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w, + mod1_w, mod2_w, mod3_w); mod0_w = diff0_r & mod0_w; mod1_w = diff0_l & mod1_w; @@ -131,8 +128,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll); MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll, mod0_w, mod1_w, mod2_w, mod3_w); - ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, - mod0_w, mod1_w, mod2_w, mod3_w); + ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, + mod2_w, mod3_w); ST_SW2(mod0_w, mod1_w, acc, 4); acc += 8; @@ -141,13 +138,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, } } -static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, - uint32_t stride, +static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride, uint8_t *frm2_ptr, - int32_t filt_sth, - int32_t filt_wgt, - uint32_t *acc, - uint16_t *cnt) { + int32_t filt_sth, int32_t filt_wgt, + uint32_t *acc, uint16_t *cnt) { uint32_t row; v16i8 frm1, frm2, frm3, frm4; v16u8 frm_r, frm_l; @@ -183,8 +177,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, UNPCK_SH_SW(diff1, diff1_r, diff1_l); MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); - MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, - mod0_w, mod1_w, mod2_w, mod3_w); + MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w, + mod1_w, mod2_w, mod3_w); SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); diff0_r = (mod0_w < cnst16); @@ -192,8 +186,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, diff1_r = (mod2_w < cnst16); diff1_l = (mod3_w < cnst16); - SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, - mod0_w, mod1_w, mod2_w, mod3_w); + SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w, + mod1_w, mod2_w, mod3_w); mod0_w = diff0_r & mod0_w; mod1_w = diff0_l & mod1_w; @@ -212,8 +206,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll); MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll, mod0_w, mod1_w, mod2_w, mod3_w); - ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, - mod0_w, mod1_w, mod2_w, mod3_w); + ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, + mod2_w, mod3_w); ST_SW2(mod0_w, mod1_w, acc, 4); acc += 8; @@ -230,8 +224,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, UNPCK_SH_SW(diff1, diff1_r, diff1_l); MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); - MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, - mod0_w, mod1_w, mod2_w, mod3_w); + MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w, + mod1_w, mod2_w, mod3_w); SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); diff0_r = (mod0_w < cnst16); @@ -239,8 +233,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, diff1_r = (mod2_w < cnst16); diff1_l = (mod3_w < cnst16); - SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, - mod0_w, mod1_w, mod2_w, mod3_w); + SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w, + mod1_w, mod2_w, mod3_w); mod0_w = diff0_r & mod0_w; mod1_w = diff0_l & mod1_w; @@ -259,8 +253,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll); MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll, mod0_w, mod1_w, mod2_w, mod3_w); - ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, - mod0_w, mod1_w, mod2_w, mod3_w); + ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, + mod2_w, mod3_w); ST_SW2(mod0_w, mod1_w, acc, 4); acc += 8; ST_SW2(mod2_w, mod3_w, acc, 4); @@ -277,11 +271,11 @@ void vp9_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride, int32_t filt_wgt, uint32_t *accu, uint16_t *cnt) { if (8 == (blk_w * blk_h)) { - temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, - strength, filt_wgt, accu, cnt); + temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength, + filt_wgt, accu, cnt); } else if (16 == (blk_w * blk_h)) { - temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, - strength, filt_wgt, accu, cnt); + temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength, + filt_wgt, accu, cnt); } else { vp9_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h, strength, filt_wgt, accu, cnt); -- cgit v1.2.3