diff options
Diffstat (limited to 'vp8/common')
-rw-r--r-- | vp8/common/loongarch/idct_lsx.c | 39 | ||||
-rw-r--r-- | vp8/common/loongarch/loopfilter_filters_lsx.c | 16 | ||||
-rw-r--r-- | vp8/common/loongarch/sixtap_filter_lsx.c | 322 |
3 files changed, 187 insertions, 190 deletions
diff --git a/vp8/common/loongarch/idct_lsx.c b/vp8/common/loongarch/idct_lsx.c index 679019ff6..eee871eec 100644 --- a/vp8/common/loongarch/idct_lsx.c +++ b/vp8/common/loongarch/idct_lsx.c @@ -16,47 +16,44 @@ static const int32_t cospi8sqrt2minus1 = 20091; static const int32_t sinpi8sqrt2 = 35468; #define TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \ - { \ + do { \ __m128i tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ \ DUP2_ARG2(__lsx_vilvl_h, in1, in0, in3, in2, tmp0_m, tmp1_m); \ DUP2_ARG2(__lsx_vilvh_h, in1, in0, in3, in2, tmp2_m, tmp3_m); \ DUP2_ARG2(__lsx_vilvl_w, tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out2); \ DUP2_ARG2(__lsx_vilvh_w, tmp1_m, tmp0_m, tmp3_m, tmp2_m, out1, out3); \ - } + } while (0) #define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \ - { \ + do { \ __m128i s4_m, s5_m, s6_m, s7_m; \ \ TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \ DUP2_ARG2(__lsx_vilvl_d, s6_m, s4_m, s7_m, s5_m, out0, out2); \ out1 = __lsx_vilvh_d(s6_m, s4_m); \ out3 = __lsx_vilvh_d(s7_m, s5_m); \ - } + } while (0) -#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \ - ({ \ - __m128i out_m; \ +#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in0, in1) \ + do { \ __m128i zero_m = __lsx_vldi(0); \ __m128i tmp1_m, tmp2_m; \ __m128i sinpi8_sqrt2_m = __lsx_vreplgr2vr_w(sinpi8sqrt2); \ \ - tmp1_m = __lsx_vilvl_h(in, zero_m); \ - tmp2_m = __lsx_vilvh_h(in, zero_m); \ + tmp1_m = __lsx_vilvl_h(in0, zero_m); \ + tmp2_m = __lsx_vilvh_h(in0, zero_m); \ tmp1_m = __lsx_vsrai_w(tmp1_m, 16); \ tmp2_m = __lsx_vsrai_w(tmp2_m, 16); \ tmp1_m = __lsx_vmul_w(tmp1_m, sinpi8_sqrt2_m); \ tmp1_m = __lsx_vsrai_w(tmp1_m, 16); \ tmp2_m = __lsx_vmul_w(tmp2_m, sinpi8_sqrt2_m); \ tmp2_m = __lsx_vsrai_w(tmp2_m, 16); \ - out_m = __lsx_vpickev_h(tmp2_m, tmp1_m); \ - \ - out_m; \ - }) + in1 = __lsx_vpickev_h(tmp2_m, tmp1_m); \ + } while (0) #define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \ - { \ + do { \ __m128i a1_m, b1_m, c1_m, d1_m; \ __m128i c_tmp1_m, c_tmp2_m; \ __m128i d_tmp1_m, d_tmp2_m; \ @@ -65,7 +62,7 @@ static const int32_t sinpi8sqrt2 = 35468; const_cospi8sqrt2minus1_m = __lsx_vreplgr2vr_h(cospi8sqrt2minus1); \ a1_m = __lsx_vadd_h(in0, in2); \ b1_m = __lsx_vsub_h(in0, in2); \ - c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \ + EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1, c_tmp1_m); \ \ c_tmp2_m = __lsx_vmuh_h(in3, const_cospi8sqrt2minus1_m); \ c_tmp2_m = __lsx_vslli_h(c_tmp2_m, 1); \ @@ -77,13 +74,13 @@ static const int32_t sinpi8sqrt2 = 35468; d_tmp1_m = __lsx_vslli_h(d_tmp1_m, 1); \ d_tmp1_m = __lsx_vsrai_h(d_tmp1_m, 1); \ d_tmp1_m = __lsx_vadd_h(in1, d_tmp1_m); \ - d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \ + EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3, d_tmp2_m); \ d1_m = __lsx_vadd_h(d_tmp1_m, d_tmp2_m); \ LSX_BUTTERFLY_4_H(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \ - } + } while (0) #define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \ - { \ + do { \ __m128i a1_m, b1_m, c1_m, d1_m; \ __m128i c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \ __m128i const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \ @@ -105,13 +102,13 @@ static const int32_t sinpi8sqrt2 = 35468; d_tmp2_m = __lsx_vsrai_w(d_tmp2_m, 16); \ d1_m = __lsx_vadd_w(d_tmp1_m, d_tmp2_m); \ LSX_BUTTERFLY_4_W(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \ - } + } while (0) #define UNPCK_SH_SW(in, out0, out1) \ - { \ + do { \ out0 = __lsx_vsllwil_w_h(in, 0); \ out1 = __lsx_vexth_w_h(in); \ - } + } while (0) static void idct4x4_addconst_lsx(int16_t in_dc, uint8_t *pred, int32_t pred_stride, uint8_t *dest, diff --git a/vp8/common/loongarch/loopfilter_filters_lsx.c b/vp8/common/loongarch/loopfilter_filters_lsx.c index a3ac76d25..f743ec0c5 100644 --- a/vp8/common/loongarch/loopfilter_filters_lsx.c +++ b/vp8/common/loongarch/loopfilter_filters_lsx.c @@ -14,7 +14,7 @@ #include "vpx_util/loongson_intrinsics.h" #define VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev) \ - { \ + do { \ __m128i p1_m, p0_m, q0_m, q1_m, filt, q0_sub_p0, t1, t2; \ const __m128i cnst4b = __lsx_vldi(4); \ const __m128i cnst3b = __lsx_vldi(3); \ @@ -46,10 +46,10 @@ q1 = __lsx_vxori_b(q1_m, 0x80); \ p1_m = __lsx_vsadd_b(p1_m, filt); \ p1 = __lsx_vxori_b(p1_m, 0x80); \ - } + } while (0) #define VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \ - { \ + do { \ __m128i p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \ __m128i u, filt, t1, t2, filt_sign, q0_sub_p0; \ __m128i filt_r, filt_l; \ @@ -113,12 +113,12 @@ p0_m = __lsx_vsadd_b(p0_m, u); \ q0 = __lsx_vxori_b(q0_m, 0x80); \ p0 = __lsx_vxori_b(p0_m, 0x80); \ - } + } while (0) #define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \ limit_in, b_limit_in, thresh_in, hev_out, mask_out, \ flat_out) \ - { \ + do { \ __m128i p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \ __m128i p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \ \ @@ -143,13 +143,13 @@ mask_out = __lsx_vmax_bu(q2_asub_q1_m, mask_out); \ mask_out = __lsx_vslt_bu(limit_in, mask_out); \ mask_out = __lsx_vxori_b(mask_out, 0xff); \ - } + } while (0) #define VP8_ST6x1_B(in0, in0_idx, in1, in1_idx, pdst, stride) \ - { \ + do { \ __lsx_vstelm_w(in0, pdst, 0, in0_idx); \ __lsx_vstelm_h(in1, pdst + stride, 0, in1_idx); \ - } + } while (0) static void loop_filter_horizontal_4_dual_lsx(uint8_t *src, int32_t pitch, const uint8_t *b_limit0_ptr, diff --git a/vp8/common/loongarch/sixtap_filter_lsx.c b/vp8/common/loongarch/sixtap_filter_lsx.c index a23ed16d2..cd7ba5474 100644 --- a/vp8/common/loongarch/sixtap_filter_lsx.c +++ b/vp8/common/loongarch/sixtap_filter_lsx.c @@ -33,37 +33,61 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = { 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28 }; -#define DPADD_H3(in0, in1, in2, coeff0, coeff1, coeff2) \ - ({ \ - __m128i out0_m; \ - \ - out0_m = __lsx_vdp2_h_b(in0, coeff0); \ - out0_m = __lsx_vdp2add_h_b(out0_m, in1, coeff1); \ - out0_m = __lsx_vdp2add_h_b(out0_m, in2, coeff2); \ - \ - out0_m; \ - }) - -#define HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_h0, filt_h1, \ - filt_h2) \ - ({ \ - __m128i vec0_m, vec1_m, vec2_m; \ - __m128i hz_out_m; \ - \ - DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, vec0_m, \ - vec1_m); \ - vec2_m = __lsx_vshuf_b(src1, src0, mask2); \ - hz_out_m = DPADD_H3(vec0_m, vec1_m, vec2_m, filt_h0, filt_h1, filt_h2); \ - \ - hz_out_m = __lsx_vsrari_h(hz_out_m, VP8_FILTER_SHIFT); \ - hz_out_m = __lsx_vsat_h(hz_out_m, 7); \ - \ - hz_out_m; \ - }) +static INLINE __m128i dpadd_h3(__m128i in0, __m128i in1, __m128i in2, + __m128i coeff0, __m128i coeff1, __m128i coeff2) { + __m128i out0_m; + + out0_m = __lsx_vdp2_h_b(in0, coeff0); + out0_m = __lsx_vdp2add_h_b(out0_m, in1, coeff1); + out0_m = __lsx_vdp2add_h_b(out0_m, in2, coeff2); + + return out0_m; +} + +static INLINE __m128i horiz_6tap_filt(__m128i src0, __m128i src1, __m128i mask0, + __m128i mask1, __m128i mask2, + __m128i filt_h0, __m128i filt_h1, + __m128i filt_h2) { + __m128i vec0_m, vec1_m, vec2_m; + __m128i hz_out_m; + + DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, vec0_m, + vec1_m); + vec2_m = __lsx_vshuf_b(src1, src0, mask2); + hz_out_m = dpadd_h3(vec0_m, vec1_m, vec2_m, filt_h0, filt_h1, filt_h2); + hz_out_m = __lsx_vsrari_h(hz_out_m, VP8_FILTER_SHIFT); + hz_out_m = __lsx_vsat_h(hz_out_m, 7); + + return hz_out_m; +} + +static INLINE __m128i filt_4tap_dpadd_h(__m128i vec0, __m128i vec1, + __m128i filt0, __m128i filt1) { + __m128i tmp_m; + + tmp_m = __lsx_vdp2_h_b(vec0, filt0); + tmp_m = __lsx_vdp2add_h_b(tmp_m, vec1, filt1); + + return tmp_m; +} + +static INLINE __m128i horiz_4tap_filt(__m128i src0, __m128i src1, __m128i mask0, + __m128i mask1, __m128i filt_h0, + __m128i filt_h1) { + __m128i vec0_m, vec1_m, hz_out_m; + + DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, vec0_m, + vec1_m); + hz_out_m = filt_4tap_dpadd_h(vec0_m, vec1_m, filt_h0, filt_h1); + hz_out_m = __lsx_vsrari_h(hz_out_m, VP8_FILTER_SHIFT); + hz_out_m = __lsx_vsat_h(hz_out_m, 7); + + return hz_out_m; +} #define HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ mask2, filt0, filt1, filt2, out0, out1) \ - { \ + do { \ __m128i vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m; \ \ DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src3, src2, mask0, vec0_m, \ @@ -77,12 +101,12 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = { vec5_m); \ DUP2_ARG3(__lsx_vdp2add_h_b, out0, vec4_m, filt2, out1, vec5_m, filt2, \ out0, out1); \ - } + } while (0) #define HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ mask2, filt0, filt1, filt2, out0, out1, \ out2, out3) \ - ({ \ + do { \ __m128i vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ \ DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src1, src1, mask0, vec0_m, \ @@ -105,35 +129,11 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = { DUP4_ARG3(__lsx_vdp2add_h_b, out0, vec4_m, filt2, out1, vec5_m, filt2, \ out2, vec6_m, filt2, out3, vec7_m, filt2, out0, out1, out2, \ out3); \ - }) - -#define FILT_4TAP_DPADD_H(vec0, vec1, filt0, filt1) \ - ({ \ - __m128i tmp0; \ - \ - tmp0 = __lsx_vdp2_h_b(vec0, filt0); \ - tmp0 = __lsx_vdp2add_h_b(tmp0, vec1, filt1); \ - \ - tmp0; \ - }) - -#define HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_h0, filt_h1) \ - ({ \ - __m128i vec0_m, vec1_m; \ - __m128i hz_out_m; \ - \ - DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, vec0_m, \ - vec1_m); \ - hz_out_m = FILT_4TAP_DPADD_H(vec0_m, vec1_m, filt_h0, filt_h1); \ - hz_out_m = __lsx_vsrari_h(hz_out_m, VP8_FILTER_SHIFT); \ - hz_out_m = __lsx_vsat_h(hz_out_m, 7); \ - \ - hz_out_m; \ - }) + } while (0) #define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ filt0, filt1, out0, out1) \ - { \ + do { \ __m128i vec0_m, vec1_m, vec2_m, vec3_m; \ \ DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src3, src2, mask0, vec0_m, \ @@ -143,11 +143,11 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = { vec3_m); \ DUP2_ARG3(__lsx_vdp2add_h_b, out0, vec2_m, filt1, out1, vec3_m, filt1, \ out0, out1); \ - } + } while (0) #define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ filt0, filt1, out0, out1, out2, out3) \ - ({ \ + do { \ __m128i vec0_m, vec1_m, vec2_m, vec3_m; \ \ DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src1, src1, mask0, vec0_m, \ @@ -163,7 +163,7 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = { DUP4_ARG3(__lsx_vdp2add_h_b, out0, vec0_m, filt1, out1, vec1_m, filt1, \ out2, vec2_m, filt1, out3, vec3_m, filt1, out0, out1, out2, \ out3); \ - }) + } while (0) static inline void common_hz_6t_4x4_lsx(uint8_t *RESTRICT src, int32_t src_stride, @@ -424,8 +424,8 @@ static void common_vt_6t_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, DUP2_ARG2(__lsx_vilvl_d, src65_r, src54_r, src87_r, src76_r, src6554, src8776); DUP2_ARG2(__lsx_vxori_b, src6554, 128, src8776, 128, src6554, src8776); - out0 = DPADD_H3(src2110, src4332, src6554, filt0, filt1, filt2); - out1 = DPADD_H3(src4332, src6554, src8776, filt0, filt1, filt2); + out0 = dpadd_h3(src2110, src4332, src6554, filt0, filt1, filt2); + out1 = dpadd_h3(src4332, src6554, src8776, filt0, filt1, filt2); out0 = __lsx_vssrarni_b_h(out1, out0, VP8_FILTER_SHIFT); out0 = __lsx_vxori_b(out0, 128); @@ -487,10 +487,10 @@ static void common_vt_6t_8w_lsx(uint8_t *RESTRICT src, int32_t src_stride, DUP4_ARG2(__lsx_vilvl_b, src7, src4, src8, src7, src9, src8, src10, src9, src76_r, src87_r, src98_r, src109_r); - out0_r = DPADD_H3(src10_r, src32_r, src76_r, filt0, filt1, filt2); - out1_r = DPADD_H3(src21_r, src43_r, src87_r, filt0, filt1, filt2); - out2_r = DPADD_H3(src32_r, src76_r, src98_r, filt0, filt1, filt2); - out3_r = DPADD_H3(src43_r, src87_r, src109_r, filt0, filt1, filt2); + out0_r = dpadd_h3(src10_r, src32_r, src76_r, filt0, filt1, filt2); + out1_r = dpadd_h3(src21_r, src43_r, src87_r, filt0, filt1, filt2); + out2_r = dpadd_h3(src32_r, src76_r, src98_r, filt0, filt1, filt2); + out3_r = dpadd_h3(src43_r, src87_r, src109_r, filt0, filt1, filt2); DUP2_ARG3(__lsx_vssrarni_b_h, out1_r, out0_r, VP8_FILTER_SHIFT, out3_r, out2_r, VP8_FILTER_SHIFT, tmp0, tmp1); DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1); @@ -555,14 +555,14 @@ static void common_vt_6t_16w_lsx(uint8_t *RESTRICT src, int32_t src_stride, src54_r, src65_r, src76_r, src87_r); DUP4_ARG2(__lsx_vilvh_b, src5, src4, src6, src5, src7, src6, src8, src7, src54_l, src65_l, src76_l, src87_l); - out0_r = DPADD_H3(src10_r, src32_r, src54_r, filt0, filt1, filt2); - out1_r = DPADD_H3(src21_r, src43_r, src65_r, filt0, filt1, filt2); - out2_r = DPADD_H3(src32_r, src54_r, src76_r, filt0, filt1, filt2); - out3_r = DPADD_H3(src43_r, src65_r, src87_r, filt0, filt1, filt2); - out0_l = DPADD_H3(src10_l, src32_l, src54_l, filt0, filt1, filt2); - out1_l = DPADD_H3(src21_l, src43_l, src65_l, filt0, filt1, filt2); - out2_l = DPADD_H3(src32_l, src54_l, src76_l, filt0, filt1, filt2); - out3_l = DPADD_H3(src43_l, src65_l, src87_l, filt0, filt1, filt2); + out0_r = dpadd_h3(src10_r, src32_r, src54_r, filt0, filt1, filt2); + out1_r = dpadd_h3(src21_r, src43_r, src65_r, filt0, filt1, filt2); + out2_r = dpadd_h3(src32_r, src54_r, src76_r, filt0, filt1, filt2); + out3_r = dpadd_h3(src43_r, src65_r, src87_r, filt0, filt1, filt2); + out0_l = dpadd_h3(src10_l, src32_l, src54_l, filt0, filt1, filt2); + out1_l = dpadd_h3(src21_l, src43_l, src65_l, filt0, filt1, filt2); + out2_l = dpadd_h3(src32_l, src54_l, src76_l, filt0, filt1, filt2); + out3_l = dpadd_h3(src43_l, src65_l, src87_l, filt0, filt1, filt2); DUP4_ARG3(__lsx_vssrarni_b_h, out0_l, out0_r, VP8_FILTER_SHIFT, out1_l, out1_r, VP8_FILTER_SHIFT, out2_l, out2_r, VP8_FILTER_SHIFT, out3_l, out3_r, VP8_FILTER_SHIFT, tmp0, tmp1, tmp2, tmp3); @@ -621,12 +621,12 @@ static void common_hv_6ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, src1, src2, src3); src4 = __lsx_vxori_b(src4, 128); - hz_out0 = HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out0 = horiz_6tap_filt(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); - hz_out2 = HORIZ_6TAP_FILT(src2, src3, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out2 = horiz_6tap_filt(src2, src3, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out1 = __lsx_vshuf_b(hz_out2, hz_out0, shuff); - hz_out3 = HORIZ_6TAP_FILT(src3, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out3 = horiz_6tap_filt(src3, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out3, hz_out2, out0, out1); @@ -636,7 +636,7 @@ static void common_hv_6ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, src += src_stride_x2; DUP2_ARG2(__lsx_vxori_b, src5, 128, src6, 128, src5, src6); - hz_out5 = HORIZ_6TAP_FILT(src5, src6, mask0, mask1, mask2, filt_hz0, + hz_out5 = horiz_6tap_filt(src5, src6, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out4 = __lsx_vshuf_b(hz_out5, hz_out3, shuff); @@ -645,15 +645,15 @@ static void common_hv_6ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, src += src_stride_x2; DUP2_ARG2(__lsx_vxori_b, src7, 128, src8, 128, src7, src8); - hz_out7 = HORIZ_6TAP_FILT(src7, src8, mask0, mask1, mask2, filt_hz0, + hz_out7 = horiz_6tap_filt(src7, src8, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out6 = __lsx_vshuf_b(hz_out7, hz_out5, shuff); out2 = __lsx_vpackev_b(hz_out5, hz_out4); - tmp0 = DPADD_H3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); + tmp0 = dpadd_h3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); out3 = __lsx_vpackev_b(hz_out7, hz_out6); - tmp1 = DPADD_H3(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2); + tmp1 = dpadd_h3(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2); tmp0 = __lsx_vssrarni_b_h(tmp1, tmp0, 7); tmp0 = __lsx_vxori_b(tmp0, 128); @@ -710,15 +710,15 @@ static void common_hv_6ht_6vt_8w_lsx(uint8_t *RESTRICT src, int32_t src_stride, src1, src2, src3); src4 = __lsx_vxori_b(src4, 128); - hz_out0 = HORIZ_6TAP_FILT(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out0 = horiz_6tap_filt(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); - hz_out1 = HORIZ_6TAP_FILT(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out1 = horiz_6tap_filt(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); - hz_out2 = HORIZ_6TAP_FILT(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out2 = horiz_6tap_filt(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); - hz_out3 = HORIZ_6TAP_FILT(src3, src3, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out3 = horiz_6tap_filt(src3, src3, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); - hz_out4 = HORIZ_6TAP_FILT(src4, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out4 = horiz_6tap_filt(src4, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); filt = __lsx_vld(filter_vert, 0); DUP2_ARG2(__lsx_vreplvei_h, filt, 0, filt, 1, filt_vt0, filt_vt1); @@ -734,25 +734,25 @@ static void common_hv_6ht_6vt_8w_lsx(uint8_t *RESTRICT src, int32_t src_stride, DUP4_ARG2(__lsx_vxori_b, src5, 128, src6, 128, src7, 128, src8, 128, src5, src6, src7, src8); - hz_out5 = HORIZ_6TAP_FILT(src5, src5, mask0, mask1, mask2, filt_hz0, + hz_out5 = horiz_6tap_filt(src5, src5, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); out2 = __lsx_vpackev_b(hz_out5, hz_out4); - tmp0 = DPADD_H3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); + tmp0 = dpadd_h3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); - hz_out6 = HORIZ_6TAP_FILT(src6, src6, mask0, mask1, mask2, filt_hz0, + hz_out6 = horiz_6tap_filt(src6, src6, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); out5 = __lsx_vpackev_b(hz_out6, hz_out5); - tmp1 = DPADD_H3(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2); + tmp1 = dpadd_h3(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2); - hz_out7 = HORIZ_6TAP_FILT(src7, src7, mask0, mask1, mask2, filt_hz0, + hz_out7 = horiz_6tap_filt(src7, src7, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); out7 = __lsx_vpackev_b(hz_out7, hz_out6); - tmp2 = DPADD_H3(out1, out2, out7, filt_vt0, filt_vt1, filt_vt2); + tmp2 = dpadd_h3(out1, out2, out7, filt_vt0, filt_vt1, filt_vt2); - hz_out8 = HORIZ_6TAP_FILT(src8, src8, mask0, mask1, mask2, filt_hz0, + hz_out8 = horiz_6tap_filt(src8, src8, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); out6 = __lsx_vpackev_b(hz_out8, hz_out7); - tmp3 = DPADD_H3(out4, out5, out6, filt_vt0, filt_vt1, filt_vt2); + tmp3 = dpadd_h3(out4, out5, out6, filt_vt0, filt_vt1, filt_vt2); DUP2_ARG3(__lsx_vssrarni_b_h, tmp1, tmp0, VP8_FILTER_SHIFT, tmp3, tmp2, VP8_FILTER_SHIFT, vec0, vec1); @@ -997,14 +997,14 @@ static void common_vt_4t_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, src32_r, src43_r); src4332 = __lsx_vilvl_d(src43_r, src32_r); src4332 = __lsx_vxori_b(src4332, 128); - out0 = FILT_4TAP_DPADD_H(src2110, src4332, filt0, filt1); + out0 = filt_4tap_dpadd_h(src2110, src4332, filt0, filt1); src2 = __lsx_vld(src, 0); src += src_stride; DUP2_ARG2(__lsx_vilvl_b, src5, src4, src2, src5, src54_r, src65_r); src2110 = __lsx_vilvl_d(src65_r, src54_r); src2110 = __lsx_vxori_b(src2110, 128); - out1 = FILT_4TAP_DPADD_H(src4332, src2110, filt0, filt1); + out1 = filt_4tap_dpadd_h(src4332, src2110, filt0, filt1); out0 = __lsx_vssrarni_b_h(out1, out0, VP8_FILTER_SHIFT); out0 = __lsx_vxori_b(out0, 128); @@ -1055,10 +1055,10 @@ static void common_vt_4t_8w_lsx(uint8_t *RESTRICT src, int32_t src_stride, src8, src9, src10); DUP4_ARG2(__lsx_vilvl_b, src7, src2, src8, src7, src9, src8, src10, src9, src72_r, src87_r, src98_r, src109_r); - out0_r = FILT_4TAP_DPADD_H(src10_r, src72_r, filt0, filt1); - out1_r = FILT_4TAP_DPADD_H(src21_r, src87_r, filt0, filt1); - out2_r = FILT_4TAP_DPADD_H(src72_r, src98_r, filt0, filt1); - out3_r = FILT_4TAP_DPADD_H(src87_r, src109_r, filt0, filt1); + out0_r = filt_4tap_dpadd_h(src10_r, src72_r, filt0, filt1); + out1_r = filt_4tap_dpadd_h(src21_r, src87_r, filt0, filt1); + out2_r = filt_4tap_dpadd_h(src72_r, src98_r, filt0, filt1); + out3_r = filt_4tap_dpadd_h(src87_r, src109_r, filt0, filt1); DUP2_ARG3(__lsx_vssrarni_b_h, out1_r, out0_r, VP8_FILTER_SHIFT, out3_r, out2_r, VP8_FILTER_SHIFT, tmp0, tmp1); DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1); @@ -1114,14 +1114,14 @@ static void common_vt_4t_16w_lsx(uint8_t *RESTRICT src, int32_t src_stride, src32_r, src43_r, src54_r, src65_r); DUP4_ARG2(__lsx_vilvh_b, src3, src2, src4, src3, src5, src4, src6, src5, src32_l, src43_l, src54_l, src65_l); - out0_r = FILT_4TAP_DPADD_H(src10_r, src32_r, filt0, filt1); - out1_r = FILT_4TAP_DPADD_H(src21_r, src43_r, filt0, filt1); - out2_r = FILT_4TAP_DPADD_H(src32_r, src54_r, filt0, filt1); - out3_r = FILT_4TAP_DPADD_H(src43_r, src65_r, filt0, filt1); - out0_l = FILT_4TAP_DPADD_H(src10_l, src32_l, filt0, filt1); - out1_l = FILT_4TAP_DPADD_H(src21_l, src43_l, filt0, filt1); - out2_l = FILT_4TAP_DPADD_H(src32_l, src54_l, filt0, filt1); - out3_l = FILT_4TAP_DPADD_H(src43_l, src65_l, filt0, filt1); + out0_r = filt_4tap_dpadd_h(src10_r, src32_r, filt0, filt1); + out1_r = filt_4tap_dpadd_h(src21_r, src43_r, filt0, filt1); + out2_r = filt_4tap_dpadd_h(src32_r, src54_r, filt0, filt1); + out3_r = filt_4tap_dpadd_h(src43_r, src65_r, filt0, filt1); + out0_l = filt_4tap_dpadd_h(src10_l, src32_l, filt0, filt1); + out1_l = filt_4tap_dpadd_h(src21_l, src43_l, filt0, filt1); + out2_l = filt_4tap_dpadd_h(src32_l, src54_l, filt0, filt1); + out3_l = filt_4tap_dpadd_h(src43_l, src65_l, filt0, filt1); DUP4_ARG3(__lsx_vssrarni_b_h, out0_l, out0_r, VP8_FILTER_SHIFT, out1_l, out1_r, VP8_FILTER_SHIFT, out2_l, out2_r, VP8_FILTER_SHIFT, out3_l, out3_r, VP8_FILTER_SHIFT, tmp0, tmp1, tmp2, tmp3); @@ -1168,8 +1168,8 @@ static void common_hv_4ht_4vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, DUP2_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src0, src1); src2 = __lsx_vxori_b(src2, 128); - hz_out0 = HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_hz0, filt_hz1); - hz_out1 = HORIZ_4TAP_FILT(src1, src2, mask0, mask1, filt_hz0, filt_hz1); + hz_out0 = horiz_4tap_filt(src0, src1, mask0, mask1, filt_hz0, filt_hz1); + hz_out1 = horiz_4tap_filt(src1, src2, mask0, mask1, filt_hz0, filt_hz1); vec0 = __lsx_vpackev_b(hz_out1, hz_out0); DUP2_ARG2(__lsx_vldrepl_h, filter_vert, 0, filter_vert, 2, filt_vt0, @@ -1182,16 +1182,16 @@ static void common_hv_4ht_4vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, src += src_stride_x4; DUP2_ARG2(__lsx_vxori_b, src3, 128, src4, 128, src3, src4); - hz_out3 = HORIZ_4TAP_FILT(src3, src4, mask0, mask1, filt_hz0, filt_hz1); + hz_out3 = horiz_4tap_filt(src3, src4, mask0, mask1, filt_hz0, filt_hz1); hz_out2 = __lsx_vshuf_b(hz_out3, hz_out1, shuff); vec1 = __lsx_vpackev_b(hz_out3, hz_out2); - tmp0 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1); + tmp0 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1); DUP2_ARG2(__lsx_vxori_b, src5, 128, src6, 128, src5, src6); - hz_out5 = HORIZ_4TAP_FILT(src5, src6, mask0, mask1, filt_hz0, filt_hz1); + hz_out5 = horiz_4tap_filt(src5, src6, mask0, mask1, filt_hz0, filt_hz1); hz_out4 = __lsx_vshuf_b(hz_out5, hz_out3, shuff); vec2 = __lsx_vpackev_b(hz_out5, hz_out4); - tmp1 = FILT_4TAP_DPADD_H(vec1, vec2, filt_vt0, filt_vt1); + tmp1 = filt_4tap_dpadd_h(vec1, vec2, filt_vt0, filt_vt1); tmp0 = __lsx_vssrarni_b_h(tmp1, tmp0, 7); tmp0 = __lsx_vxori_b(tmp0, 128); @@ -1239,9 +1239,9 @@ static inline void common_hv_4ht_4vt_8w_lsx( DUP2_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src0, src1); src2 = __lsx_vxori_b(src2, 128); - hz_out0 = HORIZ_4TAP_FILT(src0, src0, mask0, mask1, filt_hz0, filt_hz1); - hz_out1 = HORIZ_4TAP_FILT(src1, src1, mask0, mask1, filt_hz0, filt_hz1); - hz_out2 = HORIZ_4TAP_FILT(src2, src2, mask0, mask1, filt_hz0, filt_hz1); + hz_out0 = horiz_4tap_filt(src0, src0, mask0, mask1, filt_hz0, filt_hz1); + hz_out1 = horiz_4tap_filt(src1, src1, mask0, mask1, filt_hz0, filt_hz1); + hz_out2 = horiz_4tap_filt(src2, src2, mask0, mask1, filt_hz0, filt_hz1); DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out2, hz_out1, vec0, vec2); filt = __lsx_vld(filter_vert, 0); @@ -1254,21 +1254,21 @@ static inline void common_hv_4ht_4vt_8w_lsx( DUP4_ARG2(__lsx_vxori_b, src3, 128, src4, 128, src5, 128, src6, 128, src3, src4, src5, src6); - hz_out3 = HORIZ_4TAP_FILT(src3, src3, mask0, mask1, filt_hz0, filt_hz1); + hz_out3 = horiz_4tap_filt(src3, src3, mask0, mask1, filt_hz0, filt_hz1); vec1 = __lsx_vpackev_b(hz_out3, hz_out2); - tmp0 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1); + tmp0 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1); - hz_out0 = HORIZ_4TAP_FILT(src4, src4, mask0, mask1, filt_hz0, filt_hz1); + hz_out0 = horiz_4tap_filt(src4, src4, mask0, mask1, filt_hz0, filt_hz1); vec3 = __lsx_vpackev_b(hz_out0, hz_out3); - tmp1 = FILT_4TAP_DPADD_H(vec2, vec3, filt_vt0, filt_vt1); + tmp1 = filt_4tap_dpadd_h(vec2, vec3, filt_vt0, filt_vt1); - hz_out1 = HORIZ_4TAP_FILT(src5, src5, mask0, mask1, filt_hz0, filt_hz1); + hz_out1 = horiz_4tap_filt(src5, src5, mask0, mask1, filt_hz0, filt_hz1); vec4 = __lsx_vpackev_b(hz_out1, hz_out0); - tmp2 = FILT_4TAP_DPADD_H(vec1, vec4, filt_vt0, filt_vt1); + tmp2 = filt_4tap_dpadd_h(vec1, vec4, filt_vt0, filt_vt1); - hz_out2 = HORIZ_4TAP_FILT(src6, src6, mask0, mask1, filt_hz0, filt_hz1); + hz_out2 = horiz_4tap_filt(src6, src6, mask0, mask1, filt_hz0, filt_hz1); DUP2_ARG2(__lsx_vpackev_b, hz_out0, hz_out3, hz_out2, hz_out1, vec0, vec1); - tmp3 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1); + tmp3 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1); DUP2_ARG3(__lsx_vssrarni_b_h, tmp1, tmp0, 7, tmp3, tmp2, 7, out0, out1); DUP2_ARG2(__lsx_vxori_b, out0, 128, out1, 128, out0, out1); @@ -1324,9 +1324,9 @@ static void common_hv_6ht_4vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, DUP2_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src0, src1); src2 = __lsx_vxori_b(src2, 128); - hz_out0 = HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out0 = horiz_6tap_filt(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); - hz_out1 = HORIZ_6TAP_FILT(src1, src2, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out1 = horiz_6tap_filt(src1, src2, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); vec0 = __lsx_vpackev_b(hz_out1, hz_out0); @@ -1341,17 +1341,17 @@ static void common_hv_6ht_4vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, DUP4_ARG2(__lsx_vxori_b, src3, 128, src4, 128, src5, 128, src6, 128, src3, src4, src5, src6); - hz_out3 = HORIZ_6TAP_FILT(src3, src4, mask0, mask1, mask2, filt_hz0, + hz_out3 = horiz_6tap_filt(src3, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out2 = __lsx_vshuf_b(hz_out3, hz_out1, shuff); vec1 = __lsx_vpackev_b(hz_out3, hz_out2); - tmp0 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1); + tmp0 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1); - hz_out5 = HORIZ_6TAP_FILT(src5, src6, mask0, mask1, mask2, filt_hz0, + hz_out5 = horiz_6tap_filt(src5, src6, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out4 = __lsx_vshuf_b(hz_out5, hz_out3, shuff); vec2 = __lsx_vpackev_b(hz_out5, hz_out4); - tmp1 = FILT_4TAP_DPADD_H(vec1, vec2, filt_vt0, filt_vt1); + tmp1 = filt_4tap_dpadd_h(vec1, vec2, filt_vt0, filt_vt1); DUP2_ARG3(__lsx_vssrarni_b_h, tmp0, tmp0, 7, tmp1, tmp1, 7, tmp0, tmp1); DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1); @@ -1402,11 +1402,11 @@ static inline void common_hv_6ht_4vt_8w_lsx( DUP2_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src0, src1); src2 = __lsx_vxori_b(src2, 128); - hz_out0 = HORIZ_6TAP_FILT(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out0 = horiz_6tap_filt(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); - hz_out1 = HORIZ_6TAP_FILT(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out1 = horiz_6tap_filt(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); - hz_out2 = HORIZ_6TAP_FILT(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1, + hz_out2 = horiz_6tap_filt(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out2, hz_out1, vec0, vec2); @@ -1420,25 +1420,25 @@ static inline void common_hv_6ht_4vt_8w_lsx( DUP4_ARG2(__lsx_vxori_b, src3, 128, src4, 128, src5, 128, src6, 128, src3, src4, src5, src6); - hz_out3 = HORIZ_6TAP_FILT(src3, src3, mask0, mask1, mask2, filt_hz0, + hz_out3 = horiz_6tap_filt(src3, src3, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); vec1 = __lsx_vpackev_b(hz_out3, hz_out2); - tmp0 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1); + tmp0 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1); - hz_out0 = HORIZ_6TAP_FILT(src4, src4, mask0, mask1, mask2, filt_hz0, + hz_out0 = horiz_6tap_filt(src4, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); vec3 = __lsx_vpackev_b(hz_out0, hz_out3); - tmp1 = FILT_4TAP_DPADD_H(vec2, vec3, filt_vt0, filt_vt1); + tmp1 = filt_4tap_dpadd_h(vec2, vec3, filt_vt0, filt_vt1); - hz_out1 = HORIZ_6TAP_FILT(src5, src5, mask0, mask1, mask2, filt_hz0, + hz_out1 = horiz_6tap_filt(src5, src5, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); vec0 = __lsx_vpackev_b(hz_out1, hz_out0); - tmp2 = FILT_4TAP_DPADD_H(vec1, vec0, filt_vt0, filt_vt1); + tmp2 = filt_4tap_dpadd_h(vec1, vec0, filt_vt0, filt_vt1); - hz_out2 = HORIZ_6TAP_FILT(src6, src6, mask0, mask1, mask2, filt_hz0, + hz_out2 = horiz_6tap_filt(src6, src6, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); DUP2_ARG2(__lsx_vpackev_b, hz_out0, hz_out3, hz_out2, hz_out1, vec1, vec2); - tmp3 = FILT_4TAP_DPADD_H(vec1, vec2, filt_vt0, filt_vt1); + tmp3 = filt_4tap_dpadd_h(vec1, vec2, filt_vt0, filt_vt1); DUP2_ARG3(__lsx_vssrarni_b_h, tmp1, tmp0, 7, tmp3, tmp2, 7, out0, out1); DUP2_ARG2(__lsx_vxori_b, out0, 128, out1, 128, out0, out1); @@ -1492,9 +1492,9 @@ static void common_hv_4ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, DUP4_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src2, 128, src3, 128, src0, src1, src2, src3); src4 = __lsx_vxori_b(src4, 128); - hz_out0 = HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_hz0, filt_hz1); - hz_out2 = HORIZ_4TAP_FILT(src2, src3, mask0, mask1, filt_hz0, filt_hz1); - hz_out3 = HORIZ_4TAP_FILT(src3, src4, mask0, mask1, filt_hz0, filt_hz1); + hz_out0 = horiz_4tap_filt(src0, src1, mask0, mask1, filt_hz0, filt_hz1); + hz_out2 = horiz_4tap_filt(src2, src3, mask0, mask1, filt_hz0, filt_hz1); + hz_out3 = horiz_4tap_filt(src3, src4, mask0, mask1, filt_hz0, filt_hz1); hz_out1 = __lsx_vshuf_b(hz_out2, hz_out0, shuff); DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out3, hz_out2, out0, out1); @@ -1510,15 +1510,15 @@ static void common_hv_4ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride, src6, src7, src8); src += src_stride_x4; - hz_out5 = HORIZ_4TAP_FILT(src5, src6, mask0, mask1, filt_hz0, filt_hz1); + hz_out5 = horiz_4tap_filt(src5, src6, mask0, mask1, filt_hz0, filt_hz1); hz_out4 = __lsx_vshuf_b(hz_out5, hz_out3, shuff); out2 = __lsx_vpackev_b(hz_out5, hz_out4); - tmp0 = DPADD_H3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); + tmp0 = dpadd_h3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); - hz_out7 = HORIZ_4TAP_FILT(src7, src8, mask0, mask1, filt_hz0, filt_hz1); + hz_out7 = horiz_4tap_filt(src7, src8, mask0, mask1, filt_hz0, filt_hz1); hz_out6 = __lsx_vshuf_b(hz_out7, hz_out5, shuff); out3 = __lsx_vpackev_b(hz_out7, hz_out6); - tmp1 = DPADD_H3(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2); + tmp1 = dpadd_h3(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2); tmp0 = __lsx_vssrarni_b_h(tmp1, tmp0, 7); tmp0 = __lsx_vxori_b(tmp0, 128); @@ -1571,11 +1571,11 @@ static inline void common_hv_4ht_6vt_8w_lsx( DUP4_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src2, 128, src3, 128, src0, src1, src2, src3); src4 = __lsx_vxori_b(src4, 128); - hz_out0 = HORIZ_4TAP_FILT(src0, src0, mask0, mask1, filt_hz0, filt_hz1); - hz_out1 = HORIZ_4TAP_FILT(src1, src1, mask0, mask1, filt_hz0, filt_hz1); - hz_out2 = HORIZ_4TAP_FILT(src2, src2, mask0, mask1, filt_hz0, filt_hz1); - hz_out3 = HORIZ_4TAP_FILT(src3, src3, mask0, mask1, filt_hz0, filt_hz1); - hz_out4 = HORIZ_4TAP_FILT(src4, src4, mask0, mask1, filt_hz0, filt_hz1); + hz_out0 = horiz_4tap_filt(src0, src0, mask0, mask1, filt_hz0, filt_hz1); + hz_out1 = horiz_4tap_filt(src1, src1, mask0, mask1, filt_hz0, filt_hz1); + hz_out2 = horiz_4tap_filt(src2, src2, mask0, mask1, filt_hz0, filt_hz1); + hz_out3 = horiz_4tap_filt(src3, src3, mask0, mask1, filt_hz0, filt_hz1); + hz_out4 = horiz_4tap_filt(src4, src4, mask0, mask1, filt_hz0, filt_hz1); DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out3, hz_out2, out0, out1); DUP2_ARG2(__lsx_vpackev_b, hz_out2, hz_out1, hz_out4, hz_out3, out3, out4); @@ -1590,21 +1590,21 @@ static inline void common_hv_4ht_6vt_8w_lsx( DUP4_ARG2(__lsx_vxori_b, src5, 128, src6, 128, src7, 128, src8, 128, src5, src6, src7, src8); - hz_out5 = HORIZ_4TAP_FILT(src5, src5, mask0, mask1, filt_hz0, filt_hz1); + hz_out5 = horiz_4tap_filt(src5, src5, mask0, mask1, filt_hz0, filt_hz1); out2 = __lsx_vpackev_b(hz_out5, hz_out4); - tmp0 = DPADD_H3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); + tmp0 = dpadd_h3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); - hz_out6 = HORIZ_4TAP_FILT(src6, src6, mask0, mask1, filt_hz0, filt_hz1); + hz_out6 = horiz_4tap_filt(src6, src6, mask0, mask1, filt_hz0, filt_hz1); out5 = __lsx_vpackev_b(hz_out6, hz_out5); - tmp1 = DPADD_H3(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2); + tmp1 = dpadd_h3(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2); - hz_out7 = HORIZ_4TAP_FILT(src7, src7, mask0, mask1, filt_hz0, filt_hz1); + hz_out7 = horiz_4tap_filt(src7, src7, mask0, mask1, filt_hz0, filt_hz1); out6 = __lsx_vpackev_b(hz_out7, hz_out6); - tmp2 = DPADD_H3(out1, out2, out6, filt_vt0, filt_vt1, filt_vt2); + tmp2 = dpadd_h3(out1, out2, out6, filt_vt0, filt_vt1, filt_vt2); - hz_out8 = HORIZ_4TAP_FILT(src8, src8, mask0, mask1, filt_hz0, filt_hz1); + hz_out8 = horiz_4tap_filt(src8, src8, mask0, mask1, filt_hz0, filt_hz1); out7 = __lsx_vpackev_b(hz_out8, hz_out7); - tmp3 = DPADD_H3(out4, out5, out7, filt_vt0, filt_vt1, filt_vt2); + tmp3 = dpadd_h3(out4, out5, out7, filt_vt0, filt_vt1, filt_vt2); DUP2_ARG3(__lsx_vssrarni_b_h, tmp1, tmp0, 7, tmp3, tmp2, 7, vec0, vec1); DUP2_ARG2(__lsx_vxori_b, vec0, 128, vec1, 128, vec0, vec1); __lsx_vstelm_d(vec0, dst, 0, 0); |