summaryrefslogtreecommitdiff
path: root/vp8/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/common')
-rw-r--r--vp8/common/loongarch/idct_lsx.c39
-rw-r--r--vp8/common/loongarch/loopfilter_filters_lsx.c16
-rw-r--r--vp8/common/loongarch/sixtap_filter_lsx.c322
3 files changed, 187 insertions, 190 deletions
diff --git a/vp8/common/loongarch/idct_lsx.c b/vp8/common/loongarch/idct_lsx.c
index 679019ff6..eee871eec 100644
--- a/vp8/common/loongarch/idct_lsx.c
+++ b/vp8/common/loongarch/idct_lsx.c
@@ -16,47 +16,44 @@ static const int32_t cospi8sqrt2minus1 = 20091;
static const int32_t sinpi8sqrt2 = 35468;
#define TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \
- { \
+ do { \
__m128i tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
\
DUP2_ARG2(__lsx_vilvl_h, in1, in0, in3, in2, tmp0_m, tmp1_m); \
DUP2_ARG2(__lsx_vilvh_h, in1, in0, in3, in2, tmp2_m, tmp3_m); \
DUP2_ARG2(__lsx_vilvl_w, tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out2); \
DUP2_ARG2(__lsx_vilvh_w, tmp1_m, tmp0_m, tmp3_m, tmp2_m, out1, out3); \
- }
+ } while (0)
#define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \
- { \
+ do { \
__m128i s4_m, s5_m, s6_m, s7_m; \
\
TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \
DUP2_ARG2(__lsx_vilvl_d, s6_m, s4_m, s7_m, s5_m, out0, out2); \
out1 = __lsx_vilvh_d(s6_m, s4_m); \
out3 = __lsx_vilvh_d(s7_m, s5_m); \
- }
+ } while (0)
-#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \
- ({ \
- __m128i out_m; \
+#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in0, in1) \
+ do { \
__m128i zero_m = __lsx_vldi(0); \
__m128i tmp1_m, tmp2_m; \
__m128i sinpi8_sqrt2_m = __lsx_vreplgr2vr_w(sinpi8sqrt2); \
\
- tmp1_m = __lsx_vilvl_h(in, zero_m); \
- tmp2_m = __lsx_vilvh_h(in, zero_m); \
+ tmp1_m = __lsx_vilvl_h(in0, zero_m); \
+ tmp2_m = __lsx_vilvh_h(in0, zero_m); \
tmp1_m = __lsx_vsrai_w(tmp1_m, 16); \
tmp2_m = __lsx_vsrai_w(tmp2_m, 16); \
tmp1_m = __lsx_vmul_w(tmp1_m, sinpi8_sqrt2_m); \
tmp1_m = __lsx_vsrai_w(tmp1_m, 16); \
tmp2_m = __lsx_vmul_w(tmp2_m, sinpi8_sqrt2_m); \
tmp2_m = __lsx_vsrai_w(tmp2_m, 16); \
- out_m = __lsx_vpickev_h(tmp2_m, tmp1_m); \
- \
- out_m; \
- })
+ in1 = __lsx_vpickev_h(tmp2_m, tmp1_m); \
+ } while (0)
#define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \
- { \
+ do { \
__m128i a1_m, b1_m, c1_m, d1_m; \
__m128i c_tmp1_m, c_tmp2_m; \
__m128i d_tmp1_m, d_tmp2_m; \
@@ -65,7 +62,7 @@ static const int32_t sinpi8sqrt2 = 35468;
const_cospi8sqrt2minus1_m = __lsx_vreplgr2vr_h(cospi8sqrt2minus1); \
a1_m = __lsx_vadd_h(in0, in2); \
b1_m = __lsx_vsub_h(in0, in2); \
- c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \
+ EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1, c_tmp1_m); \
\
c_tmp2_m = __lsx_vmuh_h(in3, const_cospi8sqrt2minus1_m); \
c_tmp2_m = __lsx_vslli_h(c_tmp2_m, 1); \
@@ -77,13 +74,13 @@ static const int32_t sinpi8sqrt2 = 35468;
d_tmp1_m = __lsx_vslli_h(d_tmp1_m, 1); \
d_tmp1_m = __lsx_vsrai_h(d_tmp1_m, 1); \
d_tmp1_m = __lsx_vadd_h(in1, d_tmp1_m); \
- d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \
+ EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3, d_tmp2_m); \
d1_m = __lsx_vadd_h(d_tmp1_m, d_tmp2_m); \
LSX_BUTTERFLY_4_H(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
- }
+ } while (0)
#define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
- { \
+ do { \
__m128i a1_m, b1_m, c1_m, d1_m; \
__m128i c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
__m128i const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \
@@ -105,13 +102,13 @@ static const int32_t sinpi8sqrt2 = 35468;
d_tmp2_m = __lsx_vsrai_w(d_tmp2_m, 16); \
d1_m = __lsx_vadd_w(d_tmp1_m, d_tmp2_m); \
LSX_BUTTERFLY_4_W(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
- }
+ } while (0)
#define UNPCK_SH_SW(in, out0, out1) \
- { \
+ do { \
out0 = __lsx_vsllwil_w_h(in, 0); \
out1 = __lsx_vexth_w_h(in); \
- }
+ } while (0)
static void idct4x4_addconst_lsx(int16_t in_dc, uint8_t *pred,
int32_t pred_stride, uint8_t *dest,
diff --git a/vp8/common/loongarch/loopfilter_filters_lsx.c b/vp8/common/loongarch/loopfilter_filters_lsx.c
index a3ac76d25..f743ec0c5 100644
--- a/vp8/common/loongarch/loopfilter_filters_lsx.c
+++ b/vp8/common/loongarch/loopfilter_filters_lsx.c
@@ -14,7 +14,7 @@
#include "vpx_util/loongson_intrinsics.h"
#define VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev) \
- { \
+ do { \
__m128i p1_m, p0_m, q0_m, q1_m, filt, q0_sub_p0, t1, t2; \
const __m128i cnst4b = __lsx_vldi(4); \
const __m128i cnst3b = __lsx_vldi(3); \
@@ -46,10 +46,10 @@
q1 = __lsx_vxori_b(q1_m, 0x80); \
p1_m = __lsx_vsadd_b(p1_m, filt); \
p1 = __lsx_vxori_b(p1_m, 0x80); \
- }
+ } while (0)
#define VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \
- { \
+ do { \
__m128i p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \
__m128i u, filt, t1, t2, filt_sign, q0_sub_p0; \
__m128i filt_r, filt_l; \
@@ -113,12 +113,12 @@
p0_m = __lsx_vsadd_b(p0_m, u); \
q0 = __lsx_vxori_b(q0_m, 0x80); \
p0 = __lsx_vxori_b(p0_m, 0x80); \
- }
+ } while (0)
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
limit_in, b_limit_in, thresh_in, hev_out, mask_out, \
flat_out) \
- { \
+ do { \
__m128i p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \
__m128i p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \
\
@@ -143,13 +143,13 @@
mask_out = __lsx_vmax_bu(q2_asub_q1_m, mask_out); \
mask_out = __lsx_vslt_bu(limit_in, mask_out); \
mask_out = __lsx_vxori_b(mask_out, 0xff); \
- }
+ } while (0)
#define VP8_ST6x1_B(in0, in0_idx, in1, in1_idx, pdst, stride) \
- { \
+ do { \
__lsx_vstelm_w(in0, pdst, 0, in0_idx); \
__lsx_vstelm_h(in1, pdst + stride, 0, in1_idx); \
- }
+ } while (0)
static void loop_filter_horizontal_4_dual_lsx(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0_ptr,
diff --git a/vp8/common/loongarch/sixtap_filter_lsx.c b/vp8/common/loongarch/sixtap_filter_lsx.c
index a23ed16d2..cd7ba5474 100644
--- a/vp8/common/loongarch/sixtap_filter_lsx.c
+++ b/vp8/common/loongarch/sixtap_filter_lsx.c
@@ -33,37 +33,61 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = {
8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
};
-#define DPADD_H3(in0, in1, in2, coeff0, coeff1, coeff2) \
- ({ \
- __m128i out0_m; \
- \
- out0_m = __lsx_vdp2_h_b(in0, coeff0); \
- out0_m = __lsx_vdp2add_h_b(out0_m, in1, coeff1); \
- out0_m = __lsx_vdp2add_h_b(out0_m, in2, coeff2); \
- \
- out0_m; \
- })
-
-#define HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_h0, filt_h1, \
- filt_h2) \
- ({ \
- __m128i vec0_m, vec1_m, vec2_m; \
- __m128i hz_out_m; \
- \
- DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, vec0_m, \
- vec1_m); \
- vec2_m = __lsx_vshuf_b(src1, src0, mask2); \
- hz_out_m = DPADD_H3(vec0_m, vec1_m, vec2_m, filt_h0, filt_h1, filt_h2); \
- \
- hz_out_m = __lsx_vsrari_h(hz_out_m, VP8_FILTER_SHIFT); \
- hz_out_m = __lsx_vsat_h(hz_out_m, 7); \
- \
- hz_out_m; \
- })
+static INLINE __m128i dpadd_h3(__m128i in0, __m128i in1, __m128i in2,
+ __m128i coeff0, __m128i coeff1, __m128i coeff2) {
+ __m128i out0_m;
+
+ out0_m = __lsx_vdp2_h_b(in0, coeff0);
+ out0_m = __lsx_vdp2add_h_b(out0_m, in1, coeff1);
+ out0_m = __lsx_vdp2add_h_b(out0_m, in2, coeff2);
+
+ return out0_m;
+}
+
+static INLINE __m128i horiz_6tap_filt(__m128i src0, __m128i src1, __m128i mask0,
+ __m128i mask1, __m128i mask2,
+ __m128i filt_h0, __m128i filt_h1,
+ __m128i filt_h2) {
+ __m128i vec0_m, vec1_m, vec2_m;
+ __m128i hz_out_m;
+
+ DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, vec0_m,
+ vec1_m);
+ vec2_m = __lsx_vshuf_b(src1, src0, mask2);
+ hz_out_m = dpadd_h3(vec0_m, vec1_m, vec2_m, filt_h0, filt_h1, filt_h2);
+ hz_out_m = __lsx_vsrari_h(hz_out_m, VP8_FILTER_SHIFT);
+ hz_out_m = __lsx_vsat_h(hz_out_m, 7);
+
+ return hz_out_m;
+}
+
+static INLINE __m128i filt_4tap_dpadd_h(__m128i vec0, __m128i vec1,
+ __m128i filt0, __m128i filt1) {
+ __m128i tmp_m;
+
+ tmp_m = __lsx_vdp2_h_b(vec0, filt0);
+ tmp_m = __lsx_vdp2add_h_b(tmp_m, vec1, filt1);
+
+ return tmp_m;
+}
+
+static INLINE __m128i horiz_4tap_filt(__m128i src0, __m128i src1, __m128i mask0,
+ __m128i mask1, __m128i filt_h0,
+ __m128i filt_h1) {
+ __m128i vec0_m, vec1_m, hz_out_m;
+
+ DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, vec0_m,
+ vec1_m);
+ hz_out_m = filt_4tap_dpadd_h(vec0_m, vec1_m, filt_h0, filt_h1);
+ hz_out_m = __lsx_vsrari_h(hz_out_m, VP8_FILTER_SHIFT);
+ hz_out_m = __lsx_vsat_h(hz_out_m, 7);
+
+ return hz_out_m;
+}
#define HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
mask2, filt0, filt1, filt2, out0, out1) \
- { \
+ do { \
__m128i vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m; \
\
DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src3, src2, mask0, vec0_m, \
@@ -77,12 +101,12 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = {
vec5_m); \
DUP2_ARG3(__lsx_vdp2add_h_b, out0, vec4_m, filt2, out1, vec5_m, filt2, \
out0, out1); \
- }
+ } while (0)
#define HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
mask2, filt0, filt1, filt2, out0, out1, \
out2, out3) \
- ({ \
+ do { \
__m128i vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
\
DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src1, src1, mask0, vec0_m, \
@@ -105,35 +129,11 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = {
DUP4_ARG3(__lsx_vdp2add_h_b, out0, vec4_m, filt2, out1, vec5_m, filt2, \
out2, vec6_m, filt2, out3, vec7_m, filt2, out0, out1, out2, \
out3); \
- })
-
-#define FILT_4TAP_DPADD_H(vec0, vec1, filt0, filt1) \
- ({ \
- __m128i tmp0; \
- \
- tmp0 = __lsx_vdp2_h_b(vec0, filt0); \
- tmp0 = __lsx_vdp2add_h_b(tmp0, vec1, filt1); \
- \
- tmp0; \
- })
-
-#define HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_h0, filt_h1) \
- ({ \
- __m128i vec0_m, vec1_m; \
- __m128i hz_out_m; \
- \
- DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, vec0_m, \
- vec1_m); \
- hz_out_m = FILT_4TAP_DPADD_H(vec0_m, vec1_m, filt_h0, filt_h1); \
- hz_out_m = __lsx_vsrari_h(hz_out_m, VP8_FILTER_SHIFT); \
- hz_out_m = __lsx_vsat_h(hz_out_m, 7); \
- \
- hz_out_m; \
- })
+ } while (0)
#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
filt0, filt1, out0, out1) \
- { \
+ do { \
__m128i vec0_m, vec1_m, vec2_m, vec3_m; \
\
DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask0, src3, src2, mask0, vec0_m, \
@@ -143,11 +143,11 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = {
vec3_m); \
DUP2_ARG3(__lsx_vdp2add_h_b, out0, vec2_m, filt1, out1, vec3_m, filt1, \
out0, out1); \
- }
+ } while (0)
#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
filt0, filt1, out0, out1, out2, out3) \
- ({ \
+ do { \
__m128i vec0_m, vec1_m, vec2_m, vec3_m; \
\
DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src1, src1, mask0, vec0_m, \
@@ -163,7 +163,7 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = {
DUP4_ARG3(__lsx_vdp2add_h_b, out0, vec0_m, filt1, out1, vec1_m, filt1, \
out2, vec2_m, filt1, out3, vec3_m, filt1, out0, out1, out2, \
out3); \
- })
+ } while (0)
static inline void common_hz_6t_4x4_lsx(uint8_t *RESTRICT src,
int32_t src_stride,
@@ -424,8 +424,8 @@ static void common_vt_6t_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
DUP2_ARG2(__lsx_vilvl_d, src65_r, src54_r, src87_r, src76_r, src6554,
src8776);
DUP2_ARG2(__lsx_vxori_b, src6554, 128, src8776, 128, src6554, src8776);
- out0 = DPADD_H3(src2110, src4332, src6554, filt0, filt1, filt2);
- out1 = DPADD_H3(src4332, src6554, src8776, filt0, filt1, filt2);
+ out0 = dpadd_h3(src2110, src4332, src6554, filt0, filt1, filt2);
+ out1 = dpadd_h3(src4332, src6554, src8776, filt0, filt1, filt2);
out0 = __lsx_vssrarni_b_h(out1, out0, VP8_FILTER_SHIFT);
out0 = __lsx_vxori_b(out0, 128);
@@ -487,10 +487,10 @@ static void common_vt_6t_8w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
DUP4_ARG2(__lsx_vilvl_b, src7, src4, src8, src7, src9, src8, src10, src9,
src76_r, src87_r, src98_r, src109_r);
- out0_r = DPADD_H3(src10_r, src32_r, src76_r, filt0, filt1, filt2);
- out1_r = DPADD_H3(src21_r, src43_r, src87_r, filt0, filt1, filt2);
- out2_r = DPADD_H3(src32_r, src76_r, src98_r, filt0, filt1, filt2);
- out3_r = DPADD_H3(src43_r, src87_r, src109_r, filt0, filt1, filt2);
+ out0_r = dpadd_h3(src10_r, src32_r, src76_r, filt0, filt1, filt2);
+ out1_r = dpadd_h3(src21_r, src43_r, src87_r, filt0, filt1, filt2);
+ out2_r = dpadd_h3(src32_r, src76_r, src98_r, filt0, filt1, filt2);
+ out3_r = dpadd_h3(src43_r, src87_r, src109_r, filt0, filt1, filt2);
DUP2_ARG3(__lsx_vssrarni_b_h, out1_r, out0_r, VP8_FILTER_SHIFT, out3_r,
out2_r, VP8_FILTER_SHIFT, tmp0, tmp1);
DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1);
@@ -555,14 +555,14 @@ static void common_vt_6t_16w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
src54_r, src65_r, src76_r, src87_r);
DUP4_ARG2(__lsx_vilvh_b, src5, src4, src6, src5, src7, src6, src8, src7,
src54_l, src65_l, src76_l, src87_l);
- out0_r = DPADD_H3(src10_r, src32_r, src54_r, filt0, filt1, filt2);
- out1_r = DPADD_H3(src21_r, src43_r, src65_r, filt0, filt1, filt2);
- out2_r = DPADD_H3(src32_r, src54_r, src76_r, filt0, filt1, filt2);
- out3_r = DPADD_H3(src43_r, src65_r, src87_r, filt0, filt1, filt2);
- out0_l = DPADD_H3(src10_l, src32_l, src54_l, filt0, filt1, filt2);
- out1_l = DPADD_H3(src21_l, src43_l, src65_l, filt0, filt1, filt2);
- out2_l = DPADD_H3(src32_l, src54_l, src76_l, filt0, filt1, filt2);
- out3_l = DPADD_H3(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+ out0_r = dpadd_h3(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+ out1_r = dpadd_h3(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+ out2_r = dpadd_h3(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+ out3_r = dpadd_h3(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+ out0_l = dpadd_h3(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+ out1_l = dpadd_h3(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+ out2_l = dpadd_h3(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+ out3_l = dpadd_h3(src43_l, src65_l, src87_l, filt0, filt1, filt2);
DUP4_ARG3(__lsx_vssrarni_b_h, out0_l, out0_r, VP8_FILTER_SHIFT, out1_l,
out1_r, VP8_FILTER_SHIFT, out2_l, out2_r, VP8_FILTER_SHIFT,
out3_l, out3_r, VP8_FILTER_SHIFT, tmp0, tmp1, tmp2, tmp3);
@@ -621,12 +621,12 @@ static void common_hv_6ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
src1, src2, src3);
src4 = __lsx_vxori_b(src4, 128);
- hz_out0 = HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out0 = horiz_6tap_filt(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
- hz_out2 = HORIZ_6TAP_FILT(src2, src3, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out2 = horiz_6tap_filt(src2, src3, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
hz_out1 = __lsx_vshuf_b(hz_out2, hz_out0, shuff);
- hz_out3 = HORIZ_6TAP_FILT(src3, src4, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out3 = horiz_6tap_filt(src3, src4, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out3, hz_out2, out0, out1);
@@ -636,7 +636,7 @@ static void common_hv_6ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
src += src_stride_x2;
DUP2_ARG2(__lsx_vxori_b, src5, 128, src6, 128, src5, src6);
- hz_out5 = HORIZ_6TAP_FILT(src5, src6, mask0, mask1, mask2, filt_hz0,
+ hz_out5 = horiz_6tap_filt(src5, src6, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
hz_out4 = __lsx_vshuf_b(hz_out5, hz_out3, shuff);
@@ -645,15 +645,15 @@ static void common_hv_6ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
src += src_stride_x2;
DUP2_ARG2(__lsx_vxori_b, src7, 128, src8, 128, src7, src8);
- hz_out7 = HORIZ_6TAP_FILT(src7, src8, mask0, mask1, mask2, filt_hz0,
+ hz_out7 = horiz_6tap_filt(src7, src8, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
hz_out6 = __lsx_vshuf_b(hz_out7, hz_out5, shuff);
out2 = __lsx_vpackev_b(hz_out5, hz_out4);
- tmp0 = DPADD_H3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
+ tmp0 = dpadd_h3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
out3 = __lsx_vpackev_b(hz_out7, hz_out6);
- tmp1 = DPADD_H3(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2);
+ tmp1 = dpadd_h3(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2);
tmp0 = __lsx_vssrarni_b_h(tmp1, tmp0, 7);
tmp0 = __lsx_vxori_b(tmp0, 128);
@@ -710,15 +710,15 @@ static void common_hv_6ht_6vt_8w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
src1, src2, src3);
src4 = __lsx_vxori_b(src4, 128);
- hz_out0 = HORIZ_6TAP_FILT(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out0 = horiz_6tap_filt(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
- hz_out1 = HORIZ_6TAP_FILT(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out1 = horiz_6tap_filt(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
- hz_out2 = HORIZ_6TAP_FILT(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out2 = horiz_6tap_filt(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
- hz_out3 = HORIZ_6TAP_FILT(src3, src3, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out3 = horiz_6tap_filt(src3, src3, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
- hz_out4 = HORIZ_6TAP_FILT(src4, src4, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out4 = horiz_6tap_filt(src4, src4, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
filt = __lsx_vld(filter_vert, 0);
DUP2_ARG2(__lsx_vreplvei_h, filt, 0, filt, 1, filt_vt0, filt_vt1);
@@ -734,25 +734,25 @@ static void common_hv_6ht_6vt_8w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
DUP4_ARG2(__lsx_vxori_b, src5, 128, src6, 128, src7, 128, src8, 128, src5,
src6, src7, src8);
- hz_out5 = HORIZ_6TAP_FILT(src5, src5, mask0, mask1, mask2, filt_hz0,
+ hz_out5 = horiz_6tap_filt(src5, src5, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
out2 = __lsx_vpackev_b(hz_out5, hz_out4);
- tmp0 = DPADD_H3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
+ tmp0 = dpadd_h3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
- hz_out6 = HORIZ_6TAP_FILT(src6, src6, mask0, mask1, mask2, filt_hz0,
+ hz_out6 = horiz_6tap_filt(src6, src6, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
out5 = __lsx_vpackev_b(hz_out6, hz_out5);
- tmp1 = DPADD_H3(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2);
+ tmp1 = dpadd_h3(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2);
- hz_out7 = HORIZ_6TAP_FILT(src7, src7, mask0, mask1, mask2, filt_hz0,
+ hz_out7 = horiz_6tap_filt(src7, src7, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
out7 = __lsx_vpackev_b(hz_out7, hz_out6);
- tmp2 = DPADD_H3(out1, out2, out7, filt_vt0, filt_vt1, filt_vt2);
+ tmp2 = dpadd_h3(out1, out2, out7, filt_vt0, filt_vt1, filt_vt2);
- hz_out8 = HORIZ_6TAP_FILT(src8, src8, mask0, mask1, mask2, filt_hz0,
+ hz_out8 = horiz_6tap_filt(src8, src8, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
out6 = __lsx_vpackev_b(hz_out8, hz_out7);
- tmp3 = DPADD_H3(out4, out5, out6, filt_vt0, filt_vt1, filt_vt2);
+ tmp3 = dpadd_h3(out4, out5, out6, filt_vt0, filt_vt1, filt_vt2);
DUP2_ARG3(__lsx_vssrarni_b_h, tmp1, tmp0, VP8_FILTER_SHIFT, tmp3, tmp2,
VP8_FILTER_SHIFT, vec0, vec1);
@@ -997,14 +997,14 @@ static void common_vt_4t_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, src32_r, src43_r);
src4332 = __lsx_vilvl_d(src43_r, src32_r);
src4332 = __lsx_vxori_b(src4332, 128);
- out0 = FILT_4TAP_DPADD_H(src2110, src4332, filt0, filt1);
+ out0 = filt_4tap_dpadd_h(src2110, src4332, filt0, filt1);
src2 = __lsx_vld(src, 0);
src += src_stride;
DUP2_ARG2(__lsx_vilvl_b, src5, src4, src2, src5, src54_r, src65_r);
src2110 = __lsx_vilvl_d(src65_r, src54_r);
src2110 = __lsx_vxori_b(src2110, 128);
- out1 = FILT_4TAP_DPADD_H(src4332, src2110, filt0, filt1);
+ out1 = filt_4tap_dpadd_h(src4332, src2110, filt0, filt1);
out0 = __lsx_vssrarni_b_h(out1, out0, VP8_FILTER_SHIFT);
out0 = __lsx_vxori_b(out0, 128);
@@ -1055,10 +1055,10 @@ static void common_vt_4t_8w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
src8, src9, src10);
DUP4_ARG2(__lsx_vilvl_b, src7, src2, src8, src7, src9, src8, src10, src9,
src72_r, src87_r, src98_r, src109_r);
- out0_r = FILT_4TAP_DPADD_H(src10_r, src72_r, filt0, filt1);
- out1_r = FILT_4TAP_DPADD_H(src21_r, src87_r, filt0, filt1);
- out2_r = FILT_4TAP_DPADD_H(src72_r, src98_r, filt0, filt1);
- out3_r = FILT_4TAP_DPADD_H(src87_r, src109_r, filt0, filt1);
+ out0_r = filt_4tap_dpadd_h(src10_r, src72_r, filt0, filt1);
+ out1_r = filt_4tap_dpadd_h(src21_r, src87_r, filt0, filt1);
+ out2_r = filt_4tap_dpadd_h(src72_r, src98_r, filt0, filt1);
+ out3_r = filt_4tap_dpadd_h(src87_r, src109_r, filt0, filt1);
DUP2_ARG3(__lsx_vssrarni_b_h, out1_r, out0_r, VP8_FILTER_SHIFT, out3_r,
out2_r, VP8_FILTER_SHIFT, tmp0, tmp1);
DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1);
@@ -1114,14 +1114,14 @@ static void common_vt_4t_16w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
src32_r, src43_r, src54_r, src65_r);
DUP4_ARG2(__lsx_vilvh_b, src3, src2, src4, src3, src5, src4, src6, src5,
src32_l, src43_l, src54_l, src65_l);
- out0_r = FILT_4TAP_DPADD_H(src10_r, src32_r, filt0, filt1);
- out1_r = FILT_4TAP_DPADD_H(src21_r, src43_r, filt0, filt1);
- out2_r = FILT_4TAP_DPADD_H(src32_r, src54_r, filt0, filt1);
- out3_r = FILT_4TAP_DPADD_H(src43_r, src65_r, filt0, filt1);
- out0_l = FILT_4TAP_DPADD_H(src10_l, src32_l, filt0, filt1);
- out1_l = FILT_4TAP_DPADD_H(src21_l, src43_l, filt0, filt1);
- out2_l = FILT_4TAP_DPADD_H(src32_l, src54_l, filt0, filt1);
- out3_l = FILT_4TAP_DPADD_H(src43_l, src65_l, filt0, filt1);
+ out0_r = filt_4tap_dpadd_h(src10_r, src32_r, filt0, filt1);
+ out1_r = filt_4tap_dpadd_h(src21_r, src43_r, filt0, filt1);
+ out2_r = filt_4tap_dpadd_h(src32_r, src54_r, filt0, filt1);
+ out3_r = filt_4tap_dpadd_h(src43_r, src65_r, filt0, filt1);
+ out0_l = filt_4tap_dpadd_h(src10_l, src32_l, filt0, filt1);
+ out1_l = filt_4tap_dpadd_h(src21_l, src43_l, filt0, filt1);
+ out2_l = filt_4tap_dpadd_h(src32_l, src54_l, filt0, filt1);
+ out3_l = filt_4tap_dpadd_h(src43_l, src65_l, filt0, filt1);
DUP4_ARG3(__lsx_vssrarni_b_h, out0_l, out0_r, VP8_FILTER_SHIFT, out1_l,
out1_r, VP8_FILTER_SHIFT, out2_l, out2_r, VP8_FILTER_SHIFT,
out3_l, out3_r, VP8_FILTER_SHIFT, tmp0, tmp1, tmp2, tmp3);
@@ -1168,8 +1168,8 @@ static void common_hv_4ht_4vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
DUP2_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src0, src1);
src2 = __lsx_vxori_b(src2, 128);
- hz_out0 = HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_hz0, filt_hz1);
- hz_out1 = HORIZ_4TAP_FILT(src1, src2, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out0 = horiz_4tap_filt(src0, src1, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out1 = horiz_4tap_filt(src1, src2, mask0, mask1, filt_hz0, filt_hz1);
vec0 = __lsx_vpackev_b(hz_out1, hz_out0);
DUP2_ARG2(__lsx_vldrepl_h, filter_vert, 0, filter_vert, 2, filt_vt0,
@@ -1182,16 +1182,16 @@ static void common_hv_4ht_4vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
src += src_stride_x4;
DUP2_ARG2(__lsx_vxori_b, src3, 128, src4, 128, src3, src4);
- hz_out3 = HORIZ_4TAP_FILT(src3, src4, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out3 = horiz_4tap_filt(src3, src4, mask0, mask1, filt_hz0, filt_hz1);
hz_out2 = __lsx_vshuf_b(hz_out3, hz_out1, shuff);
vec1 = __lsx_vpackev_b(hz_out3, hz_out2);
- tmp0 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1);
+ tmp0 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1);
DUP2_ARG2(__lsx_vxori_b, src5, 128, src6, 128, src5, src6);
- hz_out5 = HORIZ_4TAP_FILT(src5, src6, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out5 = horiz_4tap_filt(src5, src6, mask0, mask1, filt_hz0, filt_hz1);
hz_out4 = __lsx_vshuf_b(hz_out5, hz_out3, shuff);
vec2 = __lsx_vpackev_b(hz_out5, hz_out4);
- tmp1 = FILT_4TAP_DPADD_H(vec1, vec2, filt_vt0, filt_vt1);
+ tmp1 = filt_4tap_dpadd_h(vec1, vec2, filt_vt0, filt_vt1);
tmp0 = __lsx_vssrarni_b_h(tmp1, tmp0, 7);
tmp0 = __lsx_vxori_b(tmp0, 128);
@@ -1239,9 +1239,9 @@ static inline void common_hv_4ht_4vt_8w_lsx(
DUP2_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src0, src1);
src2 = __lsx_vxori_b(src2, 128);
- hz_out0 = HORIZ_4TAP_FILT(src0, src0, mask0, mask1, filt_hz0, filt_hz1);
- hz_out1 = HORIZ_4TAP_FILT(src1, src1, mask0, mask1, filt_hz0, filt_hz1);
- hz_out2 = HORIZ_4TAP_FILT(src2, src2, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out0 = horiz_4tap_filt(src0, src0, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out1 = horiz_4tap_filt(src1, src1, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out2 = horiz_4tap_filt(src2, src2, mask0, mask1, filt_hz0, filt_hz1);
DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out2, hz_out1, vec0, vec2);
filt = __lsx_vld(filter_vert, 0);
@@ -1254,21 +1254,21 @@ static inline void common_hv_4ht_4vt_8w_lsx(
DUP4_ARG2(__lsx_vxori_b, src3, 128, src4, 128, src5, 128, src6, 128, src3,
src4, src5, src6);
- hz_out3 = HORIZ_4TAP_FILT(src3, src3, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out3 = horiz_4tap_filt(src3, src3, mask0, mask1, filt_hz0, filt_hz1);
vec1 = __lsx_vpackev_b(hz_out3, hz_out2);
- tmp0 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1);
+ tmp0 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1);
- hz_out0 = HORIZ_4TAP_FILT(src4, src4, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out0 = horiz_4tap_filt(src4, src4, mask0, mask1, filt_hz0, filt_hz1);
vec3 = __lsx_vpackev_b(hz_out0, hz_out3);
- tmp1 = FILT_4TAP_DPADD_H(vec2, vec3, filt_vt0, filt_vt1);
+ tmp1 = filt_4tap_dpadd_h(vec2, vec3, filt_vt0, filt_vt1);
- hz_out1 = HORIZ_4TAP_FILT(src5, src5, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out1 = horiz_4tap_filt(src5, src5, mask0, mask1, filt_hz0, filt_hz1);
vec4 = __lsx_vpackev_b(hz_out1, hz_out0);
- tmp2 = FILT_4TAP_DPADD_H(vec1, vec4, filt_vt0, filt_vt1);
+ tmp2 = filt_4tap_dpadd_h(vec1, vec4, filt_vt0, filt_vt1);
- hz_out2 = HORIZ_4TAP_FILT(src6, src6, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out2 = horiz_4tap_filt(src6, src6, mask0, mask1, filt_hz0, filt_hz1);
DUP2_ARG2(__lsx_vpackev_b, hz_out0, hz_out3, hz_out2, hz_out1, vec0, vec1);
- tmp3 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1);
+ tmp3 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1);
DUP2_ARG3(__lsx_vssrarni_b_h, tmp1, tmp0, 7, tmp3, tmp2, 7, out0, out1);
DUP2_ARG2(__lsx_vxori_b, out0, 128, out1, 128, out0, out1);
@@ -1324,9 +1324,9 @@ static void common_hv_6ht_4vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
DUP2_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src0, src1);
src2 = __lsx_vxori_b(src2, 128);
- hz_out0 = HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out0 = horiz_6tap_filt(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
- hz_out1 = HORIZ_6TAP_FILT(src1, src2, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out1 = horiz_6tap_filt(src1, src2, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
vec0 = __lsx_vpackev_b(hz_out1, hz_out0);
@@ -1341,17 +1341,17 @@ static void common_hv_6ht_4vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
DUP4_ARG2(__lsx_vxori_b, src3, 128, src4, 128, src5, 128, src6, 128, src3,
src4, src5, src6);
- hz_out3 = HORIZ_6TAP_FILT(src3, src4, mask0, mask1, mask2, filt_hz0,
+ hz_out3 = horiz_6tap_filt(src3, src4, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
hz_out2 = __lsx_vshuf_b(hz_out3, hz_out1, shuff);
vec1 = __lsx_vpackev_b(hz_out3, hz_out2);
- tmp0 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1);
+ tmp0 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1);
- hz_out5 = HORIZ_6TAP_FILT(src5, src6, mask0, mask1, mask2, filt_hz0,
+ hz_out5 = horiz_6tap_filt(src5, src6, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
hz_out4 = __lsx_vshuf_b(hz_out5, hz_out3, shuff);
vec2 = __lsx_vpackev_b(hz_out5, hz_out4);
- tmp1 = FILT_4TAP_DPADD_H(vec1, vec2, filt_vt0, filt_vt1);
+ tmp1 = filt_4tap_dpadd_h(vec1, vec2, filt_vt0, filt_vt1);
DUP2_ARG3(__lsx_vssrarni_b_h, tmp0, tmp0, 7, tmp1, tmp1, 7, tmp0, tmp1);
DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1);
@@ -1402,11 +1402,11 @@ static inline void common_hv_6ht_4vt_8w_lsx(
DUP2_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src0, src1);
src2 = __lsx_vxori_b(src2, 128);
- hz_out0 = HORIZ_6TAP_FILT(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out0 = horiz_6tap_filt(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
- hz_out1 = HORIZ_6TAP_FILT(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out1 = horiz_6tap_filt(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
- hz_out2 = HORIZ_6TAP_FILT(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1,
+ hz_out2 = horiz_6tap_filt(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1,
filt_hz2);
DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out2, hz_out1, vec0, vec2);
@@ -1420,25 +1420,25 @@ static inline void common_hv_6ht_4vt_8w_lsx(
DUP4_ARG2(__lsx_vxori_b, src3, 128, src4, 128, src5, 128, src6, 128, src3,
src4, src5, src6);
- hz_out3 = HORIZ_6TAP_FILT(src3, src3, mask0, mask1, mask2, filt_hz0,
+ hz_out3 = horiz_6tap_filt(src3, src3, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
vec1 = __lsx_vpackev_b(hz_out3, hz_out2);
- tmp0 = FILT_4TAP_DPADD_H(vec0, vec1, filt_vt0, filt_vt1);
+ tmp0 = filt_4tap_dpadd_h(vec0, vec1, filt_vt0, filt_vt1);
- hz_out0 = HORIZ_6TAP_FILT(src4, src4, mask0, mask1, mask2, filt_hz0,
+ hz_out0 = horiz_6tap_filt(src4, src4, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
vec3 = __lsx_vpackev_b(hz_out0, hz_out3);
- tmp1 = FILT_4TAP_DPADD_H(vec2, vec3, filt_vt0, filt_vt1);
+ tmp1 = filt_4tap_dpadd_h(vec2, vec3, filt_vt0, filt_vt1);
- hz_out1 = HORIZ_6TAP_FILT(src5, src5, mask0, mask1, mask2, filt_hz0,
+ hz_out1 = horiz_6tap_filt(src5, src5, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
vec0 = __lsx_vpackev_b(hz_out1, hz_out0);
- tmp2 = FILT_4TAP_DPADD_H(vec1, vec0, filt_vt0, filt_vt1);
+ tmp2 = filt_4tap_dpadd_h(vec1, vec0, filt_vt0, filt_vt1);
- hz_out2 = HORIZ_6TAP_FILT(src6, src6, mask0, mask1, mask2, filt_hz0,
+ hz_out2 = horiz_6tap_filt(src6, src6, mask0, mask1, mask2, filt_hz0,
filt_hz1, filt_hz2);
DUP2_ARG2(__lsx_vpackev_b, hz_out0, hz_out3, hz_out2, hz_out1, vec1, vec2);
- tmp3 = FILT_4TAP_DPADD_H(vec1, vec2, filt_vt0, filt_vt1);
+ tmp3 = filt_4tap_dpadd_h(vec1, vec2, filt_vt0, filt_vt1);
DUP2_ARG3(__lsx_vssrarni_b_h, tmp1, tmp0, 7, tmp3, tmp2, 7, out0, out1);
DUP2_ARG2(__lsx_vxori_b, out0, 128, out1, 128, out0, out1);
@@ -1492,9 +1492,9 @@ static void common_hv_4ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
DUP4_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src2, 128, src3, 128, src0,
src1, src2, src3);
src4 = __lsx_vxori_b(src4, 128);
- hz_out0 = HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_hz0, filt_hz1);
- hz_out2 = HORIZ_4TAP_FILT(src2, src3, mask0, mask1, filt_hz0, filt_hz1);
- hz_out3 = HORIZ_4TAP_FILT(src3, src4, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out0 = horiz_4tap_filt(src0, src1, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out2 = horiz_4tap_filt(src2, src3, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out3 = horiz_4tap_filt(src3, src4, mask0, mask1, filt_hz0, filt_hz1);
hz_out1 = __lsx_vshuf_b(hz_out2, hz_out0, shuff);
DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out3, hz_out2, out0, out1);
@@ -1510,15 +1510,15 @@ static void common_hv_4ht_6vt_4w_lsx(uint8_t *RESTRICT src, int32_t src_stride,
src6, src7, src8);
src += src_stride_x4;
- hz_out5 = HORIZ_4TAP_FILT(src5, src6, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out5 = horiz_4tap_filt(src5, src6, mask0, mask1, filt_hz0, filt_hz1);
hz_out4 = __lsx_vshuf_b(hz_out5, hz_out3, shuff);
out2 = __lsx_vpackev_b(hz_out5, hz_out4);
- tmp0 = DPADD_H3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
+ tmp0 = dpadd_h3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
- hz_out7 = HORIZ_4TAP_FILT(src7, src8, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out7 = horiz_4tap_filt(src7, src8, mask0, mask1, filt_hz0, filt_hz1);
hz_out6 = __lsx_vshuf_b(hz_out7, hz_out5, shuff);
out3 = __lsx_vpackev_b(hz_out7, hz_out6);
- tmp1 = DPADD_H3(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2);
+ tmp1 = dpadd_h3(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2);
tmp0 = __lsx_vssrarni_b_h(tmp1, tmp0, 7);
tmp0 = __lsx_vxori_b(tmp0, 128);
@@ -1571,11 +1571,11 @@ static inline void common_hv_4ht_6vt_8w_lsx(
DUP4_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src2, 128, src3, 128, src0,
src1, src2, src3);
src4 = __lsx_vxori_b(src4, 128);
- hz_out0 = HORIZ_4TAP_FILT(src0, src0, mask0, mask1, filt_hz0, filt_hz1);
- hz_out1 = HORIZ_4TAP_FILT(src1, src1, mask0, mask1, filt_hz0, filt_hz1);
- hz_out2 = HORIZ_4TAP_FILT(src2, src2, mask0, mask1, filt_hz0, filt_hz1);
- hz_out3 = HORIZ_4TAP_FILT(src3, src3, mask0, mask1, filt_hz0, filt_hz1);
- hz_out4 = HORIZ_4TAP_FILT(src4, src4, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out0 = horiz_4tap_filt(src0, src0, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out1 = horiz_4tap_filt(src1, src1, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out2 = horiz_4tap_filt(src2, src2, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out3 = horiz_4tap_filt(src3, src3, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out4 = horiz_4tap_filt(src4, src4, mask0, mask1, filt_hz0, filt_hz1);
DUP2_ARG2(__lsx_vpackev_b, hz_out1, hz_out0, hz_out3, hz_out2, out0, out1);
DUP2_ARG2(__lsx_vpackev_b, hz_out2, hz_out1, hz_out4, hz_out3, out3, out4);
@@ -1590,21 +1590,21 @@ static inline void common_hv_4ht_6vt_8w_lsx(
DUP4_ARG2(__lsx_vxori_b, src5, 128, src6, 128, src7, 128, src8, 128, src5,
src6, src7, src8);
- hz_out5 = HORIZ_4TAP_FILT(src5, src5, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out5 = horiz_4tap_filt(src5, src5, mask0, mask1, filt_hz0, filt_hz1);
out2 = __lsx_vpackev_b(hz_out5, hz_out4);
- tmp0 = DPADD_H3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
+ tmp0 = dpadd_h3(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
- hz_out6 = HORIZ_4TAP_FILT(src6, src6, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out6 = horiz_4tap_filt(src6, src6, mask0, mask1, filt_hz0, filt_hz1);
out5 = __lsx_vpackev_b(hz_out6, hz_out5);
- tmp1 = DPADD_H3(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2);
+ tmp1 = dpadd_h3(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2);
- hz_out7 = HORIZ_4TAP_FILT(src7, src7, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out7 = horiz_4tap_filt(src7, src7, mask0, mask1, filt_hz0, filt_hz1);
out6 = __lsx_vpackev_b(hz_out7, hz_out6);
- tmp2 = DPADD_H3(out1, out2, out6, filt_vt0, filt_vt1, filt_vt2);
+ tmp2 = dpadd_h3(out1, out2, out6, filt_vt0, filt_vt1, filt_vt2);
- hz_out8 = HORIZ_4TAP_FILT(src8, src8, mask0, mask1, filt_hz0, filt_hz1);
+ hz_out8 = horiz_4tap_filt(src8, src8, mask0, mask1, filt_hz0, filt_hz1);
out7 = __lsx_vpackev_b(hz_out8, hz_out7);
- tmp3 = DPADD_H3(out4, out5, out7, filt_vt0, filt_vt1, filt_vt2);
+ tmp3 = dpadd_h3(out4, out5, out7, filt_vt0, filt_vt1, filt_vt2);
DUP2_ARG3(__lsx_vssrarni_b_h, tmp1, tmp0, 7, tmp3, tmp2, 7, vec0, vec1);
DUP2_ARG2(__lsx_vxori_b, vec0, 128, vec1, 128, vec0, vec1);
__lsx_vstelm_d(vec0, dst, 0, 0);