summaryrefslogtreecommitdiff
path: root/vpx_dsp/x86
diff options
context:
space:
mode:
Diffstat (limited to 'vpx_dsp/x86')
-rw-r--r--vpx_dsp/x86/highbd_idct16x16_add_sse4.c6
-rw-r--r--vpx_dsp/x86/highbd_inv_txfm_sse4.h1
-rw-r--r--vpx_dsp/x86/inv_txfm_sse2.c6
-rw-r--r--vpx_dsp/x86/inv_txfm_sse2.h1
4 files changed, 8 insertions, 6 deletions
diff --git a/vpx_dsp/x86/highbd_idct16x16_add_sse4.c b/vpx_dsp/x86/highbd_idct16x16_add_sse4.c
index de097c66a..7898ee12c 100644
--- a/vpx_dsp/x86/highbd_idct16x16_add_sse4.c
+++ b/vpx_dsp/x86/highbd_idct16x16_add_sse4.c
@@ -53,7 +53,7 @@ static INLINE void highbd_idct16_4col_stage6(const __m128i *const in,
out[15] = in[15];
}
-static INLINE void highbd_idct16_4col(__m128i *const io /*io[16]*/) {
+void vpx_highbd_idct16_4col_sse4_1(__m128i *const io /*io[16]*/) {
__m128i step1[16], step2[16];
// stage 2
@@ -233,7 +233,7 @@ void vpx_highbd_idct16x16_256_add_sse4_1(const tran_low_t *input,
in = all[i];
highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]);
highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]);
- highbd_idct16_4col(in);
+ vpx_highbd_idct16_4col_sse4_1(in);
input += 4 * 16;
}
@@ -243,7 +243,7 @@ void vpx_highbd_idct16x16_256_add_sse4_1(const tran_low_t *input,
transpose_32bit_4x4(all[1] + i, out + 4);
transpose_32bit_4x4(all[2] + i, out + 8);
transpose_32bit_4x4(all[3] + i, out + 12);
- highbd_idct16_4col(out);
+ vpx_highbd_idct16_4col_sse4_1(out);
for (j = 0; j < 16; ++j) {
highbd_write_buffer_4(dest + j * stride, out[j], bd);
diff --git a/vpx_dsp/x86/highbd_inv_txfm_sse4.h b/vpx_dsp/x86/highbd_inv_txfm_sse4.h
index e8ec335e3..5a7fd1d39 100644
--- a/vpx_dsp/x86/highbd_inv_txfm_sse4.h
+++ b/vpx_dsp/x86/highbd_inv_txfm_sse4.h
@@ -107,5 +107,6 @@ static INLINE void highbd_idct4_sse4_1(__m128i *const io) {
}
void vpx_highbd_idct8x8_half1d_sse4_1(__m128i *const io);
+void vpx_highbd_idct16_4col_sse4_1(__m128i *const io /*io[16]*/);
#endif // VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_
diff --git a/vpx_dsp/x86/inv_txfm_sse2.c b/vpx_dsp/x86/inv_txfm_sse2.c
index e17d2627d..4b02da966 100644
--- a/vpx_dsp/x86/inv_txfm_sse2.c
+++ b/vpx_dsp/x86/inv_txfm_sse2.c
@@ -514,7 +514,7 @@ void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest,
}
}
-static void iadst16_8col(__m128i *const in) {
+void vpx_iadst16_8col_sse2(__m128i *const in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -874,8 +874,8 @@ void idct16_sse2(__m128i *const in0, __m128i *const in1) {
void iadst16_sse2(__m128i *const in0, __m128i *const in1) {
transpose_16bit_16x16(in0, in1);
- iadst16_8col(in0);
- iadst16_8col(in1);
+ vpx_iadst16_8col_sse2(in0);
+ vpx_iadst16_8col_sse2(in1);
}
// Group the coefficient calculation into smaller functions to prevent stack
diff --git a/vpx_dsp/x86/inv_txfm_sse2.h b/vpx_dsp/x86/inv_txfm_sse2.h
index 4cb1f0789..d573f66c9 100644
--- a/vpx_dsp/x86/inv_txfm_sse2.h
+++ b/vpx_dsp/x86/inv_txfm_sse2.h
@@ -701,6 +701,7 @@ void vpx_idct8_sse2(__m128i *const in);
void idct16_sse2(__m128i *const in0, __m128i *const in1);
void iadst4_sse2(__m128i *const in);
void iadst8_sse2(__m128i *const in);
+void vpx_iadst16_8col_sse2(__m128i *const in);
void iadst16_sse2(__m128i *const in0, __m128i *const in1);
void idct32_1024_8x32(const __m128i *const in, __m128i *const out);
void idct32_34_8x32_sse2(const __m128i *const in, __m128i *const out);