diff options
author | Linfeng Zhang <linfengz@google.com> | 2017-08-03 17:50:03 -0700 |
---|---|---|
committer | Linfeng Zhang <linfengz@google.com> | 2017-08-04 15:33:37 -0700 |
commit | d670678f26da66a0a903a41bc789b6f996eb6c33 (patch) | |
tree | fa54b2ee01858a9a033a8a129cd18eb8d9934cfb | |
parent | fa829e0e5aa3e43d14ecea5f327b6a2e27818cf8 (diff) | |
download | libvpx-d670678f26da66a0a903a41bc789b6f996eb6c33.tar libvpx-d670678f26da66a0a903a41bc789b6f996eb6c33.tar.gz libvpx-d670678f26da66a0a903a41bc789b6f996eb6c33.tar.bz2 libvpx-d670678f26da66a0a903a41bc789b6f996eb6c33.zip |
Rename highbd_multiplication_and_add_xx() to highbd_butterfly_xx()
in idct x86 code
Change-Id: I5159499a73a5c1b680516f6ca9c3d84f00c35083
-rw-r--r-- | vpx_dsp/x86/highbd_idct16x16_add_sse2.c | 52 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_idct16x16_add_sse4.c | 56 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_idct4x4_add_sse2.c | 4 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_idct4x4_add_sse4.c | 4 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_idct8x8_add_sse2.c | 12 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_idct8x8_add_sse4.c | 12 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_inv_txfm_sse2.h | 7 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_inv_txfm_sse4.h | 7 |
8 files changed, 76 insertions, 78 deletions
diff --git a/vpx_dsp/x86/highbd_idct16x16_add_sse2.c b/vpx_dsp/x86/highbd_idct16x16_add_sse2.c index ac586c5d1..8192f09ef 100644 --- a/vpx_dsp/x86/highbd_idct16x16_add_sse2.c +++ b/vpx_dsp/x86/highbd_idct16x16_add_sse2.c @@ -75,20 +75,20 @@ static INLINE void highbd_idct16_4col(__m128i *const io /*io[16]*/) { __m128i temp1[4], temp2, sign[2]; // stage 2 - highbd_multiplication_and_add_sse2(io[1], io[15], (int)cospi_30_64, - (int)cospi_2_64, &step2[8], &step2[15]); - highbd_multiplication_and_add_sse2(io[9], io[7], (int)cospi_14_64, - (int)cospi_18_64, &step2[9], &step2[14]); - highbd_multiplication_and_add_sse2(io[5], io[11], (int)cospi_22_64, - (int)cospi_10_64, &step2[10], &step2[13]); - highbd_multiplication_and_add_sse2(io[13], io[3], (int)cospi_6_64, - (int)cospi_26_64, &step2[11], &step2[12]); + highbd_butterfly_sse2(io[1], io[15], (int)cospi_30_64, (int)cospi_2_64, + &step2[8], &step2[15]); + highbd_butterfly_sse2(io[9], io[7], (int)cospi_14_64, (int)cospi_18_64, + &step2[9], &step2[14]); + highbd_butterfly_sse2(io[5], io[11], (int)cospi_22_64, (int)cospi_10_64, + &step2[10], &step2[13]); + highbd_butterfly_sse2(io[13], io[3], (int)cospi_6_64, (int)cospi_26_64, + &step2[11], &step2[12]); // stage 3 - highbd_multiplication_and_add_sse2(io[2], io[14], (int)cospi_28_64, - (int)cospi_4_64, &step1[4], &step1[7]); - highbd_multiplication_and_add_sse2(io[10], io[6], (int)cospi_12_64, - (int)cospi_20_64, &step1[5], &step1[6]); + highbd_butterfly_sse2(io[2], io[14], (int)cospi_28_64, (int)cospi_4_64, + &step1[4], &step1[7]); + highbd_butterfly_sse2(io[10], io[6], (int)cospi_12_64, (int)cospi_20_64, + &step1[5], &step1[6]); step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[10] = _mm_sub_epi32(step2[10], step2[11]); // step1[10] = -step1[10] @@ -105,12 +105,12 @@ static INLINE void highbd_idct16_4col(__m128i *const io /*io[16]*/) { temp2 = _mm_sub_epi32(io[0], io[8]); abs_extend_64bit_sse2(temp2, temp1, sign); step2[1] = multiplication_round_shift_sse2(temp1, sign, (int)cospi_16_64); - highbd_multiplication_and_add_sse2(io[4], io[12], (int)cospi_24_64, - (int)cospi_8_64, &step2[2], &step2[3]); - highbd_multiplication_and_add_sse2(step1[14], step1[9], (int)cospi_24_64, - (int)cospi_8_64, &step2[9], &step2[14]); - highbd_multiplication_and_add_sse2(step1[10], step1[13], (int)cospi_8_64, - (int)cospi_24_64, &step2[13], &step2[10]); + highbd_butterfly_sse2(io[4], io[12], (int)cospi_24_64, (int)cospi_8_64, + &step2[2], &step2[3]); + highbd_butterfly_sse2(step1[14], step1[9], (int)cospi_24_64, (int)cospi_8_64, + &step2[9], &step2[14]); + highbd_butterfly_sse2(step1[10], step1[13], (int)cospi_8_64, (int)cospi_24_64, + &step2[13], &step2[10]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step1[4] = _mm_add_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); @@ -159,10 +159,10 @@ static INLINE void highbd_idct16x16_38_4col(__m128i *const io /*io[16]*/) { step2[1] = step2[0]; highbd_multiplication_sse2(io[4], (int)cospi_24_64, (int)cospi_8_64, &step2[2], &step2[3]); - highbd_multiplication_and_add_sse2(step1[14], step1[9], (int)cospi_24_64, - (int)cospi_8_64, &step2[9], &step2[14]); - highbd_multiplication_and_add_sse2(step1[10], step1[13], (int)cospi_8_64, - (int)cospi_24_64, &step2[13], &step2[10]); + highbd_butterfly_sse2(step1[14], step1[9], (int)cospi_24_64, (int)cospi_8_64, + &step2[9], &step2[14]); + highbd_butterfly_sse2(step1[10], step1[13], (int)cospi_8_64, (int)cospi_24_64, + &step2[13], &step2[10]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step1[4] = _mm_add_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); @@ -207,10 +207,10 @@ static INLINE void highbd_idct16x16_10_4col(__m128i *const io /*io[16]*/) { step2[1] = step2[0]; step2[2] = _mm_setzero_si128(); step2[3] = _mm_setzero_si128(); - highbd_multiplication_and_add_sse2(step1[14], step1[9], (int)cospi_24_64, - (int)cospi_8_64, &step2[9], &step2[14]); - highbd_multiplication_and_add_sse2(step1[10], step1[13], (int)cospi_8_64, - (int)cospi_24_64, &step2[13], &step2[10]); + highbd_butterfly_sse2(step1[14], step1[9], (int)cospi_24_64, (int)cospi_8_64, + &step2[9], &step2[14]); + highbd_butterfly_sse2(step1[10], step1[13], (int)cospi_8_64, (int)cospi_24_64, + &step2[13], &step2[10]); step2[5] = step1[4]; step2[6] = step1[7]; step2[8] = step1[8]; diff --git a/vpx_dsp/x86/highbd_idct16x16_add_sse4.c b/vpx_dsp/x86/highbd_idct16x16_add_sse4.c index f25d8e5ee..8cc0d0836 100644 --- a/vpx_dsp/x86/highbd_idct16x16_add_sse4.c +++ b/vpx_dsp/x86/highbd_idct16x16_add_sse4.c @@ -76,21 +76,20 @@ static INLINE void highbd_idct16_4col(__m128i *const io /*io[16]*/) { __m128i temp1[4], temp2; // stage 2 - highbd_multiplication_and_add_sse4_1(io[1], io[15], (int)cospi_30_64, - (int)cospi_2_64, &step2[8], &step2[15]); - highbd_multiplication_and_add_sse4_1(io[9], io[7], (int)cospi_14_64, - (int)cospi_18_64, &step2[9], &step2[14]); - highbd_multiplication_and_add_sse4_1(io[5], io[11], (int)cospi_22_64, - (int)cospi_10_64, &step2[10], - &step2[13]); - highbd_multiplication_and_add_sse4_1( - io[13], io[3], (int)cospi_6_64, (int)cospi_26_64, &step2[11], &step2[12]); + highbd_butterfly_sse4_1(io[1], io[15], (int)cospi_30_64, (int)cospi_2_64, + &step2[8], &step2[15]); + highbd_butterfly_sse4_1(io[9], io[7], (int)cospi_14_64, (int)cospi_18_64, + &step2[9], &step2[14]); + highbd_butterfly_sse4_1(io[5], io[11], (int)cospi_22_64, (int)cospi_10_64, + &step2[10], &step2[13]); + highbd_butterfly_sse4_1(io[13], io[3], (int)cospi_6_64, (int)cospi_26_64, + &step2[11], &step2[12]); // stage 3 - highbd_multiplication_and_add_sse4_1(io[2], io[14], (int)cospi_28_64, - (int)cospi_4_64, &step1[4], &step1[7]); - highbd_multiplication_and_add_sse4_1(io[10], io[6], (int)cospi_12_64, - (int)cospi_20_64, &step1[5], &step1[6]); + highbd_butterfly_sse4_1(io[2], io[14], (int)cospi_28_64, (int)cospi_4_64, + &step1[4], &step1[7]); + highbd_butterfly_sse4_1(io[10], io[6], (int)cospi_12_64, (int)cospi_20_64, + &step1[5], &step1[6]); step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[10] = _mm_sub_epi32(step2[10], step2[11]); // step1[10] = -step1[10] @@ -107,13 +106,12 @@ static INLINE void highbd_idct16_4col(__m128i *const io /*io[16]*/) { temp2 = _mm_sub_epi32(io[0], io[8]); extend_64bit(temp2, temp1); step2[1] = multiplication_round_shift_sse4_1(temp1, (int)cospi_16_64); - highbd_multiplication_and_add_sse4_1(io[4], io[12], (int)cospi_24_64, - (int)cospi_8_64, &step2[2], &step2[3]); - highbd_multiplication_and_add_sse4_1(step1[14], step1[9], (int)cospi_24_64, - (int)cospi_8_64, &step2[9], &step2[14]); - highbd_multiplication_and_add_sse4_1(step1[10], step1[13], (int)cospi_8_64, - (int)cospi_24_64, &step2[13], - &step2[10]); + highbd_butterfly_sse4_1(io[4], io[12], (int)cospi_24_64, (int)cospi_8_64, + &step2[2], &step2[3]); + highbd_butterfly_sse4_1(step1[14], step1[9], (int)cospi_24_64, + (int)cospi_8_64, &step2[9], &step2[14]); + highbd_butterfly_sse4_1(step1[10], step1[13], (int)cospi_8_64, + (int)cospi_24_64, &step2[13], &step2[10]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step1[4] = _mm_add_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); @@ -162,11 +160,10 @@ static INLINE void highbd_idct16x16_38_4col(__m128i *const io /*io[16]*/) { step2[1] = step2[0]; highbd_multiplication_sse4_1(io[4], (int)cospi_24_64, (int)cospi_8_64, &step2[2], &step2[3]); - highbd_multiplication_and_add_sse4_1(step1[14], step1[9], (int)cospi_24_64, - (int)cospi_8_64, &step2[9], &step2[14]); - highbd_multiplication_and_add_sse4_1(step1[10], step1[13], (int)cospi_8_64, - (int)cospi_24_64, &step2[13], - &step2[10]); + highbd_butterfly_sse4_1(step1[14], step1[9], (int)cospi_24_64, + (int)cospi_8_64, &step2[9], &step2[14]); + highbd_butterfly_sse4_1(step1[10], step1[13], (int)cospi_8_64, + (int)cospi_24_64, &step2[13], &step2[10]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step1[4] = _mm_add_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); @@ -211,11 +208,10 @@ static INLINE void highbd_idct16x16_10_4col(__m128i *const io /*io[16]*/) { step2[1] = step2[0]; step2[2] = _mm_setzero_si128(); step2[3] = _mm_setzero_si128(); - highbd_multiplication_and_add_sse4_1(step1[14], step1[9], (int)cospi_24_64, - (int)cospi_8_64, &step2[9], &step2[14]); - highbd_multiplication_and_add_sse4_1(step1[10], step1[13], (int)cospi_8_64, - (int)cospi_24_64, &step2[13], - &step2[10]); + highbd_butterfly_sse4_1(step1[14], step1[9], (int)cospi_24_64, + (int)cospi_8_64, &step2[9], &step2[14]); + highbd_butterfly_sse4_1(step1[10], step1[13], (int)cospi_8_64, + (int)cospi_24_64, &step2[13], &step2[10]); step2[5] = step1[4]; step2[6] = step1[7]; step2[8] = step1[8]; diff --git a/vpx_dsp/x86/highbd_idct4x4_add_sse2.c b/vpx_dsp/x86/highbd_idct4x4_add_sse2.c index f5ee7b74e..88cc40ac5 100644 --- a/vpx_dsp/x86/highbd_idct4x4_add_sse2.c +++ b/vpx_dsp/x86/highbd_idct4x4_add_sse2.c @@ -86,8 +86,8 @@ static INLINE void highbd_idct4_large_sse2(__m128i *const io) { temp[0] = _mm_sub_epi32(io[0], io[2]); // input[0] - input[2] abs_extend_64bit_sse2(temp[0], temp, sign); step[1] = multiplication_round_shift_sse2(temp, sign, (int)cospi_16_64); - highbd_multiplication_and_add_sse2(io[1], io[3], (int)cospi_24_64, - (int)cospi_8_64, &step[2], &step[3]); + highbd_butterfly_sse2(io[1], io[3], (int)cospi_24_64, (int)cospi_8_64, + &step[2], &step[3]); // stage 2 io[0] = _mm_add_epi32(step[0], step[3]); // step[0] + step[3] diff --git a/vpx_dsp/x86/highbd_idct4x4_add_sse4.c b/vpx_dsp/x86/highbd_idct4x4_add_sse4.c index e1934350b..015824fa9 100644 --- a/vpx_dsp/x86/highbd_idct4x4_add_sse4.c +++ b/vpx_dsp/x86/highbd_idct4x4_add_sse4.c @@ -28,8 +28,8 @@ static INLINE void highbd_idct4(__m128i *const io) { temp[0] = _mm_sub_epi32(io[0], io[2]); // input[0] - input[2] extend_64bit(temp[0], temp); step[1] = multiplication_round_shift_sse4_1(temp, (int)cospi_16_64); - highbd_multiplication_and_add_sse4_1(io[1], io[3], (int)cospi_24_64, - (int)cospi_8_64, &step[2], &step[3]); + highbd_butterfly_sse4_1(io[1], io[3], (int)cospi_24_64, (int)cospi_8_64, + &step[2], &step[3]); // stage 2 io[0] = _mm_add_epi32(step[0], step[3]); // step[0] + step[3] diff --git a/vpx_dsp/x86/highbd_idct8x8_add_sse2.c b/vpx_dsp/x86/highbd_idct8x8_add_sse2.c index c94c90111..ac76f5c0e 100644 --- a/vpx_dsp/x86/highbd_idct8x8_add_sse2.c +++ b/vpx_dsp/x86/highbd_idct8x8_add_sse2.c @@ -25,10 +25,10 @@ static void highbd_idct8x8_half1d(__m128i *const io) { step1[2] = io[4]; step1[1] = io[2]; step1[3] = io[6]; - highbd_multiplication_and_add_sse2(io[1], io[7], (int)cospi_28_64, - (int)cospi_4_64, &step1[4], &step1[7]); - highbd_multiplication_and_add_sse2(io[5], io[3], (int)cospi_12_64, - (int)cospi_20_64, &step1[5], &step1[6]); + highbd_butterfly_sse2(io[1], io[7], (int)cospi_28_64, (int)cospi_4_64, + &step1[4], &step1[7]); + highbd_butterfly_sse2(io[5], io[3], (int)cospi_12_64, (int)cospi_20_64, + &step1[5], &step1[6]); // stage 2 temp2[0] = _mm_add_epi32(step1[0], step1[2]); @@ -37,8 +37,8 @@ static void highbd_idct8x8_half1d(__m128i *const io) { temp2[0] = _mm_sub_epi32(step1[0], step1[2]); abs_extend_64bit_sse2(temp2[0], temp1, sign); step2[1] = multiplication_round_shift_sse2(temp1, sign, (int)cospi_16_64); - highbd_multiplication_and_add_sse2(step1[1], step1[3], (int)cospi_24_64, - (int)cospi_8_64, &step2[2], &step2[3]); + highbd_butterfly_sse2(step1[1], step1[3], (int)cospi_24_64, (int)cospi_8_64, + &step2[2], &step2[3]); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); diff --git a/vpx_dsp/x86/highbd_idct8x8_add_sse4.c b/vpx_dsp/x86/highbd_idct8x8_add_sse4.c index e918e215d..e901760cc 100644 --- a/vpx_dsp/x86/highbd_idct8x8_add_sse4.c +++ b/vpx_dsp/x86/highbd_idct8x8_add_sse4.c @@ -27,10 +27,10 @@ static void highbd_idct8x8_half1d(__m128i *const io) { step1[2] = io[4]; step1[1] = io[2]; step1[3] = io[6]; - highbd_multiplication_and_add_sse4_1(io[1], io[7], (int)cospi_28_64, - (int)cospi_4_64, &step1[4], &step1[7]); - highbd_multiplication_and_add_sse4_1(io[5], io[3], (int)cospi_12_64, - (int)cospi_20_64, &step1[5], &step1[6]); + highbd_butterfly_sse4_1(io[1], io[7], (int)cospi_28_64, (int)cospi_4_64, + &step1[4], &step1[7]); + highbd_butterfly_sse4_1(io[5], io[3], (int)cospi_12_64, (int)cospi_20_64, + &step1[5], &step1[6]); // stage 2 temp2[0] = _mm_add_epi32(step1[0], step1[2]); @@ -39,8 +39,8 @@ static void highbd_idct8x8_half1d(__m128i *const io) { temp2[0] = _mm_sub_epi32(step1[0], step1[2]); extend_64bit(temp2[0], temp1); step2[1] = multiplication_round_shift_sse4_1(temp1, (int)cospi_16_64); - highbd_multiplication_and_add_sse4_1(step1[1], step1[3], (int)cospi_24_64, - (int)cospi_8_64, &step2[2], &step2[3]); + highbd_butterfly_sse4_1(step1[1], step1[3], (int)cospi_24_64, (int)cospi_8_64, + &step2[2], &step2[3]); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); diff --git a/vpx_dsp/x86/highbd_inv_txfm_sse2.h b/vpx_dsp/x86/highbd_inv_txfm_sse2.h index ca771b5f7..d7f7a165e 100644 --- a/vpx_dsp/x86/highbd_inv_txfm_sse2.h +++ b/vpx_dsp/x86/highbd_inv_txfm_sse2.h @@ -110,9 +110,10 @@ static INLINE __m128i multiplication_neg_round_shift_sse2( } // Note: c0 and c1 must be non negative. -static INLINE void highbd_multiplication_and_add_sse2( - const __m128i in0, const __m128i in1, const int c0, const int c1, - __m128i *const out0, __m128i *const out1) { +static INLINE void highbd_butterfly_sse2(const __m128i in0, const __m128i in1, + const int c0, const int c1, + __m128i *const out0, + __m128i *const out1) { const __m128i pair_c0 = pair_set_epi32(c0 << 2, 0); const __m128i pair_c1 = pair_set_epi32(c1 << 2, 0); __m128i temp1[4], temp2[4], sign1[2], sign2[2]; diff --git a/vpx_dsp/x86/highbd_inv_txfm_sse4.h b/vpx_dsp/x86/highbd_inv_txfm_sse4.h index 17b87a913..24b3f1046 100644 --- a/vpx_dsp/x86/highbd_inv_txfm_sse4.h +++ b/vpx_dsp/x86/highbd_inv_txfm_sse4.h @@ -29,9 +29,10 @@ static INLINE __m128i multiplication_round_shift_sse4_1( return pack_4(t0, t1); } -static INLINE void highbd_multiplication_and_add_sse4_1( - const __m128i in0, const __m128i in1, const int c0, const int c1, - __m128i *const out0, __m128i *const out1) { +static INLINE void highbd_butterfly_sse4_1(const __m128i in0, const __m128i in1, + const int c0, const int c1, + __m128i *const out0, + __m128i *const out1) { const __m128i pair_c0 = pair_set_epi32(4 * c0, 0); const __m128i pair_c1 = pair_set_epi32(4 * c1, 0); __m128i temp1[4], temp2[4]; |