summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2013-09-24 08:40:56 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-09-24 08:40:56 -0700
commitbacb5925ffa4dbc83a00986ae585b35848d0fcbd (patch)
tree5d3b8aea1602e26c174c8f34b680f5c19e1fdda3 /vp9
parentdb9235657741ce3fa5b9226f55d780304320ef8c (diff)
parent13c7715a75e91bcd855e0aa647ecd88d90152e63 (diff)
downloadlibvpx-bacb5925ffa4dbc83a00986ae585b35848d0fcbd.tar
libvpx-bacb5925ffa4dbc83a00986ae585b35848d0fcbd.tar.gz
libvpx-bacb5925ffa4dbc83a00986ae585b35848d0fcbd.tar.bz2
libvpx-bacb5925ffa4dbc83a00986ae585b35848d0fcbd.zip
Merge "Number of instructions in fdct4_1d_sse2 reduced by two."
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index eb271fef3..ad3d01da9 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -171,22 +171,21 @@ static INLINE void transpose_4x4(__m128i *res) {
void fdct4_1d_sse2(__m128i *in) {
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
- const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
__m128i u[4], v[4];
- u[0] = _mm_add_epi16(in[0], in[3]);
- u[1] = _mm_add_epi16(in[1], in[2]);
- u[2] = _mm_sub_epi16(in[1], in[2]);
- u[3] = _mm_sub_epi16(in[0], in[3]);
+ u[0]=_mm_unpacklo_epi16(in[0], in[1]);
+ u[1]=_mm_unpacklo_epi16(in[3], in[2]);
+
+ v[0] = _mm_add_epi16(u[0], u[1]);
+ v[1] = _mm_sub_epi16(u[0], u[1]);
- v[0] = _mm_unpacklo_epi16(u[0], u[1]);
- v[1] = _mm_unpacklo_epi16(u[2], u[3]);
u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16); // 0
u[1] = _mm_madd_epi16(v[0], k__cospi_p16_m16); // 2
- u[2] = _mm_madd_epi16(v[1], k__cospi_p24_p08); // 1
- u[3] = _mm_madd_epi16(v[1], k__cospi_m08_p24); // 3
+ u[2] = _mm_madd_epi16(v[1], k__cospi_p08_p24); // 1
+ u[3] = _mm_madd_epi16(v[1], k__cospi_p24_m08); // 3
v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);