diff options
author | Jingning Han <jingning@google.com> | 2013-07-25 08:49:37 -0700 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2013-07-25 08:49:37 -0700 |
commit | 242157c756314827ad9244952c7253e8900b9626 (patch) | |
tree | a91eebba8d14421488c8543600d19c4ed1692f50 /vp9 | |
parent | a0e8b45fee3990283013a8c7685f4397da48c08c (diff) | |
parent | 384e37e32b95d7b30a529da4b10c41f15aa3bd80 (diff) | |
download | libvpx-242157c756314827ad9244952c7253e8900b9626.tar libvpx-242157c756314827ad9244952c7253e8900b9626.tar.gz libvpx-242157c756314827ad9244952c7253e8900b9626.tar.bz2 libvpx-242157c756314827ad9244952c7253e8900b9626.zip |
Merge "SSE2 inverse 4x4 2D-DCT with DC only"
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_rtcd_defs.sh | 2 | ||||
-rw-r--r-- | vp9/common/x86/vp9_idct_intrin_sse2.c | 17 |
2 files changed, 18 insertions, 1 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index f1789fb74..c357ef62a 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -292,7 +292,7 @@ specialize vp9_convolve8_avg_vert ssse3 neon # dct # prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_idct4x4_1_add +specialize vp9_short_idct4x4_1_add sse2 prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct4x4_add sse2 diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index b4766df5b..a1e14b482 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -148,6 +148,23 @@ void vp9_short_idct4x4_add_sse2(int16_t *input, uint8_t *dest, int stride) { RECON_AND_STORE4X4(dest, input3); } +void vp9_short_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) { + __m128i dc_value; + const __m128i zero = _mm_setzero_si128(); + int a; + + a = dct_const_round_shift(input[0] * cospi_16_64); + a = dct_const_round_shift(a * cospi_16_64); + a = ROUND_POWER_OF_TWO(a, 4); + + dc_value = _mm_set1_epi16(a); + + RECON_AND_STORE4X4(dest, dc_value); + RECON_AND_STORE4X4(dest, dc_value); + RECON_AND_STORE4X4(dest, dc_value); + RECON_AND_STORE4X4(dest, dc_value); +} + void vp9_idct4_1d_sse2(int16_t *input, int16_t *output) { const __m128i zero = _mm_setzero_si128(); const __m128i c1 = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64, |