summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2013-07-25 08:49:37 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-07-25 08:49:37 -0700
commit242157c756314827ad9244952c7253e8900b9626 (patch)
treea91eebba8d14421488c8543600d19c4ed1692f50
parenta0e8b45fee3990283013a8c7685f4397da48c08c (diff)
parent384e37e32b95d7b30a529da4b10c41f15aa3bd80 (diff)
downloadlibvpx-242157c756314827ad9244952c7253e8900b9626.tar
libvpx-242157c756314827ad9244952c7253e8900b9626.tar.gz
libvpx-242157c756314827ad9244952c7253e8900b9626.tar.bz2
libvpx-242157c756314827ad9244952c7253e8900b9626.zip
Merge "SSE2 inverse 4x4 2D-DCT with DC only"
-rw-r--r--vp9/common/vp9_rtcd_defs.sh2
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.c17
2 files changed, 18 insertions, 1 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index f1789fb74..c357ef62a 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -292,7 +292,7 @@ specialize vp9_convolve8_avg_vert ssse3 neon
# dct
#
prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct4x4_1_add
+specialize vp9_short_idct4x4_1_add sse2
prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct4x4_add sse2
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index b4766df5b..a1e14b482 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -148,6 +148,23 @@ void vp9_short_idct4x4_add_sse2(int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE4X4(dest, input3);
}
+void vp9_short_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+ __m128i dc_value;
+ const __m128i zero = _mm_setzero_si128();
+ int a;
+
+ a = dct_const_round_shift(input[0] * cospi_16_64);
+ a = dct_const_round_shift(a * cospi_16_64);
+ a = ROUND_POWER_OF_TWO(a, 4);
+
+ dc_value = _mm_set1_epi16(a);
+
+ RECON_AND_STORE4X4(dest, dc_value);
+ RECON_AND_STORE4X4(dest, dc_value);
+ RECON_AND_STORE4X4(dest, dc_value);
+ RECON_AND_STORE4X4(dest, dc_value);
+}
+
void vp9_idct4_1d_sse2(int16_t *input, int16_t *output) {
const __m128i zero = _mm_setzero_si128();
const __m128i c1 = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,