From 78136edcdc3f53bc63b58e76ec4b160a2da1a0e3 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Wed, 7 Aug 2013 14:45:37 -0700 Subject: SSE2 high precision 32x32 forward DCT Enable SSE2 implementation of high precision 32x32 forward DCT. The intermediate stacks are of 32-bits. The run-time goes down from 32126 cycles to 13442 cycles. Change-Id: Ib5ccafe3176c65bd6f2dbdef790bd47bbc880e56 --- vp9/common/vp9_blockd.h | 2 +- vp9/common/vp9_idct.h | 3 +++ vp9/common/vp9_rtcd_defs.sh | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'vp9/common') diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 244885ad2..25b8cf69d 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -143,7 +143,7 @@ typedef struct { unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ unsigned char segment_id; // Segment id for current frame - // Flags used for prediction status of various bistream signals + // Flags used for prediction status of various bit-stream signals unsigned char seg_id_predicted; // Indicates if the mb is part of the image (1) vs border (0) diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 2d959f0ea..0c47da6bd 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -27,6 +27,9 @@ #define pair_set_epi16(a, b) \ _mm_set1_epi32(((uint16_t)(a)) + (((uint16_t)(b)) << 16)) +#define pair_set_epi32(a, b) \ + _mm_set_epi32(b, a, b, a) + // Constants: // for (int i = 1; i< 32; ++i) // printf("static const int cospi_%d_64 = %.0f;\n", i, diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 9e8714cb2..c5ae35806 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -740,7 +740,7 @@ prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int p specialize vp9_short_fdct8x4 sse2 prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_fdct32x32 +specialize vp9_short_fdct32x32 sse2 prototype void vp9_short_fdct32x32_rd "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct32x32_rd sse2 -- cgit v1.2.3