summaryrefslogtreecommitdiff
path: root/vp9/decoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/decoder')
-rw-r--r--vp9/decoder/vp9_idct_blk.c21
-rw-r--r--vp9/decoder/x86/vp9_dequantize_sse2.c59
2 files changed, 3 insertions, 77 deletions
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index faaee7378..bc943fa85 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -105,10 +105,6 @@ void vp9_add_residual_8x8_c(const int16_t *diff, uint8_t *dest, int stride) {
add_residual(diff, dest, stride, 8, 8);
}
-void vp9_add_residual_16x16_c(const int16_t *diff, uint8_t *dest, int stride) {
- add_residual(diff, dest, stride, 16, 16);
-}
-
static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride,
int width, int height) {
int r, c;
@@ -260,19 +256,14 @@ void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
if (tx_type == DCT_DCT) {
vp9_idct_add_16x16(input, dest, stride, eob);
} else {
- DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
-
if (eob > 0) {
- vp9_short_iht16x16(input, output, 16, tx_type);
+ vp9_short_iht16x16_add(input, dest, stride, tx_type);
vpx_memset(input, 0, 512);
- vp9_add_residual_16x16(output, dest, stride);
}
}
}
void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
- DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
-
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
if (eob) {
@@ -288,21 +279,15 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
vp9_add_constant_residual_16x16(out, dest, stride);
#if !CONFIG_SCATTERSCAN
} else if (eob <= 10) {
- // the idct halves ( >> 1) the pitch
- vp9_short_idct10_16x16(input, output, 32);
-
+ vp9_short_idct10_16x16_add(input, dest, stride);
input[0] = input[1] = input[2] = input[3] = 0;
input[16] = input[17] = input[18] = 0;
input[32] = input[33] = 0;
input[48] = 0;
-
- vp9_add_residual_16x16(output, dest, stride);
#endif
} else {
- // the idct halves ( >> 1) the pitch
- vp9_short_idct16x16(input, output, 16 << 1);
+ vp9_short_idct16x16_add(input, dest, stride);
vpx_memset(input, 0, 512);
- vp9_add_residual_16x16(output, dest, stride);
}
}
}
diff --git a/vp9/decoder/x86/vp9_dequantize_sse2.c b/vp9/decoder/x86/vp9_dequantize_sse2.c
index 38fd5aaa4..796fc123c 100644
--- a/vp9/decoder/x86/vp9_dequantize_sse2.c
+++ b/vp9/decoder/x86/vp9_dequantize_sse2.c
@@ -122,65 +122,6 @@ void vp9_add_residual_8x8_sse2(const int16_t *diff, uint8_t *dest, int stride) {
_mm_storel_epi64((__m128i *)(dest + 7 * stride), p6);
}
-void vp9_add_residual_16x16_sse2(const int16_t *diff, uint8_t *dest,
- int stride) {
- const int width = 16;
- int i = 4;
- const __m128i zero = _mm_setzero_si128();
-
- // Diff data
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
- __m128i p0, p1, p2, p3, p4, p5, p6, p7;
-
- do {
- d0 = _mm_load_si128((const __m128i *)(diff + 0 * width));
- d1 = _mm_load_si128((const __m128i *)(diff + 0 * width + 8));
- d2 = _mm_load_si128((const __m128i *)(diff + 1 * width));
- d3 = _mm_load_si128((const __m128i *)(diff + 1 * width + 8));
- d4 = _mm_load_si128((const __m128i *)(diff + 2 * width));
- d5 = _mm_load_si128((const __m128i *)(diff + 2 * width + 8));
- d6 = _mm_load_si128((const __m128i *)(diff + 3 * width));
- d7 = _mm_load_si128((const __m128i *)(diff + 3 * width + 8));
-
- // Prediction data.
- p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride));
- p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride));
- p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride));
- p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride));
-
- p0 = _mm_unpacklo_epi8(p1, zero);
- p1 = _mm_unpackhi_epi8(p1, zero);
- p2 = _mm_unpacklo_epi8(p3, zero);
- p3 = _mm_unpackhi_epi8(p3, zero);
- p4 = _mm_unpacklo_epi8(p5, zero);
- p5 = _mm_unpackhi_epi8(p5, zero);
- p6 = _mm_unpacklo_epi8(p7, zero);
- p7 = _mm_unpackhi_epi8(p7, zero);
-
- p0 = _mm_add_epi16(p0, d0);
- p1 = _mm_add_epi16(p1, d1);
- p2 = _mm_add_epi16(p2, d2);
- p3 = _mm_add_epi16(p3, d3);
- p4 = _mm_add_epi16(p4, d4);
- p5 = _mm_add_epi16(p5, d5);
- p6 = _mm_add_epi16(p6, d6);
- p7 = _mm_add_epi16(p7, d7);
-
- p0 = _mm_packus_epi16(p0, p1);
- p1 = _mm_packus_epi16(p2, p3);
- p2 = _mm_packus_epi16(p4, p5);
- p3 = _mm_packus_epi16(p6, p7);
-
- _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
- _mm_store_si128((__m128i *)(dest + 1 * stride), p1);
- _mm_store_si128((__m128i *)(dest + 2 * stride), p2);
- _mm_store_si128((__m128i *)(dest + 3 * stride), p3);
-
- diff += 4 * width;
- dest += 4 * stride;
- } while (--i);
-}
-
void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest,
int stride) {
uint8_t abs_diff;