From fdd1186f9775cec72bcaaf98739b4d5f2e08b0ac Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Tue, 20 Sep 2016 20:22:08 -0700
Subject: vpx_idct32x32_34_add_sse2: rm unneeded transposes

this change is neutral to mildly positive across various x86-64
platforms

Change-Id: I28fb5ae598fc1317b7a42c9a846ac5d57d104784
---
 vpx_dsp/x86/inv_txfm_sse2.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'vpx_dsp')

diff --git a/vpx_dsp/x86/inv_txfm_sse2.c b/vpx_dsp/x86/inv_txfm_sse2.c
index 330ae8d6a..cb56ad078 100644
--- a/vpx_dsp/x86/inv_txfm_sse2.c
+++ b/vpx_dsp/x86/inv_txfm_sse2.c
@@ -3066,17 +3066,7 @@ void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest,
   in[6] = load_input_data(input + 192);
   in[7] = load_input_data(input + 224);
 
-  for (i = 8; i < 32; ++i) {
-    in[i] = _mm_setzero_si128();
-  }
-
   array_transpose_8x8(in, in);
-  // TODO(hkuang): Following transposes are unnecessary. But remove them will
-  // lead to performance drop on some devices.
-  array_transpose_8x8(in + 8, in + 8);
-  array_transpose_8x8(in + 16, in + 16);
-  array_transpose_8x8(in + 24, in + 24);
-
   IDCT32_34
 
   // 1_D: Store 32 intermediate results for each 8x32 block.
-- 
cgit v1.2.3