diff options
author | Jian Zhou <zhoujian@google.com> | 2015-12-14 17:56:01 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2015-12-14 17:56:01 +0000 |
commit | 2404e3290e51b776dc16c5c082bbd39e45a15b10 (patch) | |
tree | 190c88b677e6e8136c5b22c3f77d492364bfbaf1 | |
parent | e19b7df8d33c795e0daf4432315325ec445d21bd (diff) | |
parent | 88120481a4475c1b40f867b8d80edfd2a560a315 (diff) | |
download | libvpx-2404e3290e51b776dc16c5c082bbd39e45a15b10.tar libvpx-2404e3290e51b776dc16c5c082bbd39e45a15b10.tar.gz libvpx-2404e3290e51b776dc16c5c082bbd39e45a15b10.tar.bz2 libvpx-2404e3290e51b776dc16c5c082bbd39e45a15b10.zip |
Merge "Code clean of tm_predictor_32x32"
-rw-r--r-- | test/test_intra_pred_speed.cc | 9 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 | ||||
-rw-r--r-- | vpx_dsp/x86/intrapred_sse2.asm | 43 |
3 files changed, 21 insertions, 33 deletions
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 4064ea645..3e65fecfb 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -337,21 +337,12 @@ INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c, vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c) #if HAVE_SSE2 && CONFIG_USE_X86INC -#if ARCH_X86_64 INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2, vpx_dc_left_predictor_32x32_sse2, vpx_dc_top_predictor_32x32_sse2, vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2, vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_32x32_sse2) -#else -INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2, - vpx_dc_left_predictor_32x32_sse2, - vpx_dc_top_predictor_32x32_sse2, - vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2, - vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, - NULL, NULL) -#endif // ARCH_X86_64 #endif // HAVE_SSE2 && CONFIG_USE_X86INC #if HAVE_SSSE3 && CONFIG_USE_X86INC diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 4d36e2796..798dbf124 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -241,7 +241,7 @@ add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, con specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc"; +specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; diff --git a/vpx_dsp/x86/intrapred_sse2.asm b/vpx_dsp/x86/intrapred_sse2.asm index 6f924a779..c24d53686 100644 --- a/vpx_dsp/x86/intrapred_sse2.asm +++ b/vpx_dsp/x86/intrapred_sse2.asm @@ -700,9 +700,8 @@ cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left jnz .loop REP_RET -%if ARCH_X86_64 INIT_XMM sse2 -cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left +cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left pxor m1, m1 movd m2, [aboveq-1] mova m0, [aboveq] @@ -723,31 +722,29 @@ cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left psubw m5, m2 .loop: movd m2, [leftq+lineq*2] - movd m6, [leftq+lineq*2+1] + pxor m1, m1 punpcklbw m2, m1 - punpcklbw m6, m1 + pshuflw m7, m2, 0x55 pshuflw m2, m2, 0x0 - pshuflw m6, m6, 0x0 punpcklqdq m2, m2 - punpcklqdq m6, m6 - paddw m7, m2, m0 - paddw m8, m2, m3 - paddw m9, m2, m4 - paddw m2, m5 - packuswb m7, m8 - packuswb m9, m2 - paddw m2, m6, m0 - paddw m8, m6, m3 - mova [dstq ], m7 - paddw m7, m6, m4 - paddw m6, m5 - mova [dstq +16], m9 - packuswb m2, m8 - packuswb m7, m6 - mova [dstq+strideq ], m2 - mova [dstq+strideq+16], m7 + punpcklqdq m7, m7 + paddw m6, m2, m3 + paddw m1, m2, m0 + packuswb m1, m6 + mova [dstq ], m1 + paddw m6, m2, m5 + paddw m1, m2, m4 + packuswb m1, m6 + mova [dstq+16 ], m1 + paddw m6, m7, m3 + paddw m1, m7, m0 + packuswb m1, m6 + mova [dstq+strideq ], m1 + paddw m6, m7, m5 + paddw m1, m7, m4 + packuswb m1, m6 + mova [dstq+strideq+16], m1 lea dstq, [dstq+strideq*2] inc lineq jnz .loop REP_RET -%endif |