diff options
author | James Zern <jzern@google.com> | 2015-06-19 19:19:22 -0700 |
---|---|---|
committer | James Zern <jzern@google.com> | 2015-06-19 19:19:22 -0700 |
commit | 12c6688e31a307e63924ae75d9ffea6d303295b1 (patch) | |
tree | 6e359b11a6d380a17b235667e25760f124c99c66 | |
parent | 90c9ede8e653a10855c583510aa2b071cc2f02c2 (diff) | |
download | libvpx-12c6688e31a307e63924ae75d9ffea6d303295b1.tar libvpx-12c6688e31a307e63924ae75d9ffea6d303295b1.tar.gz libvpx-12c6688e31a307e63924ae75d9ffea6d303295b1.tar.bz2 libvpx-12c6688e31a307e63924ae75d9ffea6d303295b1.zip |
vp9_reconintra_neon: add d45 8x8
based on ssse3 implementation
~91% faster over 20M pixels
Change-Id: I6d743a53352c2d6de0efe7899d7996e8b0f7fa29
-rw-r--r-- | test/test_intra_pred_speed.cc | 4 | ||||
-rw-r--r-- | vp9/common/arm/neon/vp9_reconintra_neon.c | 20 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 2 |
3 files changed, 23 insertions, 3 deletions
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 352cde25a..7b4c4350e 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -264,8 +264,8 @@ INTRA_PRED_TEST(DSPR2, TestIntraPred8, vp9_dc_predictor_8x8_dspr2, NULL, NULL, INTRA_PRED_TEST(NEON, TestIntraPred8, vp9_dc_predictor_8x8_neon, vp9_dc_left_predictor_8x8_neon, vp9_dc_top_predictor_8x8_neon, vp9_dc_128_predictor_8x8_neon, vp9_v_predictor_8x8_neon, - vp9_h_predictor_8x8_neon, NULL, NULL, NULL, NULL, NULL, NULL, - vp9_tm_predictor_8x8_neon) + vp9_h_predictor_8x8_neon, vp9_d45_predictor_8x8_neon, NULL, + NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_neon) #endif // HAVE_NEON diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.c b/vp9/common/arm/neon/vp9_reconintra_neon.c index 13c46a57e..cfd5905ef 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.c +++ b/vp9/common/arm/neon/vp9_reconintra_neon.c @@ -338,6 +338,26 @@ void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, dst[3 * stride + 3] = above[7]; } +void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + static const uint8_t shuffle1[8] = { 1, 2, 3, 4, 5, 6, 7, 7 }; + static const uint8_t shuffle2[8] = { 2, 3, 4, 5, 6, 7, 7, 7 }; + const uint8x8_t sh_12345677 = vld1_u8(shuffle1); + const uint8x8_t sh_23456777 = vld1_u8(shuffle2); + const uint8x8_t A0 = vld1_u8(above); // top row + const uint8x8_t A1 = vtbl1_u8(A0, sh_12345677); + const uint8x8_t A2 = vtbl1_u8(A0, sh_23456777); + const uint8x8_t avg1 = vhadd_u8(A0, A2); + uint8x8_t row = vrhadd_u8(avg1, A1); + int i; + (void)left; + for (i = 0; i < 7; ++i) { + vst1_u8(dst + i * stride, row); + row = vtbl1_u8(row, sh_12345677); + } + vst1_u8(dst + i * stride, row); +} + // ----------------------------------------------------------------------------- void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 27cd3d010..1f265fe0b 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -99,7 +99,7 @@ add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc"; add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc"; +specialize qw/vp9_d45_predictor_8x8 neon/, "$ssse3_x86inc"; add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc"; |