diff options
-rw-r--r-- | test/test_intra_pred_speed.cc | 4 | ||||
-rw-r--r-- | vp9/common/arm/neon/vp9_reconintra_neon.c | 20 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 2 |
3 files changed, 23 insertions, 3 deletions
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 352cde25a..7b4c4350e 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -264,8 +264,8 @@ INTRA_PRED_TEST(DSPR2, TestIntraPred8, vp9_dc_predictor_8x8_dspr2, NULL, NULL, INTRA_PRED_TEST(NEON, TestIntraPred8, vp9_dc_predictor_8x8_neon, vp9_dc_left_predictor_8x8_neon, vp9_dc_top_predictor_8x8_neon, vp9_dc_128_predictor_8x8_neon, vp9_v_predictor_8x8_neon, - vp9_h_predictor_8x8_neon, NULL, NULL, NULL, NULL, NULL, NULL, - vp9_tm_predictor_8x8_neon) + vp9_h_predictor_8x8_neon, vp9_d45_predictor_8x8_neon, NULL, + NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_neon) #endif // HAVE_NEON diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.c b/vp9/common/arm/neon/vp9_reconintra_neon.c index 13c46a57e..cfd5905ef 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.c +++ b/vp9/common/arm/neon/vp9_reconintra_neon.c @@ -338,6 +338,26 @@ void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, dst[3 * stride + 3] = above[7]; } +void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + static const uint8_t shuffle1[8] = { 1, 2, 3, 4, 5, 6, 7, 7 }; + static const uint8_t shuffle2[8] = { 2, 3, 4, 5, 6, 7, 7, 7 }; + const uint8x8_t sh_12345677 = vld1_u8(shuffle1); + const uint8x8_t sh_23456777 = vld1_u8(shuffle2); + const uint8x8_t A0 = vld1_u8(above); // top row + const uint8x8_t A1 = vtbl1_u8(A0, sh_12345677); + const uint8x8_t A2 = vtbl1_u8(A0, sh_23456777); + const uint8x8_t avg1 = vhadd_u8(A0, A2); + uint8x8_t row = vrhadd_u8(avg1, A1); + int i; + (void)left; + for (i = 0; i < 7; ++i) { + vst1_u8(dst + i * stride, row); + row = vtbl1_u8(row, sh_12345677); + } + vst1_u8(dst + i * stride, row); +} + // ----------------------------------------------------------------------------- void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 27cd3d010..1f265fe0b 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -99,7 +99,7 @@ add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc"; add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc"; +specialize qw/vp9_d45_predictor_8x8 neon/, "$ssse3_x86inc"; add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc"; |