summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2015-06-24 21:23:15 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-06-24 21:23:15 +0000
commitd219f2b9d246305e788e906f17436cc3f2068d36 (patch)
tree4ae850c1831c38ea8b4560cbf969809d97e6c4ff
parent0bd61519c07b5b5d93874779998822bdd564fdb6 (diff)
parent9db1f24c47e543cc14eff3e8dc375cca1476b4d1 (diff)
downloadlibvpx-d219f2b9d246305e788e906f17436cc3f2068d36.tar
libvpx-d219f2b9d246305e788e906f17436cc3f2068d36.tar.gz
libvpx-d219f2b9d246305e788e906f17436cc3f2068d36.tar.bz2
libvpx-d219f2b9d246305e788e906f17436cc3f2068d36.zip
Merge "vp9_reconintra_neon: add d45 16x16"
-rw-r--r--test/test_intra_pred_speed.cc4
-rw-r--r--vp9/common/arm/neon/vp9_reconintra_neon.c17
-rw-r--r--vp9/common/vp9_rtcd_defs.pl2
3 files changed, 20 insertions, 3 deletions
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index 7b4c4350e..46d4a2582 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -316,8 +316,8 @@ INTRA_PRED_TEST(NEON, TestIntraPred16, vp9_dc_predictor_16x16_neon,
vp9_dc_left_predictor_16x16_neon,
vp9_dc_top_predictor_16x16_neon,
vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
- vp9_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
- vp9_tm_predictor_16x16_neon)
+ vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
+ NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
#endif // HAVE_NEON
#if HAVE_MSA
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.c b/vp9/common/arm/neon/vp9_reconintra_neon.c
index cfd5905ef..92706bf2c 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.c
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.c
@@ -358,6 +358,23 @@ void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
vst1_u8(dst + i * stride, row);
}
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint8x16_t A0 = vld1q_u8(above); // top row
+ const uint8x16_t above_right = vld1q_dup_u8(above + 15);
+ const uint8x16_t A1 = vextq_u8(A0, above_right, 1);
+ const uint8x16_t A2 = vextq_u8(A0, above_right, 2);
+ const uint8x16_t avg1 = vhaddq_u8(A0, A2);
+ uint8x16_t row = vrhaddq_u8(avg1, A1);
+ int i;
+ (void)left;
+ for (i = 0; i < 15; ++i) {
+ vst1q_u8(dst + i * stride, row);
+ row = vextq_u8(row, above_right, 1);
+ }
+ vst1q_u8(dst + i * stride, row);
+}
+
// -----------------------------------------------------------------------------
void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index abc051d5c..64649bf28 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -138,7 +138,7 @@ add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride,
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc";
add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";