Refactor convolve8 NEON functions

Change-Id: I4ac576875c91fee7cb150d298fae4a2c156d374c
author: Linfeng Zhang <linfengz@google.com> 2017-09-05 14:48:17 -0700
committer: Linfeng Zhang <linfengz@google.com> 2017-09-06 15:55:17 -0700
commit: 3ec20445b28ceccb0a32727f81ef2659596aaf33 (patch)
tree: 323c9e26d16a462fd7eb838f43411aa93a00b7cb /vpx_dsp/arm/vpx_convolve8_neon.h
parent: d5d2cbcc758d8b735d2aba25663914185d11cada (diff)
download: libvpx-3ec20445b28ceccb0a32727f81ef2659596aaf33.tar
libvpx-3ec20445b28ceccb0a32727f81ef2659596aaf33.tar.gz
libvpx-3ec20445b28ceccb0a32727f81ef2659596aaf33.tar.bz2
libvpx-3ec20445b28ceccb0a32727f81ef2659596aaf33.zip
1 files changed, 58 insertions, 0 deletions
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.h b/vpx_dsp/arm/vpx_convolve8_neon.h
new file mode 100644
index 000000000..a086d481f
--- /dev/null
+++ b/vpx_dsp/arm/vpx_convolve8_neon.h
@@ -0,0 +1,58 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+static INLINE void load_u8_8x8(const uint8_t *s, const ptrdiff_t p,
+                               uint8x8_t *const s0, uint8x8_t *const s1,
+                               uint8x8_t *const s2, uint8x8_t *const s3,
+                               uint8x8_t *const s4, uint8x8_t *const s5,
+                               uint8x8_t *const s6, uint8x8_t *const s7) {
+  *s0 = vld1_u8(s);
+  s += p;
+  *s1 = vld1_u8(s);
+  s += p;
+  *s2 = vld1_u8(s);
+  s += p;
+  *s3 = vld1_u8(s);
+  s += p;
+  *s4 = vld1_u8(s);
+  s += p;
+  *s5 = vld1_u8(s);
+  s += p;
+  *s6 = vld1_u8(s);
+  s += p;
+  *s7 = vld1_u8(s);
+}
+
+static INLINE uint8x8_t convolve8_8(const int16x8_t s0, const int16x8_t s1,
+                                    const int16x8_t s2, const int16x8_t s3,
+                                    const int16x8_t s4, const int16x8_t s5,
+                                    const int16x8_t s6, const int16x8_t s7,
+                                    const int16x8_t filters,
+                                    const int16x8_t filter3,
+                                    const int16x8_t filter4) {
+  const int16x4_t filters_lo = vget_low_s16(filters);
+  const int16x4_t filters_hi = vget_high_s16(filters);
+  int16x8_t sum;
+
+  sum = vmulq_lane_s16(s0, filters_lo, 0);
+  sum = vmlaq_lane_s16(sum, s1, filters_lo, 1);
+  sum = vmlaq_lane_s16(sum, s2, filters_lo, 2);
+  sum = vmlaq_lane_s16(sum, s5, filters_hi, 1);
+  sum = vmlaq_lane_s16(sum, s6, filters_hi, 2);
+  sum = vmlaq_lane_s16(sum, s7, filters_hi, 3);
+  sum = vqaddq_s16(sum, vmulq_s16(s3, filter3));
+  sum = vqaddq_s16(sum, vmulq_s16(s4, filter4));
+  return vqrshrun_n_s16(sum, 7);
+}
author	Linfeng Zhang <linfengz@google.com>	2017-09-05 14:48:17 -0700
committer	Linfeng Zhang <linfengz@google.com>	2017-09-06 15:55:17 -0700
commit	3ec20445b28ceccb0a32727f81ef2659596aaf33 (patch)
tree	323c9e26d16a462fd7eb838f43411aa93a00b7cb /vpx_dsp/arm/vpx_convolve8_neon.h
parent	d5d2cbcc758d8b735d2aba25663914185d11cada (diff)
download	libvpx-3ec20445b28ceccb0a32727f81ef2659596aaf33.tar libvpx-3ec20445b28ceccb0a32727f81ef2659596aaf33.tar.gz libvpx-3ec20445b28ceccb0a32727f81ef2659596aaf33.tar.bz2 libvpx-3ec20445b28ceccb0a32727f81ef2659596aaf33.zip