neon,load_unaligned_*: use dup for lane 0

this produces better assembly with gcc (11.3.0-3); no change in assembly using clang from the r24 android sdk (Android (8075178, based on r437112b) clang version 14.0.1 (https://android.googlesource.com/toolchain/llvm-project 8671348b81b95fc603505dfc881b45103bee1731) Change-Id: Ifec252d4f499f23be1cd94aa8516caf6b3fbbc11
author: James Zern <jzern@google.com> 2022-09-01 18:47:50 -0700
committer: James Zern <jzern@google.com> 2022-09-01 18:47:50 -0700
commit: 281dfae8353940fe380c73384607ec11a5c53f43 (patch)
tree: 97c71966a24e14aa53f10b6147a0569e2d573e54 /vpx_dsp
parent: 028fc1b50f196cab1ec93816654fbefe64f20cf3 (diff)
download: libvpx-281dfae8353940fe380c73384607ec11a5c53f43.tar
libvpx-281dfae8353940fe380c73384607ec11a5c53f43.tar.gz
libvpx-281dfae8353940fe380c73384607ec11a5c53f43.tar.bz2
libvpx-281dfae8353940fe380c73384607ec11a5c53f43.zip
2 files changed, 6 insertions, 6 deletions
diff --git a/vpx_dsp/arm/mem_neon.h b/vpx_dsp/arm/mem_neon.h
index 50aaa94fe..84aae161b 100644
--- a/vpx_dsp/arm/mem_neon.h
+++ b/vpx_dsp/arm/mem_neon.h
@@ -116,11 +116,11 @@ static INLINE void uint32_to_mem(uint8_t *buf, uint32_t a) {
 static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf,
                                           ptrdiff_t stride) {
   uint32_t a;
-  uint32x2_t a_u32 = vdup_n_u32(0);
+  uint32x2_t a_u32;
   if (stride == 4) return vld1_u8(buf);
   memcpy(&a, buf, 4);
   buf += stride;
-  a_u32 = vset_lane_u32(a, a_u32, 0);
+  a_u32 = vdup_n_u32(a);
   memcpy(&a, buf, 4);
   a_u32 = vset_lane_u32(a, a_u32, 1);
   return vreinterpret_u8_u32(a_u32);
@@ -143,11 +143,11 @@ static INLINE void store_unaligned_u8(uint8_t *buf, ptrdiff_t stride,
 static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf,
                                             ptrdiff_t stride) {
   uint32_t a;
-  uint32x4_t a_u32 = vdupq_n_u32(0);
+  uint32x4_t a_u32;
   if (stride == 4) return vld1q_u8(buf);
   memcpy(&a, buf, 4);
   buf += stride;
-  a_u32 = vsetq_lane_u32(a, a_u32, 0);
+  a_u32 = vdupq_n_u32(a);
   memcpy(&a, buf, 4);
   buf += stride;
   a_u32 = vsetq_lane_u32(a, a_u32, 1);
diff --git a/vpx_dsp/arm/sad4d_neon.c b/vpx_dsp/arm/sad4d_neon.c
index 03f716c3d..53866296c 100644
--- a/vpx_dsp/arm/sad4d_neon.c
+++ b/vpx_dsp/arm/sad4d_neon.c
@@ -20,9 +20,9 @@
 static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0,
                                                  const void *const buf1) {
   uint32_t a;
-  uint32x2_t aa = vdup_n_u32(0);
+  uint32x2_t aa;
   memcpy(&a, buf0, 4);
-  aa = vset_lane_u32(a, aa, 0);
+  aa = vdup_n_u32(a);
   memcpy(&a, buf1, 4);
   aa = vset_lane_u32(a, aa, 1);
   return vreinterpret_u8_u32(aa);
author	James Zern <jzern@google.com>	2022-09-01 18:47:50 -0700
committer	James Zern <jzern@google.com>	2022-09-01 18:47:50 -0700
commit	281dfae8353940fe380c73384607ec11a5c53f43 (patch)
tree	97c71966a24e14aa53f10b6147a0569e2d573e54 /vpx_dsp
parent	028fc1b50f196cab1ec93816654fbefe64f20cf3 (diff)
download	libvpx-281dfae8353940fe380c73384607ec11a5c53f43.tar libvpx-281dfae8353940fe380c73384607ec11a5c53f43.tar.gz libvpx-281dfae8353940fe380c73384607ec11a5c53f43.tar.bz2 libvpx-281dfae8353940fe380c73384607ec11a5c53f43.zip