summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--vpx_dsp/arm/sad_neon.c32
-rw-r--r--vpx_dsp/arm/variance_neon.c12
2 files changed, 22 insertions, 22 deletions
diff --git a/vpx_dsp/arm/sad_neon.c b/vpx_dsp/arm/sad_neon.c
index 4753aeaec..ad575d4aa 100644
--- a/vpx_dsp/arm/sad_neon.c
+++ b/vpx_dsp/arm/sad_neon.c
@@ -21,7 +21,7 @@ uint32_t vpx_sad4x4_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride) {
const uint8x16_t src_u8 = load_unaligned_u8q(src_ptr, src_stride);
const uint8x16_t ref_u8 = load_unaligned_u8q(ref_ptr, ref_stride);
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if defined(__ARM_FEATURE_DOTPROD)
const uint8x16_t sad_u8 = vabdq_u8(src_u8, ref_u8);
const uint32x4_t dp = vdotq_u32(vdupq_n_u32(0), sad_u8, vdupq_n_u8(1));
return horizontal_add_uint32x4(dp);
@@ -39,7 +39,7 @@ uint32_t vpx_sad4x4_avg_neon(const uint8_t *src_ptr, int src_stride,
const uint8x16_t ref_u8 = load_unaligned_u8q(ref_ptr, ref_stride);
const uint8x16_t second_pred_u8 = vld1q_u8(second_pred);
const uint8x16_t avg = vrhaddq_u8(ref_u8, second_pred_u8);
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if defined(__ARM_FEATURE_DOTPROD)
const uint8x16_t sad_u8 = vabdq_u8(src_u8, avg);
const uint32x4_t prod = vdotq_u32(vdupq_n_u32(0), sad_u8, vdupq_n_u8(1));
return horizontal_add_uint32x4(prod);
@@ -52,7 +52,7 @@ uint32_t vpx_sad4x4_avg_neon(const uint8_t *src_ptr, int src_stride,
uint32_t vpx_sad4x8_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride) {
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if defined(__ARM_FEATURE_DOTPROD)
uint32x4_t prod = vdupq_n_u32(0);
const uint8x16_t ones = vdupq_n_u8(1);
const uint8x16_t src1_u8 = load_unaligned_u8q(src_ptr, src_stride);
@@ -85,7 +85,7 @@ uint32_t vpx_sad4x8_neon(const uint8_t *src_ptr, int src_stride,
uint32_t vpx_sad4x8_avg_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const uint8_t *second_pred) {
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if defined(__ARM_FEATURE_DOTPROD)
uint32x4_t prod = vdupq_n_u32(0);
const uint8x16_t ones = vdupq_n_u8(1);
const uint8x16_t src1_u8 = load_unaligned_u8q(src_ptr, src_stride);
@@ -122,7 +122,7 @@ uint32_t vpx_sad4x8_avg_neon(const uint8_t *src_ptr, int src_stride,
#endif
}
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if defined(__ARM_FEATURE_DOTPROD)
static INLINE uint32x2_t sad8x(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const int height) {
@@ -177,7 +177,7 @@ static INLINE uint32x2_t sad8x_avg(const uint8_t *src_ptr, int src_stride,
return horizontal_add_uint32x2(prod); \
}
-#else // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else // !defined(__ARM_FEATURE_DOTPROD)
static INLINE uint16x8_t sad8x(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const int height) {
@@ -228,13 +228,13 @@ static INLINE uint16x8_t sad8x_avg(const uint8_t *src_ptr, int src_stride,
sad8x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \
return horizontal_add_uint16x8(abs); \
}
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif // defined(__ARM_FEATURE_DOTPROD)
SAD8XN(4)
SAD8XN(8)
SAD8XN(16)
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if defined(__ARM_FEATURE_DOTPROD)
static INLINE uint32x4_t sad16x(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const int height) {
@@ -288,7 +288,7 @@ static INLINE uint32x4_t sad16x_avg(const uint8_t *src_ptr, int src_stride,
sad16x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \
return horizontal_add_uint32x4(prod); \
}
-#else // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else // !defined(__ARM_FEATURE_DOTPROD)
static INLINE uint16x8_t sad16x(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const int height) {
@@ -342,13 +342,13 @@ static INLINE uint16x8_t sad16x_avg(const uint8_t *src_ptr, int src_stride,
sad16x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \
return horizontal_add_uint16x8(abs); \
}
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif // defined(__ARM_FEATURE_DOTPROD)
SAD16XN(8)
SAD16XN(16)
SAD16XN(32)
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if defined(__ARM_FEATURE_DOTPROD)
static INLINE uint32x4_t sad32x(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const int height) {
@@ -413,7 +413,7 @@ static INLINE uint32x4_t sad32x_avg(const uint8_t *src_ptr, int src_stride,
return horizontal_add_uint32x4(prod); \
}
-#else // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else // defined(__ARM_FEATURE_DOTPROD)
static INLINE uint16x8_t sad32x(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const int height) {
@@ -477,13 +477,13 @@ static INLINE uint16x8_t sad32x_avg(const uint8_t *src_ptr, int src_stride,
sad32x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \
return horizontal_add_uint16x8(abs); \
}
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif // defined(__ARM_FEATURE_DOTPROD)
SAD32XN(16)
SAD32XN(32)
SAD32XN(64)
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if defined(__ARM_FEATURE_DOTPROD)
static INLINE uint32x4_t sad64x(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const int height) {
@@ -551,7 +551,7 @@ static INLINE uint32x4_t sad64x_avg(const uint8_t *src_ptr, int src_stride,
}
return prod;
}
-#else // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else // !defined(__ARM_FEATURE_DOTPROD)
static INLINE uint32x4_t sad64x(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const int height) {
@@ -629,7 +629,7 @@ static INLINE uint32x4_t sad64x_avg(const uint8_t *src_ptr, int src_stride,
return vpadalq_u16(sum, abs_1);
}
}
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif // defined(__ARM_FEATURE_DOTPROD)
#define SAD64XN(n) \
uint32_t vpx_sad64x##n##_neon(const uint8_t *src_ptr, int src_stride, \
diff --git a/vpx_dsp/arm/variance_neon.c b/vpx_dsp/arm/variance_neon.c
index 1b5cbcc46..f9969ed5a 100644
--- a/vpx_dsp/arm/variance_neon.c
+++ b/vpx_dsp/arm/variance_neon.c
@@ -19,7 +19,7 @@
#include "vpx_dsp/arm/sum_neon.h"
#include "vpx_ports/mem.h"
-#if defined(__ARM_FEATURE_DOTPROD) && (__ARM_FEATURE_DOTPROD == 1)
+#if defined(__ARM_FEATURE_DOTPROD)
// Process a block of width 4 four rows at a time.
static void variance_neon_w4x4(const uint8_t *src_ptr, int src_stride,
@@ -111,7 +111,7 @@ static void variance_neon_w8x2(const uint8_t *src_ptr, int src_stride,
*sse = horizontal_add_uint32x2(vadd_u32(sse_lo_u32, sse_hi_u32));
}
-#else // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else // !defined(__ARM_FEATURE_DOTPROD)
// The variance helper functions use int16_t for sum. 8 values are accumulated
// and then added (at which point they expand up to int32_t). To avoid overflow,
@@ -254,7 +254,7 @@ static void variance_neon_w8x2(const uint8_t *src_ptr, int src_stride,
vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32)));
}
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif // defined(__ARM_FEATURE_DOTPROD)
void vpx_get8x8var_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
@@ -357,7 +357,7 @@ unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int src_stride,
return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12);
}
-#if defined(__ARM_FEATURE_DOTPROD) && (__ARM_FEATURE_DOTPROD == 1)
+#if defined(__ARM_FEATURE_DOTPROD)
unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
@@ -421,7 +421,7 @@ unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,
return vget_lane_u32(sse, 0);
}
-#else // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else // !defined(__ARM_FEATURE_DOTPROD)
unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
@@ -518,4 +518,4 @@ unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,
return horizontal_add_uint32x4(vreinterpretq_u32_s32(sse));
}
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif // defined(__ARM_FEATURE_DOTPROD)