summaryrefslogtreecommitdiff
path: root/vpx_dsp/arm/sad4d_neon.c
diff options
context:
space:
mode:
authorJohann <johann.koenig@duck.com>2018-10-30 12:59:46 -0700
committerJohann <johann.koenig@duck.com>2018-11-01 12:14:14 -0700
commit96082749aaf639333c0889b3f402291f2eee69b7 (patch)
treedfa32078669b1a441d6485978c076006a2f95b42 /vpx_dsp/arm/sad4d_neon.c
parent4635b0fced6df4b371454b52cfd512c14eec1f76 (diff)
downloadlibvpx-96082749aaf639333c0889b3f402291f2eee69b7.tar
libvpx-96082749aaf639333c0889b3f402291f2eee69b7.tar.gz
libvpx-96082749aaf639333c0889b3f402291f2eee69b7.tar.bz2
libvpx-96082749aaf639333c0889b3f402291f2eee69b7.zip
clang-tidy: fix vpx_dsp parameters
BUG=webm:1444 Change-Id: Iee19be068afc6c81396c79218a89c469d2e66207
Diffstat (limited to 'vpx_dsp/arm/sad4d_neon.c')
-rw-r--r--vpx_dsp/arm/sad4d_neon.c162
1 files changed, 84 insertions, 78 deletions
diff --git a/vpx_dsp/arm/sad4d_neon.c b/vpx_dsp/arm/sad4d_neon.c
index 535ec0f0d..06443c699 100644
--- a/vpx_dsp/arm/sad4d_neon.c
+++ b/vpx_dsp/arm/sad4d_neon.c
@@ -28,24 +28,25 @@ static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0,
return vreinterpret_u8_u32(aa);
}
-static INLINE void sad4x_4d(const uint8_t *const src, const int src_stride,
- const uint8_t *const ref[4], const int ref_stride,
- const int height, uint32_t *const res) {
+static INLINE void sad4x_4d(const uint8_t *const src_ptr, const int src_stride,
+ const uint8_t *const ref_array[4],
+ const int ref_stride, const int height,
+ uint32_t *const res) {
int i;
uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) };
uint16x4_t a[2];
uint32x4_t r;
- assert(!((intptr_t)src % sizeof(uint32_t)));
+ assert(!((intptr_t)src_ptr % sizeof(uint32_t)));
assert(!(src_stride % sizeof(uint32_t)));
for (i = 0; i < height; ++i) {
const uint8x8_t s = vreinterpret_u8_u32(
- vld1_dup_u32((const uint32_t *)(src + i * src_stride)));
- const uint8x8_t ref01 = load_unaligned_2_buffers(ref[0] + i * ref_stride,
- ref[1] + i * ref_stride);
- const uint8x8_t ref23 = load_unaligned_2_buffers(ref[2] + i * ref_stride,
- ref[3] + i * ref_stride);
+ vld1_dup_u32((const uint32_t *)(src_ptr + i * src_stride)));
+ const uint8x8_t ref01 = load_unaligned_2_buffers(
+ ref_array[0] + i * ref_stride, ref_array[1] + i * ref_stride);
+ const uint8x8_t ref23 = load_unaligned_2_buffers(
+ ref_array[2] + i * ref_stride, ref_array[3] + i * ref_stride);
abs[0] = vabal_u8(abs[0], s, ref01);
abs[1] = vabal_u8(abs[1], s, ref23);
}
@@ -56,16 +57,16 @@ static INLINE void sad4x_4d(const uint8_t *const src, const int src_stride,
vst1q_u32(res, r);
}
-void vpx_sad4x4x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad4x4x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
- sad4x_4d(src, src_stride, ref, ref_stride, 4, res);
+ sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 4, res);
}
-void vpx_sad4x8x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad4x8x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
- sad4x_4d(src, src_stride, ref, ref_stride, 8, res);
+ sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 8, res);
}
////////////////////////////////////////////////////////////////////////////////
@@ -137,17 +138,18 @@ static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
vst1q_u32(res, vcombine_u32(d0, d1));
}
-static INLINE void sad8x_4d(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res, const int height) {
int i, j;
- const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
+ const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
+ ref_array[3] };
uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
vdupq_n_u16(0) };
for (i = 0; i < height; ++i) {
- const uint8x8_t s = vld1_u8(src);
- src += src_stride;
+ const uint8x8_t s = vld1_u8(src_ptr);
+ src_ptr += src_stride;
for (j = 0; j < 4; ++j) {
const uint8x8_t b_u8 = vld1_u8(ref_loop[j]);
ref_loop[j] += ref_stride;
@@ -158,44 +160,45 @@ static INLINE void sad8x_4d(const uint8_t *src, int src_stride,
sad_512_pel_final_neon(sum, res);
}
-void vpx_sad8x4x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad8x4x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
- sad8x_4d(src, src_stride, ref, ref_stride, res, 4);
+ sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 4);
}
-void vpx_sad8x8x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad8x8x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
- sad8x_4d(src, src_stride, ref, ref_stride, res, 8);
+ sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8);
}
-void vpx_sad8x16x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad8x16x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
- sad8x_4d(src, src_stride, ref, ref_stride, res, 16);
+ sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);
}
////////////////////////////////////////////////////////////////////////////////
-static INLINE void sad16_neon(const uint8_t *ref, const uint8x16_t src,
+static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr,
uint16x8_t *const sum) {
- const uint8x16_t r = vld1q_u8(ref);
- *sum = vabal_u8(*sum, vget_low_u8(src), vget_low_u8(r));
- *sum = vabal_u8(*sum, vget_high_u8(src), vget_high_u8(r));
+ const uint8x16_t r = vld1q_u8(ref_ptr);
+ *sum = vabal_u8(*sum, vget_low_u8(src_ptr), vget_low_u8(r));
+ *sum = vabal_u8(*sum, vget_high_u8(src_ptr), vget_high_u8(r));
}
-static INLINE void sad16x_4d(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res, const int height) {
int i, j;
- const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
+ const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
+ ref_array[3] };
uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
vdupq_n_u16(0) };
for (i = 0; i < height; ++i) {
- const uint8x16_t s = vld1q_u8(src);
- src += src_stride;
+ const uint8x16_t s = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
for (j = 0; j < 4; ++j) {
sad16_neon(ref_loop[j], s, &sum[j]);
ref_loop[j] += ref_stride;
@@ -205,50 +208,51 @@ static INLINE void sad16x_4d(const uint8_t *src, int src_stride,
sad_512_pel_final_neon(sum, res);
}
-void vpx_sad16x8x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad16x8x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
- sad16x_4d(src, src_stride, ref, ref_stride, res, 8);
+ sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8);
}
-void vpx_sad16x16x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad16x16x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
- sad16x_4d(src, src_stride, ref, ref_stride, res, 16);
+ sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);
}
-void vpx_sad16x32x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad16x32x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
- sad16x_4d(src, src_stride, ref, ref_stride, res, 32);
+ sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32);
}
////////////////////////////////////////////////////////////////////////////////
-static INLINE void sad32x_4d(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
const int height, uint16x8_t *const sum) {
int i;
- const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
+ const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
+ ref_array[3] };
sum[0] = sum[1] = sum[2] = sum[3] = vdupq_n_u16(0);
for (i = 0; i < height; ++i) {
uint8x16_t s;
- s = vld1q_u8(src + 0 * 16);
+ s = vld1q_u8(src_ptr + 0 * 16);
sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);
sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]);
sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]);
sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]);
- s = vld1q_u8(src + 1 * 16);
+ s = vld1q_u8(src_ptr + 1 * 16);
sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);
sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]);
sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]);
sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]);
- src += src_stride;
+ src_ptr += src_stride;
ref_loop[0] += ref_stride;
ref_loop[1] += ref_stride;
ref_loop[2] += ref_stride;
@@ -256,68 +260,69 @@ static INLINE void sad32x_4d(const uint8_t *src, int src_stride,
}
}
-void vpx_sad32x16x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint16x8_t sum[4];
- sad32x_4d(src, src_stride, ref, ref_stride, 16, sum);
+ sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 16, sum);
sad_512_pel_final_neon(sum, res);
}
-void vpx_sad32x32x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint16x8_t sum[4];
- sad32x_4d(src, src_stride, ref, ref_stride, 32, sum);
+ sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 32, sum);
sad_1024_pel_final_neon(sum, res);
}
-void vpx_sad32x64x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint16x8_t sum[4];
- sad32x_4d(src, src_stride, ref, ref_stride, 64, sum);
+ sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 64, sum);
sad_2048_pel_final_neon(sum, res);
}
////////////////////////////////////////////////////////////////////////////////
-void vpx_sad64x32x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
int i;
- const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
+ const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
+ ref_array[3] };
uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
vdupq_n_u16(0) };
for (i = 0; i < 32; ++i) {
uint8x16_t s;
- s = vld1q_u8(src + 0 * 16);
+ s = vld1q_u8(src_ptr + 0 * 16);
sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);
sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]);
sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]);
sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]);
- s = vld1q_u8(src + 1 * 16);
+ s = vld1q_u8(src_ptr + 1 * 16);
sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);
sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]);
sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]);
sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]);
- s = vld1q_u8(src + 2 * 16);
+ s = vld1q_u8(src_ptr + 2 * 16);
sad16_neon(ref_loop[0] + 2 * 16, s, &sum[0]);
sad16_neon(ref_loop[1] + 2 * 16, s, &sum[1]);
sad16_neon(ref_loop[2] + 2 * 16, s, &sum[2]);
sad16_neon(ref_loop[3] + 2 * 16, s, &sum[3]);
- s = vld1q_u8(src + 3 * 16);
+ s = vld1q_u8(src_ptr + 3 * 16);
sad16_neon(ref_loop[0] + 3 * 16, s, &sum[0]);
sad16_neon(ref_loop[1] + 3 * 16, s, &sum[1]);
sad16_neon(ref_loop[2] + 3 * 16, s, &sum[2]);
sad16_neon(ref_loop[3] + 3 * 16, s, &sum[3]);
- src += src_stride;
+ src_ptr += src_stride;
ref_loop[0] += ref_stride;
ref_loop[1] += ref_stride;
ref_loop[2] += ref_stride;
@@ -327,11 +332,12 @@ void vpx_sad64x32x4d_neon(const uint8_t *src, int src_stride,
sad_2048_pel_final_neon(sum, res);
}
-void vpx_sad64x64x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
+void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
int i;
- const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
+ const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
+ ref_array[3] };
uint16x8_t sum[8] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
vdupq_n_u16(0), vdupq_n_u16(0) };
@@ -339,31 +345,31 @@ void vpx_sad64x64x4d_neon(const uint8_t *src, int src_stride,
for (i = 0; i < 64; ++i) {
uint8x16_t s;
- s = vld1q_u8(src + 0 * 16);
+ s = vld1q_u8(src_ptr + 0 * 16);
sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);
sad16_neon(ref_loop[1] + 0 * 16, s, &sum[2]);
sad16_neon(ref_loop[2] + 0 * 16, s, &sum[4]);
sad16_neon(ref_loop[3] + 0 * 16, s, &sum[6]);
- s = vld1q_u8(src + 1 * 16);
+ s = vld1q_u8(src_ptr + 1 * 16);
sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);
sad16_neon(ref_loop[1] + 1 * 16, s, &sum[2]);
sad16_neon(ref_loop[2] + 1 * 16, s, &sum[4]);
sad16_neon(ref_loop[3] + 1 * 16, s, &sum[6]);
- s = vld1q_u8(src + 2 * 16);
+ s = vld1q_u8(src_ptr + 2 * 16);
sad16_neon(ref_loop[0] + 2 * 16, s, &sum[1]);
sad16_neon(ref_loop[1] + 2 * 16, s, &sum[3]);
sad16_neon(ref_loop[2] + 2 * 16, s, &sum[5]);
sad16_neon(ref_loop[3] + 2 * 16, s, &sum[7]);
- s = vld1q_u8(src + 3 * 16);
+ s = vld1q_u8(src_ptr + 3 * 16);
sad16_neon(ref_loop[0] + 3 * 16, s, &sum[1]);
sad16_neon(ref_loop[1] + 3 * 16, s, &sum[3]);
sad16_neon(ref_loop[2] + 3 * 16, s, &sum[5]);
sad16_neon(ref_loop[3] + 3 * 16, s, &sum[7]);
- src += src_stride;
+ src_ptr += src_stride;
ref_loop[0] += ref_stride;
ref_loop[1] += ref_stride;
ref_loop[2] += ref_stride;