diff options
author | Johann <johann.koenig@duck.com> | 2018-10-30 12:59:46 -0700 |
---|---|---|
committer | Johann <johann.koenig@duck.com> | 2018-11-01 12:14:14 -0700 |
commit | 96082749aaf639333c0889b3f402291f2eee69b7 (patch) | |
tree | dfa32078669b1a441d6485978c076006a2f95b42 /vpx_dsp/arm/sad4d_neon.c | |
parent | 4635b0fced6df4b371454b52cfd512c14eec1f76 (diff) | |
download | libvpx-96082749aaf639333c0889b3f402291f2eee69b7.tar libvpx-96082749aaf639333c0889b3f402291f2eee69b7.tar.gz libvpx-96082749aaf639333c0889b3f402291f2eee69b7.tar.bz2 libvpx-96082749aaf639333c0889b3f402291f2eee69b7.zip |
clang-tidy: fix vpx_dsp parameters
BUG=webm:1444
Change-Id: Iee19be068afc6c81396c79218a89c469d2e66207
Diffstat (limited to 'vpx_dsp/arm/sad4d_neon.c')
-rw-r--r-- | vpx_dsp/arm/sad4d_neon.c | 162 |
1 files changed, 84 insertions, 78 deletions
diff --git a/vpx_dsp/arm/sad4d_neon.c b/vpx_dsp/arm/sad4d_neon.c index 535ec0f0d..06443c699 100644 --- a/vpx_dsp/arm/sad4d_neon.c +++ b/vpx_dsp/arm/sad4d_neon.c @@ -28,24 +28,25 @@ static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0, return vreinterpret_u8_u32(aa); } -static INLINE void sad4x_4d(const uint8_t *const src, const int src_stride, - const uint8_t *const ref[4], const int ref_stride, - const int height, uint32_t *const res) { +static INLINE void sad4x_4d(const uint8_t *const src_ptr, const int src_stride, + const uint8_t *const ref_array[4], + const int ref_stride, const int height, + uint32_t *const res) { int i; uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) }; uint16x4_t a[2]; uint32x4_t r; - assert(!((intptr_t)src % sizeof(uint32_t))); + assert(!((intptr_t)src_ptr % sizeof(uint32_t))); assert(!(src_stride % sizeof(uint32_t))); for (i = 0; i < height; ++i) { const uint8x8_t s = vreinterpret_u8_u32( - vld1_dup_u32((const uint32_t *)(src + i * src_stride))); - const uint8x8_t ref01 = load_unaligned_2_buffers(ref[0] + i * ref_stride, - ref[1] + i * ref_stride); - const uint8x8_t ref23 = load_unaligned_2_buffers(ref[2] + i * ref_stride, - ref[3] + i * ref_stride); + vld1_dup_u32((const uint32_t *)(src_ptr + i * src_stride))); + const uint8x8_t ref01 = load_unaligned_2_buffers( + ref_array[0] + i * ref_stride, ref_array[1] + i * ref_stride); + const uint8x8_t ref23 = load_unaligned_2_buffers( + ref_array[2] + i * ref_stride, ref_array[3] + i * ref_stride); abs[0] = vabal_u8(abs[0], s, ref01); abs[1] = vabal_u8(abs[1], s, ref23); } @@ -56,16 +57,16 @@ static INLINE void sad4x_4d(const uint8_t *const src, const int src_stride, vst1q_u32(res, r); } -void vpx_sad4x4x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad4x4x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad4x_4d(src, src_stride, ref, ref_stride, 4, res); + sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 4, res); } -void vpx_sad4x8x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad4x8x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad4x_4d(src, src_stride, ref, ref_stride, 8, res); + sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 8, res); } //////////////////////////////////////////////////////////////////////////////// @@ -137,17 +138,18 @@ static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/, vst1q_u32(res, vcombine_u32(d0, d1)); } -static INLINE void sad8x_4d(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res, const int height) { int i, j; - const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; for (i = 0; i < height; ++i) { - const uint8x8_t s = vld1_u8(src); - src += src_stride; + const uint8x8_t s = vld1_u8(src_ptr); + src_ptr += src_stride; for (j = 0; j < 4; ++j) { const uint8x8_t b_u8 = vld1_u8(ref_loop[j]); ref_loop[j] += ref_stride; @@ -158,44 +160,45 @@ static INLINE void sad8x_4d(const uint8_t *src, int src_stride, sad_512_pel_final_neon(sum, res); } -void vpx_sad8x4x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad8x4x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad8x_4d(src, src_stride, ref, ref_stride, res, 4); + sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 4); } -void vpx_sad8x8x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad8x8x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad8x_4d(src, src_stride, ref, ref_stride, res, 8); + sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8); } -void vpx_sad8x16x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad8x16x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad8x_4d(src, src_stride, ref, ref_stride, res, 16); + sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16); } //////////////////////////////////////////////////////////////////////////////// -static INLINE void sad16_neon(const uint8_t *ref, const uint8x16_t src, +static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr, uint16x8_t *const sum) { - const uint8x16_t r = vld1q_u8(ref); - *sum = vabal_u8(*sum, vget_low_u8(src), vget_low_u8(r)); - *sum = vabal_u8(*sum, vget_high_u8(src), vget_high_u8(r)); + const uint8x16_t r = vld1q_u8(ref_ptr); + *sum = vabal_u8(*sum, vget_low_u8(src_ptr), vget_low_u8(r)); + *sum = vabal_u8(*sum, vget_high_u8(src_ptr), vget_high_u8(r)); } -static INLINE void sad16x_4d(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res, const int height) { int i, j; - const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; for (i = 0; i < height; ++i) { - const uint8x16_t s = vld1q_u8(src); - src += src_stride; + const uint8x16_t s = vld1q_u8(src_ptr); + src_ptr += src_stride; for (j = 0; j < 4; ++j) { sad16_neon(ref_loop[j], s, &sum[j]); ref_loop[j] += ref_stride; @@ -205,50 +208,51 @@ static INLINE void sad16x_4d(const uint8_t *src, int src_stride, sad_512_pel_final_neon(sum, res); } -void vpx_sad16x8x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad16x8x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad16x_4d(src, src_stride, ref, ref_stride, res, 8); + sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8); } -void vpx_sad16x16x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad16x16x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad16x_4d(src, src_stride, ref, ref_stride, res, 16); + sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16); } -void vpx_sad16x32x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad16x32x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad16x_4d(src, src_stride, ref, ref_stride, res, 32); + sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32); } //////////////////////////////////////////////////////////////////////////////// -static INLINE void sad32x_4d(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, const int height, uint16x8_t *const sum) { int i; - const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; sum[0] = sum[1] = sum[2] = sum[3] = vdupq_n_u16(0); for (i = 0; i < height; ++i) { uint8x16_t s; - s = vld1q_u8(src + 0 * 16); + s = vld1q_u8(src_ptr + 0 * 16); sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); - s = vld1q_u8(src + 1 * 16); + s = vld1q_u8(src_ptr + 1 * 16); sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); - src += src_stride; + src_ptr += src_stride; ref_loop[0] += ref_stride; ref_loop[1] += ref_stride; ref_loop[2] += ref_stride; @@ -256,68 +260,69 @@ static INLINE void sad32x_4d(const uint8_t *src, int src_stride, } } -void vpx_sad32x16x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { uint16x8_t sum[4]; - sad32x_4d(src, src_stride, ref, ref_stride, 16, sum); + sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 16, sum); sad_512_pel_final_neon(sum, res); } -void vpx_sad32x32x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { uint16x8_t sum[4]; - sad32x_4d(src, src_stride, ref, ref_stride, 32, sum); + sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 32, sum); sad_1024_pel_final_neon(sum, res); } -void vpx_sad32x64x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { uint16x8_t sum[4]; - sad32x_4d(src, src_stride, ref, ref_stride, 64, sum); + sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 64, sum); sad_2048_pel_final_neon(sum, res); } //////////////////////////////////////////////////////////////////////////////// -void vpx_sad64x32x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { int i; - const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; for (i = 0; i < 32; ++i) { uint8x16_t s; - s = vld1q_u8(src + 0 * 16); + s = vld1q_u8(src_ptr + 0 * 16); sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); - s = vld1q_u8(src + 1 * 16); + s = vld1q_u8(src_ptr + 1 * 16); sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); - s = vld1q_u8(src + 2 * 16); + s = vld1q_u8(src_ptr + 2 * 16); sad16_neon(ref_loop[0] + 2 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 2 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 2 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 2 * 16, s, &sum[3]); - s = vld1q_u8(src + 3 * 16); + s = vld1q_u8(src_ptr + 3 * 16); sad16_neon(ref_loop[0] + 3 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 3 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 3 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 3 * 16, s, &sum[3]); - src += src_stride; + src_ptr += src_stride; ref_loop[0] += ref_stride; ref_loop[1] += ref_stride; ref_loop[2] += ref_stride; @@ -327,11 +332,12 @@ void vpx_sad64x32x4d_neon(const uint8_t *src, int src_stride, sad_2048_pel_final_neon(sum, res); } -void vpx_sad64x64x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { int i; - const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; uint16x8_t sum[8] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; @@ -339,31 +345,31 @@ void vpx_sad64x64x4d_neon(const uint8_t *src, int src_stride, for (i = 0; i < 64; ++i) { uint8x16_t s; - s = vld1q_u8(src + 0 * 16); + s = vld1q_u8(src_ptr + 0 * 16); sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 0 * 16, s, &sum[2]); sad16_neon(ref_loop[2] + 0 * 16, s, &sum[4]); sad16_neon(ref_loop[3] + 0 * 16, s, &sum[6]); - s = vld1q_u8(src + 1 * 16); + s = vld1q_u8(src_ptr + 1 * 16); sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 1 * 16, s, &sum[2]); sad16_neon(ref_loop[2] + 1 * 16, s, &sum[4]); sad16_neon(ref_loop[3] + 1 * 16, s, &sum[6]); - s = vld1q_u8(src + 2 * 16); + s = vld1q_u8(src_ptr + 2 * 16); sad16_neon(ref_loop[0] + 2 * 16, s, &sum[1]); sad16_neon(ref_loop[1] + 2 * 16, s, &sum[3]); sad16_neon(ref_loop[2] + 2 * 16, s, &sum[5]); sad16_neon(ref_loop[3] + 2 * 16, s, &sum[7]); - s = vld1q_u8(src + 3 * 16); + s = vld1q_u8(src_ptr + 3 * 16); sad16_neon(ref_loop[0] + 3 * 16, s, &sum[1]); sad16_neon(ref_loop[1] + 3 * 16, s, &sum[3]); sad16_neon(ref_loop[2] + 3 * 16, s, &sum[5]); sad16_neon(ref_loop[3] + 3 * 16, s, &sum[7]); - src += src_stride; + src_ptr += src_stride; ref_loop[0] += ref_stride; ref_loop[1] += ref_stride; ref_loop[2] += ref_stride; |