summaryrefslogtreecommitdiff
path: root/vpx_dsp/arm
diff options
context:
space:
mode:
authorJohann <johann.koenig@duck.com>2018-10-30 14:43:36 -0700
committerJohann <johann.koenig@duck.com>2018-10-31 15:05:37 -0700
commit4635b0fced6df4b371454b52cfd512c14eec1f76 (patch)
tree2903808dd10b90a5dbe87c8b130ed1716276cea9 /vpx_dsp/arm
parent331d289c5c540cf82d89c2a03da45c30e4fe0779 (diff)
downloadlibvpx-4635b0fced6df4b371454b52cfd512c14eec1f76.tar
libvpx-4635b0fced6df4b371454b52cfd512c14eec1f76.tar.gz
libvpx-4635b0fced6df4b371454b52cfd512c14eec1f76.tar.bz2
libvpx-4635b0fced6df4b371454b52cfd512c14eec1f76.zip
clang-tidy: normalize variance functions
Always use src/ref and _ptr/_stride suffixes. Normalize to [xy]_offset and second_pred. Drop some stray source/recon_strides. BUG=webm:1444 Change-Id: I32362a50988eb84464ab78686348610ea40e5c80
Diffstat (limited to 'vpx_dsp/arm')
-rw-r--r--vpx_dsp/arm/subpel_variance_neon.c104
-rw-r--r--vpx_dsp/arm/variance_neon.c170
2 files changed, 144 insertions, 130 deletions
diff --git a/vpx_dsp/arm/subpel_variance_neon.c b/vpx_dsp/arm/subpel_variance_neon.c
index 4f58a7832..37bfd1cd1 100644
--- a/vpx_dsp/arm/subpel_variance_neon.c
+++ b/vpx_dsp/arm/subpel_variance_neon.c
@@ -97,30 +97,30 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
// 4xM filter writes an extra row to fdata because it processes two rows at a
// time.
-#define sub_pixel_varianceNxM(n, m) \
- uint32_t vpx_sub_pixel_variance##n##x##m##_neon( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse) { \
- uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \
- uint8_t temp1[n * m]; \
- \
- if (n == 4) { \
- var_filter_block2d_bil_w4(a, temp0, a_stride, 1, (m + 2), \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \
- bilinear_filters[yoffset]); \
- } else if (n == 8) { \
- var_filter_block2d_bil_w8(a, temp0, a_stride, 1, (m + 1), \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \
- bilinear_filters[yoffset]); \
- } else { \
- var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (m + 1), n, \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \
- bilinear_filters[yoffset]); \
- } \
- return vpx_variance##n##x##m(temp1, n, b, b_stride, sse); \
+#define sub_pixel_varianceNxM(n, m) \
+ uint32_t vpx_sub_pixel_variance##n##x##m##_neon( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \
+ uint8_t temp1[n * m]; \
+ \
+ if (n == 4) { \
+ var_filter_block2d_bil_w4(src_ptr, temp0, src_stride, 1, (m + 2), \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \
+ bilinear_filters[y_offset]); \
+ } else if (n == 8) { \
+ var_filter_block2d_bil_w8(src_ptr, temp0, src_stride, 1, (m + 1), \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \
+ bilinear_filters[y_offset]); \
+ } else { \
+ var_filter_block2d_bil_w16(src_ptr, temp0, src_stride, 1, (m + 1), n, \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \
+ bilinear_filters[y_offset]); \
+ } \
+ return vpx_variance##n##x##m(temp1, n, ref_ptr, ref_stride, sse); \
}
sub_pixel_varianceNxM(4, 4);
@@ -139,34 +139,34 @@ sub_pixel_varianceNxM(64, 64);
// 4xM filter writes an extra row to fdata because it processes two rows at a
// time.
-#define sub_pixel_avg_varianceNxM(n, m) \
- uint32_t vpx_sub_pixel_avg_variance##n##x##m##_neon( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \
- uint8_t temp1[n * m]; \
- \
- if (n == 4) { \
- var_filter_block2d_bil_w4(a, temp0, a_stride, 1, (m + 2), \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \
- bilinear_filters[yoffset]); \
- } else if (n == 8) { \
- var_filter_block2d_bil_w8(a, temp0, a_stride, 1, (m + 1), \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \
- bilinear_filters[yoffset]); \
- } else { \
- var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (m + 1), n, \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \
- bilinear_filters[yoffset]); \
- } \
- \
- vpx_comp_avg_pred(temp0, second_pred, n, m, temp1, n); \
- \
- return vpx_variance##n##x##m(temp0, n, b, b_stride, sse); \
+#define sub_pixel_avg_varianceNxM(n, m) \
+ uint32_t vpx_sub_pixel_avg_variance##n##x##m##_neon( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \
+ uint8_t temp1[n * m]; \
+ \
+ if (n == 4) { \
+ var_filter_block2d_bil_w4(src_ptr, temp0, src_stride, 1, (m + 2), \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \
+ bilinear_filters[y_offset]); \
+ } else if (n == 8) { \
+ var_filter_block2d_bil_w8(src_ptr, temp0, src_stride, 1, (m + 1), \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \
+ bilinear_filters[y_offset]); \
+ } else { \
+ var_filter_block2d_bil_w16(src_ptr, temp0, src_stride, 1, (m + 1), n, \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \
+ bilinear_filters[y_offset]); \
+ } \
+ \
+ vpx_comp_avg_pred(temp0, second_pred, n, m, temp1, n); \
+ \
+ return vpx_variance##n##x##m(temp0, n, ref_ptr, ref_stride, sse); \
}
sub_pixel_avg_varianceNxM(4, 4);
diff --git a/vpx_dsp/arm/variance_neon.c b/vpx_dsp/arm/variance_neon.c
index 61c2c16a7..77b1015b7 100644
--- a/vpx_dsp/arm/variance_neon.c
+++ b/vpx_dsp/arm/variance_neon.c
@@ -27,8 +27,9 @@
// this limit.
// Process a block of width 4 four rows at a time.
-static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int h, uint32_t *sse, int *sum) {
+static void variance_neon_w4x4(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int h,
+ uint32_t *sse, int *sum) {
int i;
int16x8_t sum_s16 = vdupq_n_s16(0);
int32x4_t sse_lo_s32 = vdupq_n_s32(0);
@@ -38,8 +39,8 @@ static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b,
assert(h <= 256);
for (i = 0; i < h; i += 4) {
- const uint8x16_t a_u8 = load_unaligned_u8q(a, a_stride);
- const uint8x16_t b_u8 = load_unaligned_u8q(b, b_stride);
+ const uint8x16_t a_u8 = load_unaligned_u8q(src_ptr, src_stride);
+ const uint8x16_t b_u8 = load_unaligned_u8q(ref_ptr, ref_stride);
const uint16x8_t diff_lo_u16 =
vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8));
const uint16x8_t diff_hi_u16 =
@@ -61,8 +62,8 @@ static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b,
sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16),
vget_high_s16(diff_hi_s16));
- a += 4 * a_stride;
- b += 4 * b_stride;
+ src_ptr += 4 * src_stride;
+ ref_ptr += 4 * ref_stride;
}
*sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);
@@ -72,9 +73,9 @@ static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b,
}
// Process a block of any size where the width is divisible by 16.
-static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int w, int h, uint32_t *sse,
- int *sum) {
+static void variance_neon_w16(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int w,
+ int h, uint32_t *sse, int *sum) {
int i, j;
int16x8_t sum_s16 = vdupq_n_s16(0);
int32x4_t sse_lo_s32 = vdupq_n_s32(0);
@@ -86,8 +87,8 @@ static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b,
for (i = 0; i < h; ++i) {
for (j = 0; j < w; j += 16) {
- const uint8x16_t a_u8 = vld1q_u8(a + j);
- const uint8x16_t b_u8 = vld1q_u8(b + j);
+ const uint8x16_t a_u8 = vld1q_u8(src_ptr + j);
+ const uint8x16_t b_u8 = vld1q_u8(ref_ptr + j);
const uint16x8_t diff_lo_u16 =
vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8));
@@ -110,8 +111,8 @@ static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b,
sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16),
vget_high_s16(diff_hi_s16));
}
- a += a_stride;
- b += b_stride;
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
*sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);
@@ -121,8 +122,9 @@ static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b,
}
// Process a block of width 8 two rows at a time.
-static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int h, uint32_t *sse, int *sum) {
+static void variance_neon_w8x2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int h,
+ uint32_t *sse, int *sum) {
int i = 0;
int16x8_t sum_s16 = vdupq_n_s16(0);
int32x4_t sse_lo_s32 = vdupq_n_s32(0);
@@ -132,10 +134,10 @@ static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b,
assert(h <= 128);
do {
- const uint8x8_t a_0_u8 = vld1_u8(a);
- const uint8x8_t a_1_u8 = vld1_u8(a + a_stride);
- const uint8x8_t b_0_u8 = vld1_u8(b);
- const uint8x8_t b_1_u8 = vld1_u8(b + b_stride);
+ const uint8x8_t a_0_u8 = vld1_u8(src_ptr);
+ const uint8x8_t a_1_u8 = vld1_u8(src_ptr + src_stride);
+ const uint8x8_t b_0_u8 = vld1_u8(ref_ptr);
+ const uint8x8_t b_1_u8 = vld1_u8(ref_ptr + ref_stride);
const uint16x8_t diff_0_u16 = vsubl_u8(a_0_u8, b_0_u8);
const uint16x8_t diff_1_u16 = vsubl_u8(a_1_u8, b_1_u8);
const int16x8_t diff_0_s16 = vreinterpretq_s16_u16(diff_0_u16);
@@ -150,8 +152,8 @@ static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b,
vget_high_s16(diff_0_s16));
sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_1_s16),
vget_high_s16(diff_1_s16));
- a += a_stride + a_stride;
- b += b_stride + b_stride;
+ src_ptr += src_stride + src_stride;
+ ref_ptr += ref_stride + ref_stride;
i += 2;
} while (i < h);
@@ -161,31 +163,36 @@ static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b,
0);
}
-void vpx_get8x8var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, unsigned int *sse, int *sum) {
- variance_neon_w8x2(a, a_stride, b, b_stride, 8, sse, sum);
+void vpx_get8x8var_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
+ variance_neon_w8x2(src_ptr, src_stride, ref_ptr, ref_stride, 8, sse, sum);
}
-void vpx_get16x16var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, unsigned int *sse, int *sum) {
- variance_neon_w16(a, a_stride, b, b_stride, 16, 16, sse, sum);
+void vpx_get16x16var_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
}
-#define varianceNxM(n, m, shift) \
- unsigned int vpx_variance##n##x##m##_neon(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- unsigned int *sse) { \
- int sum; \
- if (n == 4) \
- variance_neon_w4x4(a, a_stride, b, b_stride, m, sse, &sum); \
- else if (n == 8) \
- variance_neon_w8x2(a, a_stride, b, b_stride, m, sse, &sum); \
- else \
- variance_neon_w16(a, a_stride, b, b_stride, n, m, sse, &sum); \
- if (n * m < 16 * 16) \
- return *sse - ((sum * sum) >> shift); \
- else \
- return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \
+#define varianceNxM(n, m, shift) \
+ unsigned int vpx_variance##n##x##m##_neon( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, unsigned int *sse) { \
+ int sum; \
+ if (n == 4) \
+ variance_neon_w4x4(src_ptr, src_stride, ref_ptr, ref_stride, m, sse, \
+ &sum); \
+ else if (n == 8) \
+ variance_neon_w8x2(src_ptr, src_stride, ref_ptr, ref_stride, m, sse, \
+ &sum); \
+ else \
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, n, m, sse, \
+ &sum); \
+ if (n * m < 16 * 16) \
+ return *sse - ((sum * sum) >> shift); \
+ else \
+ return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \
}
varianceNxM(4, 4, 4);
@@ -199,58 +206,66 @@ varianceNxM(16, 32, 9);
varianceNxM(32, 16, 9);
varianceNxM(32, 32, 10);
-unsigned int vpx_variance32x64_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum1, sum2;
uint32_t sse1, sse2;
- variance_neon_w16(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
- variance_neon_w16(a + (32 * a_stride), a_stride, b + (32 * b_stride),
- b_stride, 32, 32, &sse2, &sum2);
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32, &sse1,
+ &sum1);
+ variance_neon_w16(src_ptr + (32 * src_stride), src_stride,
+ ref_ptr + (32 * ref_stride), ref_stride, 32, 32, &sse2,
+ &sum2);
*sse = sse1 + sse2;
sum1 += sum2;
return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11);
}
-unsigned int vpx_variance64x32_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum1, sum2;
uint32_t sse1, sse2;
- variance_neon_w16(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
- variance_neon_w16(a + (16 * a_stride), a_stride, b + (16 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 64, 16, &sse1,
+ &sum1);
+ variance_neon_w16(src_ptr + (16 * src_stride), src_stride,
+ ref_ptr + (16 * ref_stride), ref_stride, 64, 16, &sse2,
+ &sum2);
*sse = sse1 + sse2;
sum1 += sum2;
return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11);
}
-unsigned int vpx_variance64x64_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum1, sum2;
uint32_t sse1, sse2;
- variance_neon_w16(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
- variance_neon_w16(a + (16 * a_stride), a_stride, b + (16 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 64, 16, &sse1,
+ &sum1);
+ variance_neon_w16(src_ptr + (16 * src_stride), src_stride,
+ ref_ptr + (16 * ref_stride), ref_stride, 64, 16, &sse2,
+ &sum2);
sse1 += sse2;
sum1 += sum2;
- variance_neon_w16(a + (16 * 2 * a_stride), a_stride, b + (16 * 2 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
+ variance_neon_w16(src_ptr + (16 * 2 * src_stride), src_stride,
+ ref_ptr + (16 * 2 * ref_stride), ref_stride, 64, 16, &sse2,
+ &sum2);
sse1 += sse2;
sum1 += sum2;
- variance_neon_w16(a + (16 * 3 * a_stride), a_stride, b + (16 * 3 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
+ variance_neon_w16(src_ptr + (16 * 3 * src_stride), src_stride,
+ ref_ptr + (16 * 3 * ref_stride), ref_stride, 64, 16, &sse2,
+ &sum2);
*sse = sse1 + sse2;
sum1 += sum2;
return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12);
}
-unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int source_stride,
- const unsigned char *ref_ptr, int recon_stride,
+unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
unsigned int *sse) {
int i;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
@@ -267,13 +282,13 @@ unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int source_stride,
for (i = 0; i < 8; i++) { // mse16x16_neon_loop
q0u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
q1u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
q2u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
q3u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
@@ -312,10 +327,9 @@ unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int source_stride,
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
}
-unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr,
- int source_stride,
+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr,
- int recon_stride) {
+ int ref_stride) {
int16x4_t d22s16, d24s16, d26s16, d28s16;
int64x1_t d0s64;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
@@ -324,21 +338,21 @@ unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr,
int64x2_t q1s64;
d0u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
d4u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
d1u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
d5u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
d2u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
d6u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
d3u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
d7u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
q11u16 = vsubl_u8(d0u8, d4u8);
q12u16 = vsubl_u8(d1u8, d5u8);