summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2022-08-26 22:12:44 -0700
committerJames Zern <jzern@google.com>2022-08-26 22:12:44 -0700
commit27fd546079a5566346b078754b51008ef46f5d2d (patch)
treecc03eb05a9e36b8ccdb924682d168ad81876749b /vpx_dsp
parent4bfab03e812f32167259f1e84356f862eb71fd44 (diff)
downloadlibvpx-27fd546079a5566346b078754b51008ef46f5d2d.tar
libvpx-27fd546079a5566346b078754b51008ef46f5d2d.tar.gz
libvpx-27fd546079a5566346b078754b51008ef46f5d2d.tar.bz2
libvpx-27fd546079a5566346b078754b51008ef46f5d2d.zip
highbd_variance_neon,cosmetics: reorder a few lines
Change-Id: Ia6fa54652d7f94687e64108482bb0f28ca06cf49
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/arm/highbd_variance_neon.c30
1 files changed, 12 insertions, 18 deletions
diff --git a/vpx_dsp/arm/highbd_variance_neon.c b/vpx_dsp/arm/highbd_variance_neon.c
index 3a60a14ab..96a35af01 100644
--- a/vpx_dsp/arm/highbd_variance_neon.c
+++ b/vpx_dsp/arm/highbd_variance_neon.c
@@ -233,14 +233,12 @@ static INLINE void highbd_var_filter_block2d_bil_first_pass(
if (output_width >= 8) {
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 8) {
- uint32x4_t sum1_u32;
- uint32x4_t sum2_u32;
- uint16x4_t out1_u16;
- uint16x4_t out2_u16;
const uint16x8_t src1_u16 = vld1q_u16(&src_ptr[j]);
const uint16x8_t src2_u16 = vld1q_u16(&src_ptr[j + pixel_step]);
- sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16));
- sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16));
+ uint32x4_t sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16));
+ uint32x4_t sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16));
+ uint16x4_t out1_u16;
+ uint16x4_t out2_u16;
sum1_u32 = vmlal_u16(sum1_u32, filter2_u16, vget_low_u16(src2_u16));
sum2_u32 = vmlal_u16(sum2_u32, filter2_u16, vget_high_u16(src2_u16));
out1_u16 = vshrn_n_u32(vaddq_u32(sum1_u32, round_u32), FILTER_BITS);
@@ -255,11 +253,10 @@ static INLINE void highbd_var_filter_block2d_bil_first_pass(
assert(output_width >= 4);
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 4) {
- uint32x4_t sum_u32;
- uint16x4_t out_u16;
const uint16x4_t src1_u16 = vld1_u16(&src_ptr[j]);
const uint16x4_t src2_u16 = vld1_u16(&src_ptr[j + pixel_step]);
- sum_u32 = vmull_u16(filter1_u16, src1_u16);
+ uint32x4_t sum_u32 = vmull_u16(filter1_u16, src1_u16);
+ uint16x4_t out_u16;
sum_u32 = vmlal_u16(sum_u32, filter2_u16, src2_u16);
out_u16 = vshrn_n_u32(vaddq_u32(sum_u32, round_u32), FILTER_BITS);
vst1_u16(&output_ptr[j], out_u16);
@@ -285,14 +282,12 @@ static INLINE void highbd_var_filter_block2d_bil_second_pass(
if (output_width >= 8) {
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 8) {
- uint32x4_t sum1_u32;
- uint32x4_t sum2_u32;
- uint16x4_t out1_u16;
- uint16x4_t out2_u16;
const uint16x8_t src1_u16 = vld1q_u16(&src_ptr[j]);
const uint16x8_t src2_u16 = vld1q_u16(&src_ptr[j + pixel_step]);
- sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16));
- sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16));
+ uint32x4_t sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16));
+ uint32x4_t sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16));
+ uint16x4_t out1_u16;
+ uint16x4_t out2_u16;
sum1_u32 = vmlal_u16(sum1_u32, filter2_u16, vget_low_u16(src2_u16));
sum2_u32 = vmlal_u16(sum2_u32, filter2_u16, vget_high_u16(src2_u16));
out1_u16 = vshrn_n_u32(vaddq_u32(sum1_u32, round_u32), FILTER_BITS);
@@ -307,11 +302,10 @@ static INLINE void highbd_var_filter_block2d_bil_second_pass(
assert(output_width >= 4);
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 4) {
- uint32x4_t sum_u32;
- uint16x4_t out_u16;
const uint16x4_t src1_u16 = vld1_u16(&src_ptr[j]);
const uint16x4_t src2_u16 = vld1_u16(&src_ptr[j + pixel_step]);
- sum_u32 = vmull_u16(filter1_u16, src1_u16);
+ uint32x4_t sum_u32 = vmull_u16(filter1_u16, src1_u16);
+ uint16x4_t out_u16;
sum_u32 = vmlal_u16(sum_u32, filter2_u16, src2_u16);
out_u16 = vshrn_n_u32(vaddq_u32(sum_u32, round_u32), FILTER_BITS);
vst1_u16(&output_ptr[j], out_u16);