diff options
author | James Zern <jzern@google.com> | 2022-08-26 22:12:44 -0700 |
---|---|---|
committer | James Zern <jzern@google.com> | 2022-08-26 22:12:44 -0700 |
commit | 27fd546079a5566346b078754b51008ef46f5d2d (patch) | |
tree | cc03eb05a9e36b8ccdb924682d168ad81876749b /vpx_dsp | |
parent | 4bfab03e812f32167259f1e84356f862eb71fd44 (diff) | |
download | libvpx-27fd546079a5566346b078754b51008ef46f5d2d.tar libvpx-27fd546079a5566346b078754b51008ef46f5d2d.tar.gz libvpx-27fd546079a5566346b078754b51008ef46f5d2d.tar.bz2 libvpx-27fd546079a5566346b078754b51008ef46f5d2d.zip |
highbd_variance_neon,cosmetics: reorder a few lines
Change-Id: Ia6fa54652d7f94687e64108482bb0f28ca06cf49
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/arm/highbd_variance_neon.c | 30 |
1 files changed, 12 insertions, 18 deletions
diff --git a/vpx_dsp/arm/highbd_variance_neon.c b/vpx_dsp/arm/highbd_variance_neon.c index 3a60a14ab..96a35af01 100644 --- a/vpx_dsp/arm/highbd_variance_neon.c +++ b/vpx_dsp/arm/highbd_variance_neon.c @@ -233,14 +233,12 @@ static INLINE void highbd_var_filter_block2d_bil_first_pass( if (output_width >= 8) { for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; j += 8) { - uint32x4_t sum1_u32; - uint32x4_t sum2_u32; - uint16x4_t out1_u16; - uint16x4_t out2_u16; const uint16x8_t src1_u16 = vld1q_u16(&src_ptr[j]); const uint16x8_t src2_u16 = vld1q_u16(&src_ptr[j + pixel_step]); - sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16)); - sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16)); + uint32x4_t sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16)); + uint32x4_t sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16)); + uint16x4_t out1_u16; + uint16x4_t out2_u16; sum1_u32 = vmlal_u16(sum1_u32, filter2_u16, vget_low_u16(src2_u16)); sum2_u32 = vmlal_u16(sum2_u32, filter2_u16, vget_high_u16(src2_u16)); out1_u16 = vshrn_n_u32(vaddq_u32(sum1_u32, round_u32), FILTER_BITS); @@ -255,11 +253,10 @@ static INLINE void highbd_var_filter_block2d_bil_first_pass( assert(output_width >= 4); for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; j += 4) { - uint32x4_t sum_u32; - uint16x4_t out_u16; const uint16x4_t src1_u16 = vld1_u16(&src_ptr[j]); const uint16x4_t src2_u16 = vld1_u16(&src_ptr[j + pixel_step]); - sum_u32 = vmull_u16(filter1_u16, src1_u16); + uint32x4_t sum_u32 = vmull_u16(filter1_u16, src1_u16); + uint16x4_t out_u16; sum_u32 = vmlal_u16(sum_u32, filter2_u16, src2_u16); out_u16 = vshrn_n_u32(vaddq_u32(sum_u32, round_u32), FILTER_BITS); vst1_u16(&output_ptr[j], out_u16); @@ -285,14 +282,12 @@ static INLINE void highbd_var_filter_block2d_bil_second_pass( if (output_width >= 8) { for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; j += 8) { - uint32x4_t sum1_u32; - uint32x4_t sum2_u32; - uint16x4_t out1_u16; - uint16x4_t out2_u16; const uint16x8_t src1_u16 = vld1q_u16(&src_ptr[j]); const uint16x8_t src2_u16 = vld1q_u16(&src_ptr[j + pixel_step]); - sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16)); - sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16)); + uint32x4_t sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16)); + uint32x4_t sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16)); + uint16x4_t out1_u16; + uint16x4_t out2_u16; sum1_u32 = vmlal_u16(sum1_u32, filter2_u16, vget_low_u16(src2_u16)); sum2_u32 = vmlal_u16(sum2_u32, filter2_u16, vget_high_u16(src2_u16)); out1_u16 = vshrn_n_u32(vaddq_u32(sum1_u32, round_u32), FILTER_BITS); @@ -307,11 +302,10 @@ static INLINE void highbd_var_filter_block2d_bil_second_pass( assert(output_width >= 4); for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; j += 4) { - uint32x4_t sum_u32; - uint16x4_t out_u16; const uint16x4_t src1_u16 = vld1_u16(&src_ptr[j]); const uint16x4_t src2_u16 = vld1_u16(&src_ptr[j + pixel_step]); - sum_u32 = vmull_u16(filter1_u16, src1_u16); + uint32x4_t sum_u32 = vmull_u16(filter1_u16, src1_u16); + uint16x4_t out_u16; sum_u32 = vmlal_u16(sum_u32, filter2_u16, src2_u16); out_u16 = vshrn_n_u32(vaddq_u32(sum_u32, round_u32), FILTER_BITS); vst1_u16(&output_ptr[j], out_u16); |