diff options
author | James Zern <jzern@google.com> | 2021-04-27 18:02:35 -0700 |
---|---|---|
committer | James Zern <jzern@google.com> | 2021-04-27 18:04:08 -0700 |
commit | ff67c848115ae1356f21d361342091140d176c1d (patch) | |
tree | 11b3334f622084cc317741c2991cabace2805e32 | |
parent | 07cf024d4d061feac503054d15d039c2cfbce35e (diff) | |
download | libvpx-ff67c848115ae1356f21d361342091140d176c1d.tar libvpx-ff67c848115ae1356f21d361342091140d176c1d.tar.gz libvpx-ff67c848115ae1356f21d361342091140d176c1d.tar.bz2 libvpx-ff67c848115ae1356f21d361342091140d176c1d.zip |
vpx_convolve_neon: prefer != 0 to > 0 in tests
this produces better assembly code; the horizontal convolve is called
with an adjusted intermediate_height where it may over process some rows
so the checks in those functions remain.
Change-Id: Iebe9842f2a13a4960d9a5addde9489452f5ce33a
-rw-r--r-- | vpx_dsp/arm/vpx_convolve8_neon.c | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.c b/vpx_dsp/arm/vpx_convolve8_neon.c index 08ae17dba..c55c9fb56 100644 --- a/vpx_dsp/arm/vpx_convolve8_neon.c +++ b/vpx_dsp/arm/vpx_convolve8_neon.c @@ -145,7 +145,7 @@ void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, src += 4; dst += 4; w -= 4; - } while (w > 0); + } while (w != 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); @@ -296,7 +296,7 @@ void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, s += 8; d += 8; width -= 8; - } while (width > 0); + } while (width != 0); src += 8 * src_stride; dst += 8 * dst_stride; h -= 8; @@ -402,7 +402,7 @@ void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, src += 4; dst += 4; w -= 4; - } while (w > 0); + } while (w != 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); @@ -586,7 +586,7 @@ void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, s += 8; d += 8; width -= 8; - } while (width > 0); + } while (width != 0); src += 8 * src_stride; dst += 8 * dst_stride; h -= 8; @@ -679,7 +679,7 @@ void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, s5 = s9; s6 = s10; h -= 4; - } while (h > 0); + } while (h != 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); @@ -759,11 +759,11 @@ void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, s5 = s9; s6 = s10; height -= 4; - } while (height > 0); + } while (height != 0); src += 8; dst += 8; w -= 8; - } while (w > 0); + } while (w != 0); } } @@ -860,7 +860,7 @@ void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, s5 = s9; s6 = s10; h -= 4; - } while (h > 0); + } while (h != 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); @@ -950,10 +950,10 @@ void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, s5 = s9; s6 = s10; height -= 4; - } while (height > 0); + } while (height != 0); src += 8; dst += 8; w -= 8; - } while (w > 0); + } while (w != 0); } } |