From eab52a4f3c5bece97b8a2656553903aacd8f7ab4 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 8 Mar 2023 16:34:20 +0000 Subject: Fix buffer overrun in highbd Neon subpel variance filters The high bitdepth Neon code applying the first pass of the bilinear filter for subpixel variance on blocks of width 4 processed two rows at a time. This resulted in a source buffer overread, attempting to produce two rows of padding for the second (vertical) pass of the bilinear filter. This patch modifies highbd_var_filter_block2d_bil_w4 and highbd_avg_pred_var_filter_block2d_bil_w4 such that they only process a single row per iteration, and only require a single row of padding for the second pass. This prevents the buffer overread. Since all block sizes are now processed one row at a time, there is no need for a "padding" macro parameter - the value is always 1, with no special case for 4xh blocks. As well as re-enabling the Neon paths and their associated tests, we remove the now-redundant 'padding' macro parameter. Bug: webm:1796 Change-Id: Icd6076b38eb4476139795bb1734ca800c9edf079 --- test/variance_test.cc | 28 +-- vpx_dsp/arm/highbd_subpel_variance_neon.c | 302 ++++++++++++++---------------- vpx_dsp/arm/mem_neon.h | 9 + vpx_dsp/vpx_dsp_rtcd_defs.pl | 48 ++--- 4 files changed, 170 insertions(+), 217 deletions(-) diff --git a/test/variance_test.cc b/test/variance_test.cc index 8af26969c..1359bc4ba 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -1591,12 +1591,10 @@ INSTANTIATE_TEST_SUITE_P( 12), SubpelVarianceParams(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_neon, 12), - /*TODO(https://crbug.com/webm/1796): enable after heap overflow is - fixed. SubpelVarianceParams(2, 3, &vpx_highbd_12_sub_pixel_variance4x8_neon, 12), SubpelVarianceParams(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_neon, - 12),*/ + 12), SubpelVarianceParams(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_neon, 10), SubpelVarianceParams(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_neon, @@ -1619,12 +1617,10 @@ INSTANTIATE_TEST_SUITE_P( 10), SubpelVarianceParams(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_neon, 10), - /*TODO(https://crbug.com/webm/1796): enable after heap overflow is - fixed. SubpelVarianceParams(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_neon, 10), SubpelVarianceParams(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_neon, - 10),*/ + 10), SubpelVarianceParams(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_neon, 8), SubpelVarianceParams(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_neon, @@ -1644,12 +1640,10 @@ INSTANTIATE_TEST_SUITE_P( SubpelVarianceParams(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_neon, 8), SubpelVarianceParams(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_neon, 8), - SubpelVarianceParams(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_neon, 8) - /*TODO(https://crbug.com/webm/1796): enable after heap overflow is - fixed. + SubpelVarianceParams(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_neon, 8), SubpelVarianceParams(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_neon, 8), SubpelVarianceParams(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_neon, - 8)*/)); + 8))); INSTANTIATE_TEST_SUITE_P( NEON, VpxHBDSubpelAvgVarianceTest, @@ -1687,14 +1681,12 @@ INSTANTIATE_TEST_SUITE_P( SubpelAvgVarianceParams(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_neon, 12), - /*TODO(https://crbug.com/webm/1796): enable after heap overflow is - fixed. SubpelAvgVarianceParams(2, 3, &vpx_highbd_12_sub_pixel_avg_variance4x8_neon, 12), SubpelAvgVarianceParams(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_neon, - 12),*/ + 12), SubpelAvgVarianceParams(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_neon, 10), @@ -1728,14 +1720,12 @@ INSTANTIATE_TEST_SUITE_P( SubpelAvgVarianceParams(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_neon, 10), - /*TODO(https://crbug.com/webm/1796): enable after heap overflow is - fixed. SubpelAvgVarianceParams(2, 3, &vpx_highbd_10_sub_pixel_avg_variance4x8_neon, 10), SubpelAvgVarianceParams(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_neon, - 10),*/ + 10), SubpelAvgVarianceParams(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_neon, 8), @@ -1768,15 +1758,13 @@ INSTANTIATE_TEST_SUITE_P( 8), SubpelAvgVarianceParams(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_neon, - 8) - /*TODO(https://crbug.com/webm/1796): enable after heap overflow is - fixed. + 8), SubpelAvgVarianceParams(2, 3, &vpx_highbd_8_sub_pixel_avg_variance4x8_neon, 8), SubpelAvgVarianceParams(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_neon, - 8)*/)); + 8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON diff --git a/vpx_dsp/arm/highbd_subpel_variance_neon.c b/vpx_dsp/arm/highbd_subpel_variance_neon.c index b2fe9921c..683df5797 100644 --- a/vpx_dsp/arm/highbd_subpel_variance_neon.c +++ b/vpx_dsp/arm/highbd_subpel_variance_neon.c @@ -36,33 +36,29 @@ // requiring double the number of data processing instructions. (12-bit * 8 = // 15-bit.) -// Process a block exactly 4 wide and a multiple of 2 high. -// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed. -#if 0 +// Process a block exactly 4 wide and any height. static void highbd_var_filter_block2d_bil_w4(const uint16_t *src_ptr, uint16_t *dst_ptr, int src_stride, int pixel_step, int dst_height, int filter_offset) { - const uint16x8_t f0 = vdupq_n_u16(8 - filter_offset); - const uint16x8_t f1 = vdupq_n_u16(filter_offset); + const uint16x4_t f0 = vdup_n_u16(8 - filter_offset); + const uint16x4_t f1 = vdup_n_u16(filter_offset); int i = dst_height; do { - uint16x8_t s0 = load_unaligned_u16q(src_ptr, src_stride); - uint16x8_t s1 = load_unaligned_u16q(src_ptr + pixel_step, src_stride); + uint16x4_t s0 = load_unaligned_u16(src_ptr); + uint16x4_t s1 = load_unaligned_u16(src_ptr + pixel_step); - uint16x8_t blend = vmulq_u16(s0, f0); - blend = vmlaq_u16(blend, s1, f1); - blend = vrshrq_n_u16(blend, 3); + uint16x4_t blend = vmul_u16(s0, f0); + blend = vmla_u16(blend, s1, f1); + blend = vrshr_n_u16(blend, 3); - vst1q_u16(dst_ptr, blend); + vst1_u16(dst_ptr, blend); - src_ptr += 2 * src_stride; - dst_ptr += 8; - i -= 2; - } while (i != 0); + src_ptr += src_stride; + dst_ptr += 4; + } while (--i != 0); } -#endif // 0 // Process a block which is a multiple of 8 and any height. static void highbd_var_filter_block2d_bil_large(const uint16_t *src_ptr, @@ -148,23 +144,23 @@ static void highbd_var_filter_block2d_avg(const uint16_t *src_ptr, } while (--i != 0); } -#define HBD_SUBPEL_VARIANCE_WXH_NEON(bitdepth, w, h, padding) \ +#define HBD_SUBPEL_VARIANCE_WXH_NEON(bitdepth, w, h) \ unsigned int vpx_highbd_##bitdepth##_sub_pixel_variance##w##x##h##_neon( \ const uint8_t *src, int src_stride, int xoffset, int yoffset, \ const uint8_t *ref, int ref_stride, uint32_t *sse) { \ - uint16_t tmp0[w * (h + padding)]; \ + uint16_t tmp0[w * (h + 1)]; \ uint16_t tmp1[w * h]; \ uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src); \ \ - highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, \ - (h + padding), xoffset); \ + highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, (h + 1), \ + xoffset); \ highbd_var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset); \ \ return vpx_highbd_##bitdepth##_variance##w##x##h(CONVERT_TO_BYTEPTR(tmp1), \ w, ref, ref_stride, sse); \ } -#define HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(bitdepth, w, h, padding) \ +#define HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(bitdepth, w, h) \ unsigned int vpx_highbd_##bitdepth##_sub_pixel_variance##w##x##h##_neon( \ const uint8_t *src, int src_stride, int xoffset, int yoffset, \ const uint8_t *ref, int ref_stride, unsigned int *sse) { \ @@ -188,28 +184,28 @@ static void highbd_var_filter_block2d_avg(const uint16_t *src_ptr, CONVERT_TO_BYTEPTR(tmp), w, ref, ref_stride, sse); \ } \ } else if (xoffset == 4) { \ - uint16_t tmp0[w * (h + padding)]; \ + uint16_t tmp0[w * (h + 1)]; \ if (yoffset == 0) { \ highbd_var_filter_block2d_avg(src_ptr, tmp0, src_stride, 1, w, h); \ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ CONVERT_TO_BYTEPTR(tmp0), w, ref, ref_stride, sse); \ } else if (yoffset == 4) { \ - uint16_t tmp1[w * (h + padding)]; \ + uint16_t tmp1[w * (h + 1)]; \ highbd_var_filter_block2d_avg(src_ptr, tmp0, src_stride, 1, w, \ - (h + padding)); \ + (h + 1)); \ highbd_var_filter_block2d_avg(tmp0, tmp1, w, w, w, h); \ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \ } else { \ - uint16_t tmp1[w * (h + padding)]; \ + uint16_t tmp1[w * (h + 1)]; \ highbd_var_filter_block2d_avg(src_ptr, tmp0, src_stride, 1, w, \ - (h + padding)); \ + (h + 1)); \ highbd_var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset); \ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \ } \ } else { \ - uint16_t tmp0[w * (h + padding)]; \ + uint16_t tmp0[w * (h + 1)]; \ if (yoffset == 0) { \ highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, h, \ xoffset); \ @@ -218,14 +214,14 @@ static void highbd_var_filter_block2d_avg(const uint16_t *src_ptr, } else if (yoffset == 4) { \ uint16_t tmp1[w * h]; \ highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, \ - (h + padding), xoffset); \ + (h + 1), xoffset); \ highbd_var_filter_block2d_avg(tmp0, tmp1, w, w, w, h); \ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \ } else { \ uint16_t tmp1[w * h]; \ highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, \ - (h + padding), xoffset); \ + (h + 1), xoffset); \ highbd_var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset); \ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \ @@ -233,98 +229,88 @@ static void highbd_var_filter_block2d_avg(const uint16_t *src_ptr, } \ } -// 4x blocks are processed two rows at a time, so require an extra row of -// padding. - // 8-bit -// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed. -// HBD_SUBPEL_VARIANCE_WXH_NEON(8, 4, 4, 2) -// HBD_SUBPEL_VARIANCE_WXH_NEON(8, 4, 8, 2) +HBD_SUBPEL_VARIANCE_WXH_NEON(8, 4, 4) +HBD_SUBPEL_VARIANCE_WXH_NEON(8, 4, 8) -HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 4, 1) -HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 8, 1) -HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 16, 1) +HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 4) +HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 8) +HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 16) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 8, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 16, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 32, 1) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 8) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 16) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 32) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 16, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 32, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 64, 1) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 16) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 32) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 64) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 64, 32, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 64, 64, 1) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 64, 32) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 64, 64) // 10-bit -// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed. -// HBD_SUBPEL_VARIANCE_WXH_NEON(10, 4, 4, 2) -// HBD_SUBPEL_VARIANCE_WXH_NEON(10, 4, 8, 2) +HBD_SUBPEL_VARIANCE_WXH_NEON(10, 4, 4) +HBD_SUBPEL_VARIANCE_WXH_NEON(10, 4, 8) -HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 4, 1) -HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 8, 1) -HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 16, 1) +HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 4) +HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 8) +HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 16) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 8, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 16, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 32, 1) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 8) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 16) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 32) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 16, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 32, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 64, 1) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 16) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 32) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 64) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 64, 32, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 64, 64, 1) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 64, 32) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 64, 64) // 12-bit -// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed. -// HBD_SUBPEL_VARIANCE_WXH_NEON(12, 4, 4, 2) -// HBD_SUBPEL_VARIANCE_WXH_NEON(12, 4, 8, 2) +HBD_SUBPEL_VARIANCE_WXH_NEON(12, 4, 4) +HBD_SUBPEL_VARIANCE_WXH_NEON(12, 4, 8) -HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 4, 1) -HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 8, 1) -HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 16, 1) +HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 4) +HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 8) +HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 16) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 8, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 16, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 32, 1) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 8) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 16) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 32) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 16, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 32, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 64, 1) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 16) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 32) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 64) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 64, 32, 1) -HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 64, 64, 1) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 64, 32) +HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 64, 64) // Combine bilinear filter with vpx_highbd_comp_avg_pred for blocks having // width 4. -// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed. -#if 0 static void highbd_avg_pred_var_filter_block2d_bil_w4( const uint16_t *src_ptr, uint16_t *dst_ptr, int src_stride, int pixel_step, int dst_height, int filter_offset, const uint16_t *second_pred) { - const uint16x8_t f0 = vdupq_n_u16(8 - filter_offset); - const uint16x8_t f1 = vdupq_n_u16(filter_offset); + const uint16x4_t f0 = vdup_n_u16(8 - filter_offset); + const uint16x4_t f1 = vdup_n_u16(filter_offset); int i = dst_height; do { - uint16x8_t s0 = load_unaligned_u16q(src_ptr, src_stride); - uint16x8_t s1 = load_unaligned_u16q(src_ptr + pixel_step, src_stride); - uint16x8_t p = vld1q_u16(second_pred); + uint16x4_t s0 = load_unaligned_u16(src_ptr); + uint16x4_t s1 = load_unaligned_u16(src_ptr + pixel_step); + uint16x4_t p = vld1_u16(second_pred); - uint16x8_t blend = vmulq_u16(s0, f0); - blend = vmlaq_u16(blend, s1, f1); - blend = vrshrq_n_u16(blend, 3); + uint16x4_t blend = vmul_u16(s0, f0); + blend = vmla_u16(blend, s1, f1); + blend = vrshr_n_u16(blend, 3); - vst1q_u16(dst_ptr, vrhaddq_u16(blend, p)); + vst1_u16(dst_ptr, vrhadd_u16(blend, p)); - src_ptr += 2 * src_stride; - dst_ptr += 2 * 4; - second_pred += 2 * 4; - i -= 2; - } while (i != 0); + src_ptr += src_stride; + dst_ptr += 4; + second_pred += 4; + } while (--i != 0); } -#endif // 0 // Combine bilinear filter with vpx_highbd_comp_avg_pred for large blocks. static void highbd_avg_pred_var_filter_block2d_bil_large( @@ -444,25 +430,25 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr, } while (--i != 0); } -#define HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(bitdepth, w, h, padding) \ - uint32_t vpx_highbd_##bitdepth##_sub_pixel_avg_variance##w##x##h##_neon( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *ref, int ref_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint16_t tmp0[w * (h + padding)]; \ - uint16_t tmp1[w * h]; \ - uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src); \ - \ - highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, \ - (h + padding), xoffset); \ - highbd_avg_pred_var_filter_block2d_bil_w##w( \ - tmp0, tmp1, w, w, h, yoffset, CONVERT_TO_SHORTPTR(second_pred)); \ - \ - return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ - CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \ +#define HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(bitdepth, w, h) \ + uint32_t vpx_highbd_##bitdepth##_sub_pixel_avg_variance##w##x##h##_neon( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *ref, int ref_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t tmp0[w * (h + 1)]; \ + uint16_t tmp1[w * h]; \ + uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src); \ + \ + highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, (h + 1), \ + xoffset); \ + highbd_avg_pred_var_filter_block2d_bil_w##w( \ + tmp0, tmp1, w, w, h, yoffset, CONVERT_TO_SHORTPTR(second_pred)); \ + \ + return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ + CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \ } -#define HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(bitdepth, w, h, padding) \ +#define HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(bitdepth, w, h) \ unsigned int vpx_highbd_##bitdepth##_sub_pixel_avg_variance##w##x##h##_neon( \ const uint8_t *src, int source_stride, int xoffset, int yoffset, \ const uint8_t *ref, int ref_stride, unsigned int *sse, \ @@ -490,7 +476,7 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr, CONVERT_TO_BYTEPTR(tmp), w, ref, ref_stride, sse); \ } \ } else if (xoffset == 4) { \ - uint16_t tmp0[w * (h + padding)]; \ + uint16_t tmp0[w * (h + 1)]; \ if (yoffset == 0) { \ highbd_avg_pred_var_filter_block2d_avg( \ src_ptr, tmp0, source_stride, 1, w, h, \ @@ -498,24 +484,24 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr, return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ CONVERT_TO_BYTEPTR(tmp0), w, ref, ref_stride, sse); \ } else if (yoffset == 4) { \ - uint16_t tmp1[w * (h + padding)]; \ + uint16_t tmp1[w * (h + 1)]; \ highbd_var_filter_block2d_avg(src_ptr, tmp0, source_stride, 1, w, \ - (h + padding)); \ + (h + 1)); \ highbd_avg_pred_var_filter_block2d_avg( \ tmp0, tmp1, w, w, w, h, CONVERT_TO_SHORTPTR(second_pred)); \ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \ } else { \ - uint16_t tmp1[w * (h + padding)]; \ + uint16_t tmp1[w * (h + 1)]; \ highbd_var_filter_block2d_avg(src_ptr, tmp0, source_stride, 1, w, \ - (h + padding)); \ + (h + 1)); \ highbd_avg_pred_var_filter_block2d_bil_w##w( \ tmp0, tmp1, w, w, h, yoffset, CONVERT_TO_SHORTPTR(second_pred)); \ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \ } \ } else { \ - uint16_t tmp0[w * (h + padding)]; \ + uint16_t tmp0[w * (h + 1)]; \ if (yoffset == 0) { \ highbd_avg_pred_var_filter_block2d_bil_w##w( \ src_ptr, tmp0, source_stride, 1, h, xoffset, \ @@ -525,7 +511,7 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr, } else if (yoffset == 4) { \ uint16_t tmp1[w * h]; \ highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, source_stride, 1, \ - (h + padding), xoffset); \ + (h + 1), xoffset); \ highbd_avg_pred_var_filter_block2d_avg( \ tmp0, tmp1, w, w, w, h, CONVERT_TO_SHORTPTR(second_pred)); \ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ @@ -533,7 +519,7 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr, } else { \ uint16_t tmp1[w * h]; \ highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, source_stride, 1, \ - (h + padding), xoffset); \ + (h + 1), xoffset); \ highbd_avg_pred_var_filter_block2d_bil_w##w( \ tmp0, tmp1, w, w, h, yoffset, CONVERT_TO_SHORTPTR(second_pred)); \ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \ @@ -542,65 +528,59 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr, } \ } -// 4x blocks are processed two rows at a time, so require an extra row of -// padding. - // 8-bit -// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed. -// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 4, 4, 2) -// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 4, 8, 2) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 4, 4) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 4, 8) -HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 4, 1) -HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 8, 1) -HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 16, 1) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 4) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 8) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 16) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 8, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 16, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 32, 1) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 8) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 16) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 32) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 16, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 32, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 64, 1) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 16) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 32) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 64) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 64, 32, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 64, 64, 1) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 64, 32) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 64, 64) // 10-bit -// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed. -// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 4, 4, 2) -// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 4, 8, 2) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 4, 4) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 4, 8) -HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 4, 1) -HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 8, 1) -HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 16, 1) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 4) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 8) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 16) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 8, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 16, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 32, 1) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 8) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 16) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 32) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 16, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 32, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 64, 1) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 16) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 32) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 64) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 64, 32, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 64, 64, 1) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 64, 32) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 64, 64) // 12-bit -// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed. -// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 4, 4, 2) -// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 4, 8, 2) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 4, 4) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 4, 8) -HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 4, 1) -HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 8, 1) -HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 16, 1) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 4) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 8) +HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 16) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 8, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 16, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 32, 1) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 8) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 16) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 32) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 16, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 32, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 64, 1) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 16) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 32) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 64) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 64, 32, 1) -HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 64, 64, 1) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 64, 32) +HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 64, 64) diff --git a/vpx_dsp/arm/mem_neon.h b/vpx_dsp/arm/mem_neon.h index 400846b70..fa14f80b2 100644 --- a/vpx_dsp/arm/mem_neon.h +++ b/vpx_dsp/arm/mem_neon.h @@ -164,6 +164,15 @@ static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, return vreinterpret_u8_u32(a_u32); } +// Load 8 bytes when alignment is not guaranteed. +static INLINE uint16x4_t load_unaligned_u16(const uint16_t *buf) { + uint64_t a; + uint64x1_t a_u64 = vdup_n_u64(0); + memcpy(&a, buf, 8); + a_u64 = vset_lane_u64(a, a_u64, 0); + return vreinterpret_u16_u64(a_u64); +} + // Load 2 sets of 8 bytes when alignment is not guaranteed. static INLINE uint16x8_t load_unaligned_u16q(const uint16_t *buf, ptrdiff_t stride) { diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 62f4789c2..ad8ff6e18 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1443,13 +1443,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_12_sub_pixel_variance8x4 sse2 neon/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_12_sub_pixel_variance4x8 neon/; + specialize qw/vpx_highbd_12_sub_pixel_variance4x8 neon/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_12_sub_pixel_variance4x4 neon/; + specialize qw/vpx_highbd_12_sub_pixel_variance4x4 neon/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance64x64 sse2 neon/; @@ -1485,13 +1481,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_10_sub_pixel_variance8x4 sse2 neon/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_10_sub_pixel_variance4x8 neon/; + specialize qw/vpx_highbd_10_sub_pixel_variance4x8 neon/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_10_sub_pixel_variance4x4 neon/; + specialize qw/vpx_highbd_10_sub_pixel_variance4x4 neon/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance64x64 sse2 neon/; @@ -1527,13 +1519,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_8_sub_pixel_variance8x4 sse2 neon/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_8_sub_pixel_variance4x8 neon/; + specialize qw/vpx_highbd_8_sub_pixel_variance4x8 neon/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_8_sub_pixel_variance4x4 neon/; + specialize qw/vpx_highbd_8_sub_pixel_variance4x4 neon/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64 sse2 neon/; @@ -1569,13 +1557,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4 sse2 neon/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x8 neon/; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x8 neon/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x4 neon/; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x4 neon/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64 sse2 neon/; @@ -1611,13 +1595,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4 sse2 neon/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x8 neon/; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x8 neon/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x4 neon/; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x4 neon/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64 sse2 neon/; @@ -1653,13 +1633,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4 sse2 neon/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x8 neon/; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x8 neon/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is - # fixed. - # specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x4 neon/; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x4 neon/; } # CONFIG_VP9_HIGHBITDEPTH -- cgit v1.2.3