summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2023-03-14 00:22:31 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2023-03-14 00:22:31 +0000
commitd32a410880c6583d49baaac17c84b3d0fead43ba (patch)
tree09c3918142870436b4bd2328e8ca6fa591a5a98c
parentf40c89459fe1101fde5d2fb9b333ef37af15644a (diff)
parenteab52a4f3c5bece97b8a2656553903aacd8f7ab4 (diff)
downloadlibvpx-d32a410880c6583d49baaac17c84b3d0fead43ba.tar
libvpx-d32a410880c6583d49baaac17c84b3d0fead43ba.tar.gz
libvpx-d32a410880c6583d49baaac17c84b3d0fead43ba.tar.bz2
libvpx-d32a410880c6583d49baaac17c84b3d0fead43ba.zip
Merge "Fix buffer overrun in highbd Neon subpel variance filters" into main
-rw-r--r--test/variance_test.cc28
-rw-r--r--vpx_dsp/arm/highbd_subpel_variance_neon.c302
-rw-r--r--vpx_dsp/arm/mem_neon.h9
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl48
4 files changed, 170 insertions, 217 deletions
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 8af26969c..1359bc4ba 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -1591,12 +1591,10 @@ INSTANTIATE_TEST_SUITE_P(
12),
SubpelVarianceParams(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_neon,
12),
- /*TODO(https://crbug.com/webm/1796): enable after heap overflow is
- fixed.
SubpelVarianceParams(2, 3, &vpx_highbd_12_sub_pixel_variance4x8_neon,
12),
SubpelVarianceParams(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_neon,
- 12),*/
+ 12),
SubpelVarianceParams(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_neon,
10),
SubpelVarianceParams(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_neon,
@@ -1619,12 +1617,10 @@ INSTANTIATE_TEST_SUITE_P(
10),
SubpelVarianceParams(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_neon,
10),
- /*TODO(https://crbug.com/webm/1796): enable after heap overflow is
- fixed.
SubpelVarianceParams(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_neon,
10),
SubpelVarianceParams(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_neon,
- 10),*/
+ 10),
SubpelVarianceParams(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_neon,
8),
SubpelVarianceParams(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_neon,
@@ -1644,12 +1640,10 @@ INSTANTIATE_TEST_SUITE_P(
SubpelVarianceParams(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_neon,
8),
SubpelVarianceParams(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_neon, 8),
- SubpelVarianceParams(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_neon, 8)
- /*TODO(https://crbug.com/webm/1796): enable after heap overflow is
- fixed.
+ SubpelVarianceParams(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_neon, 8),
SubpelVarianceParams(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_neon, 8),
SubpelVarianceParams(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_neon,
- 8)*/));
+ 8)));
INSTANTIATE_TEST_SUITE_P(
NEON, VpxHBDSubpelAvgVarianceTest,
@@ -1687,14 +1681,12 @@ INSTANTIATE_TEST_SUITE_P(
SubpelAvgVarianceParams(3, 2,
&vpx_highbd_12_sub_pixel_avg_variance8x4_neon,
12),
- /*TODO(https://crbug.com/webm/1796): enable after heap overflow is
- fixed.
SubpelAvgVarianceParams(2, 3,
&vpx_highbd_12_sub_pixel_avg_variance4x8_neon,
12),
SubpelAvgVarianceParams(2, 2,
&vpx_highbd_12_sub_pixel_avg_variance4x4_neon,
- 12),*/
+ 12),
SubpelAvgVarianceParams(6, 6,
&vpx_highbd_10_sub_pixel_avg_variance64x64_neon,
10),
@@ -1728,14 +1720,12 @@ INSTANTIATE_TEST_SUITE_P(
SubpelAvgVarianceParams(3, 2,
&vpx_highbd_10_sub_pixel_avg_variance8x4_neon,
10),
- /*TODO(https://crbug.com/webm/1796): enable after heap overflow is
- fixed.
SubpelAvgVarianceParams(2, 3,
&vpx_highbd_10_sub_pixel_avg_variance4x8_neon,
10),
SubpelAvgVarianceParams(2, 2,
&vpx_highbd_10_sub_pixel_avg_variance4x4_neon,
- 10),*/
+ 10),
SubpelAvgVarianceParams(6, 6,
&vpx_highbd_8_sub_pixel_avg_variance64x64_neon,
8),
@@ -1768,15 +1758,13 @@ INSTANTIATE_TEST_SUITE_P(
8),
SubpelAvgVarianceParams(3, 2,
&vpx_highbd_8_sub_pixel_avg_variance8x4_neon,
- 8)
- /*TODO(https://crbug.com/webm/1796): enable after heap overflow is
- fixed.
+ 8),
SubpelAvgVarianceParams(2, 3,
&vpx_highbd_8_sub_pixel_avg_variance4x8_neon,
8),
SubpelAvgVarianceParams(2, 2,
&vpx_highbd_8_sub_pixel_avg_variance4x4_neon,
- 8)*/));
+ 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON
diff --git a/vpx_dsp/arm/highbd_subpel_variance_neon.c b/vpx_dsp/arm/highbd_subpel_variance_neon.c
index b2fe9921c..683df5797 100644
--- a/vpx_dsp/arm/highbd_subpel_variance_neon.c
+++ b/vpx_dsp/arm/highbd_subpel_variance_neon.c
@@ -36,33 +36,29 @@
// requiring double the number of data processing instructions. (12-bit * 8 =
// 15-bit.)
-// Process a block exactly 4 wide and a multiple of 2 high.
-// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed.
-#if 0
+// Process a block exactly 4 wide and any height.
static void highbd_var_filter_block2d_bil_w4(const uint16_t *src_ptr,
uint16_t *dst_ptr, int src_stride,
int pixel_step, int dst_height,
int filter_offset) {
- const uint16x8_t f0 = vdupq_n_u16(8 - filter_offset);
- const uint16x8_t f1 = vdupq_n_u16(filter_offset);
+ const uint16x4_t f0 = vdup_n_u16(8 - filter_offset);
+ const uint16x4_t f1 = vdup_n_u16(filter_offset);
int i = dst_height;
do {
- uint16x8_t s0 = load_unaligned_u16q(src_ptr, src_stride);
- uint16x8_t s1 = load_unaligned_u16q(src_ptr + pixel_step, src_stride);
+ uint16x4_t s0 = load_unaligned_u16(src_ptr);
+ uint16x4_t s1 = load_unaligned_u16(src_ptr + pixel_step);
- uint16x8_t blend = vmulq_u16(s0, f0);
- blend = vmlaq_u16(blend, s1, f1);
- blend = vrshrq_n_u16(blend, 3);
+ uint16x4_t blend = vmul_u16(s0, f0);
+ blend = vmla_u16(blend, s1, f1);
+ blend = vrshr_n_u16(blend, 3);
- vst1q_u16(dst_ptr, blend);
+ vst1_u16(dst_ptr, blend);
- src_ptr += 2 * src_stride;
- dst_ptr += 8;
- i -= 2;
- } while (i != 0);
+ src_ptr += src_stride;
+ dst_ptr += 4;
+ } while (--i != 0);
}
-#endif // 0
// Process a block which is a multiple of 8 and any height.
static void highbd_var_filter_block2d_bil_large(const uint16_t *src_ptr,
@@ -148,23 +144,23 @@ static void highbd_var_filter_block2d_avg(const uint16_t *src_ptr,
} while (--i != 0);
}
-#define HBD_SUBPEL_VARIANCE_WXH_NEON(bitdepth, w, h, padding) \
+#define HBD_SUBPEL_VARIANCE_WXH_NEON(bitdepth, w, h) \
unsigned int vpx_highbd_##bitdepth##_sub_pixel_variance##w##x##h##_neon( \
const uint8_t *src, int src_stride, int xoffset, int yoffset, \
const uint8_t *ref, int ref_stride, uint32_t *sse) { \
- uint16_t tmp0[w * (h + padding)]; \
+ uint16_t tmp0[w * (h + 1)]; \
uint16_t tmp1[w * h]; \
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src); \
\
- highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, \
- (h + padding), xoffset); \
+ highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, (h + 1), \
+ xoffset); \
highbd_var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset); \
\
return vpx_highbd_##bitdepth##_variance##w##x##h(CONVERT_TO_BYTEPTR(tmp1), \
w, ref, ref_stride, sse); \
}
-#define HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(bitdepth, w, h, padding) \
+#define HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(bitdepth, w, h) \
unsigned int vpx_highbd_##bitdepth##_sub_pixel_variance##w##x##h##_neon( \
const uint8_t *src, int src_stride, int xoffset, int yoffset, \
const uint8_t *ref, int ref_stride, unsigned int *sse) { \
@@ -188,28 +184,28 @@ static void highbd_var_filter_block2d_avg(const uint16_t *src_ptr,
CONVERT_TO_BYTEPTR(tmp), w, ref, ref_stride, sse); \
} \
} else if (xoffset == 4) { \
- uint16_t tmp0[w * (h + padding)]; \
+ uint16_t tmp0[w * (h + 1)]; \
if (yoffset == 0) { \
highbd_var_filter_block2d_avg(src_ptr, tmp0, src_stride, 1, w, h); \
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
CONVERT_TO_BYTEPTR(tmp0), w, ref, ref_stride, sse); \
} else if (yoffset == 4) { \
- uint16_t tmp1[w * (h + padding)]; \
+ uint16_t tmp1[w * (h + 1)]; \
highbd_var_filter_block2d_avg(src_ptr, tmp0, src_stride, 1, w, \
- (h + padding)); \
+ (h + 1)); \
highbd_var_filter_block2d_avg(tmp0, tmp1, w, w, w, h); \
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \
} else { \
- uint16_t tmp1[w * (h + padding)]; \
+ uint16_t tmp1[w * (h + 1)]; \
highbd_var_filter_block2d_avg(src_ptr, tmp0, src_stride, 1, w, \
- (h + padding)); \
+ (h + 1)); \
highbd_var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset); \
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \
} \
} else { \
- uint16_t tmp0[w * (h + padding)]; \
+ uint16_t tmp0[w * (h + 1)]; \
if (yoffset == 0) { \
highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, h, \
xoffset); \
@@ -218,14 +214,14 @@ static void highbd_var_filter_block2d_avg(const uint16_t *src_ptr,
} else if (yoffset == 4) { \
uint16_t tmp1[w * h]; \
highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, \
- (h + padding), xoffset); \
+ (h + 1), xoffset); \
highbd_var_filter_block2d_avg(tmp0, tmp1, w, w, w, h); \
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \
} else { \
uint16_t tmp1[w * h]; \
highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, \
- (h + padding), xoffset); \
+ (h + 1), xoffset); \
highbd_var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset); \
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \
@@ -233,98 +229,88 @@ static void highbd_var_filter_block2d_avg(const uint16_t *src_ptr,
} \
}
-// 4x<h> blocks are processed two rows at a time, so require an extra row of
-// padding.
-
// 8-bit
-// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed.
-// HBD_SUBPEL_VARIANCE_WXH_NEON(8, 4, 4, 2)
-// HBD_SUBPEL_VARIANCE_WXH_NEON(8, 4, 8, 2)
+HBD_SUBPEL_VARIANCE_WXH_NEON(8, 4, 4)
+HBD_SUBPEL_VARIANCE_WXH_NEON(8, 4, 8)
-HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 4, 1)
-HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 8, 1)
-HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 16, 1)
+HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 4)
+HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 8)
+HBD_SUBPEL_VARIANCE_WXH_NEON(8, 8, 16)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 8, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 16, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 32, 1)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 8)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 16)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 16, 32)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 16, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 32, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 64, 1)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 16)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 32)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 32, 64)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 64, 32, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 64, 64, 1)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 64, 32)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(8, 64, 64)
// 10-bit
-// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed.
-// HBD_SUBPEL_VARIANCE_WXH_NEON(10, 4, 4, 2)
-// HBD_SUBPEL_VARIANCE_WXH_NEON(10, 4, 8, 2)
+HBD_SUBPEL_VARIANCE_WXH_NEON(10, 4, 4)
+HBD_SUBPEL_VARIANCE_WXH_NEON(10, 4, 8)
-HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 4, 1)
-HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 8, 1)
-HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 16, 1)
+HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 4)
+HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 8)
+HBD_SUBPEL_VARIANCE_WXH_NEON(10, 8, 16)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 8, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 16, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 32, 1)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 8)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 16)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 16, 32)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 16, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 32, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 64, 1)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 16)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 32)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 32, 64)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 64, 32, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 64, 64, 1)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 64, 32)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(10, 64, 64)
// 12-bit
-// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed.
-// HBD_SUBPEL_VARIANCE_WXH_NEON(12, 4, 4, 2)
-// HBD_SUBPEL_VARIANCE_WXH_NEON(12, 4, 8, 2)
+HBD_SUBPEL_VARIANCE_WXH_NEON(12, 4, 4)
+HBD_SUBPEL_VARIANCE_WXH_NEON(12, 4, 8)
-HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 4, 1)
-HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 8, 1)
-HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 16, 1)
+HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 4)
+HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 8)
+HBD_SUBPEL_VARIANCE_WXH_NEON(12, 8, 16)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 8, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 16, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 32, 1)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 8)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 16)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 16, 32)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 16, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 32, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 64, 1)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 16)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 32)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 32, 64)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 64, 32, 1)
-HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 64, 64, 1)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 64, 32)
+HBD_SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(12, 64, 64)
// Combine bilinear filter with vpx_highbd_comp_avg_pred for blocks having
// width 4.
-// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed.
-#if 0
static void highbd_avg_pred_var_filter_block2d_bil_w4(
const uint16_t *src_ptr, uint16_t *dst_ptr, int src_stride, int pixel_step,
int dst_height, int filter_offset, const uint16_t *second_pred) {
- const uint16x8_t f0 = vdupq_n_u16(8 - filter_offset);
- const uint16x8_t f1 = vdupq_n_u16(filter_offset);
+ const uint16x4_t f0 = vdup_n_u16(8 - filter_offset);
+ const uint16x4_t f1 = vdup_n_u16(filter_offset);
int i = dst_height;
do {
- uint16x8_t s0 = load_unaligned_u16q(src_ptr, src_stride);
- uint16x8_t s1 = load_unaligned_u16q(src_ptr + pixel_step, src_stride);
- uint16x8_t p = vld1q_u16(second_pred);
+ uint16x4_t s0 = load_unaligned_u16(src_ptr);
+ uint16x4_t s1 = load_unaligned_u16(src_ptr + pixel_step);
+ uint16x4_t p = vld1_u16(second_pred);
- uint16x8_t blend = vmulq_u16(s0, f0);
- blend = vmlaq_u16(blend, s1, f1);
- blend = vrshrq_n_u16(blend, 3);
+ uint16x4_t blend = vmul_u16(s0, f0);
+ blend = vmla_u16(blend, s1, f1);
+ blend = vrshr_n_u16(blend, 3);
- vst1q_u16(dst_ptr, vrhaddq_u16(blend, p));
+ vst1_u16(dst_ptr, vrhadd_u16(blend, p));
- src_ptr += 2 * src_stride;
- dst_ptr += 2 * 4;
- second_pred += 2 * 4;
- i -= 2;
- } while (i != 0);
+ src_ptr += src_stride;
+ dst_ptr += 4;
+ second_pred += 4;
+ } while (--i != 0);
}
-#endif // 0
// Combine bilinear filter with vpx_highbd_comp_avg_pred for large blocks.
static void highbd_avg_pred_var_filter_block2d_bil_large(
@@ -444,25 +430,25 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr,
} while (--i != 0);
}
-#define HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(bitdepth, w, h, padding) \
- uint32_t vpx_highbd_##bitdepth##_sub_pixel_avg_variance##w##x##h##_neon( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref, int ref_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t tmp0[w * (h + padding)]; \
- uint16_t tmp1[w * h]; \
- uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src); \
- \
- highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, \
- (h + padding), xoffset); \
- highbd_avg_pred_var_filter_block2d_bil_w##w( \
- tmp0, tmp1, w, w, h, yoffset, CONVERT_TO_SHORTPTR(second_pred)); \
- \
- return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
- CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \
+#define HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(bitdepth, w, h) \
+ uint32_t vpx_highbd_##bitdepth##_sub_pixel_avg_variance##w##x##h##_neon( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *ref, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint16_t tmp0[w * (h + 1)]; \
+ uint16_t tmp1[w * h]; \
+ uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src); \
+ \
+ highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, src_stride, 1, (h + 1), \
+ xoffset); \
+ highbd_avg_pred_var_filter_block2d_bil_w##w( \
+ tmp0, tmp1, w, w, h, yoffset, CONVERT_TO_SHORTPTR(second_pred)); \
+ \
+ return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
+ CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \
}
-#define HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(bitdepth, w, h, padding) \
+#define HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(bitdepth, w, h) \
unsigned int vpx_highbd_##bitdepth##_sub_pixel_avg_variance##w##x##h##_neon( \
const uint8_t *src, int source_stride, int xoffset, int yoffset, \
const uint8_t *ref, int ref_stride, unsigned int *sse, \
@@ -490,7 +476,7 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr,
CONVERT_TO_BYTEPTR(tmp), w, ref, ref_stride, sse); \
} \
} else if (xoffset == 4) { \
- uint16_t tmp0[w * (h + padding)]; \
+ uint16_t tmp0[w * (h + 1)]; \
if (yoffset == 0) { \
highbd_avg_pred_var_filter_block2d_avg( \
src_ptr, tmp0, source_stride, 1, w, h, \
@@ -498,24 +484,24 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr,
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
CONVERT_TO_BYTEPTR(tmp0), w, ref, ref_stride, sse); \
} else if (yoffset == 4) { \
- uint16_t tmp1[w * (h + padding)]; \
+ uint16_t tmp1[w * (h + 1)]; \
highbd_var_filter_block2d_avg(src_ptr, tmp0, source_stride, 1, w, \
- (h + padding)); \
+ (h + 1)); \
highbd_avg_pred_var_filter_block2d_avg( \
tmp0, tmp1, w, w, w, h, CONVERT_TO_SHORTPTR(second_pred)); \
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \
} else { \
- uint16_t tmp1[w * (h + padding)]; \
+ uint16_t tmp1[w * (h + 1)]; \
highbd_var_filter_block2d_avg(src_ptr, tmp0, source_stride, 1, w, \
- (h + padding)); \
+ (h + 1)); \
highbd_avg_pred_var_filter_block2d_bil_w##w( \
tmp0, tmp1, w, w, h, yoffset, CONVERT_TO_SHORTPTR(second_pred)); \
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
CONVERT_TO_BYTEPTR(tmp1), w, ref, ref_stride, sse); \
} \
} else { \
- uint16_t tmp0[w * (h + padding)]; \
+ uint16_t tmp0[w * (h + 1)]; \
if (yoffset == 0) { \
highbd_avg_pred_var_filter_block2d_bil_w##w( \
src_ptr, tmp0, source_stride, 1, h, xoffset, \
@@ -525,7 +511,7 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr,
} else if (yoffset == 4) { \
uint16_t tmp1[w * h]; \
highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, source_stride, 1, \
- (h + padding), xoffset); \
+ (h + 1), xoffset); \
highbd_avg_pred_var_filter_block2d_avg( \
tmp0, tmp1, w, w, w, h, CONVERT_TO_SHORTPTR(second_pred)); \
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
@@ -533,7 +519,7 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr,
} else { \
uint16_t tmp1[w * h]; \
highbd_var_filter_block2d_bil_w##w(src_ptr, tmp0, source_stride, 1, \
- (h + padding), xoffset); \
+ (h + 1), xoffset); \
highbd_avg_pred_var_filter_block2d_bil_w##w( \
tmp0, tmp1, w, w, h, yoffset, CONVERT_TO_SHORTPTR(second_pred)); \
return vpx_highbd_##bitdepth##_variance##w##x##h##_neon( \
@@ -542,65 +528,59 @@ static void highbd_avg_pred(const uint16_t *src_ptr, uint16_t *dst_ptr,
} \
}
-// 4x<h> blocks are processed two rows at a time, so require an extra row of
-// padding.
-
// 8-bit
-// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed.
-// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 4, 4, 2)
-// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 4, 8, 2)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 4, 4)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 4, 8)
-HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 4, 1)
-HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 8, 1)
-HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 16, 1)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 4)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 8)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 8, 16)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 8, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 16, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 32, 1)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 8)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 16)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 16, 32)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 16, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 32, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 64, 1)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 16)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 32)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 32, 64)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 64, 32, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 64, 64, 1)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 64, 32)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(8, 64, 64)
// 10-bit
-// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed.
-// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 4, 4, 2)
-// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 4, 8, 2)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 4, 4)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 4, 8)
-HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 4, 1)
-HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 8, 1)
-HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 16, 1)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 4)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 8)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 8, 16)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 8, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 16, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 32, 1)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 8)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 16)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 16, 32)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 16, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 32, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 64, 1)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 16)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 32)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 32, 64)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 64, 32, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 64, 64, 1)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 64, 32)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(10, 64, 64)
// 12-bit
-// TODO(https://crbug.com/webm/1796): enable after heap overflow is fixed.
-// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 4, 4, 2)
-// HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 4, 8, 2)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 4, 4)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 4, 8)
-HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 4, 1)
-HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 8, 1)
-HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 16, 1)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 4)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 8)
+HBD_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 8, 16)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 8, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 16, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 32, 1)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 8)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 16)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 16, 32)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 16, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 32, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 64, 1)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 16)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 32)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 32, 64)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 64, 32, 1)
-HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 64, 64, 1)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 64, 32)
+HBD_SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON(12, 64, 64)
diff --git a/vpx_dsp/arm/mem_neon.h b/vpx_dsp/arm/mem_neon.h
index 400846b70..fa14f80b2 100644
--- a/vpx_dsp/arm/mem_neon.h
+++ b/vpx_dsp/arm/mem_neon.h
@@ -164,6 +164,15 @@ static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf,
return vreinterpret_u8_u32(a_u32);
}
+// Load 8 bytes when alignment is not guaranteed.
+static INLINE uint16x4_t load_unaligned_u16(const uint16_t *buf) {
+ uint64_t a;
+ uint64x1_t a_u64 = vdup_n_u64(0);
+ memcpy(&a, buf, 8);
+ a_u64 = vset_lane_u64(a, a_u64, 0);
+ return vreinterpret_u16_u64(a_u64);
+}
+
// Load 2 sets of 8 bytes when alignment is not guaranteed.
static INLINE uint16x8_t load_unaligned_u16q(const uint16_t *buf,
ptrdiff_t stride) {
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index e666f4a90..3baf16cc8 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1446,13 +1446,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_12_sub_pixel_variance8x4 sse2 neon/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_12_sub_pixel_variance4x8 neon/;
+ specialize qw/vpx_highbd_12_sub_pixel_variance4x8 neon/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_12_sub_pixel_variance4x4 neon/;
+ specialize qw/vpx_highbd_12_sub_pixel_variance4x4 neon/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance64x64 sse2 neon/;
@@ -1488,13 +1484,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_10_sub_pixel_variance8x4 sse2 neon/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_10_sub_pixel_variance4x8 neon/;
+ specialize qw/vpx_highbd_10_sub_pixel_variance4x8 neon/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_10_sub_pixel_variance4x4 neon/;
+ specialize qw/vpx_highbd_10_sub_pixel_variance4x4 neon/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance64x64 sse2 neon/;
@@ -1530,13 +1522,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_8_sub_pixel_variance8x4 sse2 neon/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_8_sub_pixel_variance4x8 neon/;
+ specialize qw/vpx_highbd_8_sub_pixel_variance4x8 neon/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_8_sub_pixel_variance4x4 neon/;
+ specialize qw/vpx_highbd_8_sub_pixel_variance4x4 neon/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64 sse2 neon/;
@@ -1572,13 +1560,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4 sse2 neon/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x8 neon/;
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x8 neon/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x4 neon/;
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x4 neon/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64 sse2 neon/;
@@ -1614,13 +1598,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4 sse2 neon/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x8 neon/;
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x8 neon/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x4 neon/;
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x4 neon/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64 sse2 neon/;
@@ -1656,13 +1636,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4 sse2 neon/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x8 neon/;
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x8 neon/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- # TODO(https://crbug.com/webm/1796): enable neon after heap overflow is
- # fixed.
- # specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x4 neon/;
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x4 neon/;
} # CONFIG_VP9_HIGHBITDEPTH