Merge changes Ia3647698,I55caf34e,Id2c60f39 into main

* changes: vpx_dsp_common.h,clip_pixel: work around VS2022 Arm64 issue fdct_partial_neon.c: work around VS2022 Arm64 issue fdct8x8_test.cc: work around VS2022 Arm64 issue
author: James Zern <jzern@google.com> 2023-05-25 04:54:09 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> 2023-05-25 04:54:09 +0000
commit: ad5677eafceac4eccf7a7fd506a4e1f081cea22d (patch)
tree: b273a64685587e0957b94fbac9c802e31789bf04
parent: 47fa9804b2d98e2b456329d4e0e3bee10f208c1d (diff)
parent: 25f2e1ef255e89d5e7357aa2427926776327765a (diff)
download: libvpx-ad5677eafceac4eccf7a7fd506a4e1f081cea22d.tar
libvpx-ad5677eafceac4eccf7a7fd506a4e1f081cea22d.tar.gz
libvpx-ad5677eafceac4eccf7a7fd506a4e1f081cea22d.tar.bz2
libvpx-ad5677eafceac4eccf7a7fd506a4e1f081cea22d.zip
3 files changed, 36 insertions, 0 deletions
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index fcc84690a..21f8dcffa 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -132,6 +132,15 @@ void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
+// Visual Studio 2022 (cl.exe) targeting AArch64 with optimizations enabled
+// produces invalid code in RunExtremalCheck() and RunInvAccuracyCheck().
+// See:
+// https://developercommunity.visualstudio.com/t/1770-preview-1:-Misoptimization-for-AR/10369786
+// TODO(jzern): check the compiler version after a fix for the issue is
+// released.
+#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
+#pragma optimize("", off)
+#endif
 class FwdTrans8x8TestBase {
  public:
   virtual ~FwdTrans8x8TestBase() {}
@@ -523,6 +532,9 @@ class FwdTrans8x8TestBase {
   vpx_bit_depth_t bit_depth_;
   int mask_;
 };
+#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
+#pragma optimize("", on)
+#endif
 
 class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
                        public ::testing::TestWithParam<Dct8x8Param> {
diff --git a/vpx_dsp/arm/fdct_partial_neon.c b/vpx_dsp/arm/fdct_partial_neon.c
index 718dba0d9..df0da543c 100644
--- a/vpx_dsp/arm/fdct_partial_neon.c
+++ b/vpx_dsp/arm/fdct_partial_neon.c
@@ -37,6 +37,15 @@ void vpx_fdct4x4_1_neon(const int16_t *input, tran_low_t *output, int stride) {
   output[1] = 0;
 }
 
+// Visual Studio 2022 (cl.exe) targeting AArch64 with optimizations enabled
+// will fail with an internal compiler error.
+// See:
+// https://developercommunity.visualstudio.com/t/Compiler-crash-C1001-when-building-a-for/10346110
+// TODO(jzern): check the compiler version after a fix for the issue is
+// released.
+#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
+#pragma optimize("", off)
+#endif
 void vpx_fdct8x8_1_neon(const int16_t *input, tran_low_t *output, int stride) {
   int r;
   int16x8_t sum = vld1q_s16(&input[0]);
@@ -49,6 +58,9 @@ void vpx_fdct8x8_1_neon(const int16_t *input, tran_low_t *output, int stride) {
   output[0] = (tran_low_t)horizontal_add_int16x8(sum);
   output[1] = 0;
 }
+#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
+#pragma optimize("", on)
+#endif
 
 void vpx_fdct16x16_1_neon(const int16_t *input, tran_low_t *output,
                           int stride) {
diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h
index 2de449546..4b946d756 100644
--- a/vpx_dsp/vpx_dsp_common.h
+++ b/vpx_dsp/vpx_dsp_common.h
@@ -45,9 +45,21 @@ typedef int16_t tran_low_t;
 
 typedef int16_t tran_coef_t;
 
+// Visual Studio 2022 (cl.exe) targeting AArch64 with optimizations enabled
+// produces invalid code for clip_pixel() when the return type is uint8_t.
+// See:
+// https://developercommunity.visualstudio.com/t/Misoptimization-for-ARM64-in-VS-2022-17/10363361
+// TODO(jzern): check the compiler version after a fix for the issue is
+// released.
+#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
+static INLINE int clip_pixel(int val) {
+  return (val > 255) ? 255 : (val < 0) ? 0 : val;
+}
+#else
 static INLINE uint8_t clip_pixel(int val) {
   return (val > 255) ? 255 : (val < 0) ? 0 : val;
 }
+#endif
 
 static INLINE int clamp(int value, int low, int high) {
   return value < low ? low : (value > high ? high : value);
author	James Zern <jzern@google.com>	2023-05-25 04:54:09 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	2023-05-25 04:54:09 +0000
commit	ad5677eafceac4eccf7a7fd506a4e1f081cea22d (patch)
tree	b273a64685587e0957b94fbac9c802e31789bf04
parent	47fa9804b2d98e2b456329d4e0e3bee10f208c1d (diff)
parent	25f2e1ef255e89d5e7357aa2427926776327765a (diff)
download	libvpx-ad5677eafceac4eccf7a7fd506a4e1f081cea22d.tar libvpx-ad5677eafceac4eccf7a7fd506a4e1f081cea22d.tar.gz libvpx-ad5677eafceac4eccf7a7fd506a4e1f081cea22d.tar.bz2 libvpx-ad5677eafceac4eccf7a7fd506a4e1f081cea22d.zip