From 577802f1427d1599598aec9a28f25201efc892b1 Mon Sep 17 00:00:00 2001 From: Rudi Heitbaum Date: Sun, 18 Aug 2024 05:06:10 +0000 Subject: [PATCH] gcc: 14.2 upstream fix for building libaom on arm64 ref: - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115464 - https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=32b21292adb6ad6b5e1d60d923a773e4d0daca7b - https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=86dacfb06b90371458d58872f461d358a0834305 - https://gcc.gnu.org/git/?p=gcc.git;a=shortlog;h=refs/heads/releases/gcc-14 --- ...-14.2.0-fix-building-libaom-on-arm64.patch | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 packages/lang/gcc/patches/gcc-14.2.0-fix-building-libaom-on-arm64.patch diff --git a/packages/lang/gcc/patches/gcc-14.2.0-fix-building-libaom-on-arm64.patch b/packages/lang/gcc/patches/gcc-14.2.0-fix-building-libaom-on-arm64.patch new file mode 100644 index 0000000000..94494e054d --- /dev/null +++ b/packages/lang/gcc/patches/gcc-14.2.0-fix-building-libaom-on-arm64.patch @@ -0,0 +1,199 @@ +From 32b21292adb6ad6b5e1d60d923a773e4d0daca7b Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Fri, 16 Aug 2024 07:53:01 +0100 +Subject: [PATCH] aarch64: Fix invalid nested subregs [PR115464] + +The testcase extracts one arm_neon.h vector from a pair (one subreg) +and then reinterprets the result as an SVE vector (another subreg). +Each subreg makes sense individually, but we can't fold them together +into a single subreg: it's 32 bytes -> 16 bytes -> 16*N bytes, +but the interpretation of 32 bytes -> 16*N bytes depends on +whether N==1 or N>1. + +Since the second subreg makes sense individually, simplify_subreg +should bail out rather than ICE on it. simplify_gen_subreg will +then do the same (because it already checks validate_subreg). +This leaves simplify_gen_subreg returning null, requiring the +caller to take appropriate action. + +I think this is relatively likely to occur elsewhere, so the patch +adds a helper for forcing a subreg, allowing a temporary pseudo to +be created where necessary. + +I'll follow up by using force_subreg in more places. This patch +is intended to be a minimal backportable fix for the PR. + +gcc/ + PR target/115464 + * simplify-rtx.cc (simplify_context::simplify_subreg): Don't try + to fold two subregs together if their relationship isn't known + at compile time. + * explow.h (force_subreg): Declare. + * explow.cc (force_subreg): New function. + * config/aarch64/aarch64-sve-builtins-base.cc + (svset_neonq_impl::expand): Use it instead of simplify_gen_subreg. + +gcc/testsuite/ + PR target/115464 + * gcc.target/aarch64/sve/acle/general/pr115464.c: New test. + +(cherry picked from commit 0970ff46ba6330fc80e8736fc05b2eaeeae0b6a0) +--- + gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +- + gcc/explow.cc | 15 +++++++++++++++ + gcc/explow.h | 2 ++ + gcc/simplify-rtx.cc | 5 +++++ + .../aarch64/sve/acle/general/pr115464.c | 13 +++++++++++++ + 5 files changed, 36 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +index 0d2edf3f19e..c9182594bc1 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +@@ -1174,7 +1174,7 @@ public: + Advanced SIMD argument as an SVE vector. */ + if (!BYTES_BIG_ENDIAN + && is_undef (CALL_EXPR_ARG (e.call_expr, 0))) +- return simplify_gen_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0); ++ return force_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0); + + rtx_vector_builder builder (VNx16BImode, 16, 2); + for (unsigned int i = 0; i < 16; i++) +diff --git a/gcc/explow.cc b/gcc/explow.cc +index 8e5f6b8e680..f6843398c4b 100644 +--- a/gcc/explow.cc ++++ b/gcc/explow.cc +@@ -745,6 +745,21 @@ force_reg (machine_mode mode, rtx x) + return temp; + } + ++/* Like simplify_gen_subreg, but force OP into a new register if the ++ subreg cannot be formed directly. */ ++ ++rtx ++force_subreg (machine_mode outermode, rtx op, ++ machine_mode innermode, poly_uint64 byte) ++{ ++ rtx x = simplify_gen_subreg (outermode, op, innermode, byte); ++ if (x) ++ return x; ++ ++ op = copy_to_mode_reg (innermode, op); ++ return simplify_gen_subreg (outermode, op, innermode, byte); ++} ++ + /* If X is a memory ref, copy its contents to a new temp reg and return + that reg. Otherwise, return X. */ + +diff --git a/gcc/explow.h b/gcc/explow.h +index 16aa02cfb68..cbd1fcb7eb3 100644 +--- a/gcc/explow.h ++++ b/gcc/explow.h +@@ -42,6 +42,8 @@ extern rtx copy_to_suggested_reg (rtx, rtx, machine_mode); + Args are mode (in case value is a constant) and the value. */ + extern rtx force_reg (machine_mode, rtx); + ++extern rtx force_subreg (machine_mode, rtx, machine_mode, poly_uint64); ++ + /* Return given rtx, copied into a new temp reg if it was in memory. */ + extern rtx force_not_mem (rtx); + +diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc +index dceaa13333c..729d408aa55 100644 +--- a/gcc/simplify-rtx.cc ++++ b/gcc/simplify-rtx.cc +@@ -7612,6 +7612,11 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op, + poly_uint64 innermostsize = GET_MODE_SIZE (innermostmode); + rtx newx; + ++ /* Make sure that the relationship between the two subregs is ++ known at compile time. */ ++ if (!ordered_p (outersize, innermostsize)) ++ return NULL_RTX; ++ + if (outermode == innermostmode + && known_eq (byte, 0U) + && known_eq (SUBREG_BYTE (op), 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c +new file mode 100644 +index 00000000000..d728d1325ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c +@@ -0,0 +1,13 @@ ++/* { dg-options "-O2" } */ ++ ++#include ++#include ++#include ++ ++svuint16_t ++convolve4_4_x (uint16x8x2_t permute_tbl) ++{ ++ return svset_neonq_u16 (svundef_u16 (), permute_tbl.val[1]); ++} ++ ++/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */ +-- +2.43.5 + +From 86dacfb06b90371458d58872f461d358a0834305 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Fri, 16 Aug 2024 07:53:02 +0100 +Subject: [PATCH] aarch64: Add another use of force_subreg [PR115464] + +This patch includes the testcase from r15-1399 plus a miminal +fix for it, without the other proactive uses of force_subreg. +We can backport other force_subreg calls later if they're shown +to be needed. + +gcc/ + PR target/115464 + * config/aarch64/aarch64-sve-builtins-base.cc + (svset_neonq_impl::expand): Use force_subreg instead of + lowpart_subreg. + +gcc/testsuite/ + PR target/115464 + * gcc.target/aarch64/sve/acle/general/pr115464_2.c: New test. +--- + gcc/config/aarch64/aarch64-sve-builtins-base.cc | 4 +++- + .../gcc.target/aarch64/sve/acle/general/pr115464_2.c | 11 +++++++++++ + 2 files changed, 14 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +index c9182594bc1..241a249503f 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +@@ -1185,7 +1185,9 @@ public: + if (BYTES_BIG_ENDIAN) + return e.use_exact_insn (code_for_aarch64_sve_set_neonq (mode)); + insn_code icode = code_for_vcond_mask (mode, mode); +- e.args[1] = lowpart_subreg (mode, e.args[1], GET_MODE (e.args[1])); ++ e.args[1] = force_subreg (mode, e.args[1], GET_MODE (e.args[1]), ++ subreg_lowpart_offset (mode, ++ GET_MODE (e.args[1]))); + e.add_output_operand (icode); + e.add_input_operand (icode, e.args[1]); + e.add_input_operand (icode, e.args[0]); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c +new file mode 100644 +index 00000000000..f561c34f732 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c +@@ -0,0 +1,11 @@ ++/* { dg-options "-O2" } */ ++ ++#include ++#include ++#include ++ ++svuint16_t ++convolve4_4_x (uint16x8x2_t permute_tbl, svuint16_t a) ++{ ++ return svset_neonq_u16 (a, permute_tbl.val[1]); ++} +-- +2.43.5 +