From 9e197512a747a06e49974219bbcedb1a9de9f398 Mon Sep 17 00:00:00 2001 From: Rudi Heitbaum Date: Sun, 19 May 2024 01:27:07 +0000 Subject: [PATCH] gcc: 14.1.0: revert Arm: Block predication on atomics [PR111235] ref: - xbmc/xbmc 25221 - LibreELEC/LibreELEC.tv 8899 - https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=0731889c026bfe - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115153 --- ...lock-predication-on-atomics-PR111235.patch | 516 ++++++++++++++++++ 1 file changed, 516 insertions(+) create mode 100644 packages/lang/gcc/patches/gcc-0001-Revert-Arm-Block-predication-on-atomics-PR111235.patch diff --git a/packages/lang/gcc/patches/gcc-0001-Revert-Arm-Block-predication-on-atomics-PR111235.patch b/packages/lang/gcc/patches/gcc-0001-Revert-Arm-Block-predication-on-atomics-PR111235.patch new file mode 100644 index 0000000000..54d6ce2a0e --- /dev/null +++ b/packages/lang/gcc/patches/gcc-0001-Revert-Arm-Block-predication-on-atomics-PR111235.patch @@ -0,0 +1,516 @@ +From 4e9ce2c862668c88b2be04beebdd5a6ba13074bb Mon Sep 17 00:00:00 2001 +From: Rudi Heitbaum +Date: Sun, 19 May 2024 01:23:31 +0000 +Subject: [PATCH] Revert "Arm: Block predication on atomics [PR111235]" + +This reverts commit 0731889c026bfe8d55c4851422ca5ec9d037f7a0. +--- + gcc/config/arm/constraints.md | 9 +- + gcc/config/arm/sync.md | 219 ++++++++---------- + gcc/config/arm/unspecs.md | 4 +- + gcc/testsuite/gcc.dg/rtl/arm/stl-cond.c | 61 +++++ + .../gcc.target/arm/atomic_loaddi_7.c | 2 +- + .../gcc.target/arm/atomic_loaddi_8.c | 2 +- + gcc/testsuite/gcc.target/arm/pr111235.c | 39 ---- + 7 files changed, 168 insertions(+), 168 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/rtl/arm/stl-cond.c + delete mode 100644 gcc/testsuite/gcc.target/arm/pr111235.c + +diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md +index cd159670145..c6159eecf89 100644 +--- a/gcc/config/arm/constraints.md ++++ b/gcc/config/arm/constraints.md +@@ -36,7 +36,7 @@ + ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe + ;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Rd, Rf, Rb, Ra, + ;; Rg, Ri +-;; in all states: Pg ++;; in all states: Pf, Pg + + ;; The following memory constraints have been used: + ;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us, Up, Uf, Ux, Ul +@@ -239,6 +239,13 @@ + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= 256 && ival <= 510"))) + ++(define_constraint "Pf" ++ "Memory models except relaxed, consume or release ones." ++ (and (match_code "const_int") ++ (match_test "!is_mm_relaxed (memmodel_from_int (ival)) ++ && !is_mm_consume (memmodel_from_int (ival)) ++ && !is_mm_release (memmodel_from_int (ival))"))) ++ + (define_constraint "Pg" + "@internal In Thumb-2 state a constant in range 1 to 32" + (and (match_code "const_int") +diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md +index df8dbe170ca..4f0a13f6c0f 100644 +--- a/gcc/config/arm/sync.md ++++ b/gcc/config/arm/sync.md +@@ -62,110 +62,68 @@ + (set_attr "conds" "unconditional") + (set_attr "predicable" "no")]) + +-(define_insn "arm_atomic_load" +- [(set (match_operand:QHSI 0 "register_operand" "=r,l") ++(define_insn "atomic_load" ++ [(set (match_operand:QHSI 0 "register_operand" "=r,r,l") + (unspec_volatile:QHSI +- [(match_operand:QHSI 1 "memory_operand" "m,m")] +- VUNSPEC_LDR))] +- "" +- "ldr\t%0, %1" +- [(set_attr "arch" "32,any")]) +- +-(define_insn "arm_atomic_load_acquire" +- [(set (match_operand:QHSI 0 "register_operand" "=r") +- (unspec_volatile:QHSI +- [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q")] ++ [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q,Q,Q") ++ (match_operand:SI 2 "const_int_operand" "n,Pf,n")] ;; model + VUNSPEC_LDA))] + "TARGET_HAVE_LDACQ" +- "lda\t%0, %C1" +-) ++ { ++ if (aarch_mm_needs_acquire (operands[2])) ++ { ++ if (TARGET_THUMB1) ++ return "lda\t%0, %1"; ++ else ++ return "lda%?\t%0, %1"; ++ } ++ else ++ { ++ if (TARGET_THUMB1) ++ return "ldr\t%0, %1"; ++ else ++ return "ldr%?\t%0, %1"; ++ } ++ } ++ [(set_attr "arch" "32,v8mb,any") ++ (set_attr "predicable" "yes")]) + +-(define_insn "arm_atomic_store" +- [(set (match_operand:QHSI 0 "memory_operand" "=m,m") +- (unspec_volatile:QHSI +- [(match_operand:QHSI 1 "register_operand" "r,l")] +- VUNSPEC_STR))] +- "" +- "str\t%1, %0"; +- [(set_attr "arch" "32,any")]) +- +-(define_insn "arm_atomic_store_release" +- [(set (match_operand:QHSI 0 "arm_sync_memory_operand" "=Q") ++(define_insn "atomic_store" ++ [(set (match_operand:QHSI 0 "memory_operand" "=Q,Q,Q") + (unspec_volatile:QHSI +- [(match_operand:QHSI 1 "register_operand" "r")] ++ [(match_operand:QHSI 1 "general_operand" "r,r,l") ++ (match_operand:SI 2 "const_int_operand" "n,Pf,n")] ;; model + VUNSPEC_STL))] + "TARGET_HAVE_LDACQ" +- "stl\t%1, %C0") +- +- +-(define_expand "atomic_load" +- [(match_operand:QHSI 0 "register_operand") ;; val out +- (match_operand:QHSI 1 "arm_sync_memory_operand") ;; memory +- (match_operand:SI 2 "const_int_operand")] ;; model +- "" +-{ +- memmodel model = memmodel_from_int (INTVAL (operands[2])); +- +- if (TARGET_HAVE_LDACQ && !is_mm_relaxed (model)) +- { +- emit_insn (gen_arm_atomic_load_acquire (operands[0], operands[1])); +- DONE; +- } +- +- /* The seq_cst model needs a barrier before the load to block reordering with +- earlier accesses. */ +- if (is_mm_seq_cst (model)) +- expand_mem_thread_fence (model); +- +- emit_insn (gen_arm_atomic_load (operands[0], operands[1])); +- +- /* All non-relaxed models need a barrier after the load when load-acquire +- instructions are not available. */ +- if (!is_mm_relaxed (model)) +- expand_mem_thread_fence (model); +- +- DONE; +-}) +- +-(define_expand "atomic_store" +- [(match_operand:QHSI 0 "arm_sync_memory_operand") ;; memory +- (match_operand:QHSI 1 "register_operand") ;; store value +- (match_operand:SI 2 "const_int_operand")] ;; model +- "" +-{ +- memmodel model = memmodel_from_int (INTVAL (operands[2])); +- +- if (TARGET_HAVE_LDACQ && !is_mm_relaxed (model)) +- { +- emit_insn (gen_arm_atomic_store_release (operands[0], operands[1])); +- DONE; +- } +- +- /* All non-relaxed models need a barrier after the load when load-acquire +- instructions are not available. */ +- if (!is_mm_relaxed (model)) +- expand_mem_thread_fence (model); +- +- emit_insn (gen_arm_atomic_store (operands[0], operands[1])); +- +- /* The seq_cst model needs a barrier after the store to block reordering with +- later accesses. */ +- if (is_mm_seq_cst (model)) +- expand_mem_thread_fence (model); +- +- DONE; +-}) ++ { ++ if (aarch_mm_needs_release (operands[2])) ++ { ++ if (TARGET_THUMB1) ++ return "stl\t%1, %0"; ++ else ++ return "stl%?\t%1, %0"; ++ } ++ else ++ { ++ if (TARGET_THUMB1) ++ return "str\t%1, %0"; ++ else ++ return "str%?\t%1, %0"; ++ } ++ } ++ [(set_attr "arch" "32,v8mb,any") ++ (set_attr "predicable" "yes")]) + + ;; An LDRD instruction usable by the atomic_loaddi expander on LPAE targets + + (define_insn "arm_atomic_loaddi2_ldrd" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI +- [(match_operand:DI 1 "memory_operand" "m")] ++ [(match_operand:DI 1 "arm_sync_memory_operand" "Q")] + VUNSPEC_LDRD_ATOMIC))] + "ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_LPAE" +- "ldrd\t%0, %H0, %1" +-) ++ "ldrd%?\t%0, %H0, %C1" ++ [(set_attr "predicable" "yes")]) + + ;; There are three ways to expand this depending on the architecture + ;; features available. As for the barriers, a load needs a barrier +@@ -194,11 +152,6 @@ + DONE; + } + +- /* The seq_cst model needs a barrier before the load to block reordering with +- earlier accesses. */ +- if (is_mm_seq_cst (model)) +- expand_mem_thread_fence (model); +- + /* On LPAE targets LDRD and STRD accesses to 64-bit aligned + locations are 64-bit single-copy atomic. We still need barriers in the + appropriate places to implement the ordering constraints. */ +@@ -207,6 +160,7 @@ + else + emit_insn (gen_arm_load_exclusivedi (operands[0], operands[1])); + ++ + /* All non-relaxed models need a barrier after the load when load-acquire + instructions are not available. */ + if (!is_mm_relaxed (model)) +@@ -492,42 +446,54 @@ + [(set_attr "arch" "32,v8mb")]) + + (define_insn "arm_load_exclusive" +- [(set (match_operand:SI 0 "s_register_operand" "=r") ++ [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI + (unspec_volatile:NARROW +- [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")] ++ [(match_operand:NARROW 1 "mem_noofs_operand" "Ua,Ua")] + VUNSPEC_LL)))] + "TARGET_HAVE_LDREXBH" +- "ldrex\t%0, %C1" +-) ++ "@ ++ ldrex%?\t%0, %C1 ++ ldrex\t%0, %C1" ++ [(set_attr "arch" "32,v8mb") ++ (set_attr "predicable" "yes")]) + + (define_insn "arm_load_acquire_exclusive" +- [(set (match_operand:SI 0 "s_register_operand" "=r") ++ [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI + (unspec_volatile:NARROW +- [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")] ++ [(match_operand:NARROW 1 "mem_noofs_operand" "Ua,Ua")] + VUNSPEC_LAX)))] + "TARGET_HAVE_LDACQ" +- "ldaex\\t%0, %C1" +-) ++ "@ ++ ldaex%?\\t%0, %C1 ++ ldaex\\t%0, %C1" ++ [(set_attr "arch" "32,v8mb") ++ (set_attr "predicable" "yes")]) + + (define_insn "arm_load_exclusivesi" +- [(set (match_operand:SI 0 "s_register_operand" "=r") ++ [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (unspec_volatile:SI +- [(match_operand:SI 1 "mem_noofs_operand" "Ua")] ++ [(match_operand:SI 1 "mem_noofs_operand" "Ua,Ua")] + VUNSPEC_LL))] + "TARGET_HAVE_LDREX" +- "ldrex\t%0, %C1" +-) ++ "@ ++ ldrex%?\t%0, %C1 ++ ldrex\t%0, %C1" ++ [(set_attr "arch" "32,v8mb") ++ (set_attr "predicable" "yes")]) + + (define_insn "arm_load_acquire_exclusivesi" +- [(set (match_operand:SI 0 "s_register_operand" "=r") ++ [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (unspec_volatile:SI +- [(match_operand:SI 1 "mem_noofs_operand" "Ua")] ++ [(match_operand:SI 1 "mem_noofs_operand" "Ua,Ua")] + VUNSPEC_LAX))] + "TARGET_HAVE_LDACQ" +- "ldaex\t%0, %C1" +-) ++ "@ ++ ldaex%?\t%0, %C1 ++ ldaex\t%0, %C1" ++ [(set_attr "arch" "32,v8mb") ++ (set_attr "predicable" "yes")]) + + (define_insn "arm_load_exclusivedi" + [(set (match_operand:DI 0 "s_register_operand" "=r") +@@ -535,8 +501,8 @@ + [(match_operand:DI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LL))] + "TARGET_HAVE_LDREXD" +- "ldrexd\t%0, %H0, %C1" +-) ++ "ldrexd%?\t%0, %H0, %C1" ++ [(set_attr "predicable" "yes")]) + + (define_insn "arm_load_acquire_exclusivedi" + [(set (match_operand:DI 0 "s_register_operand" "=r") +@@ -544,8 +510,8 @@ + [(match_operand:DI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LAX))] + "TARGET_HAVE_LDACQEXD && ARM_DOUBLEWORD_ALIGN" +- "ldaexd\t%0, %H0, %C1" +-) ++ "ldaexd%?\t%0, %H0, %C1" ++ [(set_attr "predicable" "yes")]) + + (define_insn "arm_store_exclusive" + [(set (match_operand:SI 0 "s_register_operand" "=&r") +@@ -564,11 +530,14 @@ + Note that the 1st register always gets the + lowest word in memory. */ + gcc_assert ((REGNO (operands[2]) & 1) == 0 || TARGET_THUMB2); +- return "strexd\t%0, %2, %H2, %C1"; ++ return "strexd%?\t%0, %2, %H2, %C1"; + } +- return "strex\t%0, %2, %C1"; ++ if (TARGET_THUMB1) ++ return "strex\t%0, %2, %C1"; ++ else ++ return "strex%?\t%0, %2, %C1"; + } +-) ++ [(set_attr "predicable" "yes")]) + + (define_insn "arm_store_release_exclusivedi" + [(set (match_operand:SI 0 "s_register_operand" "=&r") +@@ -581,16 +550,20 @@ + { + /* See comment in arm_store_exclusive above. */ + gcc_assert ((REGNO (operands[2]) & 1) == 0 || TARGET_THUMB2); +- return "stlexd\t%0, %2, %H2, %C1"; ++ return "stlexd%?\t%0, %2, %H2, %C1"; + } +-) ++ [(set_attr "predicable" "yes")]) + + (define_insn "arm_store_release_exclusive" +- [(set (match_operand:SI 0 "s_register_operand" "=&r") ++ [(set (match_operand:SI 0 "s_register_operand" "=&r,&r") + (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX)) +- (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua") ++ (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua,Ua") + (unspec_volatile:QHSI +- [(match_operand:QHSI 2 "s_register_operand" "r")] ++ [(match_operand:QHSI 2 "s_register_operand" "r,r")] + VUNSPEC_SLX))] + "TARGET_HAVE_LDACQ" +- "stlex\t%0, %2, %C1") ++ "@ ++ stlex%?\t%0, %2, %C1 ++ stlex\t%0, %2, %C1" ++ [(set_attr "arch" "32,v8mb") ++ (set_attr "predicable" "yes")]) +diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md +index 46ac8b37157..ffc09b2febc 100644 +--- a/gcc/config/arm/unspecs.md ++++ b/gcc/config/arm/unspecs.md +@@ -221,10 +221,8 @@ + VUNSPEC_SC ; Represent a store-register-exclusive. + VUNSPEC_LAX ; Represent a load-register-acquire-exclusive. + VUNSPEC_SLX ; Represent a store-register-release-exclusive. +- VUNSPEC_LDA ; Represent a load-register-acquire. +- VUNSPEC_LDR ; Represent a load-register-relaxed. ++ VUNSPEC_LDA ; Represent a store-register-acquire. + VUNSPEC_STL ; Represent a store-register-release. +- VUNSPEC_STR ; Represent a store-register-relaxed. + VUNSPEC_GET_FPSCR ; Represent fetch of FPSCR content. + VUNSPEC_SET_FPSCR ; Represent assign of FPSCR content. + VUNSPEC_SET_FPSCR_NZCVQC ; Represent assign of FPSCR_nzcvqc content. +diff --git a/gcc/testsuite/gcc.dg/rtl/arm/stl-cond.c b/gcc/testsuite/gcc.dg/rtl/arm/stl-cond.c +new file mode 100644 +index 00000000000..e47ca6bf36d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/rtl/arm/stl-cond.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile { target arm*-*-* } } */ ++/* { dg-require-effective-target arm_arm_ok } */ ++/* { dg-require-effective-target arm_arch_v8a_ok } */ ++/* { dg-options "-O2 -marm" } */ ++/* { dg-add-options arm_arch_v8a } */ ++ ++/* We want to test that the STL instruction gets the conditional ++ suffix when under a COND_EXEC. However, COND_EXEC is very hard to ++ generate from C code because the atomic_store expansion adds a compiler ++ barrier before the insn, preventing if-conversion. So test the output ++ here with a hand-crafted COND_EXEC wrapped around an STL. */ ++ ++void __RTL (startwith ("final")) foo (int *a, int b) ++{ ++(function "foo" ++ (param "a" ++ (DECL_RTL (reg/v:SI r0)) ++ (DECL_RTL_INCOMING (reg:SI r0)) ++ ) ++ (param "b" ++ (DECL_RTL (reg/v:SI r1)) ++ (DECL_RTL_INCOMING (reg:SI r1)) ++ ) ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK) ++ ++ (insn:TI 7 (parallel [ ++ (set (reg:CC cc) ++ (compare:CC (reg:SI r1) ++ (const_int 0))) ++ (set (reg/v:SI r1) ++ (reg:SI r1 )) ++ ]) ;; {*movsi_compare0} ++ (nil)) ++ ++ ;; A conditional atomic store-release: STLNE for Armv8-A. ++ (insn 10 (cond_exec (ne (reg:CC cc) ++ (const_int 0)) ++ (set (mem/v:SI (reg/v/f:SI r0) [-1 S4 A32]) ++ (unspec_volatile:SI [ ++ (reg/v:SI r1) ++ (const_int 3) ++ ] VUNSPEC_STL))) ;; {*p atomic_storesi} ++ (expr_list:REG_DEAD (reg:CC cc) ++ (expr_list:REG_DEAD (reg/v:SI r1) ++ (expr_list:REG_DEAD (reg/v/f:SI r0) ++ (nil))))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++ (crtl ++ (return_rtx ++ (reg/i:SI r0) ++ ) ;; return_rtx ++ ) ;; crtl ++) ;; function ++} ++ ++/* { dg-final { scan-assembler "stlne" } } */ +diff --git a/gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c b/gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c +index 79e36edbb8f..6743663f1e6 100644 +--- a/gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c ++++ b/gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c +@@ -6,4 +6,4 @@ + #include "atomic_loaddi_seq_cst.x" + + /* { dg-final { scan-assembler-times "ldrexd\tr\[0-9\]+, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 1 } } */ +-/* { dg-final { scan-assembler-times "dmb\tish" 2 } } */ ++/* { dg-final { scan-assembler-times "dmb\tish" 1 } } */ +diff --git a/gcc/testsuite/gcc.target/arm/atomic_loaddi_8.c b/gcc/testsuite/gcc.target/arm/atomic_loaddi_8.c +index 7241d361313..f7bd3e5a2b5 100644 +--- a/gcc/testsuite/gcc.target/arm/atomic_loaddi_8.c ++++ b/gcc/testsuite/gcc.target/arm/atomic_loaddi_8.c +@@ -6,4 +6,4 @@ + #include "atomic_loaddi_seq_cst.x" + + /* { dg-final { scan-assembler-times "ldrd\tr\[0-9\]+, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 1 } } */ +-/* { dg-final { scan-assembler-times "dmb\tish" 2 } } */ ++/* { dg-final { scan-assembler-times "dmb\tish" 1 } } */ +diff --git a/gcc/testsuite/gcc.target/arm/pr111235.c b/gcc/testsuite/gcc.target/arm/pr111235.c +deleted file mode 100644 +index b06a5bfb8e2..00000000000 +--- a/gcc/testsuite/gcc.target/arm/pr111235.c ++++ /dev/null +@@ -1,39 +0,0 @@ +-/* { dg-do compile } */ +-/* { dg-options "-O2" } */ +-/* { dg-require-effective-target arm_arch_v7a_ok } */ +-/* { dg-add-options arm_arch_v7a } */ +- +-#include +- +-int t0 (int *p, int x) +-{ +- if (x > 100) +- x = atomic_load_explicit (p, memory_order_relaxed); +- return x + 1; +-} +- +-long long t1 (long long *p, int x) +-{ +- if (x > 100) +- x = atomic_load_explicit (p, memory_order_relaxed); +- return x + 1; +-} +- +-void t2 (int *p, int x) +-{ +- if (x > 100) +- atomic_store_explicit (p, x, memory_order_relaxed); +-} +- +-void t3 (long long *p, int x) +-{ +- if (x > 100) +- atomic_store_explicit (p, x, memory_order_relaxed); +-} +- +-/* { dg-final { scan-assembler-times "ldrexd\tr\[0-9\]+, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 2 } } */ +-/* { dg-final { scan-assembler-not "ldrgt" } } */ +-/* { dg-final { scan-assembler-not "ldrdgt" } } */ +-/* { dg-final { scan-assembler-not "ldrexdgt" } } */ +-/* { dg-final { scan-assembler-not "strgt" } } */ +-/* { dg-final { scan-assembler-not "strdgt" } } */ +-- +2.43.0 +