Merge pull request #8256 from heitbaum/mesa2330

Mesa update to 23.3.0
2025-07-24 11:16:51 +00:00 · 2023-11-30 08:16:48 +04:00 · 2023-11-30 08:16:48 +04:00 · 80f59f03b9
commit 80f59f03b9
parent ffe1b62b8c e7ffc455fa
143 changed files with 127 additions and 17779 deletions
--- a/packages/graphics/mesa/package.mk
+++ b/packages/graphics/mesa/package.mk
@ -3,8 +3,8 @@
 # Copyright (C) 2018-present Team LibreELEC (https://libreelec.tv)

 PKG_NAME="mesa"
-PKG_VERSION="23.2.1"
-PKG_SHA256="64de0616fc2d801f929ab1ac2a4f16b3e2783c4309a724c8a259b20df8bbc1cc"
+PKG_VERSION="23.3.0"
+PKG_SHA256="50f729dd60ed6335b989095baad81ef5edf7cfdd4b4b48b9b955917cb07d69c5"
 PKG_LICENSE="OSS"
 PKG_SITE="http://www.mesa3d.org/"
 PKG_URL="https://mesa.freedesktop.org/archive/mesa-${PKG_VERSION}.tar.xz"
@ -13,6 +13,10 @@ PKG_LONGDESC="Mesa is a 3-D graphics library with an API."

 get_graphicdrivers

+if [ "${DEVICE}" = "Dragonboard" ]; then
+  PKG_DEPENDS_TARGET+=" libarchive libxml2 lua54"
+fi
+
 PKG_MESON_OPTS_TARGET="-Dgallium-drivers=${GALLIUM_DRIVERS// /,} \
                       -Dgallium-extra-hud=false \
                       -Dgallium-omx=disabled \
--- a/projects/RPi/devices/RPi5/patches/mesa/0001-broadcom-cle-clif-common-simulator-add-7.1-version-o.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0001-broadcom-cle-clif-common-simulator-add-7.1-version-o.patch
@ -1,332 +0,0 @@
-From f62aa2640f92796ff5216da0a5d3c8f46a2855b4 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Mon, 26 Apr 2021 00:02:21 +0200
-Subject: [PATCH 001/142] broadcom(cle,clif,common,simulator): add 7.1 version
- on the list of versions to build
-
-This adds 7.1 to the list of available V3D_VERSION, and first changes
-on the simulator needed to get it working.
-
-Note that we needed to touch all those 4 codebases because it is
-needed if we want to use V3D_DEBUG=clif with the simulator, that it is
-the easier way to see which packets a vulkan program is using.
-
-About the simulator, this commit only handle the rename of some
-registers. Any additional changes needed to get a proper support for
-v71 will be handled them on following commits.
---
- src/broadcom/cle/meson.build            |  3 +-
- src/broadcom/cle/v3dx_pack.h            |  2 +
- src/broadcom/clif/clif_private.h        |  2 +
- src/broadcom/common/v3d_device_info.c   |  1 +
- src/broadcom/common/v3d_macros.h        |  3 +
- src/broadcom/meson.build                |  2 +-
- src/broadcom/simulator/v3d_simulator.c  | 81 +++++++++++++++++++------
- src/broadcom/simulator/v3d_simulator.h  |  5 ++
- src/broadcom/simulator/v3dx_simulator.c | 31 ++++++++--
- 9 files changed, 106 insertions(+), 24 deletions(-)
-
-diff --git a/src/broadcom/cle/meson.build b/src/broadcom/cle/meson.build
-index 31a0d5bfa94..8ac32b313e4 100644
--- a/src/broadcom/cle/meson.build
-+++ b/src/broadcom/cle/meson.build
-@@ -23,7 +23,8 @@ v3d_versions = [
-   [21, 21],
-   [33, 33],
-   [41, 33],
-  [42, 33]
-+  [42, 33],
-+  [71, 33]
- ]
- 
- v3d_xml_files = []
-diff --git a/src/broadcom/cle/v3dx_pack.h b/src/broadcom/cle/v3dx_pack.h
-index 5762e5aaa70..e5a1eb26698 100644
--- a/src/broadcom/cle/v3dx_pack.h
-+++ b/src/broadcom/cle/v3dx_pack.h
-@@ -37,6 +37,8 @@
- #  include "cle/v3d_packet_v41_pack.h"
- #elif (V3D_VERSION == 42)
- #  include "cle/v3d_packet_v42_pack.h"
-+#elif (V3D_VERSION == 71)
-+#  include "cle/v3d_packet_v71_pack.h"
- #else
- #  error "Need to add a pack header include for this v3d version"
- #endif
-diff --git a/src/broadcom/clif/clif_private.h b/src/broadcom/clif/clif_private.h
-index 6ace62b0310..cda407a00bf 100644
--- a/src/broadcom/clif/clif_private.h
-+++ b/src/broadcom/clif/clif_private.h
-@@ -101,6 +101,8 @@ bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
-                             const uint8_t *cl, uint32_t *size, bool reloc_mode);
- bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
-                             const uint8_t *cl, uint32_t *size, bool reloc_mode);
-+bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
-+                            const uint8_t *cl, uint32_t *size, bool reloc_mode);
- 
- static inline void
- out(struct clif_dump *clif, const char *fmt, ...)
-diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
-index 272190eb2e5..7e0862f1f02 100644
--- a/src/broadcom/common/v3d_device_info.c
-+++ b/src/broadcom/common/v3d_device_info.c
-@@ -66,6 +66,7 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
-         case 33:
-         case 41:
-         case 42:
-+        case 71:
-                 break;
-         default:
-                 fprintf(stderr,
-diff --git a/src/broadcom/common/v3d_macros.h b/src/broadcom/common/v3d_macros.h
-index fe89398208a..b4291fb5350 100644
--- a/src/broadcom/common/v3d_macros.h
-+++ b/src/broadcom/common/v3d_macros.h
-@@ -41,6 +41,9 @@
- #elif (V3D_VERSION == 42)
- #  define V3DX(x) V3D42_##x
- #  define v3dX(x) v3d42_##x
-+#elif (V3D_VERSION == 71)
-+#  define V3DX(x) V3D71_##x
-+#  define v3dX(x) v3d71_##x
- #else
- #  error "Need to add prefixing macros for this v3d version"
- #endif
-diff --git a/src/broadcom/meson.build b/src/broadcom/meson.build
-index 2c10e46b188..73cb7aa0575 100644
--- a/src/broadcom/meson.build
-+++ b/src/broadcom/meson.build
-@@ -22,7 +22,7 @@ inc_broadcom = include_directories('.', 'cle')
- 
- subdir('cle')
- 
-v3d_versions = ['33', '41', '42']
-+v3d_versions = ['33', '41', '42', '71']
- v3d_libs = []
- 
- if with_gallium_v3d or with_broadcom_vk
-diff --git a/src/broadcom/simulator/v3d_simulator.c b/src/broadcom/simulator/v3d_simulator.c
-index eea5d3f050e..5cceb1a82cc 100644
--- a/src/broadcom/simulator/v3d_simulator.c
-+++ b/src/broadcom/simulator/v3d_simulator.c
-@@ -490,10 +490,20 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit)
- 
-         v3d_simulator_perfmon_switch(fd, submit->perfmon_id);
- 
-        if (sim_state.ver >= 41)
-                v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
-        else
-                v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
-+        switch(sim_state.ver) {
-+        case 33:
-+           v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
-+           break;
-+        case 41:
-+        case 42:
-+           v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
-+           break;
-+        case 71:
-+           v3d71_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
-+           break;
-+        default:
-+           unreachable("Unsupported V3D version\n");
-+        }
- 
-         util_dynarray_foreach(&sim_state.bin_oom, struct v3d_simulator_bo *,
-                               sim_bo) {
-@@ -635,10 +645,17 @@ v3d_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args)
- static int
- v3d_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args)
- {
-        if (sim_state.ver >= 41)
-                return v3d41_simulator_get_param_ioctl(sim_state.v3d, args);
-        else
-+        switch(sim_state.ver) {
-+        case 33:
-                 return v3d33_simulator_get_param_ioctl(sim_state.v3d, args);
-+        case 41:
-+        case 42:
-+                return v3d41_simulator_get_param_ioctl(sim_state.v3d, args);
-+        case 71:
-+                return v3d71_simulator_get_param_ioctl(sim_state.v3d, args);
-+        default:
-+                unreachable("Unsupported V3D version\n");
-+        }
- }
- 
- static int
-@@ -652,10 +669,20 @@ v3d_simulator_submit_tfu_ioctl(int fd, struct drm_v3d_submit_tfu *args)
-         v3d_simulator_copy_in_handle(file, args->bo_handles[2]);
-         v3d_simulator_copy_in_handle(file, args->bo_handles[3]);
- 
-        if (sim_state.ver >= 41)
-                ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args);
-        else
-+        switch(sim_state.ver) {
-+        case 33:
-                 ret = v3d33_simulator_submit_tfu_ioctl(sim_state.v3d, args);
-+                break;
-+        case 41:
-+        case 42:
-+                ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args);
-+                break;
-+        case 71:
-+                ret = v3d71_simulator_submit_tfu_ioctl(sim_state.v3d, args);
-+                break;
-+        default:
-+                unreachable("Unsupported V3D version\n");
-+        }
- 
-         v3d_simulator_copy_out_handle(file, args->bo_handles[0]);
- 
-@@ -682,11 +709,19 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args)
- 
-         v3d_simulator_perfmon_switch(fd, args->perfmon_id);
- 
-        if (sim_state.ver >= 41)
-                ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
-                                                       file->gmp->ofs);
-        else
-                ret = -1;
-+        switch(sim_state.ver) {
-+        case 41:
-+        case 42:
-+           ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
-+                                                  file->gmp->ofs);
-+           break;
-+        case 71:
-+           ret = v3d71_simulator_submit_csd_ioctl(sim_state.v3d, args,
-+                                                  file->gmp->ofs);
-+           break;
-+        default:
-+           ret = -1;
-+        }
- 
-         for (int i = 0; i < args->bo_handle_count; i++)
-                 v3d_simulator_copy_out_handle(file, bo_handles[i]);
-@@ -880,10 +915,20 @@ v3d_simulator_init_global()
- 
-         util_dynarray_init(&sim_state.bin_oom, NULL);
- 
-        if (sim_state.ver >= 41)
-                v3d41_simulator_init_regs(sim_state.v3d);
-        else
-+        switch(sim_state.ver) {
-+        case 33:
-                 v3d33_simulator_init_regs(sim_state.v3d);
-+                break;
-+        case 41:
-+        case 42:
-+                v3d41_simulator_init_regs(sim_state.v3d);
-+                break;
-+        case 71:
-+                v3d71_simulator_init_regs(sim_state.v3d);
-+                break;
-+        default:
-+                unreachable("Not supported V3D version\n");
-+        }
- }
- 
- struct v3d_simulator_file *
-diff --git a/src/broadcom/simulator/v3d_simulator.h b/src/broadcom/simulator/v3d_simulator.h
-index ddb079c1455..1472c313a03 100644
--- a/src/broadcom/simulator/v3d_simulator.h
-+++ b/src/broadcom/simulator/v3d_simulator.h
-@@ -52,6 +52,11 @@ uint32_t v3d_simulator_get_mem_free(void);
- #  define v3dX(x) v3d41_##x
- #  include "v3dx_simulator.h"
- #  undef v3dX
-+
-+#  define v3dX(x) v3d71_##x
-+#  include "v3dx_simulator.h"
-+#  undef v3dX
-+
- #endif
- 
- #endif
-diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c
-index c9322f0397b..723796b16c9 100644
--- a/src/broadcom/simulator/v3dx_simulator.c
-+++ b/src/broadcom/simulator/v3dx_simulator.c
-@@ -46,11 +46,15 @@
- 
- #define HW_REGISTER_RO(x) (x)
- #define HW_REGISTER_RW(x) (x)
-#if V3D_VERSION >= 41
-+#if V3D_VERSION == 71
-+#include "libs/core/v3d/registers/7.1.5.1/v3d.h"
-+#else
-+#if V3D_VERSION == 41 || V3D_VERSION == 42
- #include "libs/core/v3d/registers/4.1.35.0/v3d.h"
- #else
- #include "libs/core/v3d/registers/3.3.0.0/v3d.h"
- #endif
-+#endif
- 
- #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
- #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
-@@ -310,16 +314,17 @@ v3d_isr_core(struct v3d_hw *v3d,
-                 return;
-         }
- 
-+#if V3D_VERSION <= 42
-         if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
-                 fprintf(stderr, "GMP violation at 0x%08x\n",
-                         V3D_READ(V3D_GMP_VIO_ADDR));
-                abort();
-         } else {
-                 fprintf(stderr,
-                         "Unexpected ISR with core status 0x%08x\n",
-                         core_status);
-         }
-         abort();
-+#endif
- }
- 
- static void
-@@ -396,6 +401,18 @@ v3d_isr_hub(struct v3d_hw *v3d)
-         }
- 
-         handle_mmu_interruptions(v3d, hub_status);
-+
-+#if V3D_VERSION == 71
-+        if (hub_status & V3D_HUB_CTL_INT_STS_INT_GMPV_SET) {
-+                fprintf(stderr, "GMP violation at 0x%08x\n",
-+                        V3D_READ(V3D_GMP_VIO_ADDR));
-+        } else {
-+                fprintf(stderr,
-+                        "Unexpected ISR with status 0x%08x\n",
-+                        hub_status);
-+        }
-+        abort();
-+#endif
- }
- 
- static void
-@@ -436,8 +453,11 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
-          * for tracing. Perhaps we should evaluate to do the same here and add
-          * some debug options.
-          */
-        uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
-                                    V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
-+        uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_OUTOMEM_SET;
-+#if V3D_VERSION <= 42
-+        core_interrupts |= V3D_CTL_0_INT_STS_INT_GMPV_SET;
-+#endif
-+
-         V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
-         V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
- 
-@@ -447,6 +467,9 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
-             V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET |  /* CAP exceeded */
-             V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
- 
-+#if V3D_VERSION == 71
-+        hub_interrupts |= V3D_HUB_CTL_INT_STS_INT_GMPV_SET;
-+#endif
-         V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
-         V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0001-gallium-Add-kmsro-drivers-for-RP1-DSI-DPI-and-VEC-de.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0001-gallium-Add-kmsro-drivers-for-RP1-DSI-DPI-and-VEC-de.patch
@ -1,7 +1,7 @@
-From 3322c102282cf726ae575b122358060abd5b24db Mon Sep 17 00:00:00 2001
+From 54cc206be2d48916862d7e264e886f58b27dd653 Mon Sep 17 00:00:00 2001
 From: Dave Stevenson <dave.stevenson@raspberrypi.com>
 Date: Thu, 5 Oct 2023 19:32:10 +0100
-Subject: [PATCH 142/142] gallium: Add kmsro drivers for RP1 DSI, DPI, and VEC
+Subject: [PATCH 1/3] gallium: Add kmsro drivers for RP1 DSI, DPI, and VEC
 devices

 Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
@ -11,7 +11,7 @@ Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
 2 files changed, 6 insertions(+)

 diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build
-index fbec1da957b..59daf3b6fb6 100644
+index 66619bba0db..443923772e8 100644
 --- a/src/gallium/targets/dri/meson.build
 +++ b/src/gallium/targets/dri/meson.build
@@ -68,6 +68,9 @@ libgallium_dri = shared_library(
@ -22,10 +22,10 @@ index fbec1da957b..59daf3b6fb6 100644
 +               'drm-rp1-dsi_dri.so',
 +               'drm-rp1-vec_dri.so',
                'exynos_dri.so',
+                'hdlcd_dri.so',
                'hx8357d_dri.so',
-                'ili9225_dri.so',
 diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
-index d506869cbb4..ecb25edd03b 100644
+index 9d3069eb004..79f60a7224a 100644
 --- a/src/gallium/targets/dri/target.c
 +++ b/src/gallium/targets/dri/target.c
@@ -98,6 +98,9 @@ DEFINE_LOADER_DRM_ENTRYPOINT(tegra);
@ -36,8 +36,8 @@ index d506869cbb4..ecb25edd03b 100644
 +DEFINE_LOADER_DRM_ENTRYPOINT(drm_rp1_dsi)
 +DEFINE_LOADER_DRM_ENTRYPOINT(drm_rp1_vec)
 DEFINE_LOADER_DRM_ENTRYPOINT(exynos)
+ DEFINE_LOADER_DRM_ENTRYPOINT(hdlcd)
 DEFINE_LOADER_DRM_ENTRYPOINT(hx8357d)
- DEFINE_LOADER_DRM_ENTRYPOINT(ili9225)
 -- 
 2.39.2

--- a/projects/RPi/devices/RPi5/patches/mesa/0002-broadcom-simulator-reset-CFG7-for-compute-dispatch-i.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0002-broadcom-simulator-reset-CFG7-for-compute-dispatch-i.patch
@ -1,30 +0,0 @@
-From 9e85edd1b347b0e779b393f463f42044a720bcff Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 28 Sep 2021 13:16:49 +0200
-Subject: [PATCH 002/142] broadcom/simulator: reset CFG7 for compute dispatch
- in v71
-
-This register is new in 7.x, it doesn't seem that we need to
-do anything specific for now, but let's make sure it is reset
-every time.
---
- src/broadcom/simulator/v3dx_simulator.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c
-index 723796b16c9..f23b0538de3 100644
--- a/src/broadcom/simulator/v3dx_simulator.c
-+++ b/src/broadcom/simulator/v3dx_simulator.c
-@@ -227,6 +227,9 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
-         V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
-         V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
-         V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
-+#if V3D_VERSION >= 71
-+        V3D_WRITE(V3D_CSD_0_QUEUED_CFG7, 0);
-+#endif
-         /* CFG0 kicks off the job */
-         V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0002-nir-add-new-opcodes-to-map-new-v71-packing-conversio.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0002-nir-add-new-opcodes-to-map-new-v71-packing-conversio.patch
@ -1,8 +1,8 @@
-From 4f33de7771621e15aae3e3c60c09fd5a2f29bdac Mon Sep 17 00:00:00 2001
+From 80050d6960a688d061eac9798c6f5f1b0eb3e960 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
 Date: Tue, 30 Nov 2021 02:39:20 +0100
-Subject: [PATCH 066/142] nir: add new opcodes to map new v71
- packing/conversion instructions
+Subject: [PATCH 2/3] nir: add new opcodes to map new v71 packing/conversion
+ instructions

 Since v71, broadcom hw include specific packing/conversion
 instructions, so this commit adds opcodes to be able to make use of
@ -28,17 +28,14 @@ integer.
 Interestingly broadcom also defines a similar one that packs the
 higher halfword. Not used yet.

-FIXME: vftounorm10lo/hi constant expression implementation is somewhat
-convoluted. It is likely that it could be implemented in a more easy
-way. But it works (passing the tests added with CTS issue #3372,
-created with this change in mind).
+Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
 ---
- src/compiler/nir/nir_constant_expressions.py | 106 +++++++++++++++++++
- src/compiler/nir/nir_opcodes.py              |  44 ++++++++
- 2 files changed, 150 insertions(+)
+ src/compiler/nir/nir_constant_expressions.py | 94 ++++++++++++++++++++
+ src/compiler/nir/nir_opcodes.py              | 52 +++++++++++
+ 2 files changed, 146 insertions(+)

 diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
-index e6383b67737..46395d79a89 100644
+index e6383b67737..0d0797526a9 100644
 --- a/src/compiler/nir/nir_constant_expressions.py
 +++ b/src/compiler/nir/nir_constant_expressions.py
@@ -62,6 +62,8 @@ template = """\
@ -50,7 +47,7 @@ index e6383b67737..46395d79a89 100644
 #include "nir_constant_expressions.h"
 
 /**
-@@ -277,6 +279,110 @@ unpack_half_1x16(uint16_t u)
+@@ -277,6 +279,98 @@ unpack_half_1x16(uint16_t u)
    return _mesa_half_to_float(u);
 }
 
@ -61,24 +58,22 @@ index e6383b67737..46395d79a89 100644
 +static uint32_t v11fpack_v3d(const uint32_t src0,
 +                             const uint32_t src1)
 +{
-+   float rgb[3];
-+
-+   rgb[0] = unpack_half_1x16((src0 & 0xffff));
-+   rgb[1] = unpack_half_1x16((src0 >> 16));
-+   rgb[2] = unpack_half_1x16((src1 & 0xffff));
+   float rgb[3] = {
+      unpack_half_1x16((src0 & 0xffff)),
+      unpack_half_1x16((src0 >> 16)),
+      unpack_half_1x16((src1 & 0xffff)),
+   };
 +
 +   return float3_to_r11g11b10f(rgb);
 +}
 +
 +/**
 +  * The three methods below are basically wrappers over pack_s/unorm_1x8/1x16,
-+  * as it receives a uint16_t val instead of a float
+  * as they receives a uint16_t val instead of a float
 +  */
-+static uint8_t _mesa_half_to_snorm8(uint16_t val)
+static inline uint8_t _mesa_half_to_snorm8(uint16_t val)
 +{
-+   float x = _mesa_half_to_float(val);
-+
-+   return pack_snorm_1x8(x);
+   return pack_snorm_1x8(_mesa_half_to_float(val));
 +}
 +
 +static uint16_t _mesa_float_to_snorm16(uint32_t val)
@ -95,51 +90,42 @@ index e6383b67737..46395d79a89 100644
 +   return pack_unorm_1x16(aux.f);
 +}
 +
-+/* FIXME: the implementation below of vftounorm10hi/lo is somewhat too
-+ * verbose. It is likely that there would be a simpler way to implement
-+ * it.
-+ */
-+static uint32_t float_pack16_v3d(uint32_t f32)
+static inline uint32_t float_pack16_v3d(uint32_t f32)
 +{
-+   float f = uif(f32);
-+   return _mesa_float_to_half(f);
+   return _mesa_float_to_half(uif(f32));
 +}
 +
-+static uint32_t float_unpack16_v3d(uint32_t f16)
+static inline uint32_t float_unpack16_v3d(uint32_t f16)
 +{
-+   float f = _mesa_half_to_float(f16);
-+   return fui(f);
+   return fui(_mesa_half_to_float(f16));
 +}
 +
-+static uint32_t vfpack_v3d(uint32_t a, uint32_t b)
+static inline uint32_t vfpack_v3d(uint32_t a, uint32_t b)
 +{
 +   return float_pack16_v3d(b) << 16 | float_pack16_v3d(a);
 +}
 +
-+static  uint32_t vfsat_v3d(uint32_t a)
+static inline uint32_t vfsat_v3d(uint32_t a)
 +{
-+   return vfpack_v3d(
-+      fui(SATURATE(_mesa_half_to_float(a & 0xffff))),
-+      fui(SATURATE(_mesa_half_to_float(a >> 16))));
+   const uint32_t low = fui(SATURATE(_mesa_half_to_float(a & 0xffff)));
+   const uint32_t high = fui(SATURATE(_mesa_half_to_float(a >> 16)));
+
+   return vfpack_v3d(low, high);
 +}
 +
-+static uint32_t fmul_v3d(uint32_t a, uint32_t b)
+static inline uint32_t fmul_v3d(uint32_t a, uint32_t b)
 +{
-+   float f = uif(a);
-+   float g = uif(b);
-+
-+   float x = f * g;
-+
-+   return fui(x);
+   return fui(uif(a) * uif(b));
 +}
 +
-+#define L(x) float_unpack16_v3d((x) & 0xffff)
-+#define H(x) float_unpack16_v3d((x) >> 16)
-+#define V(f,a,b) vfpack_v3d(f(L(a), L(b)), f(H(a), H(b)))
-+
 +static uint32_t vfmul_v3d(uint32_t a, uint32_t b)
 +{
-+   return V(fmul_v3d, a, b);
+   const uint32_t low = fmul_v3d(float_unpack16_v3d(a & 0xffff),
+                                 float_unpack16_v3d(b & 0xffff));
+   const uint32_t high = fmul_v3d(float_unpack16_v3d(a >> 16),
+                                  float_unpack16_v3d(b >> 16));
+
+   return vfpack_v3d(low, high);
 +}
 +
 +/* Convert 2x16-bit floating point to 2x10-bit unorm */
@ -156,34 +142,41 @@ index e6383b67737..46395d79a89 100644
 +{
 +   return vfmul_v3d(vfsat_v3d(src0), 0x000303ff);
 +}
-+
 +
 /* Some typed vector structures to make things like src0.y work */
 typedef int8_t int1_t;
 typedef uint8_t uint1_t;
 diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
-index e4d87aa6126..63aa7cfa315 100644
+index 0f81328f441..b70d9567cd6 100644
 --- a/src/compiler/nir/nir_opcodes.py
 +++ b/src/compiler/nir/nir_opcodes.py
-@@ -1393,6 +1393,50 @@ for (int i = 0; i < 32; i += 8) {
+@@ -1413,6 +1413,58 @@ for (int i = 0; i < 32; i += 8) {
 }
 """)
 
 +# v3d-specific opcodes
 +
-+# v3d-specific (v71) instruction that packs parts of 2 2x16 floating point into
-+# r11g11b10 bits, rounding to nearest even
+# v3d-specific (v71) instruction that packs bits of 2 2x16 floating point into
+# r11g11b10 bits, rounding to nearest even, so
+#  dst[10:0]  = float16_to_float11 (src0[15:0])
+#  dst[21:11] = float16_to_float11 (src0[31:16])
+#  dst[31:22] = float16_to_float10 (src1[15:0])
 +binop_convert("v11fpack_v3d", tuint32, tuint32, "",
 +              "v11fpack_v3d(src0, src1)")
 +
 +# v3d-specific (v71) instruction that packs 2x32 bit to 2x16 bit integer. The
 +# difference with pack_32_2x16_split is that the sources are 32bit too. So it
-+# receives 2 32-bit integer, and pack the lower halfword as 2x16 on a 32-bit
-+# pack.
+# receives 2 32-bit integer, and packs the lower halfword as 2x16 on a 32-bit
+# integer.
 +binop_horiz("vpack_v3d", 1, tuint32, 1, tuint32, 1, tuint32,
 +            "(src0.x & 0xffff) | (src1.x << 16)")
 +
-+# v3d-specific (v71) instruction that packs parts of 2 2x16 integers into r10g10b10a2
+# v3d-specific (v71) instruction that packs bits of 2 2x16 integers into
+# r10g10b10a2:
+#   dst[9:0]   = src0[9:0]
+#   dst[19:10] = src0[25:16]
+#   dst[29:20] = src1[9:0]
+#   dst[31:30] = src1[17:16]
 +binop_convert("v10pack_v3d", tuint32, tuint32, "",
 +              "(src0 & 0x3ff) | ((src0 >> 16) & 0x3ff) << 10 | (src1 & 0x3ff) << 20 | ((src1 >> 16) & 0x3ff) << 30")
 +
--- a/projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-cle-update-the-packet-definitions-for-new-g.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-cle-update-the-packet-definitions-for-new-g.patch
@ -1,712 +0,0 @@
-From 6f744bc4bec98f9769486d427e8e2d4e314ae056 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 29 Jun 2021 12:03:24 +0200
-Subject: [PATCH 003/142] broadcom/cle: update the packet definitions for new
- generation v71
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Using as reference the spec for 7.1.5. This include totally new
-packets, and redefine some that already existed on v42.
-
-Full list:
- * Add Depth Bounds Test Limits
- * Redefine Tile Binning Mode Cfg
- * Redefine Cfg Bits. There are some changes on the fields:
-   * Line Rasterization is now 1 bit size
-   * Depth Bounds Enable (that takes one of the bits of Line Rasterization)
-   * Early-Z/Early-Z updates enable bits (16-17) figure now as reserved.
-   * New Z-Clipping mode field
- * Redefine Tile Rendering Mode Cfg (Common). Changes with respect to v42:
-   * New log2 tile height/width fields starting at bit 52/55
-   * Due those two news, end pad is smaller
-   * sub-id has now a size of 3. Bit 4 is reserved.
-   * Number of render targets: this field max value is now 7 (not
-     reflected on the xml).
-   * Maximum BPP is removed on v71 (now bits 40-41 are reserved)
-   * Depth Buffer disable: on bit 44
- * Update Store Tile Buffer General
- * Adding Cfg Render Target Part1/2/3 packets: they replace v4X "Tile
-   Rendering Mode Cfg (Color)" (real name "Rendering Configuration
-   (Render Targets Config)"), "Tile Rendering Mode Cfg (Clear Colors
-   Part1)", "Tile Rendering Mode Cfg (Clear Colors Part2)", and "Tile
-   Rendering Mode Cfg (Clear Colors Part3)". On those old versions,
-   the first packet is used to configure 4 render targets. Now that 8
-   are supported, invididual per-render-target are used.
- * Update ZS clear values packet.
- * Add new v71 output formats
- * Define Clear Render Targets (Replaces Clear Tile Buffers from v42)
- * Redefine GL Shader State Record. Changes copared with v42:
-   * Fields removed:
-     * "Coordinate shader has separate input and output VPM blocks"
-       (reserved bit now)
-     * "Vertex shader has separate input and output VPM blocks"
-       (reserved bit now)
-     * "Address of table of default attribute Values." (we needed to
-       change the start position for all the following fields)
-   * New field:
-     * "Never defer FEP depth writes to fragment shader auto Z writes
-        on scoreboard conflict"
- * Redefine clipper xy scaling: Now it uses 1/64ths of pixels, instead
-   of 1/256ths
- * Update texture shader state.
-   * Notice we don't use an address type for these fields in the XML
-     description. This is because the addresses are 64-bit aligned
-     (even though the PRM doesn't say it) which means the 6 LSB bits
-     are implicitly 0, but the fields are encoded before the 6th bit
-     of their starting byte, so we can't use the usual trick we do
-     with address types where the first 6 bits in the byte are
-     implicitly overwritten by other fields and we have to encode this
-     manually as a uint field. This would mean that if we had an
-     actual BO we would also need to add it manually to the job's
-     list, but since we don't have one, we don't have to do anything
-     about it.
-   * Add new RB_Swap field for texture shader state
-   * Document Cb/Cr addresses as uint fields in texture shader state
- * Fixup Blend Config description: we now support 8 RTs.
- * TMU config parameter 2 has new fields
- * Add new clipper Z without guardband packet in v71
- * Add enums for the Z clip modes accepted in v71
- * Fix texture state array stride packing for V3D 7.1.5
-
-Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
-Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
-
-broadcom/cle: rb_swap
---
- src/broadcom/cle/v3d_packet_v33.xml | 386 ++++++++++++++++++++++++++--
- 1 file changed, 368 insertions(+), 18 deletions(-)
-
-diff --git a/src/broadcom/cle/v3d_packet_v33.xml b/src/broadcom/cle/v3d_packet_v33.xml
-index a0242b5f1c2..624353ca2bf 100644
--- a/src/broadcom/cle/v3d_packet_v33.xml
-+++ b/src/broadcom/cle/v3d_packet_v33.xml
-@@ -1,4 +1,4 @@
-<vcxml gen="3.3" min_ver="33" max_ver="42">
-+<vcxml gen="3.3" min_ver="33" max_ver="71">
- 
-   <enum name="Compare Function" prefix="V3D_COMPARE_FUNC">
-     <value name="NEVER" value="0"/>
-@@ -167,13 +167,36 @@
-     <value name="depth_16" value="2"/>
-   </enum>
- 
-  <enum name="Render Target Clamp" prefix="V3D_RENDER_TARGET_CLAMP" min_ver="41">
-+  <enum name="Render Target Clamp" prefix="V3D_RENDER_TARGET_CLAMP" min_ver="41" max_ver="42">
-     <value name="none" value="0"/> <!-- no clamping -->
-     <value name="norm" value="1"/> <!-- [0,1] for f16 -->
-     <value name="pos" value="2"/> <!-- [0, for f16 -->
-     <value name="int" value="3" min_ver="42"/> <!-- clamp to integer RT's range -->
-   </enum>
- 
-+  <enum name="Render Target Type Clamp" prefix="V3D_RENDER_TARGET_TYPE_CLAMP" min_ver="71">
-+    <value name="8i"             value="0"/>  <!-- no clamping -->
-+    <value name="16i"            value="1"/>  <!-- no clamping -->
-+    <value name="32i"            value="2"/>  <!-- no clamping -->
-+    <value name="8ui"            value="4"/>  <!-- no clamping -->
-+    <value name="16ui"           value="5"/>  <!-- no clamping -->
-+    <value name="32ui"           value="6"/>  <!-- no clamping -->
-+    <value name="8"              value="8"/>  <!-- no clamping -->
-+    <value name="16f"            value="9"/>  <!-- no clamping -->
-+    <value name="32f"            value="10"/> <!-- no clamping -->
-+    <value name="8i_clamped"     value="16"/> <!-- clamp to integer RT's range -->
-+    <value name="16i_clamped"    value="17"/> <!-- clamp to integer RT's range -->
-+    <value name="32i_clamped"    value="18"/> <!-- clamp to integer RT's range -->
-+    <value name="8ui_clamped"    value="20"/> <!-- clamp to integer RT's range -->
-+    <value name="16ui_clamped"   value="21"/> <!-- clamp to integer RT's range -->
-+    <value name="32ui_clamped"   value="22"/> <!-- clamp to integer RT's range -->
-+    <value name="16f_clamp_norm" value="24"/> <!-- [0,1] for f16 -->
-+    <value name="16f_clamp_pos"  value="25"/> <!-- [0, for f16 -->
-+    <value name="16f_clamp_pq"   value="26"/> <!-- PQ lin range, colour to [0, 125], alpha to [0, 1] for f16 -->
-+    <value name="16f_clamp_hlg"  value="27"/> <!-- HLG lin range, colour to [0, 12], alpha to [0, 1] for f16 -->
-+    <value name="invalid"        value="32"/>
-+  </enum>
-+
-   <!---
-     CL cache flush commands are not fully documented and subject to a
-     number of hardware issues that make them unreliable. Specifically:
-@@ -263,13 +286,27 @@
-     <value name="r8ui"     value="36"/>
-     <value name="srgbx8"   value="37" max_ver="33"/>
-     <value name="rgbx8"    value="38" max_ver="33"/>
-    <value name="bstc"     value="39" min_ver="41"/>
-+    <value name="bstc8"    value="39" min_ver="41"/>
-     <value name="d32f"     value="40" min_ver="41"/>
-     <value name="d24"      value="41" min_ver="41"/>
-     <value name="d16"      value="42" min_ver="41"/>
-     <value name="d24s8"    value="43" min_ver="41"/>
-     <value name="s8"       value="44" min_ver="41"/>
-     <value name="rgba5551" value="45" min_ver="41"/>
-+    <value name="bstc8_srgb"          value="46" min_ver="71"/>
-+    <value name="bstc10"              value="47" min_ver="71"/>
-+    <value name="bstc10_srgb"         value="48" min_ver="71"/>
-+    <value name="bstc10_pq"           value="49" min_ver="71"/>
-+    <value name="rgba10x6"            value="50" min_ver="71"/>
-+    <value name="bstc10_hlg"          value="55" min_ver="71"/>
-+    <value name="rgba10x6_hlg"        value="56" min_ver="71"/>
-+    <value name="rgb10_a2_hlg"        value="57" min_ver="71"/>
-+    <value name="bstc10_pq_bt1886"    value="58" min_ver="71"/>
-+    <value name="rgba10x6_pq_bt1886"  value="59" min_ver="71"/>
-+    <value name="rgb10_a2_pq_bt1886"  value="60" min_ver="71"/>
-+    <value name="bstc10_hlg_bt1886"   value="61" min_ver="71"/>
-+    <value name="rgba10x6_hlg_bt1886" value="62" min_ver="71"/>
-+    <value name="rgb10_a2_hlg_bt1886" value="63" min_ver="71"/>
-   </enum>
- 
-   <enum name="Z/S Output Image Format" prefix="V3D_OUTPUT_IMAGE_FORMAT_ZS" max_ver="33">
-@@ -314,6 +351,12 @@
-     <value name="perp end caps" value="1"/>
-   </enum>
- 
-+  <enum name="Z Clip Mode" prefix="V3D_Z_CLIP_MODE">
-+    <value name="NONE" value="0"/>
-+    <value name="MIN_ONE_TO_ONE" value="1"/>
-+    <value name="ZERO_TO_ONE" value="2"/>
-+  </enum>
-+
-   <packet code="0" name="Halt"/>
-   <packet code="1" name="NOP"/>
-   <packet code="4" name="Flush"/>
-@@ -381,11 +424,13 @@
-     <field name="Last Tile of Frame" size="1" start="0" type="bool"/>
-   </packet>
- 
-  <packet code="25" shortname="clear" name="Clear Tile Buffers" cl="R" min_ver="41">
-+  <packet code="25" shortname="clear" name="Clear Tile Buffers" cl="R" min_ver="41" max_ver="42">
-     <field name="Clear Z/Stencil Buffer" size="1" start="1" type="bool"/>
-     <field name="Clear all Render Targets" size="1" start="0" type="bool"/>
-   </packet>
- 
-+  <packet code="25" shortname="clear_rt" name="Clear Render Targets" cl="R" min_ver="71"/>
-+
-   <packet code="26" shortname="load" name="Reload Tile Color Buffer" cl="R" max_ver="33">
-     <field name="Disable Color Buffer load" size="8" start="8" type="uint"/>
-     <field name="Enable Z load" size="1" start="7" type="bool"/>
-@@ -443,6 +488,10 @@
-       <value name="Render target 1" value="1"/>
-       <value name="Render target 2" value="2"/>
-       <value name="Render target 3" value="3"/>
-+      <value name="Render target 4" value="4" min_ver="71"/>
-+      <value name="Render target 5" value="5" min_ver="71"/>
-+      <value name="Render target 6" value="6" min_ver="71"/>
-+      <value name="Render target 7" value="7" min_ver="71"/>
-       <value name="None" value="8"/>
-       <value name="Z" value="9"/>
-       <value name="Stencil" value="10"/>
-@@ -789,7 +838,7 @@
-     <field name="Alpha blend mode" size="4" start="0" type="Blend Mode"/>
-   </packet>
- 
-  <packet code="84" name="Blend Cfg" min_ver="41">
-+  <packet code="84" name="Blend Cfg" min_ver="41" max_ver="42">
-     <field name="Render Target Mask" size="4" start="24" type="uint"/>
-     <field name="Color blend dst factor" size="4" start="20" type="Blend Factor"/>
-     <field name="Color blend src factor" size="4" start="16" type="Blend Factor"/>
-@@ -799,6 +848,16 @@
-     <field name="Alpha blend mode" size="4" start="0" type="Blend Mode"/>
-   </packet>
- 
-+  <packet code="84" name="Blend Cfg" min_ver="71">
-+    <field name="Render Target Mask" size="8" start="24" type="uint"/>
-+    <field name="Color blend dst factor" size="4" start="20" type="Blend Factor"/>
-+    <field name="Color blend src factor" size="4" start="16" type="Blend Factor"/>
-+    <field name="Color blend mode" size="4" start="12" type="Blend Mode"/>
-+    <field name="Alpha blend dst factor" size="4" start="8" type="Blend Factor"/>
-+    <field name="Alpha blend src factor" size="4" start="4" type="Blend Factor"/>
-+    <field name="Alpha blend mode" size="4" start="0" type="Blend Mode"/>
-+  </packet>
-+
-   <packet code="86" shortname="blend_ccolor" name="Blend Constant Color">
-     <field name="Alpha (F16)" size="16" start="48" type="uint"/>
-     <field name="Blue (F16)" size="16" start="32" type="uint"/>
-@@ -828,7 +887,12 @@
-     <field name="address" size="32" start="0" type="address"/>
-   </packet>
- 
-  <packet code="96" name="Cfg Bits">
-+  <packet code="93" name="Depth Bounds Test Limits" min_ver="71">
-+    <field name="Lower Test Limit" size="32" start="0" type="float"/>
-+    <field name="Upper Test Limit" size="32" start="32" type="float"/>
-+  </packet>
-+
-+  <packet code="96" name="Cfg Bits" max_ver="42">
-     <field name="Direct3D Provoking Vertex" size="1" start="21" type="bool"/>
-     <field name="Direct3D 'Point-fill' mode" size="1" start="20" type="bool"/>
-     <field name="Blend enable" size="1" start="19" type="bool"/>
-@@ -846,6 +910,25 @@
-     <field name="Enable Forward Facing Primitive" size="1" start="0" type="bool"/>
-   </packet>
- 
-+  <packet code="96" name="Cfg Bits" min_ver="71">
-+    <field name="Z Clipping mode" size="2" start="22" type="Z Clip Mode"/>
-+    <field name="Direct3D Provoking Vertex" size="1" start="21" type="bool"/>
-+    <field name="Direct3D 'Point-fill' mode" size="1" start="20" type="bool"/>
-+    <field name="Blend enable" size="1" start="19" type="bool"/>
-+    <field name="Stencil enable" size="1" start="18" type="bool"/>
-+    <field name="Z updates enable" size="1" start="15" type="bool"/>
-+    <field name="Depth-Test Function" size="3" start="12" type="Compare Function"/>
-+    <field name="Direct3D Wireframe triangles mode" size="1" start="11" type="bool"/>
-+    <field name="Z Clamp Mode" size="1" start="10" type="bool"/>
-+    <field name="Rasterizer Oversample Mode" size="2" start="6" type="uint"/>
-+    <field name="Depth Bounds Test Enable" size="1" start="5" type="bool"/>
-+    <field name="Line Rasterization" size="1" start="4" type="uint"/>
-+    <field name="Enable Depth Offset" size="1" start="3" type="bool"/>
-+    <field name="Clockwise Primitives" size="1" start="2" type="bool"/>
-+    <field name="Enable Reverse Facing Primitive" size="1" start="1" type="bool"/>
-+    <field name="Enable Forward Facing Primitive" size="1" start="0" type="bool"/>
-+  </packet>
-+
-   <packet code="97" shortname="zero_all_flatshade_flags" name="Zero All Flat Shade Flags"/>
- 
-   <packet code="98" shortname="flatshade_flags" name="Flat Shade Flags">
-@@ -907,16 +990,26 @@
-     <field name="Minimum Zw" size="32" start="0" type="float"/>
-   </packet>
- 
-  <packet shortname="clipper_xy" name="Clipper XY Scaling" code="110" cl="B">
-+  <packet shortname="clipper_xy" name="Clipper XY Scaling" code="110" cl="B" max_ver="42">
-     <field name="Viewport Half-Height in 1/256th of pixel" size="32" start="32" type="float"/>
-     <field name="Viewport Half-Width in 1/256th of pixel" size="32" start="0" type="float"/>
-   </packet>
- 
-+  <packet shortname="clipper_xy" name="Clipper XY Scaling" code="110" cl="B" min_ver="71">
-+    <field name="Viewport Half-Height in 1/64th of pixel" size="32" start="32" type="float"/>
-+    <field name="Viewport Half-Width in 1/64th of pixel" size="32" start="0" type="float"/>
-+  </packet>
-+
-   <packet shortname="clipper_z" name="Clipper Z Scale and Offset" code="111" cl="B">
-     <field name="Viewport Z Offset (Zc to Zs)" size="32" start="32" type="float"/>
-     <field name="Viewport Z Scale (Zc to Zs)" size="32" start="0" type="float"/>
-   </packet>
- 
-+  <packet shortname="clipper_z_no_guardband" name="Clipper Z Scale and Offset no guardband" code="112" cl="B" min_ver="71">
-+    <field name="Viewport Z Offset (Zc to Zs)" size="32" start="32" type="float"/>
-+    <field name="Viewport Z Scale (Zc to Zs)" size="32" start="0" type="float"/>
-+  </packet>
-+
-   <packet name="Number of Layers" code="119" min_ver="41">
-     <field name="Number of Layers" size="8" start="0" type="uint" minus_one="true"/>
-   </packet>
-@@ -947,7 +1040,7 @@
-     <field name="sub-id" size="1" start="0" type="uint" default="0"/>
-   </packet>
- 
-  <packet code="120" name="Tile Binning Mode Cfg" min_ver="41">
-+  <packet code="120" name="Tile Binning Mode Cfg" min_ver="41" max_ver="42">
- 
-     <field name="Height (in pixels)" size="16" start="48" type="uint" minus_one="true"/>
-     <field name="Width (in pixels)" size="16" start="32" type="uint" minus_one="true"/>
-@@ -971,6 +1064,35 @@
-     </field>
-   </packet>
- 
-+  <packet code="120" name="Tile Binning Mode Cfg" min_ver="71">
-+    <field name="Height (in pixels)" size="16" start="48" type="uint" minus_one="true"/>
-+    <field name="Width (in pixels)" size="16" start="32" type="uint" minus_one="true"/>
-+
-+    <field name="Log2 Tile Height" size="3" start="11" type="uint">
-+      <value name="tile height 8 pixels" value="0"/>
-+      <value name="tile height 16 pixels" value="1"/>
-+      <value name="tile height 32 pixels" value="2"/>
-+      <value name="tile height 64 pixels" value="3"/>
-+    </field>
-+    <field name="Log2 Tile Width"  size="3" start="8" type="uint">
-+      <value name="tile width 8 pixels" value="0"/>
-+      <value name="tile width 16 pixels" value="1"/>
-+      <value name="tile width 32 pixels" value="2"/>
-+      <value name="tile width 64 pixels" value="3"/>
-+    </field>
-+
-+    <field name="tile allocation block size" size="2" start="4" type="uint">
-+      <value name="tile allocation block size 64b" value="0"/>
-+      <value name="tile allocation block size 128b" value="1"/>
-+      <value name="tile allocation block size 256b" value="2"/>
-+    </field>
-+    <field name="tile allocation initial block size" size="2" start="2" type="uint">
-+      <value name="tile allocation initial block size 64b" value="0"/>
-+      <value name="tile allocation initial block size 128b" value="1"/>
-+      <value name="tile allocation initial block size 256b" value="2"/>
-+    </field>
-+  </packet>
-+
-   <packet code="120" name="Tile Binning Mode Cfg (Part2)" cl="B" max_ver="33">
-     <field name="Tile Allocation Memory Address" size="32" start="32" type="address"/>
-     <field name="Tile Allocation Memory Size" size="32" start="0" type="uint"/>
-@@ -1002,7 +1124,7 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="0"/>
-   </packet>
- 
-  <packet code="121" name="Tile Rendering Mode Cfg (Common)" cl="R" min_ver="41">
-+  <packet code="121" name="Tile Rendering Mode Cfg (Common)" cl="R" min_ver="41" max_ver="42">
-     <field name="Pad" size="12" start="52" type="uint"/>
- 
-     <field name="Early Depth/Stencil Clear" size="1" start="51" type="bool"/>
-@@ -1018,7 +1140,11 @@
-     <field name="Double-buffer in non-ms mode" size="1" start="43" type="bool"/>
-     <field name="Multisample Mode (4x)" size="1" start="42" type="bool"/>
- 
-    <field name="Maximum BPP of all render targets" size="2" start="40" type="Internal BPP"/>
-+    <field name="Maximum BPP of all render targets" size="2" start="40" type="Internal BPP">
-+      <value name="Render target maximum 32bpp" value="0"/>
-+      <value name="Render target maximum 64bpp" value="1"/>
-+      <value name="Render target maximum 128bpp" value="2"/>
-+    </field>
- 
-     <field name="Image Height (pixels)" size="16" start="24" type="uint"/>
-     <field name="Image Width (pixels)" size="16" start="8" type="uint"/>
-@@ -1027,6 +1153,43 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="0"/>
-   </packet>
- 
-+  <packet code="121" name="Tile Rendering Mode Cfg (Common)" cl="R" min_ver="71">
-+    <field name="Pad" size="6" start="58" type="uint"/>
-+
-+    <field name="Log2 Tile Height" size="3" start="55" type="uint">
-+      <value name="tile height 8 pixels" value="0"/>
-+      <value name="tile height 16 pixels" value="1"/>
-+      <value name="tile height 32 pixels" value="2"/>
-+      <value name="tile height 64 pixels" value="3"/>
-+    </field>
-+    <field name="Log2 Tile Width"  size="3" start="52" type="uint">
-+      <value name="tile width 8 pixels" value="0"/>
-+      <value name="tile width 16 pixels" value="1"/>
-+      <value name="tile width 32 pixels" value="2"/>
-+      <value name="tile width 64 pixels" value="3"/>
-+    </field>
-+
-+    <field name="Early Depth/Stencil Clear" size="1" start="51" type="bool"/>
-+    <field name="Internal Depth Type" size="4" start="47" type="Internal Depth Type"/>
-+
-+    <field name="Early-Z disable" size="1" start="46" type="bool"/>
-+
-+    <field name="Early-Z Test and Update Direction" size="1" start="45" type="uint">
-+      <value name="Early-Z direction LT/LE" value="0"/>
-+      <value name="Early-Z direction GT/GE" value="1"/>
-+    </field>
-+
-+    <field name="Depth-buffer disable" size="1" start="44" type="bool"/>
-+    <field name="Double-buffer in non-ms mode" size="1" start="43" type="bool"/>
-+    <field name="Multisample Mode (4x)" size="1" start="42" type="bool"/>
-+
-+    <field name="Image Height (pixels)" size="16" start="24" type="uint"/>
-+    <field name="Image Width (pixels)" size="16" start="8" type="uint"/>
-+    <field name="Number of Render Targets" size="4" start="4" type="uint" minus_one="true"/>
-+
-+    <field name="sub-id" size="3" start="0" type="uint" default="0"/>
-+  </packet>
-+
-   <packet code="121" name="Tile Rendering Mode Cfg (Color)" cl="R" max_ver="33">
-     <field name="Address" size="32" start="32" type="address"/>
- 
-@@ -1048,7 +1211,8 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="2"/>
-   </packet>
- 
-  <packet code="121" name="Tile Rendering Mode Cfg (Color)" cl="R" min_ver="41">
-+  <!-- On 4.1 the real name would be "Tile Rendering Mode Cfg (Render Target Configs) -->
-+  <packet code="121" name="Tile Rendering Mode Cfg (Color)" cl="R" min_ver="41" max_ver="42">
- 
-     <field name="Pad" size="28" start="36" type="uint"/>
- 
-@@ -1099,7 +1263,7 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="3"/>
-   </packet>
- 
-  <packet code="121" name="Tile Rendering Mode Cfg (ZS Clear Values)" cl="R" min_ver="41">
-+  <packet code="121" name="Tile Rendering Mode Cfg (ZS Clear Values)" cl="R" min_ver="41" max_ver="42">
-     <field name="unused" size="16" start="48" type="uint"/>
- 
-     <field name="Z Clear Value" size="32" start="16" type="float"/>
-@@ -1108,6 +1272,15 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="2"/>
-   </packet>
- 
-+  <packet code="121" name="Tile Rendering Mode Cfg (ZS Clear Values)" cl="R" min_ver="71">
-+    <field name="unused" size="16" start="48" type="uint"/>
-+
-+    <field name="Z Clear Value" size="32" start="16" type="float"/>
-+
-+    <field name="Stencil Clear Value" size="8" start="8" type="uint"/>
-+    <field name="sub-id" size="4" start="0" type="uint" default="1"/>
-+  </packet>
-+
-   <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part1)" cl="R" max_ver="33">
-     <!-- Express this as a 56-bit field? -->
-     <field name="Clear Color next 24 bits" size="24" start="40" type="uint"/>
-@@ -1117,7 +1290,7 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="4"/>
-   </packet>
- 
-  <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part1)" cl="R" min_ver="41">
-+  <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part1)" cl="R" min_ver="41" max_ver="42">
-     <!-- Express this as a 56-bit field? -->
-     <field name="Clear Color next 24 bits" size="24" start="40" type="uint"/>
-     <field name="Clear Color low 32 bits" size="32" start="8" type="uint"/>
-@@ -1126,6 +1299,19 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="3"/>
-   </packet>
- 
-+  <packet code="121" name="Tile Rendering Mode Cfg (Render Target Part1)" cl="R" min_ver="71">
-+
-+    <field name="Clear Color low bits" size="32" start="32" type="uint"/>
-+    <field name="Internal Type and Clamping" size="5" start="27" type="Render Target Type Clamp"/>
-+    <field name="Internal BPP" size="2" start="25" type="Internal BPP"/>
-+
-+    <field name="Stride" size="7" start="18" type="uint" minus_one="true"/>
-+    <!-- In multiples of 512 bits -->
-+    <field name="Base Address" size="11" start="7" type="uint"/>
-+    <field name="Render Target number" size="3" start="3" type="uint"/>
-+    <field name="sub-id" size="3" start="0" type="uint" default="2"/>
-+  </packet>
-+
-   <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part2)" cl="R" max_ver="33">
-     <!-- Express this as a 56-bit field? -->
-     <field name="Clear Color mid-high 24 bits" size="24" start="40" type="uint"/>
-@@ -1135,7 +1321,7 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="5"/>
-   </packet>
- 
-  <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part2)" cl="R" min_ver="41">
-+  <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part2)" cl="R" min_ver="41" max_ver="42">
-     <!-- Express this as a 56-bit field? -->
-     <field name="Clear Color mid-high 24 bits" size="24" start="40" type="uint"/>
-     <field name="Clear Color mid-low 32 bits" size="32" start="8" type="uint"/>
-@@ -1144,6 +1330,13 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="4"/>
-   </packet>
- 
-+  <packet code="121" name="Tile Rendering Mode Cfg (Render Target Part2)" cl="R" min_ver="71">
-+    <field name="Clear Color mid bits" size="40" start="24" type="uint"/>
-+
-+    <field name="Render Target number" size="3" start="3" type="uint"/>
-+    <field name="sub-id" size="3" start="0" type="uint" default="3"/>
-+  </packet>
-+
-   <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part3)" cl="R" max_ver="33">
-     <field name="pad" size="11" start="53" type="uint"/>
-     <field name="UIF padded height in UIF blocks" size="13" start="40" type="uint"/>
-@@ -1155,7 +1348,7 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="6"/>
-   </packet>
- 
-  <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part3)" cl="R" min_ver="41">
-+  <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part3)" cl="R" min_ver="41" max_ver="42">
-     <field name="pad" size="11" start="53" type="uint"/>
-     <field name="UIF padded height in UIF blocks" size="13" start="40" type="uint"/>
-     <!-- image height is for Y flipping -->
-@@ -1166,6 +1359,13 @@
-     <field name="sub-id" size="4" start="0" type="uint" default="5"/>
-   </packet>
- 
-+  <packet code="121" name="Tile Rendering Mode Cfg (Render Target Part3)" cl="R" min_ver="71">
-+    <field name="Clear Color top bits" size="56" start="8" type="uint"/>
-+
-+    <field name="Render Target number" size="3" start="3" type="uint"/>
-+    <field name="sub-id" size="3" start="0" type="uint" default="4"/>
-+  </packet>
-+
-   <packet code="124" shortname="tile_coords" name="Tile Coordinates">
-     <field name="tile row number" size="12" start="12" type="uint"/>
-     <field name="tile column number" size="12" start="0" type="uint"/>
-@@ -1240,7 +1440,7 @@
-     <field name="Coordinate Shader Uniforms Address" size="32" start="32b" type="address"/>
-   </struct>
- 
-  <struct name="GL Shader State Record" min_ver="41">
-+  <struct name="GL Shader State Record" min_ver="41" max_ver="42">
-     <field name="Point size in shaded vertex data" size="1" start="0" type="bool"/>
-     <field name="Enable clipping" size="1" start="1" type="bool"/>
- 
-@@ -1299,6 +1499,63 @@
-     <field name="Coordinate Shader Uniforms Address" size="32" start="32b" type="address"/>
-   </struct>
- 
-+  <struct name="GL Shader State Record" min_ver="71">
-+    <field name="Point size in shaded vertex data" size="1" start="0" type="bool"/>
-+    <field name="Enable clipping" size="1" start="1" type="bool"/>
-+
-+    <field name="Vertex ID read by coordinate shader" size="1" start="2" type="bool"/>
-+    <field name="Instance ID read by coordinate shader" size="1" start="3" type="bool"/>
-+    <field name="Base Instance ID read by coordinate shader" size="1" start="4" type="bool"/>
-+    <field name="Vertex ID read by vertex shader" size="1" start="5" type="bool"/>
-+    <field name="Instance ID read by vertex shader" size="1" start="6" type="bool"/>
-+    <field name="Base Instance ID read by vertex shader" size="1" start="7" type="bool"/>
-+
-+    <field name="Fragment shader does Z writes" size="1" start="8" type="bool"/>
-+    <field name="Turn off early-z test" size="1" start="9" type="bool"/>
-+
-+    <field name="Fragment shader uses real pixel centre W in addition to centroid W2" size="1" start="12" type="bool"/>
-+    <field name="Enable Sample Rate Shading" size="1" start="13" type="bool"/>
-+    <field name="Any shader reads hardware-written Primitive ID" size="1" start="14" type="bool"/>
-+    <field name="Insert Primitive ID as first varying to fragment shader" size="1" start="15" type="bool"/>
-+    <field name="Turn off scoreboard" size="1" start="16" type="bool"/>
-+    <field name="Do scoreboard wait on first thread switch" size="1" start="17" type="bool"/>
-+    <field name="Disable implicit point/line varyings" size="1" start="18" type="bool"/>
-+    <field name="No prim pack" size="1" start="19" type="bool"/>
-+    <field name="Never defer FEP depth writes" size="1" start="20" type="bool"/>
-+
-+    <field name="Number of varyings in Fragment Shader" size="8" start="3b" type="uint"/>
-+
-+    <field name="Coordinate Shader output VPM segment size" size="4" start="4b" type="uint"/>
-+    <field name="Min Coord Shader output segments required in play in addition to VCM cache size" size="4" start="36" type="uint"/>
-+
-+    <field name="Coordinate Shader input VPM segment size" size="4" start="5b" type="uint"/>
-+    <field name="Min Coord Shader input segments required in play" size="4" start="44" type="uint" minus_one="true"/>
-+
-+    <field name="Vertex Shader output VPM segment size" size="4" start="6b" type="uint"/>
-+    <field name="Min Vertex Shader output segments required in play in addition to VCM cache size" size="4" start="52" type="uint"/>
-+
-+    <field name="Vertex Shader input VPM segment size" size="4" start="7b" type="uint"/>
-+    <field name="Min Vertex Shader input segments required in play" size="4" start="60" type="uint" minus_one="true"/>
-+
-+    <field name="Fragment Shader Code Address" size="29" start="67" type="address"/>
-+    <field name="Fragment Shader 4-way threadable" size="1" start="64" type="bool"/>
-+    <field name="Fragment Shader start in final thread section" size="1" start="65" type="bool"/>
-+    <field name="Fragment Shader Propagate NaNs" size="1" start="66" type="bool"/>
-+    <field name="Fragment Shader Uniforms Address" size="32" start="12b" type="address"/>
-+
-+    <field name="Vertex Shader Code Address" size="29" start="131" type="address"/>
-+    <field name="Vertex Shader 4-way threadable" size="1" start="128" type="bool"/>
-+    <field name="Vertex Shader start in final thread section" size="1" start="129" type="bool"/>
-+    <field name="Vertex Shader Propagate NaNs" size="1" start="130" type="bool"/>
-+    <field name="Vertex Shader Uniforms Address" size="32" start="20b" type="address"/>
-+
-+    <field name="Coordinate Shader Code Address" size="29" start="195" type="address"/>
-+    <field name="Coordinate Shader 4-way threadable" size="1" start="192" type="bool"/>
-+    <field name="Coordinate Shader start in final thread section" size="1" start="193" type="bool"/>
-+    <field name="Coordinate Shader Propagate NaNs" size="1" start="194" type="bool"/>
-+    <field name="Coordinate Shader Uniforms Address" size="32" start="28b" type="address"/>
-+  </struct>
-+
-   <struct name="Geometry Shader State Record" min_ver="41">
-     <field name="Geometry Bin Mode Shader Code Address" size="29" start="3" type="address"/>
-     <field name="Geometry Bin Mode Shader 4-way threadable" size="1" start="0" type="bool"/>
-@@ -1543,7 +1800,7 @@
-     <field name="Offset Format 8" size="1" start="0" type="bool"/>
-   </struct>
- 
-  <struct name="TMU Config Parameter 2" min_ver="42">
-+  <struct name="TMU Config Parameter 2" min_ver="42" max_ver="42">
-     <field name="Pad" size="7" start="25" type="uint"/>
-     <field name="LOD Query" size="1" start="24" type="bool"/>
-     <field name="Op" size="4" start="20" type="TMU Op"/>
-@@ -1558,6 +1815,23 @@
-     <field name="Offset Format 8" size="1" start="0" type="bool"/>
-   </struct>
- 
-+  <struct name="TMU Config Parameter 2" min_ver="71">
-+    <field name="Pad" size="5" start="27" type="uint"/>
-+    <field name="Write conversion" size="1" start="26" type="bool"/>
-+    <field name="DIM query" size="1" start="25" type="bool"/>
-+    <field name="LOD Query" size="1" start="24" type="bool"/>
-+    <field name="Op" size="4" start="20" type="TMU Op"/>
-+    <field name="Offset R" size="4" start="16" type="int"/>
-+    <field name="Offset T" size="4" start="12" type="int"/>
-+    <field name="Offset S" size="4" start="8" type="int"/>
-+    <field name="Gather Mode" size="1" start="7" type="bool"/>
-+    <field name="Gather Component" size="2" start="5" type="uint"/>
-+    <field name="Coefficient Mode" size="1" start="4" type="bool"/>
-+    <field name="Sample Number" size="2" start="2" type="uint"/>
-+    <field name="Disable AutoLOD" size="1" start="1" type="bool"/>
-+    <field name="Offset Format 8" size="1" start="0" type="bool"/>
-+  </struct>
-+
-   <struct name="Texture Shader State" max_ver="33">
-     <field name="UIF XOR disable" size="1" start="255" type="bool"/>
-     <field name="Level 0 is strictly UIF" size="1" start="254" type="bool"/>
-@@ -1611,7 +1885,7 @@
-     <field name="Filter" size="4" start="0" type="TMU Filter"/>
-   </struct>
- 
-  <struct name="Texture Shader State" min_ver="41">
-+  <struct name="Texture Shader State" min_ver="41" max_ver="42">
-     <field name="Pad" size="56" start="136" type="uint"/>
-     <field name="UIF XOR disable" size="1" start="135" type="bool"/>
-     <field name="Level 0 is strictly UIF" size="1" start="134" type="bool"/>
-@@ -1652,6 +1926,82 @@
-     <field name="Flip texture X Axis" size="1" start="0" type="bool"/>
-   </struct>
- 
-+  <struct name="Texture Shader State" min_ver="71">
-+    <field name="Pad" size="2" start="190" type="uint"/>
-+    <!-- When we use an address type, there is an implicit requirement
-+         that the address is a 32-bit that is encoded starting at a 32-bit
-+         aligned bit offset into the packet. If the address field has less than
-+         32 bits, it is assumed that the address is aligned. For example, a
-+         26-bit address field is expected to be 64-byte aligned (6 lsb bits
-+         are 0) and that this will be encoded into a packet starting at bit
-+         offset 6 into a 32-bit dword (since bits 0..5 of the address are
-+         implicitly 0 and don't need to be explicitly encoded).
-+
-+         Unfortunately, the CB address below doesn't match this requirement:
-+         it starts at bit 138, which is 10 bits into a 32-bit dword, but it
-+         represents a 64-bit aligned address (6 lsb bits are 0), so we cannot
-+         encode it as an address type. To fix this we encode these addresses
-+         as uint types which has two implications:
-+         1. the driver is responsible for manually addinng the buffer objects
-+            for these addresses to the job BO list.
-+         2. the driver needs to pass an actual 26-bit address value by manually
-+            shifting the 6 lsb bits (that are implicitly 0).
-+    -->
-+    <field name="texture_base pointer_Cr" size="26" start="164" type="uint"/>
-+    <field name="texture base pointer Cb" size="26" start="138" type="uint"/>
-+    <field name="Chroma offset y" size="1" start="137" type="uint"/>
-+    <field name="Chroma offset x" size="1" start="136" type="uint"/>
-+
-+    <field name="UIF XOR disable" size="1" start="135" type="bool"/>
-+    <field name="Level 0 is strictly UIF" size="1" start="134" type="bool"/>
-+    <field name="Level 0 XOR enable" size="1" start="132" type="bool"/>
-+    <field name="Level 0 UB_PAD" size="4" start="128" type="uint"/>
-+
-+    <field name="Base Level" size="4" start="124" type="uint"/>
-+    <field name="Max Level" size="4" start="120" type="uint"/>
-+
-+    <field name="Swizzle A" size="3" start="117" type="uint">
-+      <value name="Swizzle Zero" value="0"/>
-+      <value name="Swizzle One" value="1"/>
-+      <value name="Swizzle Red" value="2"/>
-+      <value name="Swizzle Green" value="3"/>
-+      <value name="Swizzle Blue" value="4"/>
-+      <value name="Swizzle Alpha" value="5"/>
-+    </field>
-+
-+    <field name="Swizzle B" size="3" start="114" type="uint"/>
-+    <field name="Swizzle G" size="3" start="111" type="uint"/>
-+    <field name="Swizzle R" size="3" start="108" type="uint"/>
-+    <field name="Extended" size="1" start="107" type="bool"/>
-+
-+    <field name="Texture type" size="7" start="100" type="uint"/>
-+    <field name="Image Depth" size="14" start="86" type="uint"/>
-+    <field name="Image Height" size="14" start="72" type="uint"/>
-+    <field name="Image Width" size="14" start="58" type="uint"/>
-+
-+    <!-- V3D 7.1.2 doesn't have the RB swap bit and has Array Stride starting
-+         at bit 32. However, 7.1.5 included the RB swap bit at bit 32 and has
-+         Array Stride starting at 33, which is backwards incompatible,
-+         We use the definition from 7.1.5.
-+    -->
-+    <field name="Array Stride (64-byte aligned)" size="24" start="33" type="uint"/>
-+    <field name="R/B swap" size="1" start="32" type="bool"/>
-+
-+    <field name="Texture base pointer" size="32" start="0" type="address"/>
-+
-+    <field name="Reverse" size="1" start="5" type="bool"/>
-+    <field name="Transfer func" size="3" start="2" type="uint">
-+      <value name="Transfer Func None" value="0"/>
-+      <value name="Transfer Func sRGB" value="1"/>
-+      <value name="Transfer Func PQ" value="2"/>
-+      <value name="Transfer Func HLG" value="3"/>
-+      <value name="Transfer Func PQ BT1886" value="4"/>
-+      <value name="Transfer Func HLG BT1886" value="5"/>
-+    </field>
-+    <field name="Flip texture Y Axis" size="1" start="1" type="bool"/>
-+    <field name="Flip texture X Axis" size="1" start="0" type="bool"/>
-+  </struct>
-+
-   <struct name="Sampler State" min_ver="41">
-     <field name="Border color word 3" size="32" start="160" type="uint"/>
-     <field name="Border color word 2" size="32" start="128" type="uint"/>
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-compiler-update-image-store-lowering-to-use.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-compiler-update-image-store-lowering-to-use.patch
@ -1,8 +1,8 @@
-From 381c29e3ff5237c89380cc53eb2271d1985f4e34 Mon Sep 17 00:00:00 2001
+From 7e151fd3a213848c8022c9f48e10f2aec76c3e4d Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
 Date: Thu, 2 Dec 2021 13:26:43 +0100
-Subject: [PATCH 067/142] broadcom/compiler: update image store lowering to use
- v71 new packing/conversion instructions
+Subject: [PATCH 3/3] broadcom/compiler: update image store lowering to use v71
+ new packing/conversion instructions

 Vulkan shaderdb stats with pattern dEQP-VK.image.*.with_format.*.*:
   total instructions in shared programs: 35993 -> 33245 (-7.63%)
@ -31,18 +31,20 @@ Vulkan shaderdb stats with pattern dEQP-VK.image.*.with_format.*.*:

 FWIW, that one HURT on the instructions count is for just one
 instruction.
+
+Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
 ---
- src/broadcom/compiler/nir_to_vir.c            |  39 +++
+ src/broadcom/compiler/nir_to_vir.c            |  40 +++
 src/broadcom/compiler/v3d_compiler.h          |  16 +-
- .../compiler/v3d_nir_lower_image_load_store.c | 246 +++++++++++++++++-
+ .../compiler/v3d_nir_lower_image_load_store.c | 239 +++++++++++++++++-
 src/broadcom/compiler/vir.c                   |   2 +-
- 4 files changed, 294 insertions(+), 9 deletions(-)
+ 4 files changed, 288 insertions(+), 9 deletions(-)

 diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
-index 90fe1d1e7f0..a8cf02dd386 100644
+index 220c864a056..4329d4c85f6 100644
 --- a/src/broadcom/compiler/nir_to_vir.c
 +++ b/src/broadcom/compiler/nir_to_vir.c
-@@ -1689,6 +1689,22 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
+@@ -1688,6 +1688,22 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
                 result = vir_VFPACK(c, src[0], src[1]);
                 break;
 
@ -65,10 +67,10 @@ index 90fe1d1e7f0..a8cf02dd386 100644
         case nir_op_unpack_half_2x16_split_x:
                 result = vir_FMOV(c, src[0]);
                 vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_L);
-@@ -1719,6 +1735,29 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
-                 result = vir_FMOV(c, vir_SEL(c, V3D_QPU_COND_IFNA, tmp, zero));
+@@ -1698,6 +1714,30 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
+                 vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_H);
                 break;
-         }
+ 
 +        case nir_op_vftounorm8_v3d:
 +                result = vir_VFTOUNORM8(c, src[0]);
 +                break;
@ -92,14 +94,15 @@ index 90fe1d1e7f0..a8cf02dd386 100644
 +        case nir_op_ftosnorm16_v3d:
 +                result = vir_FTOSNORM16(c, src[0]);
 +                break;
- 
+
         default:
                 fprintf(stderr, "unknown NIR ALU inst: ");
+                 nir_print_instr(&instr->instr, stderr);
 diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
-index 36adf8830b5..425ab0cdf9d 100644
+index 095b33c03b8..5714e85d2b8 100644
 --- a/src/broadcom/compiler/v3d_compiler.h
 +++ b/src/broadcom/compiler/v3d_compiler.h
-@@ -1186,7 +1186,7 @@ bool v3d_nir_lower_line_smooth(nir_shader *shader);
+@@ -1180,7 +1180,7 @@ bool v3d_nir_lower_line_smooth(nir_shader *shader);
 bool v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c);
 bool v3d_nir_lower_scratch(nir_shader *s);
 bool v3d_nir_lower_txf_ms(nir_shader *s);
@ -108,7 +111,7 @@ index 36adf8830b5..425ab0cdf9d 100644
 bool v3d_nir_lower_load_store_bitsize(nir_shader *s);
 
 void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components);
-@@ -1427,6 +1427,20 @@ VIR_SFU(LOG)
+@@ -1421,6 +1421,20 @@ VIR_SFU(LOG)
 VIR_SFU(SIN)
 VIR_SFU(RSQRT2)
 
@ -130,7 +133,7 @@ index 36adf8830b5..425ab0cdf9d 100644
 vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
              struct qreg dest, struct qreg src)
 diff --git a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
-index 2900a29817f..bbb55be4a14 100644
+index 5f8363377cb..ec43f834897 100644
 --- a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
 +++ b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
@@ -40,6 +40,10 @@
@ -151,9 +154,9 @@ index 2900a29817f..bbb55be4a14 100644
 + *
 + * This is the generic helper, using all common nir operations.
  */
- static nir_ssa_def *
- pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
-@@ -91,8 +97,185 @@ pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
+ static nir_def *
+ pack_bits(nir_builder *b, nir_def *color, const unsigned *bits,
+@@ -91,8 +97,180 @@ pack_bits(nir_builder *b, nir_def *color, const unsigned *bits,
         return nir_vec(b, results, DIV_ROUND_UP(offset, 32));
 }
 
@ -161,46 +164,42 @@ index 2900a29817f..bbb55be4a14 100644
 + * just easier to read vfpack on the code, specially while using the PRM as
 + * reference
 + */
-+static nir_ssa_def *
-+nir_vfpack(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
+static inline nir_def *
+nir_vfpack(nir_builder *b, nir_def *p1, nir_def *p2)
 +{
 +        return nir_pack_half_2x16_split(b, p1, p2);
 +}
 +
-+static inline nir_ssa_def *
-+pack_11f11f10f(nir_builder *b, nir_ssa_def *color)
+static inline nir_def *
+pack_11f11f10f(nir_builder *b, nir_def *color)
 +{
-+        nir_ssa_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
+        nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
 +                                     nir_channel(b, color, 1));
-+        /* FIXME: we noted that we could just use p2 again as the second
-+         * element to pack, and CTS tests still works. Just using undef as is
-+         * slightly more correct
-+         */
-+        nir_ssa_def *undef = nir_ssa_undef(b, 1, color->bit_size);
-+        nir_ssa_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), undef);
+        nir_def *undef = nir_undef(b, 1, color->bit_size);
+        nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), undef);
 +
 +        return nir_v11fpack_v3d(b, p1, p2);
 +}
 +
-+static inline nir_ssa_def *
-+pack_r10g10b10a2_uint(nir_builder *b, nir_ssa_def *color)
+static inline nir_def *
+pack_r10g10b10a2_uint(nir_builder *b, nir_def *color)
 +{
-+        nir_ssa_def *p1 = nir_vpack_v3d(b, nir_channel(b, color, 0),
+        nir_def *p1 = nir_vpack_v3d(b, nir_channel(b, color, 0),
 +                                        nir_channel(b, color, 1));
-+        nir_ssa_def *p2 = nir_vpack_v3d(b, nir_channel(b, color, 2),
+        nir_def *p2 = nir_vpack_v3d(b, nir_channel(b, color, 2),
 +                                        nir_channel(b, color, 3));
 +
 +        return nir_v10pack_v3d(b, p1, p2);
 +}
 +
-+static inline nir_ssa_def *
-+pack_r10g10b10a2_unorm(nir_builder *b, nir_ssa_def *color)
+static inline nir_def *
+pack_r10g10b10a2_unorm(nir_builder *b, nir_def *color)
 +{
-+        nir_ssa_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
+        nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
 +                                     nir_channel(b, color, 1));
 +        p1 = nir_vftounorm10lo_v3d(b, p1);
 +
-+        nir_ssa_def *p2 = nir_vfpack(b, nir_channel(b, color, 2),
+        nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2),
 +                                     nir_channel(b, color, 3));
 +        p2 = nir_vftounorm10hi_v3d(b, p2);
 +
@ -213,8 +212,8 @@ index 2900a29817f..bbb55be4a14 100644
 +        TO_UNORM
 +};
 +
-+static inline nir_ssa_def *
-+pack_8bit(nir_builder *b, nir_ssa_def *color,
+static inline nir_def *
+pack_8bit(nir_builder *b, nir_def *color,
 +                        unsigned num_components,
 +                        enum hw_conversion conversion)
 +{
@ -223,8 +222,8 @@ index 2900a29817f..bbb55be4a14 100644
 +         * conversion. But we support also that case, and let the caller
 +         * decide which method to use.
 +         */
-+        nir_ssa_def *p1;
-+        nir_ssa_def *p2;
+        nir_def *p1;
+        nir_def *p2;
 +
 +        if (conversion == NONE) {
 +                p1 = nir_vpack_v3d(b, nir_channel(b, color, 0),
@ -246,10 +245,9 @@ index 2900a29817f..bbb55be4a14 100644
 +                           nir_vftounorm8_v3d(b, p2) : nir_vftosnorm8_v3d(b, p2);
 +                }
 +        } else {
-+                /* As mentioned on the comment before, using an undef here
-+                 * would be more correct. But for this case we are getting
-+                 * worse values, and in fact even some worse instruction count
-+                 * with some CTS tests, so we just reuse the first packing
+                /* Using an undef here would be more correct. But for this
+                 * case we are getting worse shader-db values with some CTS
+                 * tests, so we just reuse the first packing.
 +                 */
 +                p2 = p1;
 +        }
@ -257,13 +255,13 @@ index 2900a29817f..bbb55be4a14 100644
 +        return nir_v8pack_v3d(b, p1, p2);
 +}
 +
-+static inline nir_ssa_def *
-+pack_16bit(nir_builder *b, nir_ssa_def *color,
+static inline nir_def *
+pack_16bit(nir_builder *b, nir_def *color,
 +                         unsigned num_components,
 +                         enum hw_conversion conversion)
 +{
-+        nir_ssa_def *results[2];
-+        nir_ssa_def *channels[4];
+        nir_def *results[2];
+        nir_def *channels[4];
 +
 +        /* Note that usually you should not use this method (that relies on
 +         * custom packing) if we are not doing any conversion. But we support
@ -299,8 +297,8 @@ index 2900a29817f..bbb55be4a14 100644
 +        return nir_vec(b, results, DIV_ROUND_UP(num_components, 2));
 +}
 +
-+static inline nir_ssa_def *
-+pack_xbit(nir_builder *b, nir_ssa_def *color,
+static inline nir_def *
+pack_xbit(nir_builder *b, nir_def *color,
 +          unsigned num_components,
 +          const struct util_format_channel_description *r_chan)
 +{
@ -340,7 +338,7 @@ index 2900a29817f..bbb55be4a14 100644
 {
         enum pipe_format format = nir_intrinsic_format(instr);
         assert(format != PIPE_FORMAT_NONE);
-@@ -118,9 +301,6 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
+@@ -118,9 +296,6 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
                  */
                 formatted = color;
         } else {
@ -350,7 +348,7 @@ index 2900a29817f..bbb55be4a14 100644
                 const unsigned *bits;
 
                 switch (r_chan->size) {
-@@ -171,6 +351,52 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
+@@ -170,6 +345,50 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
         return true;
 }
 
@ -366,10 +364,9 @@ index 2900a29817f..bbb55be4a14 100644
 +        unsigned num_components = util_format_get_nr_components(format);
 +        b->cursor = nir_before_instr(&instr->instr);
 +
-+        nir_ssa_def *color = nir_channels(b,
-+                                          nir_ssa_for_src(b, instr->src[3], 4),
-+                                          (1 << num_components) - 1);
-+        nir_ssa_def *formatted = NULL;
+        nir_def *color =
+           nir_trim_vector(b, instr->src[3].ssa, num_components);
+        nir_def *formatted = NULL;
 +        if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
 +                formatted = nir_format_pack_r9g9b9e5(b, color);
 +        } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
@ -393,8 +390,7 @@ index 2900a29817f..bbb55be4a14 100644
 +                formatted = pack_xbit(b, color, num_components, r_chan);
 +        }
 +
-+        nir_instr_rewrite_src(&instr->instr, &instr->src[3],
-+                              nir_src_for_ssa(formatted));
+        nir_src_rewrite(&instr->src[3], formatted);
 +        instr->num_components = formatted->num_components;
 +
 +        return true;
@ -403,10 +399,10 @@ index 2900a29817f..bbb55be4a14 100644
 static bool
 v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
 {
-@@ -215,11 +441,17 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
-         nir_intrinsic_instr *intr =
-                 nir_instr_as_intrinsic(instr);
- 
+@@ -207,11 +426,17 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
+                                   nir_intrinsic_instr *intr,
+                                   void *_state)
+ {
 +        struct v3d_compile *c = (struct v3d_compile *) _state;
 +
         switch (intr->intrinsic) {
@ -422,23 +418,24 @@ index 2900a29817f..bbb55be4a14 100644
         default:
                 return false;
         }
-@@ -228,9 +460,9 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
+@@ -220,10 +445,10 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
 }
 
 bool
 -v3d_nir_lower_image_load_store(nir_shader *s)
 +v3d_nir_lower_image_load_store(nir_shader *s, struct v3d_compile *c)
 {
-         return nir_shader_instructions_pass(s, v3d_nir_lower_image_load_store_cb,
+         return nir_shader_intrinsics_pass(s,
+                                             v3d_nir_lower_image_load_store_cb,
                                             nir_metadata_block_index |
 -                                            nir_metadata_dominance, NULL);
 +                                            nir_metadata_dominance, c);
 }
 diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
-index aea113f050e..7612eed7130 100644
+index 8c536b8fbcc..acb13a6cbf9 100644
 --- a/src/broadcom/compiler/vir.c
 +++ b/src/broadcom/compiler/vir.c
-@@ -1576,7 +1576,7 @@ v3d_attempt_compile(struct v3d_compile *c)
+@@ -1599,7 +1599,7 @@ v3d_attempt_compile(struct v3d_compile *c)
 
         NIR_PASS(_, c->s, v3d_nir_lower_io, c);
         NIR_PASS(_, c->s, v3d_nir_lower_txf_ms);
--- a/projects/RPi/devices/RPi5/patches/mesa/0004-broadcom-common-retrieve-V3D-revision-number.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0004-broadcom-common-retrieve-V3D-revision-number.patch
@ -1,65 +0,0 @@
-From 569cbe4229df737ce5915c4be2cad534707fb4f7 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 9 Nov 2021 08:50:51 +0100
-Subject: [PATCH 004/142] broadcom/common: retrieve V3D revision number
-
-The subrev field from the hub ident3 register is bumped with every
-hardware revision doing backwards incompatible changes so we want to
-keep track of this.
-
-Instead of modifying the 'ver' field info to acommodate subrev info,
-which would require a lot of changes, simply add a new 'rev' field in
-devinfo that we can use when we need to make changes based on the
-revision number of a hardware release.
---
- src/broadcom/common/v3d_device_info.c | 14 +++++++++++++-
- src/broadcom/common/v3d_device_info.h |  3 +++
- 2 files changed, 16 insertions(+), 1 deletion(-)
-
-diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
-index 7e0862f1f02..7512fe3a06b 100644
--- a/src/broadcom/common/v3d_device_info.c
-+++ b/src/broadcom/common/v3d_device_info.c
-@@ -36,6 +36,9 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
-     struct drm_v3d_get_param ident1 = {
-             .param = DRM_V3D_PARAM_V3D_CORE0_IDENT1,
-     };
-+    struct drm_v3d_get_param hub_ident3 = {
-+            .param = DRM_V3D_PARAM_V3D_HUB_IDENT3,
-+    };
-     int ret;
- 
-     ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &ident0);
-@@ -76,5 +79,14 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
-                 return false;
-     }
- 
-    return true;
-+    ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &hub_ident3);
-+    if (ret != 0) {
-+            fprintf(stderr, "Couldn't get V3D core HUB IDENT3: %s\n",
-+                    strerror(errno));
-+            return false;
-+    }
-+
-+   devinfo->rev = (hub_ident3.value >> 8) & 0xff;
-+
-+   return true;
- }
-diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
-index 97abd9b8d9f..32cb65cf81f 100644
--- a/src/broadcom/common/v3d_device_info.h
-+++ b/src/broadcom/common/v3d_device_info.h
-@@ -34,6 +34,9 @@ struct v3d_device_info {
-         /** Simple V3D version: major * 10 + minor */
-         uint8_t ver;
- 
-+        /** V3D revision number */
-+        uint8_t rev;
-+
-         /** Size of the VPM, in bytes. */
-         int vpm_size;
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0005-broadcom-common-add-some-common-v71-helpers.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0005-broadcom-common-add-some-common-v71-helpers.patch
@ -1,91 +0,0 @@
-From c260843c882d25bd31e308566b45d4517fda0fa2 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 17 Nov 2021 14:40:47 +0100
-Subject: [PATCH 005/142] broadcom/common: add some common v71 helpers
-
---
- src/broadcom/common/v3d_util.c | 27 +++++++++++++++++++++++++++
- src/broadcom/common/v3d_util.h | 27 +++++++++++++++++++++++++++
- 2 files changed, 54 insertions(+)
-
-diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c
-index 57872a923d3..26f5c6b336f 100644
--- a/src/broadcom/common/v3d_util.c
-+++ b/src/broadcom/common/v3d_util.c
-@@ -170,3 +170,30 @@ v3d_hw_prim_type(enum mesa_prim prim_type)
-       unreachable("Unsupported primitive type");
-    }
- }
-+
-+uint32_t
-+v3d_internal_bpp_words(uint32_t internal_bpp)
-+{
-+        switch (internal_bpp) {
-+        case 0 /* V3D_INTERNAL_BPP_32 */:
-+                return 1;
-+        case 1 /* V3D_INTERNAL_BPP_64 */:
-+                return 2;
-+        case 2 /* V3D_INTERNAL_BPP_128 */:
-+                return 4;
-+        default:
-+                unreachable("Unsupported internal BPP");
-+        }
-+}
-+
-+uint32_t
-+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
-+                                       uint32_t bpp)
-+{
-+        /* stride in multiples of 128 bits, and covers 2 rows. This is the
-+         * reason we divide by 2 instead of 4, as we divide number of 32-bit
-+         * words per row by 2.
-+         */
-+
-+        return (tile_width * bpp) / 2;
-+}
-diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h
-index eb802b77f67..864fc949ffa 100644
--- a/src/broadcom/common/v3d_util.h
-+++ b/src/broadcom/common/v3d_util.h
-@@ -24,6 +24,7 @@
- #ifndef V3D_UTIL_H
- #define V3D_UTIL_H
- 
-+#include "util/macros.h"
- #include "common/v3d_device_info.h"
- #include "pipe/p_defines.h"
- 
-@@ -46,4 +47,30 @@ v3d_translate_pipe_swizzle(enum pipe_swizzle swizzle);
- uint32_t
- v3d_hw_prim_type(enum mesa_prim prim_type);
- 
-+uint32_t
-+v3d_internal_bpp_words(uint32_t internal_bpp);
-+
-+/* Some configuration packets want the size on log2, but starting at 0 for
-+ * size 8.
-+ */
-+static inline uint8_t
-+log2_tile_size(uint32_t size)
-+{
-+        switch(size) {
-+        case 8:
-+                return 0;
-+        case 16:
-+                return 1;
-+        case 32:
-+                return 2;
-+        case 64:
-+                return 3;
-+        default:
-+                unreachable("Unsupported tile width/height");
-+        }
-+}
-+
-+uint32_t
-+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
-+                                       uint32_t bpp);
- #endif
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0006-broadcom-qpu-add-comments-on-waddr-not-used-on-V3D-7.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0006-broadcom-qpu-add-comments-on-waddr-not-used-on-V3D-7.patch
@ -1,53 +0,0 @@
-From a5211a4d71acc53183d2a90eb1694d8cce6eb44f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Thu, 5 Aug 2021 01:03:11 +0200
-Subject: [PATCH 006/142] broadcom/qpu: add comments on waddr not used on V3D
- 7.x
-
---
- src/broadcom/qpu/qpu_instr.h | 22 +++++++++++-----------
- 1 file changed, 11 insertions(+), 11 deletions(-)
-
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 2e133472698..45a0cad9760 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -88,11 +88,11 @@ enum v3d_qpu_uf {
- };
- 
- enum v3d_qpu_waddr {
-        V3D_QPU_WADDR_R0 = 0,
-        V3D_QPU_WADDR_R1 = 1,
-        V3D_QPU_WADDR_R2 = 2,
-        V3D_QPU_WADDR_R3 = 3,
-        V3D_QPU_WADDR_R4 = 4,
-+        V3D_QPU_WADDR_R0 = 0,    /* Reserved on V3D 7.x */
-+        V3D_QPU_WADDR_R1 = 1,    /* Reserved on V3D 7.x */
-+        V3D_QPU_WADDR_R2 = 2,    /* Reserved on V3D 7.x */
-+        V3D_QPU_WADDR_R3 = 3,    /* Reserved on V3D 7.x */
-+        V3D_QPU_WADDR_R4 = 4,    /* Reserved on V3D 7.x */
-         V3D_QPU_WADDR_R5 = 5,
-         V3D_QPU_WADDR_NOP = 6,
-         V3D_QPU_WADDR_TLB = 7,
-@@ -108,12 +108,12 @@ enum v3d_qpu_waddr {
-         V3D_QPU_WADDR_SYNC = 16,
-         V3D_QPU_WADDR_SYNCU = 17,
-         V3D_QPU_WADDR_SYNCB = 18,
-        V3D_QPU_WADDR_RECIP = 19,
-        V3D_QPU_WADDR_RSQRT = 20,
-        V3D_QPU_WADDR_EXP = 21,
-        V3D_QPU_WADDR_LOG = 22,
-        V3D_QPU_WADDR_SIN = 23,
-        V3D_QPU_WADDR_RSQRT2 = 24,
-+        V3D_QPU_WADDR_RECIP = 19,  /* Reserved on V3D 7.x */
-+        V3D_QPU_WADDR_RSQRT = 20,  /* Reserved on V3D 7.x */
-+        V3D_QPU_WADDR_EXP = 21,    /* Reserved on V3D 7.x */
-+        V3D_QPU_WADDR_LOG = 22,    /* Reserved on V3D 7.x */
-+        V3D_QPU_WADDR_SIN = 23,    /* Reserved on V3D 7.x */
-+        V3D_QPU_WADDR_RSQRT2 = 24, /* Reserved on V3D 7.x */
-         V3D_QPU_WADDR_TMUC = 32,
-         V3D_QPU_WADDR_TMUS = 33,
-         V3D_QPU_WADDR_TMUT = 34,
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0007-broadcom-qpu-set-V3D-7.x-names-for-some-waddr-aliasi.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0007-broadcom-qpu-set-V3D-7.x-names-for-some-waddr-aliasi.patch
@ -1,60 +0,0 @@
-From 0ccf3043e4a584e5592bb7fad737d5d98ed23db0 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Thu, 5 Aug 2021 01:00:47 +0200
-Subject: [PATCH 007/142] broadcom/qpu: set V3D 7.x names for some waddr
- aliasing
-
-V3D 7.x got rid of the accumulator, but still uses the values for
-WADDR_R5 and WADDR_R5REP, so let's return a proper name and add some
-aliases.
---
- src/broadcom/qpu/qpu_instr.c | 8 ++++++++
- src/broadcom/qpu/qpu_instr.h | 6 ++++--
- 2 files changed, 12 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index 60dabf74e8e..7759fb0efdf 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -35,6 +35,14 @@ v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
-         if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
-                 return "tmu";
- 
-+        /* V3D 7.x QUAD and REP aliases R5 and R5REPT in the table below
-+         */
-+        if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_QUAD)
-+                return "quad";
-+
-+        if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_REP)
-+                return "rep";
-+
-         static const char *waddr_magic[] = {
-                 [V3D_QPU_WADDR_R0] = "r0",
-                 [V3D_QPU_WADDR_R1] = "r1",
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 45a0cad9760..19bf721dbe1 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -93,7 +93,8 @@ enum v3d_qpu_waddr {
-         V3D_QPU_WADDR_R2 = 2,    /* Reserved on V3D 7.x */
-         V3D_QPU_WADDR_R3 = 3,    /* Reserved on V3D 7.x */
-         V3D_QPU_WADDR_R4 = 4,    /* Reserved on V3D 7.x */
-        V3D_QPU_WADDR_R5 = 5,
-+        V3D_QPU_WADDR_R5 = 5,    /* V3D 4.x */
-+        V3D_QPU_WADDR_QUAD = 5,  /* V3D 7.x */
-         V3D_QPU_WADDR_NOP = 6,
-         V3D_QPU_WADDR_TLB = 7,
-         V3D_QPU_WADDR_TLBU = 8,
-@@ -129,7 +130,8 @@ enum v3d_qpu_waddr {
-         V3D_QPU_WADDR_TMUHSCM = 44,
-         V3D_QPU_WADDR_TMUHSF = 45,
-         V3D_QPU_WADDR_TMUHSLOD = 46,
-        V3D_QPU_WADDR_R5REP = 55,
-+        V3D_QPU_WADDR_R5REP = 55, /* V3D 4.x */
-+        V3D_QPU_WADDR_REP = 55,   /* V3D 7.x */
- };
- 
- struct v3d_qpu_flags {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0008-broadcom-compiler-rename-small_imm-to-small_imm_b.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0008-broadcom-compiler-rename-small_imm-to-small_imm_b.patch
@ -1,241 +0,0 @@
-From 18de3cc85cf8bbe294e044f7a12abe14e554de0a Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Sun, 19 Sep 2021 03:20:18 +0200
-Subject: [PATCH 008/142] broadcom/compiler: rename small_imm to small_imm_b
-
-Current small_imm is associated with the "B" read address.
-
-We do this change in advance for v71 support, where we will have 4
-different small_imm (a/b/c/d), so we start with a renaming.
---
- src/broadcom/compiler/qpu_schedule.c          | 22 +++++++++----------
- .../compiler/vir_opt_small_immediates.c       |  4 ++--
- src/broadcom/compiler/vir_to_qpu.c            |  2 +-
- src/broadcom/qpu/qpu_disasm.c                 |  2 +-
- src/broadcom/qpu/qpu_instr.h                  |  2 +-
- src/broadcom/qpu/qpu_pack.c                   | 22 +++++++++----------
- 6 files changed, 27 insertions(+), 27 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 3b32b48f86f..a10fa03ed10 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -160,7 +160,7 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n,
-                 add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
-                 break;
-         case V3D_QPU_MUX_B:
-                if (!n->inst->qpu.sig.small_imm) {
-+                if (!n->inst->qpu.sig.small_imm_b) {
-                         add_read_dep(state,
-                                      state->last_rf[n->inst->qpu.raddr_b], n);
-                 }
-@@ -615,7 +615,7 @@ qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
-               return true;
- 
-         if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
-            !inst->sig.small_imm && (inst->raddr_b == waddr))
-+            !inst->sig.small_imm_b && (inst->raddr_b == waddr))
-               return true;
- 
-         return false;
-@@ -790,11 +790,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a,
-         uint64_t raddrs_used = 0;
-         if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_A))
-                 raddrs_used |= (1ll << a->raddr_a);
-        if (!a->sig.small_imm && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
-+        if (!a->sig.small_imm_b && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
-                 raddrs_used |= (1ll << a->raddr_b);
-         if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_A))
-                 raddrs_used |= (1ll << b->raddr_a);
-        if (!b->sig.small_imm && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
-+        if (!b->sig.small_imm_b && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
-                 raddrs_used |= (1ll << b->raddr_b);
- 
-         return raddrs_used;
-@@ -816,16 +816,16 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
-         if (naddrs > 2)
-                 return false;
- 
-        if ((add_instr->sig.small_imm || mul_instr->sig.small_imm)) {
-+        if ((add_instr->sig.small_imm_b || mul_instr->sig.small_imm_b)) {
-                 if (naddrs > 1)
-                         return false;
- 
-                if (add_instr->sig.small_imm && mul_instr->sig.small_imm)
-+                if (add_instr->sig.small_imm_b && mul_instr->sig.small_imm_b)
-                         if (add_instr->raddr_b != mul_instr->raddr_b)
-                                 return false;
- 
-                result->sig.small_imm = true;
-                result->raddr_b = add_instr->sig.small_imm ?
-+                result->sig.small_imm_b = true;
-+                result->raddr_b = add_instr->sig.small_imm_b ?
-                         add_instr->raddr_b : mul_instr->raddr_b;
-         }
- 
-@@ -836,7 +836,7 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
-         raddrs_used &= ~(1ll << raddr_a);
-         result->raddr_a = raddr_a;
- 
-        if (!result->sig.small_imm) {
-+        if (!result->sig.small_imm_b) {
-                 if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) &&
-                     raddr_a == add_instr->raddr_b) {
-                         if (add_instr->alu.add.a == V3D_QPU_MUX_B)
-@@ -1025,7 +1025,7 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
-         merge.sig.ldtmu |= b->sig.ldtmu;
-         merge.sig.ldvary |= b->sig.ldvary;
-         merge.sig.ldvpm |= b->sig.ldvpm;
-        merge.sig.small_imm |= b->sig.small_imm;
-+        merge.sig.small_imm_b |= b->sig.small_imm_b;
-         merge.sig.ldtlb |= b->sig.ldtlb;
-         merge.sig.ldtlbu |= b->sig.ldtlbu;
-         merge.sig.ucb |= b->sig.ucb;
-@@ -1614,7 +1614,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
-                         return false;
- 
-                 if (inst->raddr_b < 3 &&
-                    !inst->sig.small_imm &&
-+                    !inst->sig.small_imm_b &&
-                     v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
-                         return false;
-                 }
-diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c
-index 47d7722968d..df0d6c36c9b 100644
--- a/src/broadcom/compiler/vir_opt_small_immediates.c
-+++ b/src/broadcom/compiler/vir_opt_small_immediates.c
-@@ -80,7 +80,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
-                          */
-                         struct v3d_qpu_sig new_sig = inst->qpu.sig;
-                         uint32_t sig_packed;
-                        new_sig.small_imm = true;
-+                        new_sig.small_imm_b = true;
-                         if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig_packed))
-                                 continue;
- 
-@@ -89,7 +89,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
-                                 vir_dump_inst(c, inst);
-                                 fprintf(stderr, "\n");
-                         }
-                        inst->qpu.sig.small_imm = true;
-+                        inst->qpu.sig.small_imm_b = true;
-                         inst->qpu.raddr_b = packed;
- 
-                         inst->src[i].file = QFILE_SMALL_IMM;
-diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
-index 45e6bfa1470..15c2e3674c2 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
-+++ b/src/broadcom/compiler/vir_to_qpu.c
-@@ -94,7 +94,7 @@ static void
- set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
- {
-         if (src.smimm) {
-                assert(instr->sig.small_imm);
-+                assert(instr->sig.small_imm_b);
-                 *mux = V3D_QPU_MUX_B;
-                 return;
-         }
-diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
-index 28fb2357b97..6aca3c28e78 100644
--- a/src/broadcom/qpu/qpu_disasm.c
-+++ b/src/broadcom/qpu/qpu_disasm.c
-@@ -62,7 +62,7 @@ v3d_qpu_disasm_raddr(struct disasm_state *disasm,
-         if (mux == V3D_QPU_MUX_A) {
-                 append(disasm, "rf%d", instr->raddr_a);
-         } else if (mux == V3D_QPU_MUX_B) {
-                if (instr->sig.small_imm) {
-+                if (instr->sig.small_imm_b) {
-                         uint32_t val;
-                         ASSERTED bool ok =
-                                 v3d_qpu_small_imm_unpack(disasm->devinfo,
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 19bf721dbe1..9cd831863b4 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -50,7 +50,7 @@ struct v3d_qpu_sig {
-         bool ldvpm:1;
-         bool ldtlb:1;
-         bool ldtlbu:1;
-        bool small_imm:1;
-+        bool small_imm_b:1;
-         bool ucb:1;
-         bool rotate:1;
-         bool wrtmuc:1;
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index a875683c6f8..beac591d3c1 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -112,7 +112,7 @@
- #define LDTMU .ldtmu = true
- #define LDVARY .ldvary = true
- #define LDVPM .ldvpm = true
-#define SMIMM .small_imm = true
-+#define SMIMM_B .small_imm_b = true
- #define LDTLB .ldtlb = true
- #define LDTLBU .ldtlbu = true
- #define UCB .ucb = true
-@@ -135,8 +135,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
-         [11] = { THRSW, LDVARY,         LDUNIF },
-         [12] = {        LDVARY, LDTMU,         },
-         [13] = { THRSW, LDVARY, LDTMU,         },
-        [14] = { SMIMM, LDVARY,                },
-        [15] = { SMIMM,                        },
-+        [14] = { SMIMM_B, LDVARY,              },
-+        [15] = { SMIMM_B,                      },
-         [16] = {        LDTLB,                 },
-         [17] = {        LDTLBU,                },
-         /* 18-21 reserved */
-@@ -148,8 +148,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
-         [27] = { THRSW, LDVPM,          LDUNIF },
-         [28] = {        LDVPM, LDTMU,          },
-         [29] = { THRSW, LDVPM, LDTMU,          },
-        [30] = { SMIMM, LDVPM,                 },
-        [31] = { SMIMM,                        },
-+        [30] = { SMIMM_B, LDVPM,               },
-+        [31] = { SMIMM_B,                      },
- };
- 
- static const struct v3d_qpu_sig v40_sig_map[] = {
-@@ -167,8 +167,8 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
-         [10] = {        LDVARY,         LDUNIF },
-         [11] = { THRSW, LDVARY,         LDUNIF },
-         /* 12-13 reserved */
-        [14] = { SMIMM, LDVARY,                },
-        [15] = { SMIMM,                        },
-+        [14] = { SMIMM_B, LDVARY,              },
-+        [15] = { SMIMM_B,                      },
-         [16] = {        LDTLB,                 },
-         [17] = {        LDTLBU,                },
-         [18] = {                        WRTMUC },
-@@ -178,7 +178,7 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
-         [22] = { UCB,                          },
-         [23] = { ROT,                          },
-         /* 24-30 reserved */
-        [31] = { SMIMM,         LDTMU,         },
-+        [31] = { SMIMM_B,       LDTMU,         },
- };
- 
- static const struct v3d_qpu_sig v41_sig_map[] = {
-@@ -197,8 +197,8 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
-         [11] = { THRSW,    LDVARY, LDUNIF },
-         [12] = { LDUNIFRF                 },
-         [13] = { THRSW,    LDUNIFRF       },
-        [14] = { SMIMM,    LDVARY,        },
-        [15] = { SMIMM,                   },
-+        [14] = { SMIMM_B,    LDVARY       },
-+        [15] = { SMIMM_B,                 },
-         [16] = {           LDTLB,         },
-         [17] = {           LDTLBU,        },
-         [18] = {                          WRTMUC },
-@@ -210,7 +210,7 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
-         [24] = {                   LDUNIFA},
-         [25] = { LDUNIFARF                },
-         /* 26-30 reserved */
-        [31] = { SMIMM,            LDTMU, },
-+        [31] = { SMIMM_B,          LDTMU, },
- };
- 
- bool
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0009-broadcom-compiler-add-small_imm-a-c-d-on-v3d_qpu_sig.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0009-broadcom-compiler-add-small_imm-a-c-d-on-v3d_qpu_sig.patch
@ -1,53 +0,0 @@
-From 0e87405fe73694c173b7ce14c3d60611f241922c Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Thu, 5 Aug 2021 00:50:12 +0200
-Subject: [PATCH 009/142] broadcom/compiler: add small_imm a/c/d on v3d_qpu_sig
-
-small_imm_a, small_imm_c and small_imm_d added on top of the already
-existing small_imm_b, as V3D 7.1 defines 4 small immediates, tied to
-the 4 raddr. Note that this is only the definition, and just a inst
-validation rule to check that are not used before v71. Any real use is
-still pending.
---
- src/broadcom/compiler/qpu_validate.c | 5 +++++
- src/broadcom/qpu/qpu_instr.h         | 5 ++++-
- 2 files changed, 9 insertions(+), 1 deletion(-)
-
-diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
-index 2cc7a0eb0ae..12788692432 100644
--- a/src/broadcom/compiler/qpu_validate.c
-+++ b/src/broadcom/compiler/qpu_validate.c
-@@ -115,6 +115,11 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
-         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
-                 return;
- 
-+        if (devinfo->ver < 71) {
-+           if (inst->sig.small_imm_a || inst->sig.small_imm_c || inst->sig.small_imm_d)
-+              fail_instr(state, "small imm a/c/d added after V3D 7.1");
-+        }
-+
-         /* LDVARY writes r5 two instructions later and LDUNIF writes
-          * r5 one instruction later, which is illegal to have
-          * together.
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 9cd831863b4..13b3f37d43f 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -50,10 +50,13 @@ struct v3d_qpu_sig {
-         bool ldvpm:1;
-         bool ldtlb:1;
-         bool ldtlbu:1;
-        bool small_imm_b:1;
-         bool ucb:1;
-         bool rotate:1;
-         bool wrtmuc:1;
-+        bool small_imm_a:1; /* raddr_a (add a), since V3D 7.x */
-+        bool small_imm_b:1; /* raddr_b (add b) */
-+        bool small_imm_c:1; /* raddr_c (mul a), since V3D 7.x */
-+        bool small_imm_d:1; /* raddr_d (mul b), since V3D 7.x */
- };
- 
- enum v3d_qpu_cond {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0010-broadcom-qpu-add-v71-signal-map.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0010-broadcom-qpu-add-v71-signal-map.patch
@ -1,106 +0,0 @@
-From eca19c911d9af3b0ab3b563ea65dc455e3d27987 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 4 Aug 2021 01:11:16 +0200
-Subject: [PATCH 010/142] broadcom/qpu: add v71 signal map
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Compared with v41, the differences are:
-   * 14, 15, 29 and 30 are now about immediate a, b, c, d respectively
-   * 23 is now reserved. On v42 this was for rotate signals, that are
-     gone on v71.
-
-Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
-Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
---
- src/broadcom/qpu/qpu_pack.c | 47 ++++++++++++++++++++++++++++++++++---
- 1 file changed, 44 insertions(+), 3 deletions(-)
-
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index beac591d3c1..2820d9d4c56 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -112,12 +112,15 @@
- #define LDTMU .ldtmu = true
- #define LDVARY .ldvary = true
- #define LDVPM .ldvpm = true
-#define SMIMM_B .small_imm_b = true
- #define LDTLB .ldtlb = true
- #define LDTLBU .ldtlbu = true
- #define UCB .ucb = true
- #define ROT .rotate = true
- #define WRTMUC .wrtmuc = true
-+#define SMIMM_A .small_imm_a = true
-+#define SMIMM_B .small_imm_b = true
-+#define SMIMM_C .small_imm_c = true
-+#define SMIMM_D .small_imm_d = true
- 
- static const struct v3d_qpu_sig v33_sig_map[] = {
-         /*      MISC   R3       R4      R5 */
-@@ -213,6 +216,40 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
-         [31] = { SMIMM_B,          LDTMU, },
- };
- 
-+
-+static const struct v3d_qpu_sig v71_sig_map[] = {
-+        /*      MISC       phys    RF0 */
-+        [0]  = {                          },
-+        [1]  = { THRSW,                   },
-+        [2]  = {                   LDUNIF },
-+        [3]  = { THRSW,            LDUNIF },
-+        [4]  = {           LDTMU,         },
-+        [5]  = { THRSW,    LDTMU,         },
-+        [6]  = {           LDTMU,  LDUNIF },
-+        [7]  = { THRSW,    LDTMU,  LDUNIF },
-+        [8]  = {           LDVARY,        },
-+        [9]  = { THRSW,    LDVARY,        },
-+        [10] = {           LDVARY, LDUNIF },
-+        [11] = { THRSW,    LDVARY, LDUNIF },
-+        [12] = { LDUNIFRF                 },
-+        [13] = { THRSW,    LDUNIFRF       },
-+        [14] = { SMIMM_A,                 },
-+        [15] = { SMIMM_B,                 },
-+        [16] = {           LDTLB,         },
-+        [17] = {           LDTLBU,        },
-+        [18] = {                          WRTMUC },
-+        [19] = { THRSW,                   WRTMUC },
-+        [20] = {           LDVARY,        WRTMUC },
-+        [21] = { THRSW,    LDVARY,        WRTMUC },
-+        [22] = { UCB,                     },
-+        /* 23 reserved */
-+        [24] = {                   LDUNIFA},
-+        [25] = { LDUNIFARF                },
-+        /* 26-29 reserved */
-+        [30] = { SMIMM_C,                 },
-+        [31] = { SMIMM_D,                 },
-+};
-+
- bool
- v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
-                    uint32_t packed_sig,
-@@ -221,7 +258,9 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
-         if (packed_sig >= ARRAY_SIZE(v33_sig_map))
-                 return false;
- 
-        if (devinfo->ver >= 41)
-+        if (devinfo->ver >= 71)
-+                *sig = v71_sig_map[packed_sig];
-+        else if (devinfo->ver >= 41)
-                 *sig = v41_sig_map[packed_sig];
-         else if (devinfo->ver == 40)
-                 *sig = v40_sig_map[packed_sig];
-@@ -240,7 +279,9 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
- {
-         static const struct v3d_qpu_sig *map;
- 
-        if (devinfo->ver >= 41)
-+        if (devinfo->ver >= 71)
-+                map = v71_sig_map;
-+        else if (devinfo->ver >= 41)
-                 map = v41_sig_map;
-         else if (devinfo->ver == 40)
-                 map = v40_sig_map;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0011-broadcom-qpu-define-v3d_qpu_input-use-on-v3d_qpu_alu.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0011-broadcom-qpu-define-v3d_qpu_input-use-on-v3d_qpu_alu.patch
@ -1,778 +0,0 @@
-From d10e67a396d713ec81fb133f3516e09fe1e067b6 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Fri, 6 Aug 2021 01:22:31 +0200
-Subject: [PATCH 011/142] broadcom/qpu: define v3d_qpu_input, use on
- v3d_qpu_alu_instr
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-At this point it just tidy up a little the alu_instr structure.
-
-But also serves to prepare the structure for new changes, as 7.x uses
-raddr instead of mux, and it is just easier to add the raddr to the
-new input structure.
-
-Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
-Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
---
- src/broadcom/compiler/qpu_schedule.c          | 65 +++++++--------
- src/broadcom/compiler/vir.c                   | 16 ++--
- src/broadcom/compiler/vir_dump.c              |  8 +-
- .../compiler/vir_opt_copy_propagate.c         | 12 +--
- .../compiler/vir_opt_redundant_flags.c        |  8 +-
- src/broadcom/compiler/vir_to_qpu.c            | 30 +++----
- src/broadcom/qpu/qpu_disasm.c                 | 16 ++--
- src/broadcom/qpu/qpu_instr.c                  |  8 +-
- src/broadcom/qpu/qpu_instr.h                  | 13 +--
- src/broadcom/qpu/qpu_pack.c                   | 82 +++++++++----------
- src/broadcom/qpu/tests/qpu_disasm.c           |  8 +-
- 11 files changed, 134 insertions(+), 132 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index a10fa03ed10..455fa3867be 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -306,14 +306,14 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
-         /* XXX: LOAD_IMM */
- 
-         if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0)
-                process_mux_deps(state, n, inst->alu.add.a);
-+                process_mux_deps(state, n, inst->alu.add.a.mux);
-         if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1)
-                process_mux_deps(state, n, inst->alu.add.b);
-+                process_mux_deps(state, n, inst->alu.add.b.mux);
- 
-         if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0)
-                process_mux_deps(state, n, inst->alu.mul.a);
-+                process_mux_deps(state, n, inst->alu.mul.a.mux);
-         if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1)
-                process_mux_deps(state, n, inst->alu.mul.b);
-+                process_mux_deps(state, n, inst->alu.mul.b.mux);
- 
-         switch (inst->alu.add.op) {
-         case V3D_QPU_A_VPMSETUP:
-@@ -537,22 +537,22 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
- 
-         if (inst->alu.add.op != V3D_QPU_A_NOP) {
-                 if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0 &&
-                    mux_reads_too_soon(scoreboard, inst, inst->alu.add.a)) {
-+                    mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux)) {
-                         return true;
-                 }
-                 if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1 &&
-                    mux_reads_too_soon(scoreboard, inst, inst->alu.add.b)) {
-+                    mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux)) {
-                         return true;
-                 }
-         }
- 
-         if (inst->alu.mul.op != V3D_QPU_M_NOP) {
-                 if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0 &&
-                    mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a)) {
-+                    mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux)) {
-                         return true;
-                 }
-                 if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1 &&
-                    mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b)) {
-+                    mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux)) {
-                         return true;
-                 }
-         }
-@@ -839,20 +839,20 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
-         if (!result->sig.small_imm_b) {
-                 if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) &&
-                     raddr_a == add_instr->raddr_b) {
-                        if (add_instr->alu.add.a == V3D_QPU_MUX_B)
-                                result->alu.add.a = V3D_QPU_MUX_A;
-                        if (add_instr->alu.add.b == V3D_QPU_MUX_B &&
-+                        if (add_instr->alu.add.a.mux == V3D_QPU_MUX_B)
-+                                result->alu.add.a.mux = V3D_QPU_MUX_A;
-+                        if (add_instr->alu.add.b.mux == V3D_QPU_MUX_B &&
-                             v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) {
-                                result->alu.add.b = V3D_QPU_MUX_A;
-+                                result->alu.add.b.mux = V3D_QPU_MUX_A;
-                         }
-                 }
-                 if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_B) &&
-                     raddr_a == mul_instr->raddr_b) {
-                        if (mul_instr->alu.mul.a == V3D_QPU_MUX_B)
-                                result->alu.mul.a = V3D_QPU_MUX_A;
-                        if (mul_instr->alu.mul.b == V3D_QPU_MUX_B &&
-+                        if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_B)
-+                                result->alu.mul.a.mux = V3D_QPU_MUX_A;
-+                        if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_B &&
-                             v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) {
-                                result->alu.mul.b = V3D_QPU_MUX_A;
-+                                result->alu.mul.b.mux = V3D_QPU_MUX_A;
-                         }
-                 }
-         }
-@@ -863,20 +863,20 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
-         result->raddr_b = raddr_b;
-         if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_A) &&
-             raddr_b == add_instr->raddr_a) {
-                if (add_instr->alu.add.a == V3D_QPU_MUX_A)
-                        result->alu.add.a = V3D_QPU_MUX_B;
-                if (add_instr->alu.add.b == V3D_QPU_MUX_A &&
-+                if (add_instr->alu.add.a.mux == V3D_QPU_MUX_A)
-+                        result->alu.add.a.mux = V3D_QPU_MUX_B;
-+                if (add_instr->alu.add.b.mux == V3D_QPU_MUX_A &&
-                     v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) {
-                        result->alu.add.b = V3D_QPU_MUX_B;
-+                        result->alu.add.b.mux = V3D_QPU_MUX_B;
-                 }
-         }
-         if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_A) &&
-             raddr_b == mul_instr->raddr_a) {
-                if (mul_instr->alu.mul.a == V3D_QPU_MUX_A)
-                        result->alu.mul.a = V3D_QPU_MUX_B;
-                if (mul_instr->alu.mul.b == V3D_QPU_MUX_A &&
-+                if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_A)
-+                        result->alu.mul.a.mux = V3D_QPU_MUX_B;
-+                if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_A &&
-                     v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) {
-                        result->alu.mul.b = V3D_QPU_MUX_B;
-+                        result->alu.mul.b.mux = V3D_QPU_MUX_B;
-                 }
-         }
- 
-@@ -927,11 +927,12 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
-         inst->flags.auf = V3D_QPU_UF_NONE;
- 
-         inst->alu.mul.output_pack = inst->alu.add.output_pack;
-        inst->alu.mul.a_unpack = inst->alu.add.a_unpack;
-        inst->alu.mul.b_unpack = inst->alu.add.b_unpack;
-+
-+        inst->alu.mul.a.unpack = inst->alu.add.a.unpack;
-+        inst->alu.mul.b.unpack = inst->alu.add.b.unpack;
-         inst->alu.add.output_pack = V3D_QPU_PACK_NONE;
-        inst->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
-        inst->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
-+        inst->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
-+        inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
- }
- 
- static bool
-@@ -2064,12 +2065,12 @@ alu_reads_register(struct v3d_qpu_instr *inst,
- 
-         if (add) {
-                 num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
-                mux_a = inst->alu.add.a;
-                mux_b = inst->alu.add.b;
-+                mux_a = inst->alu.add.a.mux;
-+                mux_b = inst->alu.add.b.mux;
-         } else {
-                 num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
-                mux_a = inst->alu.mul.a;
-                mux_b = inst->alu.mul.b;
-+                mux_a = inst->alu.mul.a.mux;
-+                mux_b = inst->alu.mul.b.mux;
-         }
- 
-         for (int i = 0; i < num_src; i++) {
-diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
-index 660b11b0577..007cb0a941b 100644
--- a/src/broadcom/compiler/vir.c
-+++ b/src/broadcom/compiler/vir.c
-@@ -113,10 +113,10 @@ vir_is_raw_mov(struct qinst *inst)
-                 return false;
-         }
- 
-        if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
-            inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE ||
-            inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
-            inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) {
-+        if (inst->qpu.alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
-+            inst->qpu.alu.add.b.unpack != V3D_QPU_UNPACK_NONE ||
-+            inst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
-+            inst->qpu.alu.mul.b.unpack != V3D_QPU_UNPACK_NONE) {
-                 return false;
-         }
- 
-@@ -209,15 +209,15 @@ vir_set_unpack(struct qinst *inst, int src,
- 
-         if (vir_is_add(inst)) {
-                 if (src == 0)
-                        inst->qpu.alu.add.a_unpack = unpack;
-+                        inst->qpu.alu.add.a.unpack = unpack;
-                 else
-                        inst->qpu.alu.add.b_unpack = unpack;
-+                        inst->qpu.alu.add.b.unpack = unpack;
-         } else {
-                 assert(vir_is_mul(inst));
-                 if (src == 0)
-                        inst->qpu.alu.mul.a_unpack = unpack;
-+                        inst->qpu.alu.mul.a.unpack = unpack;
-                 else
-                        inst->qpu.alu.mul.b_unpack = unpack;
-+                        inst->qpu.alu.mul.b.unpack = unpack;
-         }
- }
- 
-diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c
-index 5c47bbdc1b0..ab5d4043039 100644
--- a/src/broadcom/compiler/vir_dump.c
-+++ b/src/broadcom/compiler/vir_dump.c
-@@ -270,8 +270,8 @@ vir_dump_alu(struct v3d_compile *c, struct qinst *inst)
-                 vir_print_reg(c, inst, inst->dst);
-                 fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.add.output_pack));
- 
-                unpack[0] = instr->alu.add.a_unpack;
-                unpack[1] = instr->alu.add.b_unpack;
-+                unpack[0] = instr->alu.add.a.unpack;
-+                unpack[1] = instr->alu.add.b.unpack;
-         } else {
-                 fprintf(stderr, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
-                 fprintf(stderr, "%s", v3d_qpu_cond_name(instr->flags.mc));
-@@ -282,8 +282,8 @@ vir_dump_alu(struct v3d_compile *c, struct qinst *inst)
-                 vir_print_reg(c, inst, inst->dst);
-                 fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.mul.output_pack));
- 
-                unpack[0] = instr->alu.mul.a_unpack;
-                unpack[1] = instr->alu.mul.b_unpack;
-+                unpack[0] = instr->alu.mul.a.unpack;
-+                unpack[1] = instr->alu.mul.b.unpack;
-         }
- 
-         for (int i = 0; i < nsrc; i++) {
-diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c
-index da121c2a5bd..c4aa7255a17 100644
--- a/src/broadcom/compiler/vir_opt_copy_propagate.c
-+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c
-@@ -104,14 +104,14 @@ vir_has_unpack(struct qinst *inst, int chan)
- 
-         if (vir_is_add(inst)) {
-                 if (chan == 0)
-                        return inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE;
-+                        return inst->qpu.alu.add.a.unpack != V3D_QPU_UNPACK_NONE;
-                 else
-                        return inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE;
-+                        return inst->qpu.alu.add.b.unpack != V3D_QPU_UNPACK_NONE;
-         } else {
-                 if (chan == 0)
-                        return inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE;
-+                        return inst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE;
-                 else
-                        return inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE;
-+                        return inst->qpu.alu.mul.b.unpack != V3D_QPU_UNPACK_NONE;
-         }
- }
- 
-@@ -161,7 +161,7 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
-                                 continue;
- 
-                         /* these ops can't represent abs. */
-                        if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) {
-+                        if (mov->qpu.alu.mul.a.unpack == V3D_QPU_UNPACK_ABS) {
-                                 switch (inst->qpu.alu.add.op) {
-                                 case V3D_QPU_A_VFPACK:
-                                 case V3D_QPU_A_FROUND:
-@@ -189,7 +189,7 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
- 
-                 inst->src[i] = mov->src[0];
-                 if (vir_has_unpack(mov, 0)) {
-                        enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a_unpack;
-+                        enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a.unpack;
- 
-                         vir_set_unpack(inst, i, unpack);
-                 }
-diff --git a/src/broadcom/compiler/vir_opt_redundant_flags.c b/src/broadcom/compiler/vir_opt_redundant_flags.c
-index c7896d57f2b..6b61ed6a39a 100644
--- a/src/broadcom/compiler/vir_opt_redundant_flags.c
-+++ b/src/broadcom/compiler/vir_opt_redundant_flags.c
-@@ -81,11 +81,11 @@ vir_instr_flags_op_equal(struct qinst *a, struct qinst *b)
-             a->qpu.flags.mpf != b->qpu.flags.mpf ||
-             a->qpu.alu.add.op != b->qpu.alu.add.op ||
-             a->qpu.alu.mul.op != b->qpu.alu.mul.op ||
-            a->qpu.alu.add.a_unpack != b->qpu.alu.add.a_unpack ||
-            a->qpu.alu.add.b_unpack != b->qpu.alu.add.b_unpack ||
-+            a->qpu.alu.add.a.unpack != b->qpu.alu.add.a.unpack ||
-+            a->qpu.alu.add.b.unpack != b->qpu.alu.add.b.unpack ||
-             a->qpu.alu.add.output_pack != b->qpu.alu.add.output_pack ||
-            a->qpu.alu.mul.a_unpack != b->qpu.alu.mul.a_unpack ||
-            a->qpu.alu.mul.b_unpack != b->qpu.alu.mul.b_unpack ||
-+            a->qpu.alu.mul.a.unpack != b->qpu.alu.mul.a.unpack ||
-+            a->qpu.alu.mul.b.unpack != b->qpu.alu.mul.b.unpack ||
-             a->qpu.alu.mul.output_pack != b->qpu.alu.mul.output_pack) {
-                 return false;
-         }
-diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
-index 15c2e3674c2..c8b6e0a91a0 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
-+++ b/src/broadcom/compiler/vir_to_qpu.c
-@@ -106,20 +106,20 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
-                 return;
-         }
- 
-        if (instr->alu.add.a != V3D_QPU_MUX_A &&
-            instr->alu.add.b != V3D_QPU_MUX_A &&
-            instr->alu.mul.a != V3D_QPU_MUX_A &&
-            instr->alu.mul.b != V3D_QPU_MUX_A) {
-+        if (instr->alu.add.a.mux != V3D_QPU_MUX_A &&
-+            instr->alu.add.b.mux != V3D_QPU_MUX_A &&
-+            instr->alu.mul.a.mux != V3D_QPU_MUX_A &&
-+            instr->alu.mul.b.mux != V3D_QPU_MUX_A) {
-                 instr->raddr_a = src.index;
-                 *mux = V3D_QPU_MUX_A;
-         } else {
-                 if (instr->raddr_a == src.index) {
-                         *mux = V3D_QPU_MUX_A;
-                 } else {
-                        assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
-                                 instr->alu.add.b == V3D_QPU_MUX_B &&
-                                 instr->alu.mul.a == V3D_QPU_MUX_B &&
-                                 instr->alu.mul.b == V3D_QPU_MUX_B) ||
-+                        assert(!(instr->alu.add.a.mux == V3D_QPU_MUX_B &&
-+                                 instr->alu.add.b.mux == V3D_QPU_MUX_B &&
-+                                 instr->alu.mul.a.mux == V3D_QPU_MUX_B &&
-+                                 instr->alu.mul.b.mux == V3D_QPU_MUX_B) ||
-                                src.index == instr->raddr_b);
- 
-                         instr->raddr_b = src.index;
-@@ -147,14 +147,14 @@ is_no_op_mov(struct qinst *qinst)
-                 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
-                         return false;
- 
-                if (qinst->qpu.alu.mul.a !=
-+                if (qinst->qpu.alu.mul.a.mux !=
-                     V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
-                         return false;
-                 }
-         } else {
-                 int raddr;
- 
-                switch (qinst->qpu.alu.mul.a) {
-+                switch (qinst->qpu.alu.mul.a.mux) {
-                 case V3D_QPU_MUX_A:
-                         raddr = qinst->qpu.raddr_a;
-                         break;
-@@ -171,7 +171,7 @@ is_no_op_mov(struct qinst *qinst)
-         /* No packing or flags updates, or we need to execute the
-          * instruction.
-          */
-        if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
-+        if (qinst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
-             qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
-             qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
-             qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
-@@ -302,11 +302,11 @@ v3d_generate_code_block(struct v3d_compile *c,
-                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
-                                 if (nsrc >= 1) {
-                                         set_src(&qinst->qpu,
-                                                &qinst->qpu.alu.add.a, src[0]);
-+                                                &qinst->qpu.alu.add.a.mux, src[0]);
-                                 }
-                                 if (nsrc >= 2) {
-                                         set_src(&qinst->qpu,
-                                                &qinst->qpu.alu.add.b, src[1]);
-+                                                &qinst->qpu.alu.add.b.mux, src[1]);
-                                 }
- 
-                                 qinst->qpu.alu.add.waddr = dst.index;
-@@ -314,11 +314,11 @@ v3d_generate_code_block(struct v3d_compile *c,
-                         } else {
-                                 if (nsrc >= 1) {
-                                         set_src(&qinst->qpu,
-                                                &qinst->qpu.alu.mul.a, src[0]);
-+                                                &qinst->qpu.alu.mul.a.mux, src[0]);
-                                 }
-                                 if (nsrc >= 2) {
-                                         set_src(&qinst->qpu,
-                                                &qinst->qpu.alu.mul.b, src[1]);
-+                                                &qinst->qpu.alu.mul.b.mux, src[1]);
-                                 }
- 
-                                 qinst->qpu.alu.mul.waddr = dst.index;
-diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
-index 6aca3c28e78..588a665f770 100644
--- a/src/broadcom/qpu/qpu_disasm.c
-+++ b/src/broadcom/qpu/qpu_disasm.c
-@@ -121,16 +121,16 @@ v3d_qpu_disasm_add(struct disasm_state *disasm,
-         if (num_src >= 1) {
-                 if (has_dst)
-                         append(disasm, ", ");
-                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a);
-+                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a.mux);
-                 append(disasm, "%s",
-                       v3d_qpu_unpack_name(instr->alu.add.a_unpack));
-+                       v3d_qpu_unpack_name(instr->alu.add.a.unpack));
-         }
- 
-         if (num_src >= 2) {
-                 append(disasm, ", ");
-                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b);
-+                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b.mux);
-                 append(disasm, "%s",
-                       v3d_qpu_unpack_name(instr->alu.add.b_unpack));
-+                       v3d_qpu_unpack_name(instr->alu.add.b.unpack));
-         }
- }
- 
-@@ -164,16 +164,16 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
-         if (num_src >= 1) {
-                 if (has_dst)
-                         append(disasm, ", ");
-                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a);
-+                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a.mux);
-                 append(disasm, "%s",
-                       v3d_qpu_unpack_name(instr->alu.mul.a_unpack));
-+                       v3d_qpu_unpack_name(instr->alu.mul.a.unpack));
-         }
- 
-         if (num_src >= 2) {
-                 append(disasm, ", ");
-                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b);
-+                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b.mux);
-                 append(disasm, "%s",
-                       v3d_qpu_unpack_name(instr->alu.mul.b_unpack));
-+                       v3d_qpu_unpack_name(instr->alu.mul.b.unpack));
-         }
- }
- 
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index 7759fb0efdf..7ece8b5e570 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -926,10 +926,10 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
-         int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
-         int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
- 
-        return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
-                (add_nsrc > 1 && inst->alu.add.b == mux) ||
-                (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
-                (mul_nsrc > 1 && inst->alu.mul.b == mux));
-+        return ((add_nsrc > 0 && inst->alu.add.a.mux == mux) ||
-+                (add_nsrc > 1 && inst->alu.add.b.mux == mux) ||
-+                (mul_nsrc > 0 && inst->alu.mul.a.mux == mux) ||
-+                (mul_nsrc > 1 && inst->alu.mul.b.mux == mux));
- }
- 
- bool
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 13b3f37d43f..53a51bfb3e1 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -294,25 +294,26 @@ enum v3d_qpu_mux {
-         V3D_QPU_MUX_B,
- };
- 
-+struct v3d_qpu_input {
-+        enum v3d_qpu_mux mux;
-+        enum v3d_qpu_input_unpack unpack;
-+};
-+
- struct v3d_qpu_alu_instr {
-         struct {
-                 enum v3d_qpu_add_op op;
-                enum v3d_qpu_mux a, b;
-+                struct v3d_qpu_input a, b;
-                 uint8_t waddr;
-                 bool magic_write;
-                 enum v3d_qpu_output_pack output_pack;
-                enum v3d_qpu_input_unpack a_unpack;
-                enum v3d_qpu_input_unpack b_unpack;
-         } add;
- 
-         struct {
-                 enum v3d_qpu_mul_op op;
-                enum v3d_qpu_mux a, b;
-+                struct v3d_qpu_input a, b;
-                 uint8_t waddr;
-                 bool magic_write;
-                 enum v3d_qpu_output_pack output_pack;
-                enum v3d_qpu_input_unpack a_unpack;
-                enum v3d_qpu_input_unpack b_unpack;
-         } mul;
- };
- 
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index 2820d9d4c56..6e975793fc0 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -853,12 +853,12 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
-                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
- 
-                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
-                                                   &instr->alu.add.a_unpack)) {
-+                                                   &instr->alu.add.a.unpack)) {
-                         return false;
-                 }
- 
-                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
-                                                   &instr->alu.add.b_unpack)) {
-+                                                   &instr->alu.add.b.unpack)) {
-                         return false;
-                 }
-                 break;
-@@ -872,7 +872,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
-                 instr->alu.add.output_pack = mux_b & 0x3;
- 
-                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
-                                                   &instr->alu.add.a_unpack)) {
-+                                                   &instr->alu.add.a.unpack)) {
-                         return false;
-                 }
-                 break;
-@@ -884,7 +884,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
-                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
- 
-                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
-                                                   &instr->alu.add.a_unpack)) {
-+                                                   &instr->alu.add.a.unpack)) {
-                         return false;
-                 }
-                 break;
-@@ -892,23 +892,23 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
-         case V3D_QPU_A_VFMIN:
-         case V3D_QPU_A_VFMAX:
-                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
-                                                   &instr->alu.add.a_unpack)) {
-+                                                   &instr->alu.add.a.unpack)) {
-                         return false;
-                 }
- 
-                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
-                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
-+                instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
-                 break;
- 
-         default:
-                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
-                instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
-                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
-+                instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
-+                instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
-                 break;
-         }
- 
-        instr->alu.add.a = mux_a;
-        instr->alu.add.b = mux_b;
-+        instr->alu.add.a.mux = mux_a;
-+        instr->alu.add.b.mux = mux_b;
-         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
- 
-         instr->alu.add.magic_write = false;
-@@ -956,12 +956,12 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
-                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
- 
-                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
-                                                   &instr->alu.mul.a_unpack)) {
-+                                                   &instr->alu.mul.a.unpack)) {
-                         return false;
-                 }
- 
-                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
-                                                   &instr->alu.mul.b_unpack)) {
-+                                                   &instr->alu.mul.b.unpack)) {
-                         return false;
-                 }
- 
-@@ -972,7 +972,7 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
-                                               ((mux_b >> 2) & 1));
- 
-                 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
-                                                   &instr->alu.mul.a_unpack)) {
-+                                                   &instr->alu.mul.a.unpack)) {
-                         return false;
-                 }
- 
-@@ -982,23 +982,23 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
-                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
- 
-                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
-                                                   &instr->alu.mul.a_unpack)) {
-+                                                   &instr->alu.mul.a.unpack)) {
-                         return false;
-                 }
- 
-                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
-+                instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
- 
-                 break;
- 
-         default:
-                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
-                instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
-                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
-+                instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
-+                instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
-                 break;
-         }
- 
-        instr->alu.mul.a = mux_a;
-        instr->alu.mul.b = mux_b;
-+        instr->alu.mul.a.mux = mux_a;
-+        instr->alu.mul.b.mux = mux_b;
-         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
-         instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
- 
-@@ -1030,8 +1030,8 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
-                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
- {
-         uint32_t waddr = instr->alu.add.waddr;
-        uint32_t mux_a = instr->alu.add.a;
-        uint32_t mux_b = instr->alu.add.b;
-+        uint32_t mux_a = instr->alu.add.a.mux;
-+        uint32_t mux_b = instr->alu.add.b.mux;
-         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
-         const struct opcode_desc *desc =
-                 lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
-@@ -1102,12 +1102,12 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
-                 }
-                 opcode |= output_pack << 4;
- 
-                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
-                                                  &a_unpack)) {
-                         return false;
-                 }
- 
-                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
-                                                  &b_unpack)) {
-                         return false;
-                 }
-@@ -1141,17 +1141,17 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
-                 uint32_t a_unpack;
-                 uint32_t b_unpack;
- 
-                if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
-                    instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
-+                if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
-+                    instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
-                         return false;
-                 }
- 
-                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
-                                                  &a_unpack)) {
-                         return false;
-                 }
- 
-                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
-                                                  &b_unpack)) {
-                         return false;
-                 }
-@@ -1176,7 +1176,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
-                 }
-                 mux_b |= packed;
- 
-                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
-                                                  &packed)) {
-                         return false;
-                 }
-@@ -1194,7 +1194,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
-                         return false;
- 
-                 uint32_t packed;
-                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
-                                                  &packed)) {
-                         return false;
-                 }
-@@ -1207,11 +1207,11 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
-         case V3D_QPU_A_VFMIN:
-         case V3D_QPU_A_VFMAX:
-                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
-                    instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
-+                    instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
-                         return false;
-                 }
- 
-                if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
-+                if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
-                                                  &packed)) {
-                         return false;
-                 }
-@@ -1221,8 +1221,8 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
-         default:
-                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
-                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
-                     instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
-                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
-+                     instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
-+                     instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
-                         return false;
-                 }
-                 break;
-@@ -1242,8 +1242,8 @@ static bool
- v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
-                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
- {
-        uint32_t mux_a = instr->alu.mul.a;
-        uint32_t mux_b = instr->alu.mul.b;
-+        uint32_t mux_a = instr->alu.mul.a.mux;
-+        uint32_t mux_b = instr->alu.mul.b.mux;
-         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
- 
-         const struct opcode_desc *desc =
-@@ -1277,13 +1277,13 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
-                  */
-                 opcode += packed << 4;
- 
-                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
-                                                  &packed)) {
-                         return false;
-                 }
-                 opcode |= packed << 2;
- 
-                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack,
-                                                  &packed)) {
-                         return false;
-                 }
-@@ -1301,7 +1301,7 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
-                 opcode |= (packed >> 1) & 1;
-                 mux_b = (packed & 1) << 2;
- 
-                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
-                                                  &packed)) {
-                         return false;
-                 }
-@@ -1315,16 +1315,16 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
-                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
-                         return false;
- 
-                if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
-+                if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
-                                                  &packed)) {
-                         return false;
-                 }
-                if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
-+                if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
-                         opcode = 8;
-                 else
-                         opcode |= (packed + 4) & 7;
- 
-                if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
-+                if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
-                         return false;
- 
-                 break;
-diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c
-index 2f8e19c73fe..be7b78d5ef0 100644
--- a/src/broadcom/qpu/tests/qpu_disasm.c
-+++ b/src/broadcom/qpu/tests/qpu_disasm.c
-@@ -160,10 +160,10 @@ main(int argc, char **argv)
-                                 /* Swap the operands to be sure that we test
-                                  * how the QPUs distinguish between these ops.
-                                  */
-                                swap_mux(&instr.alu.add.a,
-                                         &instr.alu.add.b);
-                                swap_pack(&instr.alu.add.a_unpack,
-                                          &instr.alu.add.b_unpack);
-+                                swap_mux(&instr.alu.add.a.mux,
-+                                         &instr.alu.add.b.mux);
-+                                swap_pack(&instr.alu.add.a.unpack,
-+                                          &instr.alu.add.b.unpack);
-                                 break;
-                         default:
-                                 break;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0012-broadcom-qpu-add-raddr-on-v3d_qpu_input.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0012-broadcom-qpu-add-raddr-on-v3d_qpu_input.patch
@ -1,45 +0,0 @@
-From 52ea09792ff8a438ccdecac47b8415657be90098 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Fri, 6 Aug 2021 01:33:32 +0200
-Subject: [PATCH 012/142] broadcom/qpu: add raddr on v3d_qpu_input
-
-On V3D 7.x mux are not used, and raddr_a/b/c/d are used instead
-
-This is not perfect, as for v71, the raddr_a/b defined at qpu_instr
-became superfluous. But the alternative would be to define two
-different structs, or even having them defined based on version
-ifdefs, so this is a reasonable compromise.
---
- src/broadcom/qpu/qpu_instr.h | 9 ++++++---
- 1 file changed, 6 insertions(+), 3 deletions(-)
-
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 53a51bfb3e1..9e56e2d6a99 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -295,7 +295,10 @@ enum v3d_qpu_mux {
- };
- 
- struct v3d_qpu_input {
-        enum v3d_qpu_mux mux;
-+        union {
-+                enum v3d_qpu_mux mux; /* V3D 4.x */
-+                uint8_t raddr; /* V3D 7.x */
-+        };
-         enum v3d_qpu_input_unpack unpack;
- };
- 
-@@ -385,8 +388,8 @@ struct v3d_qpu_instr {
-         struct v3d_qpu_sig sig;
-         uint8_t sig_addr;
-         bool sig_magic; /* If the signal writes to a magic address */
-        uint8_t raddr_a;
-        uint8_t raddr_b;
-+        uint8_t raddr_a; /* V3D 4.x */
-+        uint8_t raddr_b; /* V3D 4.x*/
-         struct v3d_qpu_flags flags;
- 
-         union {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0013-broadcom-qpu-defining-shift-mask-for-raddr_c-d.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0013-broadcom-qpu-defining-shift-mask-for-raddr_c-d.patch
@ -1,37 +0,0 @@
-From 3e5ad0881c2789619cdf65f40a44d5481e28e800 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Thu, 12 Aug 2021 02:24:02 +0200
-Subject: [PATCH 013/142] broadcom/qpu: defining shift/mask for raddr_c/d
-
-On V3D 7.x it replaces mul_a/b and add_a/b
---
- src/broadcom/qpu/qpu_pack.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index 6e975793fc0..4f106909729 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -84,6 +84,9 @@
- #define V3D_QPU_MUL_A_SHIFT                 18
- #define V3D_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
- 
-+#define V3D_QPU_RADDR_C_SHIFT               18
-+#define V3D_QPU_RADDR_C_MASK                QPU_MASK(23, 18)
-+
- #define V3D_QPU_ADD_B_SHIFT                 15
- #define V3D_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
- 
-@@ -98,6 +101,9 @@
- #define V3D_QPU_BRANCH_BDI_SHIFT            12
- #define V3D_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
- 
-+#define V3D_QPU_RADDR_D_SHIFT               12
-+#define V3D_QPU_RADDR_D_MASK                QPU_MASK(17, 12)
-+
- #define V3D_QPU_RADDR_A_SHIFT               6
- #define V3D_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0014-broadcom-commmon-add-has_accumulators-field-on-v3d_d.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0014-broadcom-commmon-add-has_accumulators-field-on-v3d_d.patch
@ -1,46 +0,0 @@
-From 81febf14fe05ad26e992275b911e8bc1e1416ebc Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Fri, 17 Sep 2021 01:04:31 +0200
-Subject: [PATCH 014/142] broadcom/commmon: add has_accumulators field on
- v3d_device_info
-
-Even if we can just check for the version on the code, checking for
-this field makes several places more readable. So for example, on the
-register allocate code we doesn't assign an accumulator because we
-don't have accumulators on that hw, instead of because hw version is a
-given one.
---
- src/broadcom/common/v3d_device_info.c | 2 ++
- src/broadcom/common/v3d_device_info.h | 3 +++
- 2 files changed, 5 insertions(+)
-
-diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
-index 7512fe3a06b..7bc2b662cfc 100644
--- a/src/broadcom/common/v3d_device_info.c
-+++ b/src/broadcom/common/v3d_device_info.c
-@@ -65,6 +65,8 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
-     int qups = (ident1.value >> 8) & 0xf;
-     devinfo->qpu_count = nslc * qups;
- 
-+    devinfo->has_accumulators = devinfo->ver < 71;
-+
-     switch (devinfo->ver) {
-         case 33:
-         case 41:
-diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
-index 32cb65cf81f..8dfc7858727 100644
--- a/src/broadcom/common/v3d_device_info.h
-+++ b/src/broadcom/common/v3d_device_info.h
-@@ -42,6 +42,9 @@ struct v3d_device_info {
- 
-         /* NSLC * QUPS from the core's IDENT registers. */
-         int qpu_count;
-+
-+        /* If the hw has accumulator registers */
-+        bool has_accumulators;
- };
- 
- typedef int (*v3d_ioctl_fun)(int fd, unsigned long request, void *arg);
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0015-broadcom-qpu-add-qpu_writes_rf0_implicitly-helper.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0015-broadcom-qpu-add-qpu_writes_rf0_implicitly-helper.patch
@ -1,52 +0,0 @@
-From 7d42eca87b6e144697810405308d99d200dca62a Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 15 Sep 2021 10:56:43 +0200
-Subject: [PATCH 015/142] broadcom/qpu: add qpu_writes_rf0_implicitly helper
-
-On v71 rf0 replaces r5 as the register that gets updated implicitly
-with uniform loads, and gets the C coefficient with ldvary. This
-helper return if rf0 gets implicitly updated.
---
- src/broadcom/qpu/qpu_instr.c | 12 ++++++++++++
- src/broadcom/qpu/qpu_instr.h |  2 ++
- 2 files changed, 14 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index 7ece8b5e570..8de99c611d5 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -920,6 +920,18 @@ v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
-         return false;
- }
- 
-+bool
-+v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
-+                              const struct v3d_qpu_instr *inst)
-+{
-+        if (devinfo->ver >= 71 &&
-+            (inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa)) {
-+                return true;
-+        }
-+
-+        return false;
-+}
-+
- bool
- v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
- {
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 9e56e2d6a99..a25be8e0ee6 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -473,6 +473,8 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
-                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
- bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
-                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
-+bool v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
-+                                   const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
- bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
-                           const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
- bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0016-broadcom-qpu-add-pack-unpack-support-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0016-broadcom-qpu-add-pack-unpack-support-for-v71.patch
--- a/projects/RPi/devices/RPi5/patches/mesa/0017-broadcom-compiler-update-node-temp-translation-for-v.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0017-broadcom-compiler-update-node-temp-translation-for-v.patch
@ -1,261 +0,0 @@
-From ebba9019461083687f6afd23ff0d4646c1a667cb Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Sun, 29 Jan 2023 00:27:11 +0100
-Subject: [PATCH 017/142] broadcom/compiler: update node/temp translation for
- v71
-
-As the offset applied needs to take into account if we have
-accumulators or not.
---
- src/broadcom/compiler/vir_register_allocate.c | 68 +++++++++----------
- 1 file changed, 34 insertions(+), 34 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index b22f915d1df..aa9473d124b 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -39,30 +39,31 @@
-                            CLASS_BITS_R5)
- 
- static inline uint32_t
-temp_to_node(uint32_t temp)
-+temp_to_node(struct v3d_compile *c, uint32_t temp)
- {
-        return temp + ACC_COUNT;
-+        return temp + (c->devinfo->has_accumulators ? ACC_COUNT : 0);
- }
- 
- static inline uint32_t
-node_to_temp(uint32_t node)
-+node_to_temp(struct v3d_compile *c, uint32_t node)
- {
-        assert(node >= ACC_COUNT);
-        return node - ACC_COUNT;
-+        assert((c->devinfo->has_accumulators && node >= ACC_COUNT) ||
-+               (!c->devinfo->has_accumulators && node >= 0));
-+        return node - (c->devinfo->has_accumulators ? ACC_COUNT : 0);
- }
- 
- static inline uint8_t
-get_temp_class_bits(struct v3d_ra_node_info *nodes,
-+get_temp_class_bits(struct v3d_compile *c,
-                     uint32_t temp)
- {
-        return nodes->info[temp_to_node(temp)].class_bits;
-+        return c->nodes.info[temp_to_node(c, temp)].class_bits;
- }
- 
- static inline void
-set_temp_class_bits(struct v3d_ra_node_info *nodes,
-+set_temp_class_bits(struct v3d_compile *c,
-                     uint32_t temp, uint8_t class_bits)
- {
-        nodes->info[temp_to_node(temp)].class_bits = class_bits;
-+        c->nodes.info[temp_to_node(c, temp)].class_bits = class_bits;
- }
- 
- static struct ra_class *
-@@ -84,7 +85,7 @@ static inline struct ra_class *
- choose_reg_class_for_temp(struct v3d_compile *c, uint32_t temp)
- {
-         assert(temp < c->num_temps && temp < c->nodes.alloc_count);
-        return choose_reg_class(c, get_temp_class_bits(&c->nodes, temp));
-+        return choose_reg_class(c, get_temp_class_bits(c, temp));
- }
- 
- static inline bool
-@@ -313,7 +314,7 @@ v3d_choose_spill_node(struct v3d_compile *c)
- 
-         for (unsigned i = 0; i < c->num_temps; i++) {
-                 if (BITSET_TEST(c->spillable, i)) {
-                        ra_set_node_spill_cost(c->g, temp_to_node(i),
-+                        ra_set_node_spill_cost(c->g, temp_to_node(c, i),
-                                                spill_costs[i]);
-                 }
-         }
-@@ -482,7 +483,7 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
-                         c->temp_start[i] < ip && c->temp_end[i] >= ip :
-                         c->temp_start[i] <= ip && c->temp_end[i] > ip;
-                 if (thrsw_cross) {
-                        ra_set_node_class(c->g, temp_to_node(i),
-+                        ra_set_node_class(c->g, temp_to_node(c, i),
-                                           choose_reg_class(c, CLASS_BITS_PHYS));
-                 }
-         }
-@@ -509,8 +510,7 @@ v3d_emit_tmu_spill(struct v3d_compile *c,
-          * same register class bits as the original.
-          */
-         if (inst == position) {
-                uint8_t class_bits = get_temp_class_bits(&c->nodes,
-                                                         inst->dst.index);
-+                uint8_t class_bits = get_temp_class_bits(c, inst->dst.index);
-                 inst->dst = vir_get_temp(c);
-                 add_node(c, inst->dst.index, class_bits);
-         } else {
-@@ -574,7 +574,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
-                 reconstruct_op = orig_def->qpu.alu.add.op;
-         }
- 
-        uint32_t spill_node = temp_to_node(spill_temp);
-+        uint32_t spill_node = temp_to_node(c, spill_temp);
- 
-         /* We must disable the ldunif optimization if we are spilling uniforms */
-         bool had_disable_ldunif_opt = c->disable_ldunif_opt;
-@@ -739,12 +739,12 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
-          * update node priorities based one new liveness data.
-          */
-         uint32_t sb_temp =c->spill_base.index;
-        uint32_t sb_node = temp_to_node(sb_temp);
-+        uint32_t sb_node = temp_to_node(c, sb_temp);
-         for (uint32_t i = 0; i < c->num_temps; i++) {
-                 if (c->temp_end[i] == -1)
-                         continue;
- 
-                uint32_t node_i = temp_to_node(i);
-+                uint32_t node_i = temp_to_node(c, i);
-                 c->nodes.info[node_i].priority =
-                         c->temp_end[i] - c->temp_start[i];
- 
-@@ -752,7 +752,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
-                      j < c->num_temps; j++) {
-                         if (interferes(c->temp_start[i], c->temp_end[i],
-                                        c->temp_start[j], c->temp_end[j])) {
-                                uint32_t node_j = temp_to_node(j);
-+                                uint32_t node_j = temp_to_node(c, j);
-                                 ra_add_node_interference(c->g, node_i, node_j);
-                         }
-                 }
-@@ -958,7 +958,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                 for (int i = 0; i < c->num_temps; i++) {
-                         if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
-                                 ra_add_node_interference(c->g,
-                                                         temp_to_node(i),
-+                                                         temp_to_node(c, i),
-                                                          acc_nodes[3]);
-                         }
-                 }
-@@ -968,7 +968,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                 for (int i = 0; i < c->num_temps; i++) {
-                         if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
-                                 ra_add_node_interference(c->g,
-                                                         temp_to_node(i),
-+                                                         temp_to_node(c, i),
-                                                          acc_nodes[4]);
-                         }
-                 }
-@@ -987,7 +987,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                          * decides whether the LDVPM is in or out)
-                          */
-                         assert(inst->dst.file == QFILE_TEMP);
-                        set_temp_class_bits(&c->nodes, inst->dst.index,
-+                        set_temp_class_bits(c, inst->dst.index,
-                                             CLASS_BITS_PHYS);
-                         break;
-                 }
-@@ -1002,7 +1002,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                          * phys regfile.
-                          */
-                         assert(inst->dst.file == QFILE_TEMP);
-                        set_temp_class_bits(&c->nodes, inst->dst.index,
-+                        set_temp_class_bits(c, inst->dst.index,
-                                             CLASS_BITS_PHYS);
-                         break;
-                 }
-@@ -1024,7 +1024,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                          */
-                         assert(inst->qpu.alu.mul.op == V3D_QPU_M_MOV);
-                         assert(inst->dst.file == QFILE_TEMP);
-                        uint32_t node = temp_to_node(inst->dst.index);
-+                        uint32_t node = temp_to_node(c, inst->dst.index);
-                         ra_set_node_reg(c->g, node,
-                                         PHYS_INDEX + inst->src[0].index);
-                         break;
-@@ -1043,9 +1043,9 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                  */
-                 if (!inst->qpu.sig.ldunif) {
-                         uint8_t class_bits =
-                                get_temp_class_bits(&c->nodes, inst->dst.index) &
-+                                get_temp_class_bits(c, inst->dst.index) &
-                                 ~CLASS_BITS_R5;
-                        set_temp_class_bits(&c->nodes, inst->dst.index,
-+                        set_temp_class_bits(c, inst->dst.index,
-                                             class_bits);
- 
-                 } else {
-@@ -1054,7 +1054,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                          * loads interfere with each other.
-                          */
-                         if (c->devinfo->ver < 40) {
-                                set_temp_class_bits(&c->nodes, inst->dst.index,
-+                                set_temp_class_bits(c, inst->dst.index,
-                                                     CLASS_BITS_R5);
-                         }
-                 }
-@@ -1064,7 +1064,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-         if (inst->qpu.sig.thrsw) {
-                 for (int i = 0; i < c->num_temps; i++) {
-                         if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
-                                set_temp_class_bits(&c->nodes, i,
-+                                set_temp_class_bits(c, i,
-                                                     CLASS_BITS_PHYS);
-                         }
-                 }
-@@ -1125,7 +1125,7 @@ v3d_register_allocate(struct v3d_compile *c)
-                         c->nodes.info[i].priority = 0;
-                         c->nodes.info[i].class_bits = 0;
-                 } else {
-                        uint32_t t = node_to_temp(i);
-+                        uint32_t t = node_to_temp(c, i);
-                         c->nodes.info[i].priority =
-                                 c->temp_end[t] - c->temp_start[t];
-                         c->nodes.info[i].class_bits = CLASS_BITS_ANY;
-@@ -1143,7 +1143,7 @@ v3d_register_allocate(struct v3d_compile *c)
- 
-         /* Set the register classes for all our temporaries in the graph */
-         for (uint32_t i = 0; i < c->num_temps; i++) {
-                ra_set_node_class(c->g, temp_to_node(i),
-+                ra_set_node_class(c->g, temp_to_node(c, i),
-                                   choose_reg_class_for_temp(c, i));
-         }
- 
-@@ -1153,8 +1153,8 @@ v3d_register_allocate(struct v3d_compile *c)
-                         if (interferes(c->temp_start[i], c->temp_end[i],
-                                        c->temp_start[j], c->temp_end[j])) {
-                                 ra_add_node_interference(c->g,
-                                                         temp_to_node(i),
-                                                         temp_to_node(j));
-+                                                         temp_to_node(c, i),
-+                                                         temp_to_node(c, j));
-                         }
-                 }
-         }
-@@ -1171,7 +1171,7 @@ v3d_register_allocate(struct v3d_compile *c)
-                 if (c->spill_size <
-                     V3D_CHANNELS * sizeof(uint32_t) * force_register_spills) {
-                         int node = v3d_choose_spill_node(c);
-                        uint32_t temp = node_to_temp(node);
-+                        uint32_t temp = node_to_temp(c, node);
-                         if (node != -1) {
-                                 v3d_spill_reg(c, acc_nodes, temp);
-                                 continue;
-@@ -1186,7 +1186,7 @@ v3d_register_allocate(struct v3d_compile *c)
-                 if (node == -1)
-                         goto spill_fail;
- 
-                uint32_t temp = node_to_temp(node);
-+                uint32_t temp = node_to_temp(c, node);
-                 enum temp_spill_type spill_type =
-                         get_spill_type_for_temp(c, temp);
-                 if (spill_type != SPILL_TYPE_TMU || tmu_spilling_allowed(c)) {
-@@ -1201,7 +1201,7 @@ v3d_register_allocate(struct v3d_compile *c)
-         /* Allocation was successful, build the 'temp -> reg' map */
-         temp_registers = calloc(c->num_temps, sizeof(*temp_registers));
-         for (uint32_t i = 0; i < c->num_temps; i++) {
-                int ra_reg = ra_get_node_reg(c->g, temp_to_node(i));
-+                int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i));
-                 if (ra_reg < PHYS_INDEX) {
-                         temp_registers[i].magic = true;
-                         temp_registers[i].index = (V3D_QPU_WADDR_R0 +
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0018-broadcom-compiler-phys-index-depends-on-hw-version.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0018-broadcom-compiler-phys-index-depends-on-hw-version.patch
@ -1,144 +0,0 @@
-From 9b2dfe0286212aba3687a06023cc5b4ce9944ee0 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Mon, 23 Aug 2021 02:18:43 +0200
-Subject: [PATCH 018/142] broadcom/compiler: phys index depends on hw version
-
-For 7.1 there are not accumulators. So we replace the macro with a
-function call.
---
- src/broadcom/compiler/vir_register_allocate.c | 39 ++++++++++++++-----
- 1 file changed, 29 insertions(+), 10 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index aa9473d124b..a358b616e13 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -28,9 +28,19 @@
- 
- #define ACC_INDEX     0
- #define ACC_COUNT     6
-#define PHYS_INDEX    (ACC_INDEX + ACC_COUNT)
-#define PHYS_COUNT    64
- 
-+#define PHYS_COUNT 64
-+
-+static uint8_t
-+get_phys_index(const struct v3d_device_info *devinfo)
-+{
-+        if (devinfo->has_accumulators)
-+                return ACC_INDEX + ACC_COUNT;
-+        else
-+                return 0;
-+}
-+
-+/* ACC as accumulator */
- #define CLASS_BITS_PHYS   (1 << 0)
- #define CLASS_BITS_ACC    (1 << 1)
- #define CLASS_BITS_R5     (1 << 4)
-@@ -771,9 +781,11 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
- }
- 
- struct v3d_ra_select_callback_data {
-+        uint32_t phys_index;
-         uint32_t next_acc;
-         uint32_t next_phys;
-         struct v3d_ra_node_info *nodes;
-+        const struct v3d_device_info *devinfo;
- };
- 
- /* Choosing accumulators improves chances of merging QPU instructions
-@@ -794,7 +806,7 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra,
-         static const int available_rf_threshold = 5;
-         int available_rf = 0 ;
-         for (int i = 0; i < PHYS_COUNT; i++) {
-                if (BITSET_TEST(regs, PHYS_INDEX + i))
-+                if (BITSET_TEST(regs, v3d_ra->phys_index + i))
-                         available_rf++;
-                 if (available_rf >= available_rf_threshold)
-                         break;
-@@ -854,7 +866,7 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
- {
-         for (int i = 0; i < PHYS_COUNT; i++) {
-                 int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
-                int phys = PHYS_INDEX + phys_off;
-+                int phys = v3d_ra->phys_index + phys_off;
- 
-                 if (BITSET_TEST(regs, phys)) {
-                         v3d_ra->next_phys = phys_off + 1;
-@@ -896,8 +908,9 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
-          * register file can be divided up for fragment shader threading.
-          */
-         int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3);
-+        uint8_t phys_index = get_phys_index(compiler->devinfo);
- 
-        compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT,
-+        compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT,
-                                           false);
-         if (!compiler->regs)
-                 return false;
-@@ -912,8 +925,8 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
-                 compiler->reg_class_phys[threads] =
-                         ra_alloc_contig_reg_class(compiler->regs, 1);
- 
-                for (int i = PHYS_INDEX;
-                     i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) {
-+                for (int i = phys_index;
-+                     i < phys_index + (PHYS_COUNT >> threads); i++) {
-                         ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
-                         ra_class_add_reg(compiler->reg_class_phys[threads], i);
-                         ra_class_add_reg(compiler->reg_class_any[threads], i);
-@@ -1026,7 +1039,8 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                         assert(inst->dst.file == QFILE_TEMP);
-                         uint32_t node = temp_to_node(c, inst->dst.index);
-                         ra_set_node_reg(c->g, node,
-                                        PHYS_INDEX + inst->src[0].index);
-+                                        get_phys_index(c->devinfo) +
-+                                        inst->src[0].index);
-                         break;
-                 }
-                 }
-@@ -1086,13 +1100,17 @@ v3d_register_allocate(struct v3d_compile *c)
-                                           c->num_temps + ACC_COUNT),
-         };
- 
-+        uint32_t phys_index = get_phys_index(c->devinfo);
-+
-         struct v3d_ra_select_callback_data callback_data = {
-+                .phys_index = phys_index,
-                 .next_acc = 0,
-                 /* Start at RF3, to try to keep the TLB writes from using
-                  * RF0-2.
-                  */
-                 .next_phys = 3,
-                 .nodes = &c->nodes,
-+                .devinfo = c->devinfo,
-         };
- 
-         vir_calculate_live_intervals(c);
-@@ -1139,6 +1157,7 @@ v3d_register_allocate(struct v3d_compile *c)
-         vir_for_each_inst_inorder(inst, c) {
-                 inst->ip = ip++;
-                 update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
-+
-         }
- 
-         /* Set the register classes for all our temporaries in the graph */
-@@ -1202,13 +1221,13 @@ v3d_register_allocate(struct v3d_compile *c)
-         temp_registers = calloc(c->num_temps, sizeof(*temp_registers));
-         for (uint32_t i = 0; i < c->num_temps; i++) {
-                 int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i));
-                if (ra_reg < PHYS_INDEX) {
-+                if (ra_reg < phys_index) {
-                         temp_registers[i].magic = true;
-                         temp_registers[i].index = (V3D_QPU_WADDR_R0 +
-                                                    ra_reg - ACC_INDEX);
-                 } else {
-                         temp_registers[i].magic = false;
-                        temp_registers[i].index = ra_reg - PHYS_INDEX;
-+                        temp_registers[i].index = ra_reg - phys_index;
-                 }
-         }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0019-broadcom-compiler-don-t-favor-select-accum-registers.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0019-broadcom-compiler-don-t-favor-select-accum-registers.patch
@ -1,40 +0,0 @@
-From da0a3deadf86a46c8323267d3f6a49e442835608 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Fri, 17 Sep 2021 01:07:06 +0200
-Subject: [PATCH 019/142] broadcom/compiler: don't favor/select accum registers
- for hw not supporting it
-
-Note that what we do is to just return false on the favor/select accum
-methods. We could just avoid to call them, but as the select is called
-more than once, it is just easier this way.
---
- src/broadcom/compiler/vir_register_allocate.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index a358b616e13..1f495180784 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -797,6 +797,9 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra,
-                    BITSET_WORD *regs,
-                    int priority)
- {
-+        if (!v3d_ra->devinfo->has_accumulators)
-+                return false;
-+
-         /* Favor accumulators if we have less that this number of physical
-          * registers. Accumulators have more restrictions (like being
-          * invalidated through thrsw), so running out of physical registers
-@@ -832,6 +835,9 @@ v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra,
-                     BITSET_WORD *regs,
-                     unsigned int *out)
- {
-+        if (!v3d_ra->devinfo->has_accumulators)
-+                return false;
-+
-         /* Choose r5 for our ldunifs if possible (nobody else can load to that
-          * reg, and it keeps the QPU cond field free from being occupied by
-          * ldunifrf).
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0020-broadcom-vir-implement-is_no_op_mov-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0020-broadcom-vir-implement-is_no_op_mov-for-v71.patch
@ -1,105 +0,0 @@
-From 6c04d7c917da6b38f8b2b4306ab03ed2ab7e6ce0 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Thu, 9 Sep 2021 00:28:53 +0200
-Subject: [PATCH 020/142] broadcom/vir: implement is_no_op_mov for v71
-
-Did some refactoring/splitting.
---
- src/broadcom/compiler/vir_to_qpu.c | 66 ++++++++++++++++++++++++------
- 1 file changed, 53 insertions(+), 13 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
-index c8b6e0a91a0..08970d52954 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
-+++ b/src/broadcom/compiler/vir_to_qpu.c
-@@ -129,19 +129,8 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
- }
- 
- static bool
-is_no_op_mov(struct qinst *qinst)
-+v3d33_mov_src_and_dst_equal(struct qinst *qinst)
- {
-        static const struct v3d_qpu_sig no_sig = {0};
-
-        /* Make sure it's just a lone MOV. */
-        if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
-            qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
-            qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
-            memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
-                return false;
-        }
-
-        /* Check if it's a MOV from a register to itself. */
-         enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
-         if (qinst->qpu.alu.mul.magic_write) {
-                 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
-@@ -168,6 +157,57 @@ is_no_op_mov(struct qinst *qinst)
-                         return false;
-         }
- 
-+        return true;
-+}
-+
-+static bool
-+v3d71_mov_src_and_dst_equal(struct qinst *qinst)
-+{
-+        if (qinst->qpu.alu.mul.magic_write)
-+                return false;
-+
-+        enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
-+        int raddr;
-+
-+        raddr = qinst->qpu.alu.mul.a.raddr;
-+        if (raddr != waddr)
-+                return false;
-+
-+        return true;
-+}
-+
-+static bool
-+mov_src_and_dst_equal(struct qinst *qinst,
-+                      const struct v3d_device_info *devinfo)
-+{
-+        if (devinfo->ver < 71)
-+                return v3d33_mov_src_and_dst_equal(qinst);
-+        else
-+                return v3d71_mov_src_and_dst_equal(qinst);
-+}
-+
-+
-+static bool
-+is_no_op_mov(struct qinst *qinst,
-+             const struct v3d_device_info *devinfo)
-+{
-+        static const struct v3d_qpu_sig no_sig = {0};
-+
-+        /* Make sure it's just a lone MOV. We only check for M_MOV. Although
-+         * for V3D 7.x there is also A_MOV, we don't need to check for it as
-+         * we always emit using M_MOV. We could use A_MOV later on the
-+         * squedule to improve performance
-+         */
-+        if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
-+            qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
-+            qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
-+            memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
-+                return false;
-+        }
-+
-+        if (!mov_src_and_dst_equal(qinst, devinfo))
-+                return false;
-+
-         /* No packing or flags updates, or we need to execute the
-          * instruction.
-          */
-@@ -324,7 +364,7 @@ v3d_generate_code_block(struct v3d_compile *c,
-                                 qinst->qpu.alu.mul.waddr = dst.index;
-                                 qinst->qpu.alu.mul.magic_write = dst.magic;
- 
-                                if (is_no_op_mov(qinst)) {
-+                                if (is_no_op_mov(qinst, c->devinfo)) {
-                                         vir_remove_instruction(c, qinst);
-                                         continue;
-                                 }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0021-broadcom-compiler-update-vir_to_qpu-set_src-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0021-broadcom-compiler-update-vir_to_qpu-set_src-for-v71.patch
@ -1,104 +0,0 @@
-From 7b5be2d9b178a45c34c22db2744639a6a8a216d1 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Thu, 9 Sep 2021 01:18:54 +0200
-Subject: [PATCH 021/142] broadcom/compiler: update vir_to_qpu::set_src for v71
-
---
- src/broadcom/compiler/vir_to_qpu.c | 47 ++++++++++++++++++++++++++----
- 1 file changed, 42 insertions(+), 5 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
-index 08970d52954..afc4941fdb1 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
-+++ b/src/broadcom/compiler/vir_to_qpu.c
-@@ -86,12 +86,22 @@ new_qpu_nop_before(struct qinst *inst)
-         return q;
- }
- 
-+static void
-+v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src)
-+{
-+        if (src.smimm)
-+                unreachable("v3d71_set_src: pending handling small immediates");
-+
-+        assert(!src.magic);
-+        *raddr = src.index;
-+}
-+
- /**
-  * Allocates the src register (accumulator or register file) into the RADDR
-  * fields of the instruction.
-  */
- static void
-set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
-+v3d33_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
- {
-         if (src.smimm) {
-                 assert(instr->sig.small_imm_b);
-@@ -128,6 +138,24 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
-         }
- }
- 
-+/*
-+ * The main purpose of the following wrapper is to make calling set_src
-+ * cleaner. This is the reason it receives both mux and raddr pointers. Those
-+ * will be filled or not based on the device version.
-+ */
-+static void
-+set_src(struct v3d_qpu_instr *instr,
-+        enum v3d_qpu_mux *mux,
-+        uint8_t *raddr,
-+        struct qpu_reg src,
-+        const struct v3d_device_info *devinfo)
-+{
-+        if (devinfo->ver < 71)
-+                return v3d33_set_src(instr, mux, src);
-+        else
-+                return v3d71_set_src(instr, raddr, src);
-+}
-+
- static bool
- v3d33_mov_src_and_dst_equal(struct qinst *qinst)
- {
-@@ -340,13 +368,18 @@ v3d_generate_code_block(struct v3d_compile *c,
-                                 qinst->qpu.sig_magic = dst.magic;
-                         } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
-                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
-+
-                                 if (nsrc >= 1) {
-                                         set_src(&qinst->qpu,
-                                                &qinst->qpu.alu.add.a.mux, src[0]);
-+                                                &qinst->qpu.alu.add.a.mux,
-+                                                &qinst->qpu.alu.add.a.raddr,
-+                                                src[0], c->devinfo);
-                                 }
-                                 if (nsrc >= 2) {
-                                         set_src(&qinst->qpu,
-                                                &qinst->qpu.alu.add.b.mux, src[1]);
-+                                                &qinst->qpu.alu.add.b.mux,
-+                                                &qinst->qpu.alu.add.b.raddr,
-+                                                src[1], c->devinfo);
-                                 }
- 
-                                 qinst->qpu.alu.add.waddr = dst.index;
-@@ -354,11 +387,15 @@ v3d_generate_code_block(struct v3d_compile *c,
-                         } else {
-                                 if (nsrc >= 1) {
-                                         set_src(&qinst->qpu,
-                                                &qinst->qpu.alu.mul.a.mux, src[0]);
-+                                                &qinst->qpu.alu.mul.a.mux,
-+                                                &qinst->qpu.alu.mul.a.raddr,
-+                                                src[0], c->devinfo);
-                                 }
-                                 if (nsrc >= 2) {
-                                         set_src(&qinst->qpu,
-                                                &qinst->qpu.alu.mul.b.mux, src[1]);
-+                                                &qinst->qpu.alu.mul.b.mux,
-+                                                &qinst->qpu.alu.mul.b.raddr,
-+                                                src[1], c->devinfo);
-                                 }
- 
-                                 qinst->qpu.alu.mul.waddr = dst.index;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0022-broadcom-qpu_schedule-add-process_raddr_deps.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0022-broadcom-qpu_schedule-add-process_raddr_deps.patch
@ -1,92 +0,0 @@
-From fe89703008f2a3d6bfe6e260791f712013be5e48 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Thu, 9 Sep 2021 23:59:28 +0200
-Subject: [PATCH 022/142] broadcom/qpu_schedule: add process_raddr_deps
-
-On v71 we don't have muxes, but more raddr. Adding a equivalent add
-deps function.
---
- src/broadcom/compiler/qpu_schedule.c | 52 +++++++++++++++++++++++-----
- 1 file changed, 44 insertions(+), 8 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 455fa3867be..89254643c90 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -155,6 +155,7 @@ static void
- process_mux_deps(struct schedule_state *state, struct schedule_node *n,
-                  enum v3d_qpu_mux mux)
- {
-+        assert(state->devinfo->ver < 71);
-         switch (mux) {
-         case V3D_QPU_MUX_A:
-                 add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
-@@ -171,6 +172,17 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n,
-         }
- }
- 
-+
-+static void
-+process_raddr_deps(struct schedule_state *state, struct schedule_node *n,
-+                   uint8_t raddr, bool is_small_imm)
-+{
-+        assert(state->devinfo->ver >= 71);
-+
-+        if (!is_small_imm)
-+                add_read_dep(state, state->last_rf[raddr], n);
-+}
-+
- static bool
- tmu_write_is_sequence_terminator(uint32_t waddr)
- {
-@@ -305,15 +317,39 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
- 
-         /* XXX: LOAD_IMM */
- 
-        if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0)
-                process_mux_deps(state, n, inst->alu.add.a.mux);
-        if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1)
-                process_mux_deps(state, n, inst->alu.add.b.mux);
-+        if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) {
-+                if (devinfo->ver < 71) {
-+                        process_mux_deps(state, n, inst->alu.add.a.mux);
-+                } else {
-+                        process_raddr_deps(state, n, inst->alu.add.a.raddr,
-+                                           inst->sig.small_imm_a);
-+                }
-+        }
-+        if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) {
-+                if (devinfo->ver < 71) {
-+                        process_mux_deps(state, n, inst->alu.add.b.mux);
-+                } else {
-+                        process_raddr_deps(state, n, inst->alu.add.b.raddr,
-+                                           inst->sig.small_imm_b);
-+                }
-+        }
- 
-        if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0)
-                process_mux_deps(state, n, inst->alu.mul.a.mux);
-        if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1)
-                process_mux_deps(state, n, inst->alu.mul.b.mux);
-+        if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) {
-+                if (devinfo->ver < 71) {
-+                        process_mux_deps(state, n, inst->alu.mul.a.mux);
-+                } else {
-+                        process_raddr_deps(state, n, inst->alu.mul.a.raddr,
-+                                           inst->sig.small_imm_c);
-+                }
-+        }
-+        if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) {
-+                if (devinfo->ver < 71) {
-+                        process_mux_deps(state, n, inst->alu.mul.b.mux);
-+                } else {
-+                        process_raddr_deps(state, n, inst->alu.mul.b.raddr,
-+                                           inst->sig.small_imm_d);
-+                }
-+        }
- 
-         switch (inst->alu.add.op) {
-         case V3D_QPU_A_VPMSETUP:
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0023-broadcom-qpu-update-disasm_raddr-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0023-broadcom-qpu-update-disasm_raddr-for-v71.patch
@ -1,128 +0,0 @@
-From 20ce426df1ab2546332141f4bc4531ada754cdea Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Fri, 10 Sep 2021 01:20:44 +0200
-Subject: [PATCH 023/142] broadcom/qpu: update disasm_raddr for v71
-
---
- src/broadcom/qpu/qpu_disasm.c | 72 ++++++++++++++++++++++++++++++++---
- 1 file changed, 66 insertions(+), 6 deletions(-)
-
-diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
-index 588a665f770..b613de781dc 100644
--- a/src/broadcom/qpu/qpu_disasm.c
-+++ b/src/broadcom/qpu/qpu_disasm.c
-@@ -56,8 +56,9 @@ pad_to(struct disasm_state *disasm, int n)
- 
- 
- static void
-v3d_qpu_disasm_raddr(struct disasm_state *disasm,
-                     const struct v3d_qpu_instr *instr, uint8_t mux)
-+v3d33_qpu_disasm_raddr(struct disasm_state *disasm,
-+                       const struct v3d_qpu_instr *instr,
-+                       enum v3d_qpu_mux mux)
- {
-         if (mux == V3D_QPU_MUX_A) {
-                 append(disasm, "rf%d", instr->raddr_a);
-@@ -82,6 +83,65 @@ v3d_qpu_disasm_raddr(struct disasm_state *disasm,
-         }
- }
- 
-+enum v3d_qpu_input_class {
-+        V3D_QPU_ADD_A,
-+        V3D_QPU_ADD_B,
-+        V3D_QPU_MUL_A,
-+        V3D_QPU_MUL_B
-+};
-+
-+static void
-+v3d71_qpu_disasm_raddr(struct disasm_state *disasm,
-+                       const struct v3d_qpu_instr *instr,
-+                       uint8_t raddr,
-+                       enum v3d_qpu_input_class input_class)
-+{
-+        bool is_small_imm = false;
-+        switch(input_class) {
-+        case V3D_QPU_ADD_A:
-+                is_small_imm = instr->sig.small_imm_a;
-+                break;
-+        case V3D_QPU_ADD_B:
-+                is_small_imm = instr->sig.small_imm_b;
-+                break;
-+        case V3D_QPU_MUL_A:
-+                is_small_imm = instr->sig.small_imm_c;
-+                break;
-+        case V3D_QPU_MUL_B:
-+                is_small_imm = instr->sig.small_imm_d;
-+                break;
-+        }
-+
-+        if (is_small_imm) {
-+                unreachable("Pending handling small immediates");
-+                uint32_t val;
-+                ASSERTED bool ok =
-+                        v3d_qpu_small_imm_unpack(disasm->devinfo,
-+                                                 raddr,
-+                                                 &val);
-+
-+                if ((int)val >= -16 && (int)val <= 15)
-+                        append(disasm, "%d", val);
-+                else
-+                        append(disasm, "0x%08x", val);
-+                assert(ok);
-+        } else {
-+                append(disasm, "rf%d", raddr);
-+        }
-+}
-+
-+static void
-+v3d_qpu_disasm_raddr(struct disasm_state *disasm,
-+                     const struct v3d_qpu_instr *instr,
-+                     const struct v3d_qpu_input *input,
-+                     enum v3d_qpu_input_class input_class)
-+{
-+        if (disasm->devinfo->ver < 71)
-+                v3d33_qpu_disasm_raddr(disasm, instr, input->mux);
-+        else
-+                v3d71_qpu_disasm_raddr(disasm, instr, input->raddr, input_class);
-+}
-+
- static void
- v3d_qpu_disasm_waddr(struct disasm_state *disasm, uint32_t waddr, bool magic)
- {
-@@ -121,14 +181,14 @@ v3d_qpu_disasm_add(struct disasm_state *disasm,
-         if (num_src >= 1) {
-                 if (has_dst)
-                         append(disasm, ", ");
-                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a.mux);
-+                v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.add.a, V3D_QPU_ADD_A);
-                 append(disasm, "%s",
-                        v3d_qpu_unpack_name(instr->alu.add.a.unpack));
-         }
- 
-         if (num_src >= 2) {
-                 append(disasm, ", ");
-                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b.mux);
-+                v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.add.b, V3D_QPU_ADD_B);
-                 append(disasm, "%s",
-                        v3d_qpu_unpack_name(instr->alu.add.b.unpack));
-         }
-@@ -164,14 +224,14 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
-         if (num_src >= 1) {
-                 if (has_dst)
-                         append(disasm, ", ");
-                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a.mux);
-+                v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.mul.a, V3D_QPU_MUL_A);
-                 append(disasm, "%s",
-                        v3d_qpu_unpack_name(instr->alu.mul.a.unpack));
-         }
- 
-         if (num_src >= 2) {
-                 append(disasm, ", ");
-                v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b.mux);
-+                v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.mul.b, V3D_QPU_MUL_B);
-                 append(disasm, "%s",
-                        v3d_qpu_unpack_name(instr->alu.mul.b.unpack));
-         }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0024-broadcom-qpu-return-false-on-qpu_writes_accumulatorX.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0024-broadcom-qpu-return-false-on-qpu_writes_accumulatorX.patch
@ -1,59 +0,0 @@
-From 7263fa24a3c57b1dcd4d870670cda86ae89aa28c Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 15 Sep 2021 10:55:49 +0200
-Subject: [PATCH 024/142] broadcom/qpu: return false on
- qpu_writes_accumulatorXX helpers for v71
-
-As for v71 doesn't have accumulators (devinfo->has_accumulators set to
-false), those methods would always return false.
---
- src/broadcom/qpu/qpu_instr.c | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index 8de99c611d5..7ec3c867260 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -854,6 +854,9 @@ bool
- v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
-                   const struct v3d_qpu_instr *inst)
- {
-+        if(!devinfo->has_accumulators)
-+                return false;
-+
-         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))
-                 return true;
- 
-@@ -864,6 +867,9 @@ bool
- v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
-                   const struct v3d_qpu_instr *inst)
- {
-+        if (!devinfo->has_accumulators)
-+                return false;
-+
-         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
-                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
-                     inst->alu.add.magic_write &&
-@@ -894,6 +900,9 @@ bool
- v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
-                   const struct v3d_qpu_instr *inst)
- {
-+        if (!devinfo->has_accumulators)
-+                return false;
-+
-         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))
-                 return true;
- 
-@@ -904,6 +913,9 @@ bool
- v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
-                      const struct v3d_qpu_instr *inst)
- {
-+        if (!devinfo->has_accumulators)
-+                return false;
-+
-         if (v3d_qpu_writes_r5(devinfo, inst))
-                 return true;
-         if (v3d_qpu_writes_r4(devinfo, inst))
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0025-broadcom-compiler-add-support-for-varyings-on-nir-to.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0025-broadcom-compiler-add-support-for-varyings-on-nir-to.patch
@ -1,116 +0,0 @@
-From 6a9611c5a22218388bba419174d3343e0cdf773b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 14 Sep 2021 10:42:55 +0200
-Subject: [PATCH 025/142] broadcom/compiler: add support for varyings on nir to
- vir generation for v71
-
-Needs update as v71 doesn't have accumulators anymore, and ldvary uses
-now rf0 to return the value.
---
- src/broadcom/compiler/nir_to_vir.c | 34 +++++++++++++++++-------------
- 1 file changed, 19 insertions(+), 15 deletions(-)
-
-diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
-index ca072971f01..79a22c3bd08 100644
--- a/src/broadcom/compiler/nir_to_vir.c
-+++ b/src/broadcom/compiler/nir_to_vir.c
-@@ -1005,32 +1005,36 @@ emit_fragcoord_input(struct v3d_compile *c, int attr)
- 
- static struct qreg
- emit_smooth_varying(struct v3d_compile *c,
-                    struct qreg vary, struct qreg w, struct qreg r5)
-+                    struct qreg vary, struct qreg w, struct qreg c_reg)
- {
-        return vir_FADD(c, vir_FMUL(c, vary, w), r5);
-+        return vir_FADD(c, vir_FMUL(c, vary, w), c_reg);
- }
- 
- static struct qreg
- emit_noperspective_varying(struct v3d_compile *c,
-                           struct qreg vary, struct qreg r5)
-+                           struct qreg vary, struct qreg c_reg)
- {
-        return vir_FADD(c, vir_MOV(c, vary), r5);
-+        return vir_FADD(c, vir_MOV(c, vary), c_reg);
- }
- 
- static struct qreg
- emit_flat_varying(struct v3d_compile *c,
-                  struct qreg vary, struct qreg r5)
-+                  struct qreg vary, struct qreg c_reg)
- {
-         vir_MOV_dest(c, c->undef, vary);
-        return vir_MOV(c, r5);
-+        return vir_MOV(c, c_reg);
- }
- 
- static struct qreg
- emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
-                       int8_t input_idx, uint8_t swizzle, int array_index)
- {
-        struct qreg r3 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3);
-        struct qreg r5 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5);
-+        struct qreg c_reg; /* C coefficient */
-+
-+        if (c->devinfo->has_accumulators)
-+                c_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5);
-+        else
-+                c_reg = vir_reg(QFILE_REG, 0);
- 
-         struct qinst *ldvary = NULL;
-         struct qreg vary;
-@@ -1041,7 +1045,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
-                 vary = vir_emit_def(c, ldvary);
-         } else {
-                 vir_NOP(c)->qpu.sig.ldvary = true;
-                vary = r3;
-+                vary = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3);
-         }
- 
-         /* Store the input value before interpolation so we can implement
-@@ -1050,7 +1054,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
-         if (input_idx >= 0) {
-                 assert(var);
-                 c->interp[input_idx].vp = vary;
-                c->interp[input_idx].C = vir_MOV(c, r5);
-+                c->interp[input_idx].C = vir_MOV(c, c_reg);
-                 c->interp[input_idx].mode = var->data.interpolation;
-         }
- 
-@@ -1060,7 +1064,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
-          */
-         if (!var) {
-                 assert(input_idx < 0);
-                return emit_smooth_varying(c, vary, c->payload_w, r5);
-+                return emit_smooth_varying(c, vary, c->payload_w, c_reg);
-         }
- 
-         int i = c->num_inputs++;
-@@ -1075,20 +1079,20 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
-                 if (var->data.centroid) {
-                         BITSET_SET(c->centroid_flags, i);
-                         result = emit_smooth_varying(c, vary,
-                                                     c->payload_w_centroid, r5);
-+                                                     c->payload_w_centroid, c_reg);
-                 } else {
-                        result = emit_smooth_varying(c, vary, c->payload_w, r5);
-+                        result = emit_smooth_varying(c, vary, c->payload_w, c_reg);
-                 }
-                 break;
- 
-         case INTERP_MODE_NOPERSPECTIVE:
-                 BITSET_SET(c->noperspective_flags, i);
-                result = emit_noperspective_varying(c, vary, r5);
-+                result = emit_noperspective_varying(c, vary, c_reg);
-                 break;
- 
-         case INTERP_MODE_FLAT:
-                 BITSET_SET(c->flat_shade_flags, i);
-                result = emit_flat_varying(c, vary, r5);
-+                result = emit_flat_varying(c, vary, c_reg);
-                 break;
- 
-         default:
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0026-broadcom-compiler-payload_w-is-loaded-on-rf3-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0026-broadcom-compiler-payload_w-is-loaded-on-rf3-for-v71.patch
@ -1,55 +0,0 @@
-From 06af15a60f7a9c135893e5f8934b8030c1da95f9 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 15 Sep 2021 01:14:15 +0200
-Subject: [PATCH 026/142] broadcom/compiler: payload_w is loaded on rf3 for v71
-
-And in general rf0 is now used for other needs.
---
- src/broadcom/compiler/nir_to_vir.c            | 6 +++++-
- src/broadcom/compiler/vir_register_allocate.c | 6 +++++-
- 2 files changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
-index 79a22c3bd08..1a05b279a2d 100644
--- a/src/broadcom/compiler/nir_to_vir.c
-+++ b/src/broadcom/compiler/nir_to_vir.c
-@@ -4325,7 +4325,11 @@ nir_to_vir(struct v3d_compile *c)
- {
-         switch (c->s->info.stage) {
-         case MESA_SHADER_FRAGMENT:
-                c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
-+                if (c->devinfo->ver < 71)
-+                        c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
-+                else
-+                        c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 3));
-+
-                 c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
-                 c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
- 
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index 1f495180784..eca9a6751a6 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -1034,6 +1034,11 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-         if (inst->src[0].file == QFILE_REG) {
-                 switch (inst->src[0].index) {
-                 case 0:
-+                        /* V3D 7.x doesn't use rf0 for thread payload */
-+                        if (c->devinfo->ver >= 71)
-+                                break;
-+                        else
-+                                FALLTHROUGH;
-                 case 1:
-                 case 2:
-                 case 3: {
-@@ -1163,7 +1168,6 @@ v3d_register_allocate(struct v3d_compile *c)
-         vir_for_each_inst_inorder(inst, c) {
-                 inst->ip = ip++;
-                 update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
-
-         }
- 
-         /* Set the register classes for all our temporaries in the graph */
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0027-broadcom-qpu_schedule-update-write-deps-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0027-broadcom-qpu_schedule-update-write-deps-for-v71.patch
@ -1,30 +0,0 @@
-From d38d8056903b9a4f96ab56261ac3b3c3be0af4fb Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 15 Sep 2021 11:12:59 +0200
-Subject: [PATCH 027/142] broadcom/qpu_schedule: update write deps for v71
-
-We just need to add a write dep if rf0 is written implicitly.
-
-Note that we don't need to check if we have accumulators when checking
-for r3/r4/r5, as v3d_qpu_writes_rX would return false for hw version
-that doesn't have accumulators.
---
- src/broadcom/compiler/qpu_schedule.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 89254643c90..2fa9031d7b6 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -422,6 +422,8 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
-                 add_write_dep(state, &state->last_r[4], n);
-         if (v3d_qpu_writes_r5(devinfo, inst))
-                 add_write_dep(state, &state->last_r[5], n);
-+        if (v3d_qpu_writes_rf0_implicitly(devinfo, inst))
-+                add_write_dep(state, &state->last_rf[0], n);
- 
-         /* If we add any more dependencies here we should consider whether we
-          * also need to update qpu_inst_after_thrsw_valid_in_delay_slot.
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0028-broadcom-compiler-update-register-classes-to-not-inc.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0028-broadcom-compiler-update-register-classes-to-not-inc.patch
@ -1,140 +0,0 @@
-From 7e2a2be830b1672ab846389a46b5d09bad0f7a98 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Thu, 16 Sep 2021 00:49:25 +0200
-Subject: [PATCH 028/142] broadcom/compiler: update register classes to not
- include accumulators on v71
-
---
- src/broadcom/compiler/vir_register_allocate.c | 56 ++++++++++++-------
- 1 file changed, 36 insertions(+), 20 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index eca9a6751a6..7b3f6c41934 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -44,10 +44,15 @@ get_phys_index(const struct v3d_device_info *devinfo)
- #define CLASS_BITS_PHYS   (1 << 0)
- #define CLASS_BITS_ACC    (1 << 1)
- #define CLASS_BITS_R5     (1 << 4)
-#define CLASS_BITS_ANY    (CLASS_BITS_PHYS | \
-                           CLASS_BITS_ACC | \
-                           CLASS_BITS_R5)
- 
-+static uint8_t
-+get_class_bit_any(const struct v3d_device_info *devinfo)
-+{
-+        if (devinfo->has_accumulators)
-+                return (CLASS_BITS_PHYS | CLASS_BITS_ACC | CLASS_BITS_R5);
-+        else
-+                return CLASS_BITS_PHYS;
-+}
- static inline uint32_t
- temp_to_node(struct v3d_compile *c, uint32_t temp)
- {
-@@ -82,11 +87,13 @@ choose_reg_class(struct v3d_compile *c, uint8_t class_bits)
-         if (class_bits == CLASS_BITS_PHYS) {
-                 return c->compiler->reg_class_phys[c->thread_index];
-         } else if (class_bits == (CLASS_BITS_R5)) {
-+                assert(c->devinfo->has_accumulators);
-                 return c->compiler->reg_class_r5[c->thread_index];
-         } else if (class_bits == (CLASS_BITS_PHYS | CLASS_BITS_ACC)) {
-+                assert(c->devinfo->has_accumulators);
-                 return c->compiler->reg_class_phys_or_acc[c->thread_index];
-         } else {
-                assert(class_bits == CLASS_BITS_ANY);
-+                assert(class_bits == get_class_bit_any(c->devinfo));
-                 return c->compiler->reg_class_any[c->thread_index];
-         }
- }
-@@ -447,7 +454,7 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
-          */
-         assert(c->disable_ldunif_opt);
-         struct qreg offset = vir_uniform_ui(c, spill_offset);
-        add_node(c, offset.index, CLASS_BITS_ANY);
-+        add_node(c, offset.index, get_class_bit_any(c->devinfo));
- 
-         /* We always enable per-quad on spills/fills to ensure we spill
-          * any channels involved with helper invocations.
-@@ -645,7 +652,8 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
-                                          * instruction immediately after, so
-                                          * we can use any register class for it.
-                                          */
-                                        add_node(c, unif.index, CLASS_BITS_ANY);
-+                                        add_node(c, unif.index,
-+                                                 get_class_bit_any(c->devinfo));
-                                 } else if (spill_type == SPILL_TYPE_RECONSTRUCT) {
-                                         struct qreg temp =
-                                                 reconstruct_temp(c, reconstruct_op);
-@@ -924,31 +932,38 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
-         for (int threads = 0; threads < max_thread_index; threads++) {
-                 compiler->reg_class_any[threads] =
-                         ra_alloc_contig_reg_class(compiler->regs, 1);
-                compiler->reg_class_r5[threads] =
-                        ra_alloc_contig_reg_class(compiler->regs, 1);
-                compiler->reg_class_phys_or_acc[threads] =
-                        ra_alloc_contig_reg_class(compiler->regs, 1);
-+                if (compiler->devinfo->has_accumulators) {
-+                        compiler->reg_class_r5[threads] =
-+                                ra_alloc_contig_reg_class(compiler->regs, 1);
-+                        compiler->reg_class_phys_or_acc[threads] =
-+                                ra_alloc_contig_reg_class(compiler->regs, 1);
-+                }
-                 compiler->reg_class_phys[threads] =
-                         ra_alloc_contig_reg_class(compiler->regs, 1);
- 
-                 for (int i = phys_index;
-                      i < phys_index + (PHYS_COUNT >> threads); i++) {
-                        ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
-+                        if (compiler->devinfo->has_accumulators)
-+                                ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
-                         ra_class_add_reg(compiler->reg_class_phys[threads], i);
-                         ra_class_add_reg(compiler->reg_class_any[threads], i);
-                 }
- 
-                for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
-                        ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
-                        ra_class_add_reg(compiler->reg_class_any[threads], i);
-+                if (compiler->devinfo->has_accumulators) {
-+                        for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
-+                                ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
-+                                ra_class_add_reg(compiler->reg_class_any[threads], i);
-+                        }
-                 }
-                 /* r5 can only store a single 32-bit value, so not much can
-                  * use it.
-                  */
-                ra_class_add_reg(compiler->reg_class_r5[threads],
-                                 ACC_INDEX + 5);
-                ra_class_add_reg(compiler->reg_class_any[threads],
-                                 ACC_INDEX + 5);
-+                if (compiler->devinfo->has_accumulators) {
-+                        ra_class_add_reg(compiler->reg_class_r5[threads],
-+                                         ACC_INDEX + 5);
-+                        ra_class_add_reg(compiler->reg_class_any[threads],
-+                                         ACC_INDEX + 5);
-+                }
-         }
- 
-         ra_set_finalize(compiler->regs, NULL);
-@@ -1086,7 +1101,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-         }
- 
-         /* All accumulators are invalidated across a thread switch. */
-        if (inst->qpu.sig.thrsw) {
-+        if (inst->qpu.sig.thrsw && c->devinfo->has_accumulators) {
-                 for (int i = 0; i < c->num_temps; i++) {
-                         if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
-                                 set_temp_class_bits(c, i,
-@@ -1157,7 +1172,8 @@ v3d_register_allocate(struct v3d_compile *c)
-                         uint32_t t = node_to_temp(c, i);
-                         c->nodes.info[i].priority =
-                                 c->temp_end[t] - c->temp_start[t];
-                        c->nodes.info[i].class_bits = CLASS_BITS_ANY;
-+                        c->nodes.info[i].class_bits =
-+                                get_class_bit_any(c->devinfo);
-                 }
-         }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0029-broadcom-compiler-implement-reads-writes-too-soon-ch.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0029-broadcom-compiler-implement-reads-writes-too-soon-ch.patch
@ -1,109 +0,0 @@
-From 0157228c729b8812dc4900fa24db63b7d27aa342 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Thu, 23 Sep 2021 11:19:58 +0200
-Subject: [PATCH 029/142] broadcom/compiler: implement "reads/writes too soon"
- checks for v71
-
---
- src/broadcom/compiler/qpu_schedule.c | 65 ++++++++++++++++++++++------
- 1 file changed, 51 insertions(+), 14 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 2fa9031d7b6..4db0c2e72da 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -562,7 +562,24 @@ mux_reads_too_soon(struct choose_scoreboard *scoreboard,
- }
- 
- static bool
-reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
-+reads_too_soon(struct choose_scoreboard *scoreboard,
-+               const struct v3d_qpu_instr *inst, uint8_t raddr)
-+{
-+        switch (raddr) {
-+        case 0: /* ldvary delayed write of C coefficient to rf0 */
-+                if (scoreboard->tick - scoreboard->last_ldvary_tick <= 1)
-+                        return true;
-+                break;
-+        default:
-+                break;
-+        }
-+
-+        return false;
-+}
-+
-+static bool
-+reads_too_soon_after_write(const struct v3d_device_info *devinfo,
-+                           struct choose_scoreboard *scoreboard,
-                            struct qinst *qinst)
- {
-         const struct v3d_qpu_instr *inst = &qinst->qpu;
-@@ -574,24 +591,44 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
-         assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
- 
-         if (inst->alu.add.op != V3D_QPU_A_NOP) {
-                if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0 &&
-                    mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux)) {
-                        return true;
-+                if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) {
-+                        if (devinfo->ver < 71) {
-+                                if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux))
-+                                        return true;
-+                        } else {
-+                                if (reads_too_soon(scoreboard, inst, inst->alu.add.a.raddr))
-+                                        return true;
-+                        }
-                 }
-                if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1 &&
-                    mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux)) {
-                        return true;
-+                if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) {
-+                        if (devinfo->ver < 71) {
-+                                if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux))
-+                                        return true;
-+                        } else {
-+                                if (reads_too_soon(scoreboard, inst, inst->alu.add.b.raddr))
-+                                        return true;
-+                        }
-                 }
-         }
- 
-         if (inst->alu.mul.op != V3D_QPU_M_NOP) {
-                if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0 &&
-                    mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux)) {
-                        return true;
-+                if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) {
-+                        if (devinfo->ver < 71) {
-+                                if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux))
-+                                        return true;
-+                        } else {
-+                                if (reads_too_soon(scoreboard, inst, inst->alu.mul.b.raddr))
-+                                        return true;
-+                        }
-                 }
-                if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1 &&
-                    mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux)) {
-                        return true;
-+                if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) {
-+                        if (devinfo->ver < 71) {
-+                                if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux))
-+                                        return true;
-+                        } else {
-+                                if (reads_too_soon(scoreboard, inst, inst->alu.mul.b.raddr))
-+                                        return true;
-+                        }
-                 }
-         }
- 
-@@ -1147,7 +1184,7 @@ retry:
-                  *  regfile A or B that was written to by the previous
-                  *  instruction."
-                  */
-                if (reads_too_soon_after_write(scoreboard, n->inst))
-+                if (reads_too_soon_after_write(c->devinfo, scoreboard, n->inst))
-                         continue;
- 
-                 if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst))
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0030-broadcom-compiler-implement-read-stall-check-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0030-broadcom-compiler-implement-read-stall-check-for-v71.patch
@ -1,118 +0,0 @@
-From 3fb3333bdf9699157cf0a2bd46ba4c25058bc5c1 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Thu, 23 Sep 2021 11:44:59 +0200
-Subject: [PATCH 030/142] broadcom/compiler: implement read stall check for v71
-
---
- src/broadcom/compiler/qpu_schedule.c | 32 +++++++++++++++++-----------
- src/broadcom/qpu/qpu_instr.c         | 12 +++++++++++
- src/broadcom/qpu/qpu_instr.h         |  2 ++
- 3 files changed, 34 insertions(+), 12 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 4db0c2e72da..b78abe003e9 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -679,29 +679,37 @@ pixel_scoreboard_too_soon(struct v3d_compile *c,
- }
- 
- static bool
-qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
-+qpu_instruction_uses_rf(const struct v3d_device_info *devinfo,
-+                        const struct v3d_qpu_instr *inst,
-                         uint32_t waddr) {
- 
-         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
-            return false;
- 
-        if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
-            inst->raddr_a == waddr)
-              return true;
-+        if (devinfo->ver < 71) {
-+                if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
-+                    inst->raddr_a == waddr)
-+                        return true;
- 
-        if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
-            !inst->sig.small_imm_b && (inst->raddr_b == waddr))
-              return true;
-+                if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
-+                    !inst->sig.small_imm_b && (inst->raddr_b == waddr))
-+                        return true;
-+        } else {
-+                /* FIXME: skip if small immediate */
-+                if (v3d71_qpu_reads_raddr(inst, waddr))
-+                        return true;
-+        }
- 
-         return false;
- }
- 
- static bool
-mux_read_stalls(struct choose_scoreboard *scoreboard,
-                const struct v3d_qpu_instr *inst)
-+read_stalls(const struct v3d_device_info *devinfo,
-+            struct choose_scoreboard *scoreboard,
-+            const struct v3d_qpu_instr *inst)
- {
-         return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 &&
-                qpu_instruction_uses_rf(inst,
-+                qpu_instruction_uses_rf(devinfo, inst,
-                                         scoreboard->last_stallable_sfu_reg);
- }
- 
-@@ -1319,7 +1327,7 @@ retry:
- 
-                 int prio = get_instruction_priority(c->devinfo, inst);
- 
-                if (mux_read_stalls(scoreboard, inst)) {
-+                if (read_stalls(c->devinfo, scoreboard, inst)) {
-                         /* Don't merge an instruction that stalls */
-                         if (prev_inst)
-                                 continue;
-@@ -2389,7 +2397,7 @@ schedule_instructions(struct v3d_compile *c,
-                                         }
-                                 }
-                         }
-                        if (mux_read_stalls(scoreboard, inst))
-+                        if (read_stalls(c->devinfo, scoreboard, inst))
-                                 c->qpu_inst_stalled_count++;
-                 }
- 
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index 7ec3c867260..e8bbb2141b0 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -956,6 +956,18 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
-                 (mul_nsrc > 1 && inst->alu.mul.b.mux == mux));
- }
- 
-+bool
-+v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr)
-+{
-+        int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
-+        int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
-+
-+        return (add_nsrc > 0 && inst->alu.add.a.raddr == raddr) ||
-+               (add_nsrc > 1 && inst->alu.add.b.raddr == raddr) ||
-+               (mul_nsrc > 0 && inst->alu.mul.a.raddr == raddr) ||
-+               (mul_nsrc > 1 && inst->alu.mul.b.raddr == raddr);
-+}
-+
- bool
- v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
-                            const struct v3d_qpu_sig *sig)
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index a25be8e0ee6..9f7582ab06d 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -494,4 +494,6 @@ bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
- bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
- 
- bool v3d_qpu_is_nop(struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
-+
-+bool v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr);
- #endif
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0031-broadcom-compiler-add-a-v3d71_qpu_writes_waddr_expli.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0031-broadcom-compiler-add-a-v3d71_qpu_writes_waddr_expli.patch
@ -1,65 +0,0 @@
-From cbe0a7a06a5fb9b3f28acba8c9cac362a6bc5324 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 6 Oct 2021 13:58:00 +0200
-Subject: [PATCH 031/142] broadcom/compiler: add a
- v3d71_qpu_writes_waddr_explicitly helper
-
---
- src/broadcom/qpu/qpu_instr.c | 28 ++++++++++++++++++++++++++++
- src/broadcom/qpu/qpu_instr.h |  3 +++
- 2 files changed, 31 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index e8bbb2141b0..feb6b343c1c 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -968,6 +968,34 @@ v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr)
-                (mul_nsrc > 1 && inst->alu.mul.b.raddr == raddr);
- }
- 
-+bool
-+v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info *devinfo,
-+                                  const struct v3d_qpu_instr *inst,
-+                                  uint8_t waddr)
-+{
-+        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
-+                return false;
-+
-+        if (v3d_qpu_add_op_has_dst(inst->alu.add.op) &&
-+            !inst->alu.add.magic_write &&
-+            inst->alu.add.waddr == waddr) {
-+                return true;
-+        }
-+
-+        if (v3d_qpu_mul_op_has_dst(inst->alu.mul.op) &&
-+            !inst->alu.mul.magic_write &&
-+            inst->alu.mul.waddr == waddr) {
-+                return true;
-+        }
-+
-+        if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
-+            !inst->sig_magic && inst->sig_addr == waddr) {
-+                return true;
-+        }
-+
-+        return false;
-+}
-+
- bool
- v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
-                            const struct v3d_qpu_sig *sig)
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 9f7582ab06d..50a69ce8c3a 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -496,4 +496,7 @@ bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
- bool v3d_qpu_is_nop(struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
- 
- bool v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr);
-+bool v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info *devinfo,
-+                                       const struct v3d_qpu_instr *inst,
-+                                       uint8_t waddr);
- #endif
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0032-broadcom-compiler-prevent-rf2-3-usage-in-thread-end-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0032-broadcom-compiler-prevent-rf2-3-usage-in-thread-end-.patch
@ -1,67 +0,0 @@
-From 92e91a9b22ae61dc9f39880e8fdaa7714789efdb Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Mon, 27 Sep 2021 11:49:24 +0200
-Subject: [PATCH 032/142] broadcom/compiler: prevent rf2-3 usage in thread end
- delay slots for v71
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
-Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
---
- src/broadcom/compiler/qpu_schedule.c | 37 +++++++++++++++++++++-------
- 1 file changed, 28 insertions(+), 9 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index b78abe003e9..839c0c62315 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -1691,16 +1691,35 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
-                 if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
-                         return false;
- 
-                /* RF0-2 might be overwritten during the delay slots by
-                 * fragment shader setup.
-                 */
-                if (inst->raddr_a < 3 && v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A))
-                        return false;
-+                if (c->devinfo->ver <= 42) {
-+                        /* RF0-2 might be overwritten during the delay slots by
-+                         * fragment shader setup.
-+                         */
-+                        if (inst->raddr_a < 3 && v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A))
-+                                return false;
- 
-                if (inst->raddr_b < 3 &&
-                    !inst->sig.small_imm_b &&
-                    v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
-                        return false;
-+                        if (inst->raddr_b < 3 &&
-+                            !inst->sig.small_imm_b &&
-+                            v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
-+                                return false;
-+                        }
-+                }
-+
-+                if (c->devinfo->ver >= 71) {
-+                        /* RF2-3 might be overwritten during the delay slots by
-+                         * fragment shader setup.
-+                         *
-+                         * FIXME: handle small immediate cases
-+                         */
-+                        if (v3d71_qpu_reads_raddr(inst, 2) ||
-+                            v3d71_qpu_reads_raddr(inst, 3)) {
-+                                return false;
-+                        }
-+
-+                        if (v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 2) ||
-+                            v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 3)) {
-+                                return false;
-+                        }
-                 }
-         }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0033-broadcom-qpu-add-new-ADD-opcodes-for-FMOV-MOV-in-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0033-broadcom-qpu-add-new-ADD-opcodes-for-FMOV-MOV-in-v71.patch
@ -1,78 +0,0 @@
-From 68a1545eb973e41608534ff05a9e84a86c046453 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Mon, 27 Sep 2021 13:26:04 +0200
-Subject: [PATCH 033/142] broadcom/qpu: add new ADD opcodes for FMOV/MOV in v71
-
---
- src/broadcom/qpu/qpu_instr.c |  5 +++++
- src/broadcom/qpu/qpu_instr.h |  4 ++++
- src/broadcom/qpu/qpu_pack.c  | 15 +++++++++++++++
- 3 files changed, 24 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index feb6b343c1c..195a0dcd232 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -177,6 +177,8 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
-                 [V3D_QPU_A_ITOF] = "itof",
-                 [V3D_QPU_A_CLZ] = "clz",
-                 [V3D_QPU_A_UTOF] = "utof",
-+                [V3D_QPU_A_MOV] = "mov",
-+                [V3D_QPU_A_FMOV] = "fmov",
-         };
- 
-         if (op >= ARRAY_SIZE(op_names))
-@@ -458,6 +460,9 @@ static const uint8_t add_op_args[] = {
-         [V3D_QPU_A_ITOF] = D | A,
-         [V3D_QPU_A_CLZ] = D | A,
-         [V3D_QPU_A_UTOF] = D | A,
-+
-+        [V3D_QPU_A_MOV] = D | A,
-+        [V3D_QPU_A_FMOV] = D | A,
- };
- 
- static const uint8_t mul_op_args[] = {
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 50a69ce8c3a..c86a4119c54 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -227,6 +227,10 @@ enum v3d_qpu_add_op {
-         V3D_QPU_A_ITOF,
-         V3D_QPU_A_CLZ,
-         V3D_QPU_A_UTOF,
-+
-+        /* V3D 7.x */
-+        V3D_QPU_A_FMOV,
-+        V3D_QPU_A_MOV,
- };
- 
- enum v3d_qpu_mul_op {
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index 4045275cb9a..0e504e65fbf 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -776,6 +776,21 @@ static const struct opcode_desc add_ops_v71[] = {
- 
-         { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
-         { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
-+
-+        { 249, 249, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FMOV, 71 },
-+        { 249, 249, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FMOV, 71 },
-+        { 249, 249, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FMOV, 71 },
-+        { 249, 249, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FMOV, 71 },
-+        { 249, 249, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FMOV, 71 },
-+        { 249, 249, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FMOV, 71 },
-+        { 249, 249, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FMOV, 71 },
-+
-+        { 249, 249, .raddr_mask = OP_MASK(3),  V3D_QPU_A_MOV, 71 },
-+        { 249, 249, .raddr_mask = OP_MASK(7),  V3D_QPU_A_MOV, 71 },
-+        { 249, 249, .raddr_mask = OP_MASK(11), V3D_QPU_A_MOV, 71 },
-+        { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
-+        { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
-+
- };
- 
- static const struct opcode_desc mul_ops_v71[] = {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0034-broadcom-qpu-fix-packing-unpacking-of-fmov-variants-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0034-broadcom-qpu-fix-packing-unpacking-of-fmov-variants-.patch
@ -1,46 +0,0 @@
-From 8dbbb7e22b694fdc62376d112b3dc6105d556c63 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Mon, 4 Oct 2021 13:07:35 +0200
-Subject: [PATCH 034/142] broadcom/qpu: fix packing/unpacking of fmov variants
- for v71
-
---
- src/broadcom/qpu/qpu_pack.c | 9 ++++-----
- 1 file changed, 4 insertions(+), 5 deletions(-)
-
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index 0e504e65fbf..0eb820b3f10 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -1405,9 +1405,9 @@ v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
-                 break;
- 
-         case V3D_QPU_M_FMOV:
-                instr->alu.mul.output_pack = (raddr_d >> 2) & 1;
-+                instr->alu.mul.output_pack = raddr_d & 0x3;
- 
-                if (!v3d_qpu_float32_unpack_unpack(raddr_d & 0x3,
-+                if (!v3d_qpu_float32_unpack_unpack((raddr_d >> 2) & 0x7,
-                                                    &instr->alu.mul.a.unpack)) {
-                         return false;
-                 }
-@@ -2046,14 +2046,13 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
-                                                &packed)) {
-                         return false;
-                 }
-                opcode |= (packed >> 1) & 1;
-                raddr_d = (packed & 1) << 2;
-+                raddr_d |= packed;
- 
-                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
-                                                  &packed)) {
-                         return false;
-                 }
-                raddr_d |= packed;
-+                raddr_d |= packed << 2;
-                 break;
-         }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0035-broadcom-qpu-implement-switch-rules-for-fmin-fmax-fa.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0035-broadcom-qpu-implement-switch-rules-for-fmin-fmax-fa.patch
@ -1,107 +0,0 @@
-From 63d0059ebef288afb0e2e746dadda8c2238bdfcb Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 28 Sep 2021 01:17:08 +0200
-Subject: [PATCH 035/142] broadcom/qpu: implement switch rules for fmin/fmax
- fadd/faddnf for v71
-
-They use the same opcodes, and switch between one and the other based
-on raddr.
-
-Note that the rule rule includes also if small_imm_a/b are used. That
-is still not in place so that part is hardcode. Would be updated later
-when small immediates support for v71 gets implemented.
---
- src/broadcom/qpu/qpu_pack.c | 48 +++++++++++++++++++++++++++++++++++++
- 1 file changed, 48 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index 0eb820b3f10..7a262f18ac3 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -651,7 +651,9 @@ static const struct opcode_desc mul_ops_v33[] = {
-  * opcodes that changed on v71
-  */
- static const struct opcode_desc add_ops_v71[] = {
-+        /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */
-         { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD },
-+        { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF },
-         { 53,  55,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
-         { 56,  56,  .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD },
-         { 57,  59,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
-@@ -666,6 +668,10 @@ static const struct opcode_desc add_ops_v71[] = {
-         { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR },
-         { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR },
-         { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR },
-+        /* FMIN is instead FMAX depending on the raddr_a/b order. */
-+        { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN },
-+        { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX },
-+        { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN },
- 
-         { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND },
-         { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR },
-@@ -1162,6 +1168,22 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
- 
-         instr->alu.add.op = desc->op;
- 
-+        /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
-+         * operands.
-+         */
-+        /* FIXME: for now hardcoded values, until we got the small_imm support
-+         * in place
-+         */
-+        uint32_t small_imm_a = 0;
-+        uint32_t small_imm_b = 0;
-+        if (small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
-+            small_imm_b *256 + (op & 3) * 64 + raddr_b) {
-+                if (instr->alu.add.op == V3D_QPU_A_FMIN)
-+                        instr->alu.add.op = V3D_QPU_A_FMAX;
-+                if (instr->alu.add.op == V3D_QPU_A_FADD)
-+                        instr->alu.add.op = V3D_QPU_A_FADDNF;
-+        }
-+
-         /* Some QPU ops require a bit more than just basic opcode and mux a/b
-          * comparisons to distinguish them.
-          */
-@@ -1754,6 +1776,11 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
-                 uint32_t output_pack;
-                 uint32_t a_unpack;
-                 uint32_t b_unpack;
-+                /* FIXME: for now hardcoded values, until we got the small_imm
-+                 * support in place
-+                 */
-+                uint32_t small_imm_a = 0;
-+                uint32_t small_imm_b = 0;
- 
-                 if (instr->alu.add.op != V3D_QPU_A_FCMP) {
-                         if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
-@@ -1773,6 +1800,27 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
-                         return false;
-                 }
- 
-+                /* These operations with commutative operands are
-+                 * distinguished by which order their operands come in.
-+                 */
-+                bool ordering =
-+                        small_imm_a * 256 + a_unpack * 64 + raddr_a >
-+                        small_imm_b * 256 + b_unpack * 64 + raddr_b;
-+                if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
-+                      instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
-+                    ((instr->alu.add.op == V3D_QPU_A_FMAX ||
-+                      instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
-+                        uint32_t temp;
-+
-+                        temp = a_unpack;
-+                        a_unpack = b_unpack;
-+                        b_unpack = temp;
-+
-+                        temp = raddr_a;
-+                        raddr_a = raddr_b;
-+                        raddr_b = temp;
-+                }
-+
-                 opcode |= a_unpack << 2;
-                 opcode |= b_unpack << 0;
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0036-broadcom-compiler-make-vir_write_rX-return-false-on-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0036-broadcom-compiler-make-vir_write_rX-return-false-on-.patch
@ -1,37 +0,0 @@
-From c9f6faa3ddc91024b3d9dc67ce2221187daac128 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 29 Sep 2021 11:54:18 +0200
-Subject: [PATCH 036/142] broadcom/compiler: make vir_write_rX return false on
- platforms without accums
-
---
- src/broadcom/compiler/vir.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
-index 007cb0a941b..d75cd777b6d 100644
--- a/src/broadcom/compiler/vir.c
-+++ b/src/broadcom/compiler/vir.c
-@@ -158,6 +158,9 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst)
- bool
- vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
- {
-+        if (!devinfo->has_accumulators)
-+                return false;
-+
-         for (int i = 0; i < vir_get_nsrc(inst); i++) {
-                 switch (inst->src[i].file) {
-                 case QFILE_VPM:
-@@ -180,6 +183,9 @@ vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
- bool
- vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
- {
-+        if (!devinfo->has_accumulators)
-+                return false;
-+
-         switch (inst->dst.file) {
-         case QFILE_MAGIC:
-                 switch (inst->dst.index) {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0037-broadcom-compiler-rename-vir_writes_rX-to-vir_writes.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0037-broadcom-compiler-rename-vir_writes_rX-to-vir_writes.patch
@ -1,77 +0,0 @@
-From 3d16229743e26b58735ed049ee982073f6034342 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 29 Sep 2021 12:03:50 +0200
-Subject: [PATCH 037/142] broadcom/compiler: rename vir_writes_rX to
- vir_writes_rX_implicitly
-
-Since that represents more accurately what they check..
---
- src/broadcom/compiler/v3d_compiler.h          | 4 ++--
- src/broadcom/compiler/vir.c                   | 6 ++++--
- src/broadcom/compiler/vir_register_allocate.c | 4 ++--
- 3 files changed, 8 insertions(+), 6 deletions(-)
-
-diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
-index eb4e692464b..7e8f3bfc1a7 100644
--- a/src/broadcom/compiler/v3d_compiler.h
-+++ b/src/broadcom/compiler/v3d_compiler.h
-@@ -1149,8 +1149,8 @@ bool vir_is_raw_mov(struct qinst *inst);
- bool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst);
- bool vir_is_add(struct qinst *inst);
- bool vir_is_mul(struct qinst *inst);
-bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
-bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
-+bool vir_writes_r3_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
-+bool vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
- struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
- uint8_t vir_channels_written(struct qinst *inst);
- struct qreg ntq_get_src(struct v3d_compile *c, nir_src src, int i);
-diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
-index d75cd777b6d..aea113f050e 100644
--- a/src/broadcom/compiler/vir.c
-+++ b/src/broadcom/compiler/vir.c
-@@ -156,7 +156,8 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst)
- }
- 
- bool
-vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
-+vir_writes_r3_implicitly(const struct v3d_device_info *devinfo,
-+                         struct qinst *inst)
- {
-         if (!devinfo->has_accumulators)
-                 return false;
-@@ -181,7 +182,8 @@ vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
- }
- 
- bool
-vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
-+vir_writes_r4_implicitly(const struct v3d_device_info *devinfo,
-+                         struct qinst *inst)
- {
-         if (!devinfo->has_accumulators)
-                 return false;
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index 7b3f6c41934..f2df35cd458 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -988,7 +988,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-          * result to a temp), nothing else can be stored in r3/r4 across
-          * it.
-          */
-        if (vir_writes_r3(c->devinfo, inst)) {
-+        if (vir_writes_r3_implicitly(c->devinfo, inst)) {
-                 for (int i = 0; i < c->num_temps; i++) {
-                         if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
-                                 ra_add_node_interference(c->g,
-@@ -998,7 +998,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                 }
-         }
- 
-        if (vir_writes_r4(c->devinfo, inst)) {
-+        if (vir_writes_r4_implicitly(c->devinfo, inst)) {
-                 for (int i = 0; i < c->num_temps; i++) {
-                         if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
-                                 ra_add_node_interference(c->g,
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0038-broadcom-compiler-only-handle-accumulator-classes-if.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0038-broadcom-compiler-only-handle-accumulator-classes-if.patch
@ -1,170 +0,0 @@
-From 83fae160491737e8568b8fb5eaa5be4d2c8bf3c8 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 29 Sep 2021 12:10:31 +0200
-Subject: [PATCH 038/142] broadcom/compiler: only handle accumulator classes if
- present
-
---
- src/broadcom/compiler/vir_register_allocate.c | 77 ++++++++++++-------
- 1 file changed, 49 insertions(+), 28 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index f2df35cd458..e78ccb7c6aa 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -53,6 +53,17 @@ get_class_bit_any(const struct v3d_device_info *devinfo)
-         else
-                 return CLASS_BITS_PHYS;
- }
-+
-+static uint8_t
-+filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits)
-+{
-+   if (!devinfo->has_accumulators) {
-+      assert(class_bits & CLASS_BITS_PHYS);
-+      class_bits = CLASS_BITS_PHYS;
-+   }
-+   return class_bits;
-+}
-+
- static inline uint32_t
- temp_to_node(struct v3d_compile *c, uint32_t temp)
- {
-@@ -413,8 +424,10 @@ v3d_setup_spill_base(struct v3d_compile *c)
-                  */
-                 if (c->spilling) {
-                         int temp_class = CLASS_BITS_PHYS;
-                        if (i != c->spill_base.index)
-+                        if (c->devinfo->has_accumulators &&
-+                            i != c->spill_base.index) {
-                                 temp_class |= CLASS_BITS_ACC;
-+                        }
-                         add_node(c, i, temp_class);
-                 }
-         }
-@@ -473,14 +486,16 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
-          * temp will be used immediately so just like the uniform above we
-          * can allow accumulators.
-          */
-+        int temp_class =
-+                filter_class_bits(c->devinfo, CLASS_BITS_PHYS | CLASS_BITS_ACC);
-         if (!fill_dst) {
-                 struct qreg dst = vir_TMUWT(c);
-                 assert(dst.file == QFILE_TEMP);
-                add_node(c, dst.index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
-+                add_node(c, dst.index, temp_class);
-         } else {
-                 *fill_dst = vir_LDTMU(c);
-                 assert(fill_dst->file == QFILE_TEMP);
-                add_node(c, fill_dst->index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
-+                add_node(c, fill_dst->index, temp_class);
-         }
- 
-         /* Temps across the thread switch we injected can't be assigned to
-@@ -662,8 +677,10 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
-                                          * instruction immediately after so we
-                                          * can use ACC.
-                                          */
-                                        add_node(c, temp.index, CLASS_BITS_PHYS |
-                                                                CLASS_BITS_ACC);
-+                                        int temp_class =
-+                                                filter_class_bits(c->devinfo, CLASS_BITS_PHYS |
-+                                                                              CLASS_BITS_ACC);
-+                                        add_node(c, temp.index, temp_class);
-                                 } else {
-                                         /* If we have a postponed spill, we
-                                          * don't need a fill as the temp would
-@@ -941,6 +958,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
-                 compiler->reg_class_phys[threads] =
-                         ra_alloc_contig_reg_class(compiler->regs, 1);
- 
-+                /* Init physical regs */
-                 for (int i = phys_index;
-                      i < phys_index + (PHYS_COUNT >> threads); i++) {
-                         if (compiler->devinfo->has_accumulators)
-@@ -949,16 +967,15 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
-                         ra_class_add_reg(compiler->reg_class_any[threads], i);
-                 }
- 
-+                /* Init accumulator regs */
-                 if (compiler->devinfo->has_accumulators) {
-                         for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
-                                 ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
-                                 ra_class_add_reg(compiler->reg_class_any[threads], i);
-                         }
-                }
-                /* r5 can only store a single 32-bit value, so not much can
-                 * use it.
-                 */
-                if (compiler->devinfo->has_accumulators) {
-+                        /* r5 can only store a single 32-bit value, so not much can
-+                         * use it.
-+                         */
-                         ra_class_add_reg(compiler->reg_class_r5[threads],
-                                          ACC_INDEX + 5);
-                         ra_class_add_reg(compiler->reg_class_any[threads],
-@@ -1081,21 +1098,23 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                  * because ldunif has usually a shorter lifespan, allowing for
-                  * more accumulator reuse and QPU merges.
-                  */
-                if (!inst->qpu.sig.ldunif) {
-                        uint8_t class_bits =
-                                get_temp_class_bits(c, inst->dst.index) &
-                                ~CLASS_BITS_R5;
-                        set_temp_class_bits(c, inst->dst.index,
-                                            class_bits);
-
-                } else {
-                        /* Until V3D 4.x, we could only load a uniform
-                         * to r5, so we'll need to spill if uniform
-                         * loads interfere with each other.
-                         */
-                        if (c->devinfo->ver < 40) {
-+                if (c->devinfo->has_accumulators) {
-+                        if (!inst->qpu.sig.ldunif) {
-+                                uint8_t class_bits =
-+                                        get_temp_class_bits(c, inst->dst.index) &
-+                                        ~CLASS_BITS_R5;
-                                 set_temp_class_bits(c, inst->dst.index,
-                                                    CLASS_BITS_R5);
-+                                                    class_bits);
-+
-+                        } else {
-+                                /* Until V3D 4.x, we could only load a uniform
-+                                 * to r5, so we'll need to spill if uniform
-+                                 * loads interfere with each other.
-+                                 */
-+                                if (c->devinfo->ver < 40) {
-+                                        set_temp_class_bits(c, inst->dst.index,
-+                                                            CLASS_BITS_R5);
-+                                }
-                         }
-                 }
-         }
-@@ -1152,8 +1171,10 @@ v3d_register_allocate(struct v3d_compile *c)
-                         c->thread_index--;
-         }
- 
-        c->g = ra_alloc_interference_graph(c->compiler->regs,
-                                           c->num_temps + ARRAY_SIZE(acc_nodes));
-+        unsigned num_ra_nodes = c->num_temps;
-+        if (c->devinfo->has_accumulators)
-+                num_ra_nodes += ARRAY_SIZE(acc_nodes);
-+        c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
-         ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
- 
-         /* Make some fixed nodes for the accumulators, which we will need to
-@@ -1162,8 +1183,8 @@ v3d_register_allocate(struct v3d_compile *c)
-          * live in, but the classes take up a lot of memory to set up, so we
-          * don't want to make too many.
-          */
-        for (uint32_t i = 0; i < ACC_COUNT + c->num_temps; i++) {
-                if (i < ACC_COUNT) {
-+        for (uint32_t i = 0; i < num_ra_nodes; i++) {
-+                if (c->devinfo->has_accumulators && i < ACC_COUNT) {
-                         acc_nodes[i] = i;
-                         ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
-                         c->nodes.info[i].priority = 0;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0039-broadcom-compiler-don-t-assign-rf0-to-temps-across-i.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0039-broadcom-compiler-don-t-assign-rf0-to-temps-across-i.patch
@ -1,187 +0,0 @@
-From fd77cc3204e7c69927f97ce2a1d55d2a47d77a27 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 29 Sep 2021 12:14:04 +0200
-Subject: [PATCH 039/142] broadcom/compiler: don't assign rf0 to temps across
- implicit rf0 writes
-
-In platforms that don't have accumulators and have implicit writes to
-the register file we need to be careful and avoid assigning a physical
-register to a temp that lives across an implicit write to that same
-physical register.
-
-For now, we have the case of implicit writes to rf0 from various
-signals, but it should be easy to extend this to include additional
-registers if needed.
---
- src/broadcom/compiler/vir_register_allocate.c | 69 +++++++++++++++----
- 1 file changed, 57 insertions(+), 12 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index e78ccb7c6aa..e0adc1de7a4 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -29,6 +29,9 @@
- #define ACC_INDEX     0
- #define ACC_COUNT     6
- 
-+/* RA nodes used to track RF registers with implicit writes */
-+#define IMPLICIT_RF_COUNT 1
-+
- #define PHYS_COUNT 64
- 
- static uint8_t
-@@ -67,15 +70,17 @@ filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits)
- static inline uint32_t
- temp_to_node(struct v3d_compile *c, uint32_t temp)
- {
-        return temp + (c->devinfo->has_accumulators ? ACC_COUNT : 0);
-+        return temp + (c->devinfo->has_accumulators ? ACC_COUNT :
-+                                                      IMPLICIT_RF_COUNT);
- }
- 
- static inline uint32_t
- node_to_temp(struct v3d_compile *c, uint32_t node)
- {
-         assert((c->devinfo->has_accumulators && node >= ACC_COUNT) ||
-               (!c->devinfo->has_accumulators && node >= 0));
-        return node - (c->devinfo->has_accumulators ? ACC_COUNT : 0);
-+               (!c->devinfo->has_accumulators && node >= IMPLICIT_RF_COUNT));
-+        return node - (c->devinfo->has_accumulators ? ACC_COUNT :
-+                                                      IMPLICIT_RF_COUNT);
- }
- 
- static inline uint8_t
-@@ -360,7 +365,8 @@ ensure_nodes(struct v3d_compile *c)
-         c->nodes.info = reralloc_array_size(c,
-                                             c->nodes.info,
-                                             sizeof(c->nodes.info[0]),
-                                            c->nodes.alloc_count + ACC_COUNT);
-+                                            c->nodes.alloc_count +
-+                                            MAX2(ACC_COUNT, IMPLICIT_RF_COUNT));
- }
- 
- /* Creates the interference node for a new temp. We use this to keep the node
-@@ -372,7 +378,8 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits)
-         ensure_nodes(c);
- 
-         int node = ra_add_node(c->g, choose_reg_class(c, class_bits));
-        assert(node == temp + ACC_COUNT);
-+        assert(c->devinfo->has_accumulators ? node == temp + ACC_COUNT :
-+                                              node == temp + IMPLICIT_RF_COUNT);
- 
-         /* We fill the node priority after we are done inserting spills */
-         c->nodes.info[node].class_bits = class_bits;
-@@ -995,7 +1002,9 @@ tmu_spilling_allowed(struct v3d_compile *c)
- }
- 
- static void
-update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-+update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
-+                                      int *acc_nodes,
-+                                      int *implicit_rf_nodes,
-                                       struct qinst *inst)
- {
-         int32_t ip = inst->ip;
-@@ -1025,6 +1034,19 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                 }
-         }
- 
-+        /* If any instruction writes to a physical register implicitly
-+         * nothing else can write the same register across it.
-+         */
-+        if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) {
-+                for (int i = 0; i < c->num_temps; i++) {
-+                        if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
-+                                ra_add_node_interference(c->g,
-+                                                         temp_to_node(c, i),
-+                                                         implicit_rf_nodes[0]);
-+                        }
-+                }
-+        }
-+
-         if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
-                 switch (inst->qpu.alu.add.op) {
-                 case V3D_QPU_A_LDVPMV_IN:
-@@ -1116,6 +1138,16 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
-                                                             CLASS_BITS_R5);
-                                 }
-                         }
-+                } else {
-+                        /* If the instruction has an implicit write
-+                         * we can't allocate its dest to the same
-+                         * register.
-+                         */
-+                        if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) {
-+                                ra_add_node_interference(c->g,
-+                                                         temp_to_node(c, inst->dst.index),
-+                                                         implicit_rf_nodes[0]);
-+                        }
-                 }
-         }
- 
-@@ -1139,10 +1171,18 @@ struct qpu_reg *
- v3d_register_allocate(struct v3d_compile *c)
- {
-         int acc_nodes[ACC_COUNT];
-+        int implicit_rf_nodes[IMPLICIT_RF_COUNT];
-+
-+        unsigned num_ra_nodes = c->num_temps;
-+        if (c->devinfo->has_accumulators)
-+                num_ra_nodes += ARRAY_SIZE(acc_nodes);
-+        else
-+                num_ra_nodes += ARRAY_SIZE(implicit_rf_nodes);
-+
-         c->nodes = (struct v3d_ra_node_info) {
-                 .alloc_count = c->num_temps,
-                 .info = ralloc_array_size(c, sizeof(c->nodes.info[0]),
-                                          c->num_temps + ACC_COUNT),
-+                                          num_ra_nodes),
-         };
- 
-         uint32_t phys_index = get_phys_index(c->devinfo);
-@@ -1171,9 +1211,6 @@ v3d_register_allocate(struct v3d_compile *c)
-                         c->thread_index--;
-         }
- 
-        unsigned num_ra_nodes = c->num_temps;
-        if (c->devinfo->has_accumulators)
-                num_ra_nodes += ARRAY_SIZE(acc_nodes);
-         c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
-         ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
- 
-@@ -1181,7 +1218,8 @@ v3d_register_allocate(struct v3d_compile *c)
-          * interfere with when ops have implied r3/r4 writes or for the thread
-          * switches.  We could represent these as classes for the nodes to
-          * live in, but the classes take up a lot of memory to set up, so we
-         * don't want to make too many.
-+         * don't want to make too many. We use the same mechanism on platforms
-+         * without accumulators that can have implicit writes to phys regs.
-          */
-         for (uint32_t i = 0; i < num_ra_nodes; i++) {
-                 if (c->devinfo->has_accumulators && i < ACC_COUNT) {
-@@ -1189,6 +1227,12 @@ v3d_register_allocate(struct v3d_compile *c)
-                         ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
-                         c->nodes.info[i].priority = 0;
-                         c->nodes.info[i].class_bits = 0;
-+                } else if (!c->devinfo->has_accumulators &&
-+                           i < ARRAY_SIZE(implicit_rf_nodes)) {
-+                        implicit_rf_nodes[i] = i;
-+                        ra_set_node_reg(c->g, implicit_rf_nodes[i], phys_index + i);
-+                        c->nodes.info[i].priority = 0;
-+                        c->nodes.info[i].class_bits = 0;
-                 } else {
-                         uint32_t t = node_to_temp(c, i);
-                         c->nodes.info[i].priority =
-@@ -1204,7 +1248,8 @@ v3d_register_allocate(struct v3d_compile *c)
-         int ip = 0;
-         vir_for_each_inst_inorder(inst, c) {
-                 inst->ip = ip++;
-                update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
-+                update_graph_and_reg_classes_for_inst(c, acc_nodes,
-+                                                      implicit_rf_nodes, inst);
-         }
- 
-         /* Set the register classes for all our temporaries in the graph */
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0040-broadcom-compiler-CS-payload-registers-have-changed-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0040-broadcom-compiler-CS-payload-registers-have-changed-.patch
@ -1,33 +0,0 @@
-From 9a08ae9f354a6da6d9d71b87800aca8b3df49e29 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 28 Sep 2021 13:37:28 +0200
-Subject: [PATCH 040/142] broadcom/compiler: CS payload registers have changed
- in v71
-
---
- src/broadcom/compiler/nir_to_vir.c | 9 +++++++--
- 1 file changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
-index 1a05b279a2d..220ff6bcd49 100644
--- a/src/broadcom/compiler/nir_to_vir.c
-+++ b/src/broadcom/compiler/nir_to_vir.c
-@@ -4362,8 +4362,13 @@ nir_to_vir(struct v3d_compile *c)
-                                                       V3D_QPU_WADDR_SYNC));
-                 }
- 
-                c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
-                c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
-+                if (c->devinfo->ver <= 42) {
-+                        c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
-+                        c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
-+                } else if (c->devinfo->ver >= 71) {
-+                        c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 3));
-+                        c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
-+                }
- 
-                 /* Set up the division between gl_LocalInvocationIndex and
-                  * wg_in_mem in the payload reg.
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0041-broadcom-compiler-don-t-schedule-rf0-writes-right-af.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0041-broadcom-compiler-don-t-schedule-rf0-writes-right-af.patch
@ -1,46 +0,0 @@
-From 5477884196cb54a71f54fa6cad42c6d3326bde88 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Fri, 22 Oct 2021 13:39:48 +0200
-Subject: [PATCH 041/142] broadcom/compiler: don't schedule rf0 writes right
- after ldvary
-
-ldvary writes rf0 implicitly on the next cycle so they would clash.
-This case is not handled correctly by our normal dependency tracking,
-which doesn't know anything about delayed writes from instructions
-and thinks the rf0 write happens on the same cycle ldvary is emitted.
-
-Fixes (v71):
-dEQP-VK.glsl.conversions.matrix_to_matrix.mat2x3_to_mat4x2_fragment
---
- src/broadcom/compiler/qpu_schedule.c | 15 +++++++++++++++
- 1 file changed, 15 insertions(+)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 839c0c62315..870823fd2b1 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -652,6 +652,21 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo,
-             v3d_qpu_writes_r4(devinfo, inst))
-                 return true;
- 
-+        if (devinfo->ver <= 42)
-+           return false;
-+
-+        /* Don't schedule anything that writes rf0 right after ldvary, since
-+         * that would clash with the ldvary's delayed rf0 write (the exception
-+         * is another ldvary, since its implicit rf0 write would also have
-+         * one cycle of delay and would not clash).
-+         */
-+        if (scoreboard->last_ldvary_tick + 1 == scoreboard->tick &&
-+            (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) ||
-+             (v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
-+              !inst->sig.ldvary))) {
-+            return true;
-+       }
-+
-         return false;
- }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0042-broadcom-compiler-allow-instruction-merges-in-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0042-broadcom-compiler-allow-instruction-merges-in-v71.patch
@ -1,60 +0,0 @@
-From 31623712c2f741d393767641f32d56c35150eda5 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Thu, 30 Sep 2021 13:22:48 +0200
-Subject: [PATCH 042/142] broadcom/compiler: allow instruction merges in v71
-
-In v3d 4.x there were restrictions based on the number of raddrs used
-by the combined instructions, but we don't have these restrictions in
-v3d 7.x.
-
-It should be noted that while there are no restrictions on the number
-of raddrs addressed, a QPU instruction can only address a single small
-immediate, so we should be careful about that when we add support for
-small immediates.
---
- src/broadcom/compiler/qpu_schedule.c | 21 +++++++++++++++++----
- 1 file changed, 17 insertions(+), 4 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 870823fd2b1..ff544fb3c1c 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -906,8 +906,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a,
- static bool
- qpu_merge_raddrs(struct v3d_qpu_instr *result,
-                  const struct v3d_qpu_instr *add_instr,
-                 const struct v3d_qpu_instr *mul_instr)
-+                 const struct v3d_qpu_instr *mul_instr,
-+                 const struct v3d_device_info *devinfo)
- {
-+        assert(devinfo->ver <= 42);
-+
-         uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr);
-         int naddrs = util_bitcount64(raddrs_used);
- 
-@@ -1111,9 +1114,19 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
-                 add_instr = a;
-         }
- 
-        if (add_instr && mul_instr &&
-            !qpu_merge_raddrs(&merge, add_instr, mul_instr)) {
-                        return false;
-+        /* V3D 4.x and earlier use muxes to select the inputs for the ALUs and
-+         * they have restrictions on the number of raddrs that can be adressed
-+         * in a single instruction.
-+         *
-+         * FIXME: for V3D 7.x we can't merge instructions if they address more
-+         * than one small immediate. For now, we don't support small immediates,
-+         * so it is not a problem.
-+         */
-+        if (devinfo->ver <= 42) {
-+                if (add_instr && mul_instr &&
-+                    !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) {
-+                                return false;
-+                }
-         }
- 
-         merge.sig.thrsw |= b->sig.thrsw;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0043-broadcom-qpu-add-MOV-integer-packing-unpacking-varia.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0043-broadcom-qpu-add-MOV-integer-packing-unpacking-varia.patch
@ -1,172 +0,0 @@
-From 959a0128654c94d84fda53ffc108971d3b3a817a Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 6 Oct 2021 09:27:43 +0200
-Subject: [PATCH 043/142] broadcom/qpu: add MOV integer packing/unpacking
- variants
-
-These are new in v71 and cover MOV on both the ADD and the MUL alus.
---
- src/broadcom/qpu/qpu_instr.h |  9 ++++
- src/broadcom/qpu/qpu_pack.c  | 98 ++++++++++++++++++++++++++++++++++++
- 2 files changed, 107 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index c86a4119c54..4b34d17bd4c 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -285,6 +285,15 @@ enum v3d_qpu_input_unpack {
- 
-         /** Swap high and low 16 bits */
-         V3D_QPU_UNPACK_SWAP_16,
-+
-+        /** Convert low 16 bits from 16-bit integer to unsigned 32-bit int */
-+        V3D_QPU_UNPACK_UL,
-+        /** Convert high 16 bits from 16-bit integer to unsigned 32-bit int */
-+        V3D_QPU_UNPACK_UH,
-+        /** Convert low 16 bits from 16-bit integer to signed 32-bit int */
-+        V3D_QPU_UNPACK_IL,
-+        /** Convert high 16 bits from 16-bit integer to signed 32-bit int */
-+        V3D_QPU_UNPACK_IH,
- };
- 
- enum v3d_qpu_mux {
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index 7a262f18ac3..4d677894755 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -922,6 +922,56 @@ v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
-         }
- }
- 
-+static bool
-+v3d_qpu_int32_unpack_unpack(uint32_t packed,
-+                            enum v3d_qpu_input_unpack *unpacked)
-+{
-+        switch (packed) {
-+        case 0:
-+                *unpacked = V3D_QPU_UNPACK_NONE;
-+                return true;
-+        case 1:
-+                *unpacked = V3D_QPU_UNPACK_UL;
-+                return true;
-+        case 2:
-+                *unpacked = V3D_QPU_UNPACK_UH;
-+                return true;
-+        case 3:
-+                *unpacked = V3D_QPU_UNPACK_IL;
-+                return true;
-+        case 4:
-+                *unpacked = V3D_QPU_UNPACK_IH;
-+                return true;
-+        default:
-+                return false;
-+        }
-+}
-+
-+static bool
-+v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
-+                          uint32_t *packed)
-+{
-+        switch (unpacked) {
-+        case V3D_QPU_UNPACK_NONE:
-+                *packed = 0;
-+                return true;
-+        case V3D_QPU_UNPACK_UL:
-+                *packed = 1;
-+                return true;
-+        case V3D_QPU_UNPACK_UH:
-+                *packed = 2;
-+                return true;
-+        case V3D_QPU_UNPACK_IL:
-+                *packed = 3;
-+                return true;
-+        case V3D_QPU_UNPACK_IH:
-+                *packed = 4;
-+                return true;
-+        default:
-+                return false;
-+        }
-+}
-+
- static bool
- v3d_qpu_float16_unpack_unpack(uint32_t packed,
-                               enum v3d_qpu_input_unpack *unpacked)
-@@ -1273,6 +1323,15 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
-                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
-                 break;
- 
-+        case V3D_QPU_A_MOV:
-+                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
-+
-+                if (!v3d_qpu_int32_unpack_unpack((raddr_b >> 2) & 0x7,
-+                                                 &instr->alu.add.a.unpack)) {
-+                        return false;
-+                }
-+                break;
-+
-         default:
-                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
-                 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
-@@ -1449,6 +1508,15 @@ v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
- 
-                 break;
- 
-+        case V3D_QPU_M_MOV:
-+                instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
-+
-+                if (!v3d_qpu_int32_unpack_unpack((raddr_d >> 2) & 0x7,
-+                                                 &instr->alu.mul.a.unpack)) {
-+                        return false;
-+                }
-+                break;
-+
-         default:
-                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
-                 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
-@@ -1909,6 +1977,21 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
-                 opcode |= packed;
-                 break;
- 
-+        case V3D_QPU_A_MOV: {
-+                uint32_t packed;
-+
-+                if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
-+                        return false;
-+
-+                if (!v3d_qpu_int32_unpack_pack(instr->alu.add.a.unpack,
-+                                               &packed)) {
-+                        return false;
-+                }
-+
-+                raddr_b |= packed << 2;
-+                break;
-+        }
-+
-         default:
-                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
-                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
-@@ -2126,6 +2209,21 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
-                 break;
-         }
- 
-+        case V3D_QPU_M_MOV: {
-+                uint32_t packed;
-+
-+                if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
-+                        return false;
-+
-+                if (!v3d_qpu_int32_unpack_pack(instr->alu.mul.a.unpack,
-+                                               &packed)) {
-+                        return false;
-+                }
-+
-+                raddr_d |= packed << 2;
-+                break;
-+        }
-+
-         default:
-                 break;
-         }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0044-broadcom-qpu-fail-packing-on-unhandled-mul-pack-unpa.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0044-broadcom-qpu-fail-packing-on-unhandled-mul-pack-unpa.patch
@ -1,47 +0,0 @@
-From 2e86dd0c357d7b432ce6794ae22fbfae89ad186b Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 6 Oct 2021 12:01:10 +0200
-Subject: [PATCH 044/142] broadcom/qpu: fail packing on unhandled mul
- pack/unpack
-
-We are doing this for the ADD alu already and it may be helpful to
-identify cases where we have QPU code with pack/unpack modifiers on
-MUL opcodes that we then are not packing into the actual QPU
-instructions.
---
- src/broadcom/qpu/qpu_pack.c | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index 4d677894755..180d7ab08a3 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -2106,6 +2106,12 @@ v3d33_qpu_mul_pack(const struct v3d_device_info *devinfo,
-         }
- 
-         default:
-+                if (instr->alu.mul.op != V3D_QPU_M_NOP &&
-+                    (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
-+                     instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
-+                     instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
-+                        return false;
-+                }
-                 break;
-         }
- 
-@@ -2225,6 +2231,12 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
-         }
- 
-         default:
-+                if (instr->alu.mul.op != V3D_QPU_M_NOP &&
-+                    (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
-+                     instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
-+                     instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
-+                        return false;
-+                }
-                 break;
-         }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0045-broadcom-compiler-generalize-check-for-shaders-using.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0045-broadcom-compiler-generalize-check-for-shaders-using.patch
@ -1,30 +0,0 @@
-From ed6bfa29d43b5a89ff070961454f1e82e23b4f45 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Fri, 8 Oct 2021 15:10:24 +0200
-Subject: [PATCH 045/142] broadcom/compiler: generalize check for shaders using
- pixel center W
-
-V3D 4.x has pixel center W in rf0 and V3D 7.x has it in rf3. We already
-account for this when we setup the c->payload_w, so use that.
---
- src/broadcom/compiler/nir_to_vir.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
-index 220ff6bcd49..90fe1d1e7f0 100644
--- a/src/broadcom/compiler/nir_to_vir.c
-+++ b/src/broadcom/compiler/nir_to_vir.c
-@@ -4547,8 +4547,8 @@ vir_check_payload_w(struct v3d_compile *c)
- 
-         vir_for_each_inst_inorder(inst, c) {
-                 for (int i = 0; i < vir_get_nsrc(inst); i++) {
-                        if (inst->src[i].file == QFILE_REG &&
-                            inst->src[i].index == 0) {
-+                        if (inst->src[i].file == c->payload_w.file &&
-+                            inst->src[i].index == c->payload_w.index) {
-                                 c->uses_center_w = true;
-                                 return;
-                         }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0046-broadcom-compiler-v71-isn-t-affected-by-double-round.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0046-broadcom-compiler-v71-isn-t-affected-by-double-round.patch
@ -1,34 +0,0 @@
-From e1a0fa2c2010ef29b8cec798cd0fc99cf44f3a2d Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Thu, 14 Oct 2021 14:16:40 +0200
-Subject: [PATCH 046/142] broadcom/compiler: v71 isn't affected by
- double-rounding of viewport X,Y coords
-
---
- src/broadcom/compiler/v3d_nir_lower_io.c | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c
-index 3ef0e398228..4cdba3748a1 100644
--- a/src/broadcom/compiler/v3d_nir_lower_io.c
-+++ b/src/broadcom/compiler/v3d_nir_lower_io.c
-@@ -600,9 +600,13 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
-                          * The correct fix for this as recommended by Broadcom
-                          * is to convert to .8 fixed-point with ffloor().
-                          */
-                        pos = nir_f2i32(b, nir_ffloor(b, pos));
-                        v3d_nir_store_output(b, state->vp_vpm_offset + i,
-                                             offset_reg, pos);
-+                        if (c->devinfo->ver <= 42)
-+                                 pos = nir_f2i32(b, nir_ffloor(b, pos));
-+                        else
-+                                 pos = nir_f2i32(b, nir_fround_even(b, pos));
-+
-+                       v3d_nir_store_output(b, state->vp_vpm_offset + i,
-+                                            offset_reg, pos);
-                 }
-         }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0047-broadcom-compiler-update-one-TMUWT-restriction-for-v.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0047-broadcom-compiler-update-one-TMUWT-restriction-for-v.patch
@ -1,31 +0,0 @@
-From 697e6cf01b781b244404872f331a778b6d4e67da Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 19 Oct 2021 11:16:43 +0200
-Subject: [PATCH 047/142] broadcom/compiler: update one TMUWT restriction for
- v71
-
-TMUWT not allowed in the final instruction restriction doesn't apply
-for v71.
---
- src/broadcom/compiler/qpu_schedule.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index ff544fb3c1c..25f79aa6f46 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -1700,8 +1700,10 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
- 
-         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
-                 /* GFXH-1625: TMUWT not allowed in the final instruction. */
-                if (slot == 2 && inst->alu.add.op == V3D_QPU_A_TMUWT)
-+                if (c->devinfo->ver <= 42 && slot == 2 &&
-+                    inst->alu.add.op == V3D_QPU_A_TMUWT) {
-                         return false;
-+                }
- 
-                 /* No writing physical registers at the end. */
-                 bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0048-broadcom-compiler-update-ldunif-ldvary-comment-for-v.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0048-broadcom-compiler-update-ldunif-ldvary-comment-for-v.patch
@ -1,37 +0,0 @@
-From 26fea727a9f34b75a3fe3f6a806accaddcc317f6 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 19 Oct 2021 11:51:32 +0200
-Subject: [PATCH 048/142] broadcom/compiler: update ldunif/ldvary comment for
- v71
-
-For v42 and below ldunif/ldvary write both on r5, but with a different
-delay, so we need to take that into account when scheduling both.
-
-For v71 the register used is rf0, but the behaviour is the same. So
-the scheduling code can be the same, but the comment needs update.
---
- src/broadcom/compiler/qpu_schedule.c | 7 ++++---
- 1 file changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 25f79aa6f46..e8197661f89 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -1234,10 +1234,11 @@ retry:
-                 if (pixel_scoreboard_too_soon(c, scoreboard, inst))
-                         continue;
- 
-                /* ldunif and ldvary both write r5, but ldunif does so a tick
-                 * sooner.  If the ldvary's r5 wasn't used, then ldunif might
-+                /* ldunif and ldvary both write the same register (r5 for v42
-+                 * and below, rf0 for v71), but ldunif does so a tick sooner.
-+                 * If the ldvary's register wasn't used, then ldunif might
-                  * otherwise get scheduled so ldunif and ldvary try to update
-                 * r5 in the same tick.
-+                 * the register in the same tick.
-                  */
-                 if ((inst->sig.ldunif || inst->sig.ldunifa) &&
-                     scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0049-broadcom-compiler-update-payload-registers-handling-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0049-broadcom-compiler-update-payload-registers-handling-.patch
@ -1,52 +0,0 @@
-From 70456e27b039174f767010f96d9b649e5e42d84f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 19 Oct 2021 23:52:30 +0200
-Subject: [PATCH 049/142] broadcom/compiler: update payload registers handling
- when computing live intervals
-
-As for v71 the payload registers are not the same. Specifically now
-rf3 is used as payload register, so this is needed to avoid rf3 being
-selected as a instruction dst by the register allocator, overwriting
-the payload value that could be still used.
---
- src/broadcom/compiler/vir_live_variables.c | 21 +++++++++++++--------
- 1 file changed, 13 insertions(+), 8 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_live_variables.c b/src/broadcom/compiler/vir_live_variables.c
-index 575b0481dc8..87a7e2b5b81 100644
--- a/src/broadcom/compiler/vir_live_variables.c
-+++ b/src/broadcom/compiler/vir_live_variables.c
-@@ -179,17 +179,22 @@ vir_setup_def_use(struct v3d_compile *c)
-                                 flags_inst = NULL;
-                         }
- 
-                        /* Payload registers: r0/1/2 contain W, centroid W,
-                         * and Z at program start.  Register allocation will
-                         * force their nodes to R0/1/2.
-+                        /* Payload registers: for fragment shaders, W,
-+                         * centroid W, and Z will be initialized at r0/1/2
-+                         * until v42, or r1/r2/r3 from v71.
-+                         *
-+                         * For compute shaders, payload would be r0/r2 until
-+                         * v42, r3/r2 from v71
-+                         *
-+                         * Register allocation will force their nodes to those
-+                         * registers.
-                          */
-                         if (inst->src[0].file == QFILE_REG) {
-                                switch (inst->src[0].index) {
-                                case 0:
-                                case 1:
-                                case 2:
-+                                uint32_t min_payload_r = c->devinfo->ver >= 71 ? 1 : 0;
-+                                uint32_t max_payload_r = c->devinfo->ver >= 71 ? 3 : 2;
-+                                if (inst->src[0].index >= min_payload_r ||
-+                                    inst->src[0].index <= max_payload_r) {
-                                         c->temp_start[inst->dst.index] = 0;
-                                        break;
-                                 }
-                         }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0050-broadcom-compiler-update-peripheral-access-restricti.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0050-broadcom-compiler-update-peripheral-access-restricti.patch
@ -1,235 +0,0 @@
-From f9a76b3a1e316e5ed6387819b87eaaf60f989a2b Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 26 Oct 2021 11:43:02 +0200
-Subject: [PATCH 050/142] broadcom/compiler: update peripheral access
- restrictions for v71
-
-In V3D 4.x only a couple of simultaneous accesses where allowed, but
-V3D 7.x is a bit more flexible, so rather than trying to check for all
-the allowed combinations it is easier to check if we are one of the
-disallows.
-
-Shader-db (pi5):
-
-total instructions in shared programs: 11338883 -> 11307386 (-0.28%)
-instructions in affected programs: 2727201 -> 2695704 (-1.15%)
-helped: 12555
-HURT: 289
-Instructions are helped.
-
-total max-temps in shared programs: 2230199 -> 2229260 (-0.04%)
-max-temps in affected programs: 20508 -> 19569 (-4.58%)
-helped: 608
-HURT: 4
-Max-temps are helped.
-
-total sfu-stalls in shared programs: 15236 -> 15293 (0.37%)
-sfu-stalls in affected programs: 148 -> 205 (38.51%)
-helped: 38
-HURT: 64
-Inconclusive result (%-change mean confidence interval includes 0).
-
-total inst-and-stalls in shared programs: 11354119 -> 11322679 (-0.28%)
-inst-and-stalls in affected programs: 2732262 -> 2700822 (-1.15%)
-helped: 12550
-HURT: 304
-Inst-and-stalls are helped.
-
-total nops in shared programs: 273711 -> 274095 (0.14%)
-nops in affected programs: 9626 -> 10010 (3.99%)
-helped: 186
-HURT: 397
-Nops are HURT.
---
- src/broadcom/compiler/qpu_schedule.c | 88 +++++++++++++++++++++-------
- src/broadcom/compiler/qpu_validate.c |  2 +-
- src/broadcom/qpu/qpu_instr.c         | 16 +++--
- src/broadcom/qpu/qpu_instr.h         |  2 +
- 4 files changed, 82 insertions(+), 26 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index e8197661f89..adb501e85ce 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -790,7 +790,8 @@ enum {
-         V3D_PERIPHERAL_TMU_WAIT           = (1 << 6),
-         V3D_PERIPHERAL_TMU_WRTMUC_SIG     = (1 << 7),
-         V3D_PERIPHERAL_TSY                = (1 << 8),
-        V3D_PERIPHERAL_TLB                = (1 << 9),
-+        V3D_PERIPHERAL_TLB_READ           = (1 << 9),
-+        V3D_PERIPHERAL_TLB_WRITE          = (1 << 10),
- };
- 
- static uint32_t
-@@ -815,8 +816,10 @@ qpu_peripherals(const struct v3d_device_info *devinfo,
-         if (v3d_qpu_uses_sfu(inst))
-                 result |= V3D_PERIPHERAL_SFU;
- 
-        if (v3d_qpu_uses_tlb(inst))
-                result |= V3D_PERIPHERAL_TLB;
-+        if (v3d_qpu_reads_tlb(inst))
-+                result |= V3D_PERIPHERAL_TLB_READ;
-+        if (v3d_qpu_writes_tlb(inst))
-+                result |= V3D_PERIPHERAL_TLB_WRITE;
- 
-         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
-                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
-@@ -847,32 +850,75 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
-         if (devinfo->ver < 41)
-                 return false;
- 
-        /* V3D 4.1+ allow WRTMUC signal with TMU register write (other than
-         * tmuc).
-+        /* V3D 4.x can't do more than one peripheral access except in a
-+         * few cases:
-          */
-        if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
-            b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
-                return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
-+        if (devinfo->ver <= 42) {
-+                /* WRTMUC signal with TMU register write (other than tmuc). */
-+                if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
-+                    b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
-+                        return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
-+                }
-+                if (b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
-+                    a_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
-+                        return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
-+                }
-+
-+                /* TMU read with VPM read/write. */
-+                if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
-+                    (b_peripherals == V3D_PERIPHERAL_VPM_READ ||
-+                     b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
-+                        return true;
-+                }
-+                if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
-+                    (a_peripherals == V3D_PERIPHERAL_VPM_READ ||
-+                     a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
-+                        return true;
-+                }
-+
-+                return false;
-         }
- 
-        if (a_peripherals == V3D_PERIPHERAL_TMU_WRITE &&
-            b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG) {
-                return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
-+        /* V3D 7.x can't have more than one of these restricted peripherals */
-+        const uint32_t restricted = V3D_PERIPHERAL_TMU_WRITE |
-+                                    V3D_PERIPHERAL_TMU_WRTMUC_SIG |
-+                                    V3D_PERIPHERAL_TSY |
-+                                    V3D_PERIPHERAL_TLB_READ |
-+                                    V3D_PERIPHERAL_SFU |
-+                                    V3D_PERIPHERAL_VPM_READ |
-+                                    V3D_PERIPHERAL_VPM_WRITE;
-+
-+        const uint32_t a_restricted = a_peripherals & restricted;
-+        const uint32_t b_restricted = b_peripherals & restricted;
-+        if (a_restricted && b_restricted) {
-+                /* WRTMUC signal with TMU register write (other than tmuc) is
-+                 * allowed though.
-+                 */
-+                if (!((a_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
-+                       b_restricted == V3D_PERIPHERAL_TMU_WRITE &&
-+                       v3d_qpu_writes_tmu_not_tmuc(devinfo, b)) ||
-+                      (b_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
-+                       a_restricted == V3D_PERIPHERAL_TMU_WRITE &&
-+                       v3d_qpu_writes_tmu_not_tmuc(devinfo, a)))) {
-+                        return false;
-+                }
-         }
- 
-        /* V3D 4.1+ allows TMU read with VPM read/write. */
-        if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
-            (b_peripherals == V3D_PERIPHERAL_VPM_READ ||
-             b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
-                return true;
-+        /* Only one TMU read per instruction */
-+        if ((a_peripherals & V3D_PERIPHERAL_TMU_READ) &&
-+            (b_peripherals & V3D_PERIPHERAL_TMU_READ)) {
-+                return false;
-         }
-        if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
-            (a_peripherals == V3D_PERIPHERAL_VPM_READ ||
-             a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
-                return true;
-+
-+        /* Only one TLB access per instruction */
-+        if ((a_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
-+                              V3D_PERIPHERAL_TLB_READ)) &&
-+            (b_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
-+                              V3D_PERIPHERAL_TLB_READ))) {
-+                return false;
-         }
- 
-        return false;
-+        return true;
- }
- 
- /* Compute a bitmask of which rf registers are used between
-diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
-index 12788692432..fde6695d59b 100644
--- a/src/broadcom/compiler/qpu_validate.c
-+++ b/src/broadcom/compiler/qpu_validate.c
-@@ -227,7 +227,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
-             vpm_writes +
-             tlb_writes +
-             tsy_writes +
-            inst->sig.ldtmu +
-+            (devinfo->ver <= 42 ? inst->sig.ldtmu : 0) +
-             inst->sig.ldtlb +
-             inst->sig.ldvpm +
-             inst->sig.ldtlbu > 1) {
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index 195a0dcd232..f54ce7210fb 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -649,12 +649,14 @@ v3d_qpu_add_op_writes_vpm(enum  v3d_qpu_add_op op)
- }
- 
- bool
-v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
-+v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst)
- {
-        if (inst->sig.ldtlb ||
-            inst->sig.ldtlbu)
-                return true;
-+        return inst->sig.ldtlb || inst->sig.ldtlbu;
-+}
- 
-+bool
-+v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst)
-+{
-         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
-                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
-                     inst->alu.add.magic_write &&
-@@ -672,6 +674,12 @@ v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
-         return false;
- }
- 
-+bool
-+v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
-+{
-+        return  v3d_qpu_writes_tlb(inst) || v3d_qpu_reads_tlb(inst);
-+}
-+
- bool
- v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
- {
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index 4b34d17bd4c..dece45c5c54 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -472,6 +472,8 @@ bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
- bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
- bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
- bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
-+bool v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
-+bool v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
- bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
- bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
- bool v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0051-broadcom-qpu-add-packing-for-fmov-on-ADD-alu.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0051-broadcom-qpu-add-packing-for-fmov-on-ADD-alu.patch
@ -1,61 +0,0 @@
-From 3520cceb87fb2f9765ba7dbe2771fbd0cadca78d Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 26 Oct 2021 08:37:54 +0200
-Subject: [PATCH 051/142] broadcom/qpu: add packing for fmov on ADD alu
-
---
- src/broadcom/qpu/qpu_pack.c | 31 +++++++++++++++++++++++++++++++
- 1 file changed, 31 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index 180d7ab08a3..ed5a8bc667d 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -1332,6 +1332,20 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
-                 }
-                 break;
- 
-+        case V3D_QPU_A_FMOV:
-+                instr->alu.add.output_pack = raddr_b & 0x3;
-+
-+                /* Mul alu FMOV has one additional variant */
-+                int32_t unpack = (raddr_b >> 2) & 0x7;
-+                if (unpack == 7)
-+                        return false;
-+
-+                if (!v3d_qpu_float32_unpack_unpack(unpack,
-+                                                   &instr->alu.add.a.unpack)) {
-+                        return false;
-+                }
-+                break;
-+
-         default:
-                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
-                 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
-@@ -1992,6 +2006,23 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
-                 break;
-         }
- 
-+        case V3D_QPU_A_FMOV: {
-+                uint32_t packed;
-+
-+                if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
-+                                               &packed)) {
-+                        return false;
-+                }
-+                raddr_b = packed;
-+
-+                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
-+                                                 &packed)) {
-+                        return false;
-+                }
-+                raddr_b |= packed << 2;
-+                break;
-+        }
-+
-         default:
-                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
-                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0052-broadcom-compiler-handle-rf0-flops-storage-restricti.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0052-broadcom-compiler-handle-rf0-flops-storage-restricti.patch
@ -1,155 +0,0 @@
-From 7c7ab15b3c9def4bc3bb5be492228a933c325f8a Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 6 Oct 2021 13:58:27 +0200
-Subject: [PATCH 052/142] broadcom/compiler: handle rf0 flops storage
- restriction in v71
-
---
- src/broadcom/compiler/qpu_schedule.c | 81 +++++++++++++++++++++++++++-
- 1 file changed, 79 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index adb501e85ce..7048d9257b6 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -538,6 +538,10 @@ struct choose_scoreboard {
-         int ldvary_count;
-         int pending_ldtmu_count;
-         bool first_ldtmu_after_thrsw;
-+
-+        /* V3D 7.x */
-+        int last_implicit_rf0_write_tick;
-+        bool has_rf0_flops_conflict;
- };
- 
- static bool
-@@ -1499,6 +1503,62 @@ update_scoreboard_tmu_tracking(struct choose_scoreboard *scoreboard,
-         }
- }
- 
-+static void
-+set_has_rf0_flops_conflict(struct choose_scoreboard *scoreboard,
-+                           const struct v3d_qpu_instr *inst,
-+                           const struct v3d_device_info *devinfo)
-+{
-+        if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick &&
-+            v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
-+            !inst->sig_magic) {
-+                scoreboard->has_rf0_flops_conflict = true;
-+        }
-+}
-+
-+static void
-+update_scoreboard_for_rf0_flops(struct choose_scoreboard *scoreboard,
-+                                const struct v3d_qpu_instr *inst,
-+                                const struct v3d_device_info *devinfo)
-+{
-+        if (devinfo->ver < 71)
-+                return;
-+
-+        /* Thread switch restrictions:
-+         *
-+         * At the point of a thread switch or thread end (when the actual
-+         * thread switch or thread end happens, not when the signalling
-+         * instruction is processed):
-+         *
-+         *    - If the most recent write to rf0 was from a ldunif, ldunifa, or
-+         *      ldvary instruction in which another signal also wrote to the
-+         *      register file, and the final instruction of the thread section
-+         *      contained a signal which wrote to the register file, then the
-+         *      value of rf0 is undefined at the start of the new section
-+         *
-+         * Here we use the scoreboard to track if our last rf0 implicit write
-+         * happens at the same time that another signal writes the register
-+         * file (has_rf0_flops_conflict). We will use that information when
-+         * scheduling thrsw instructions to avoid putting anything in their
-+         * last delay slot which has a signal that writes to the register file.
-+         */
-+
-+        /* Reset tracking if we have an explicit rf0 write or we are starting
-+         * a new thread section.
-+         */
-+        if (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) ||
-+            scoreboard->tick - scoreboard->last_thrsw_tick == 3) {
-+                scoreboard->last_implicit_rf0_write_tick = -10;
-+                scoreboard->has_rf0_flops_conflict = false;
-+        }
-+
-+        if (v3d_qpu_writes_rf0_implicitly(devinfo, inst)) {
-+                scoreboard->last_implicit_rf0_write_tick = inst->sig.ldvary ?
-+                        scoreboard->tick + 1 : scoreboard->tick;
-+        }
-+
-+        set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
-+}
-+
- static void
- update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
-                              const struct qinst *qinst,
-@@ -1542,6 +1602,8 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
-         if (inst->sig.ldvary)
-                 scoreboard->last_ldvary_tick = scoreboard->tick;
- 
-+        update_scoreboard_for_rf0_flops(scoreboard, inst, devinfo);
-+
-         update_scoreboard_tmu_tracking(scoreboard, qinst);
- }
- 
-@@ -1812,6 +1874,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
-  */
- static bool
- qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
-+                                          struct choose_scoreboard *scoreboard,
-                                           const struct qinst *qinst,
-                                           uint32_t slot)
- {
-@@ -1842,6 +1905,17 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
-         if (v3d_qpu_writes_unifa(c->devinfo, &qinst->qpu))
-                 return false;
- 
-+        /* See comment when we set has_rf0_flops_conflict for details */
-+        if (c->devinfo->ver >= 71 &&
-+            slot == 2 &&
-+            v3d_qpu_sig_writes_address(c->devinfo, &qinst->qpu.sig) &&
-+            !qinst->qpu.sig_magic) {
-+                if (scoreboard->has_rf0_flops_conflict)
-+                        return false;
-+                if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick)
-+                        return false;
-+        }
-+
-         return true;
- }
- 
-@@ -1874,7 +1948,7 @@ qpu_inst_after_thrsw_valid_in_delay_slot(struct v3d_compile *c,
-          * also apply to instructions scheduled after the thrsw that we want
-          * to place in its delay slots.
-          */
-        if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, qinst, slot))
-+        if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard, qinst, slot))
-                 return false;
- 
-         /* TLB access is disallowed until scoreboard wait is executed, which
-@@ -1947,8 +2021,10 @@ valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard
-                      bool is_thrend)
- {
-         for (int slot = 0; slot < instructions_in_sequence; slot++) {
-                if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, qinst, slot))
-+                if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard,
-+                                                               qinst, slot)) {
-                         return false;
-+                }
- 
-                 if (is_thrend &&
-                     !qpu_inst_valid_in_thrend_slot(c, qinst, slot)) {
-@@ -2718,6 +2794,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c)
-         scoreboard.last_setmsf_tick = -10;
-         scoreboard.last_stallable_sfu_tick = -10;
-         scoreboard.first_ldtmu_after_thrsw = true;
-+        scoreboard.last_implicit_rf0_write_tick = - 10;
- 
-         if (debug) {
-                 fprintf(stderr, "Pre-schedule instructions\n");
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0053-broadcom-compiler-enable-ldvary-pipelining-on-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0053-broadcom-compiler-enable-ldvary-pipelining-on-v71.patch
@ -1,189 +0,0 @@
-From 0c6910721eb50b38b3388c2d2344b6ecfe0fee58 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 27 Oct 2021 11:35:12 +0200
-Subject: [PATCH 053/142] broadcom/compiler: enable ldvary pipelining on v71
-
---
- src/broadcom/compiler/qpu_schedule.c | 121 ++++++++++++++++++---------
- 1 file changed, 80 insertions(+), 41 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 7048d9257b6..334ffdc6d58 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -2312,46 +2312,72 @@ emit_branch(struct v3d_compile *c,
- }
- 
- static bool
-alu_reads_register(struct v3d_qpu_instr *inst,
-+alu_reads_register(const struct v3d_device_info *devinfo,
-+                   struct v3d_qpu_instr *inst,
-                    bool add, bool magic, uint32_t index)
- {
-         uint32_t num_src;
-        enum v3d_qpu_mux mux_a, mux_b;
-
-        if (add) {
-+        if (add)
-                 num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
-                mux_a = inst->alu.add.a.mux;
-                mux_b = inst->alu.add.b.mux;
-        } else {
-+        else
-                 num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
-                mux_a = inst->alu.mul.a.mux;
-                mux_b = inst->alu.mul.b.mux;
-        }
- 
-        for (int i = 0; i < num_src; i++) {
-                if (magic) {
-                        if (i == 0 && mux_a == index)
-                                return true;
-                        if (i == 1 && mux_b == index)
-                                return true;
-+        if (devinfo->ver <= 42) {
-+                enum v3d_qpu_mux mux_a, mux_b;
-+                if (add) {
-+                        mux_a = inst->alu.add.a.mux;
-+                        mux_b = inst->alu.add.b.mux;
-                 } else {
-                        if (i == 0 && mux_a == V3D_QPU_MUX_A &&
-                            inst->raddr_a == index) {
-                                return true;
-                        }
-                        if (i == 0 && mux_a == V3D_QPU_MUX_B &&
-                            inst->raddr_b == index) {
-                                return true;
-                        }
-                        if (i == 1 && mux_b == V3D_QPU_MUX_A &&
-                            inst->raddr_a == index) {
-                                return true;
-                        }
-                        if (i == 1 && mux_b == V3D_QPU_MUX_B &&
-                            inst->raddr_b == index) {
-                                return true;
-+                        mux_a = inst->alu.mul.a.mux;
-+                        mux_b = inst->alu.mul.b.mux;
-+                }
-+
-+                for (int i = 0; i < num_src; i++) {
-+                        if (magic) {
-+                                if (i == 0 && mux_a == index)
-+                                        return true;
-+                                if (i == 1 && mux_b == index)
-+                                        return true;
-+                        } else {
-+                                if (i == 0 && mux_a == V3D_QPU_MUX_A &&
-+                                    inst->raddr_a == index) {
-+                                        return true;
-+                                }
-+                                if (i == 0 && mux_a == V3D_QPU_MUX_B &&
-+                                    inst->raddr_b == index) {
-+                                        return true;
-+                                }
-+                                if (i == 1 && mux_b == V3D_QPU_MUX_A &&
-+                                    inst->raddr_a == index) {
-+                                        return true;
-+                                }
-+                                if (i == 1 && mux_b == V3D_QPU_MUX_B &&
-+                                    inst->raddr_b == index) {
-+                                        return true;
-+                                }
-                         }
-                 }
-+
-+                return false;
-+        }
-+
-+        assert(devinfo->ver >= 71);
-+        assert(!magic);
-+
-+        uint32_t raddr_a, raddr_b;
-+        if (add) {
-+                raddr_a = inst->alu.add.a.raddr;
-+                raddr_b = inst->alu.add.b.raddr;
-+        } else {
-+                raddr_a = inst->alu.mul.a.raddr;
-+                raddr_b = inst->alu.mul.b.raddr;
-+        }
-+
-+        for (int i = 0; i < num_src; i++) {
-+                if (i == 0 && raddr_a == index)
-+                        return true;
-+                if (i == 1 && raddr_b == index)
-+                        return true;
-         }
- 
-         return false;
-@@ -2386,6 +2412,8 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
-                        struct qblock *block,
-                        struct v3d_qpu_instr *inst)
- {
-+        const struct v3d_device_info *devinfo = c->devinfo;
-+
-         /* We only call this if we have successfully merged an ldvary into a
-          * previous instruction.
-          */
-@@ -2398,9 +2426,9 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
-          * the ldvary destination, if it does, then moving the ldvary before
-          * it would overwrite it.
-          */
-        if (alu_reads_register(inst, true, ldvary_magic, ldvary_index))
-+        if (alu_reads_register(devinfo, inst, true, ldvary_magic, ldvary_index))
-                 return false;
-        if (alu_reads_register(inst, false, ldvary_magic, ldvary_index))
-+        if (alu_reads_register(devinfo, inst, false, ldvary_magic, ldvary_index))
-                 return false;
- 
-         /* The implicit ldvary destination may not be written to by a signal
-@@ -2436,13 +2464,13 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
-         }
- 
-         /* The previous instruction cannot have a conflicting signal */
-        if (v3d_qpu_sig_writes_address(c->devinfo, &prev->qpu.sig))
-+        if (v3d_qpu_sig_writes_address(devinfo, &prev->qpu.sig))
-                 return false;
- 
-         uint32_t sig;
-         struct v3d_qpu_sig new_sig = prev->qpu.sig;
-         new_sig.ldvary = true;
-        if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig))
-+        if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
-                 return false;
- 
-         /* The previous instruction cannot use flags since ldvary uses the
-@@ -2471,14 +2499,25 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
-         inst->sig_magic = false;
-         inst->sig_addr = 0;
- 
-        /* By moving ldvary to the previous instruction we make it update
-         * r5 in the current one, so nothing else in it should write r5.
-         * This should've been prevented by our dependency tracking, which
-+        /* Update rf0 flops tracking for new ldvary delayed rf0 write tick */
-+        if (devinfo->ver >= 71) {
-+                scoreboard->last_implicit_rf0_write_tick = scoreboard->tick;
-+                set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
-+        }
-+
-+        /* By moving ldvary to the previous instruction we make it update r5
-+         * (rf0 for ver >= 71) in the current one, so nothing else in it
-+         * should write this register.
-+         *
-+         * This should've been prevented by our depedency tracking, which
-          * would not allow ldvary to be paired up with an instruction that
-         * writes r5 (since our dependency tracking doesn't know that the
-         * ldvary write r5 happens in the next instruction).
-+         * writes r5/rf0 (since our dependency tracking doesn't know that the
-+         * ldvary write to r5/rf0 happens in the next instruction).
-          */
-        assert(!v3d_qpu_writes_r5(c->devinfo, inst));
-+        assert(!v3d_qpu_writes_r5(devinfo, inst));
-+        assert(devinfo->ver <= 42 ||
-+               (!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
-+                !v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));
- 
-         return true;
- }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0054-broadcom-compiler-try-to-use-ldunif-a-instead-of-ldu.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0054-broadcom-compiler-try-to-use-ldunif-a-instead-of-ldu.patch
@ -1,144 +0,0 @@
-From 0670d642bb91fc68ce73f2d9fb88c482295a446d Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Thu, 28 Oct 2021 14:13:29 +0200
-Subject: [PATCH 054/142] broadcom/compiler: try to use ldunif(a) instead of
- ldunif(a)rf in v71
-
-The rf variants need to encode the destination in the cond bits, which
-prevents these to be merged with any other instruction that need them.
-
-In 4.x, ldunif(a) write to r5 which is a special register that only
-ldunif(a) and ldvary can write so we have a special register class for
-it and only allow it for them. Then when we need to choose a register
-for a node, if this register is available we always use it.
-
-In 7.x these instructions write to rf0, which can be used by any
-instruction, so instead of restricting rf0, we track the temps that
-are used as ldunif(a) destinations and use that information to favor
-rf0 for them.
---
- src/broadcom/compiler/v3d_compiler.h          |  3 ++
- src/broadcom/compiler/vir_register_allocate.c | 34 ++++++++++++++++---
- src/broadcom/compiler/vir_to_qpu.c            | 11 ++++--
- 3 files changed, 41 insertions(+), 7 deletions(-)
-
-diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
-index 7e8f3bfc1a7..36adf8830b5 100644
--- a/src/broadcom/compiler/v3d_compiler.h
-+++ b/src/broadcom/compiler/v3d_compiler.h
-@@ -613,6 +613,9 @@ struct v3d_ra_node_info {
-         struct {
-                 uint32_t priority;
-                 uint8_t class_bits;
-+
-+                /* V3D 7.x */
-+                bool is_ldunif_dst;
-         } *info;
-         uint32_t alloc_count;
- };
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index e0adc1de7a4..1be091f8518 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -384,6 +384,7 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits)
-         /* We fill the node priority after we are done inserting spills */
-         c->nodes.info[node].class_bits = class_bits;
-         c->nodes.info[node].priority = 0;
-+        c->nodes.info[node].is_ldunif_dst = false;
- }
- 
- /* The spill offset for this thread takes a bit of setup, so do it once at
-@@ -899,9 +900,22 @@ v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra,
- 
- static bool
- v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
-+                 unsigned int node,
-                  BITSET_WORD *regs,
-                  unsigned int *out)
- {
-+        /* In V3D 7.x, try to assign rf0 to temps used as ldunif's dst
-+         * so we can avoid turning them into ldunifrf (which uses the
-+         * cond field to encode the dst and would prevent merge with
-+         * instructions that use cond flags).
-+         */
-+        if (v3d_ra->nodes->info[node].is_ldunif_dst &&
-+            BITSET_TEST(regs, v3d_ra->phys_index)) {
-+                assert(v3d_ra->devinfo->ver >= 71);
-+                *out = v3d_ra->phys_index;
-+                return true;
-+        }
-+
-         for (int i = 0; i < PHYS_COUNT; i++) {
-                 int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
-                 int phys = v3d_ra->phys_index + phys_off;
-@@ -927,7 +941,7 @@ v3d_ra_select_callback(unsigned int n, BITSET_WORD *regs, void *data)
-                 return reg;
-         }
- 
-        if (v3d_ra_select_rf(v3d_ra, regs, &reg))
-+        if (v3d_ra_select_rf(v3d_ra, n, regs, &reg))
-                 return reg;
- 
-         /* If we ran out of physical registers try to assign an accumulator
-@@ -1139,15 +1153,24 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
-                                 }
-                         }
-                 } else {
-                        /* If the instruction has an implicit write
-                         * we can't allocate its dest to the same
-                         * register.
-+                        /* Make sure we don't allocate the ldvary's
-+                         * destination to rf0, since it would clash
-+                         * with its implicit write to that register.
-                          */
-                        if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) {
-+                        if (inst->qpu.sig.ldvary) {
-                                 ra_add_node_interference(c->g,
-                                                          temp_to_node(c, inst->dst.index),
-                                                          implicit_rf_nodes[0]);
-                         }
-+                        /* Flag dst temps from ldunif(a) instructions
-+                         * so we can try to assign rf0 to them and avoid
-+                         * converting these to ldunif(a)rf.
-+                         */
-+                        if (inst->qpu.sig.ldunif || inst->qpu.sig.ldunifa) {
-+                                const uint32_t dst_n =
-+                                        temp_to_node(c, inst->dst.index);
-+                                c->nodes.info[dst_n].is_ldunif_dst = true;
-+                        }
-                 }
-         }
- 
-@@ -1222,6 +1245,7 @@ v3d_register_allocate(struct v3d_compile *c)
-          * without accumulators that can have implicit writes to phys regs.
-          */
-         for (uint32_t i = 0; i < num_ra_nodes; i++) {
-+                c->nodes.info[i].is_ldunif_dst = false;
-                 if (c->devinfo->has_accumulators && i < ACC_COUNT) {
-                         acc_nodes[i] = i;
-                         ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
-diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
-index afc4941fdb1..cbbb495592b 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
-+++ b/src/broadcom/compiler/vir_to_qpu.c
-@@ -345,8 +345,15 @@ v3d_generate_code_block(struct v3d_compile *c,
-                                 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
-                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
- 
-                                if (!dst.magic ||
-                                    dst.index != V3D_QPU_WADDR_R5) {
-+                                bool use_rf;
-+                                if (c->devinfo->has_accumulators) {
-+                                        use_rf = !dst.magic ||
-+                                                 dst.index != V3D_QPU_WADDR_R5;
-+                                } else {
-+                                        use_rf = dst.magic || dst.index != 0;
-+                                }
-+
-+                                if (use_rf) {
-                                         assert(c->devinfo->ver >= 40);
- 
-                                         if (qinst->qpu.sig.ldunif) {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0055-broadcom-compiler-don-t-assign-rf0-to-temps-that-con.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0055-broadcom-compiler-don-t-assign-rf0-to-temps-that-con.patch
@ -1,82 +0,0 @@
-From cbed3b97394da09c9ae644c79e098e3ba8b5c3e8 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Fri, 29 Oct 2021 13:00:56 +0200
-Subject: [PATCH 055/142] broadcom/compiler: don't assign rf0 to temps that
- conflict with ldvary
-
-ldvary writes to rf0 implicitly, so we don't want to allocate rf0 to
-any temps that are live across ldvary's rf0 live ranges.
---
- src/broadcom/compiler/vir_register_allocate.c | 39 ++++++++++++++++++-
- 1 file changed, 38 insertions(+), 1 deletion(-)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index 1be091f8518..6f7b1ca0589 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -1019,6 +1019,7 @@ static void
- update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
-                                       int *acc_nodes,
-                                       int *implicit_rf_nodes,
-+                                      int last_ldvary_ip,
-                                       struct qinst *inst)
- {
-         int32_t ip = inst->ip;
-@@ -1125,6 +1126,25 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
-                 }
-         }
- 
-+        /* Don't allocate rf0 to temps that cross ranges where we have
-+         * live implicit rf0 writes from ldvary. We can identify these
-+         * by tracking the last ldvary instruction and explicit reads
-+         * of rf0.
-+         */
-+        if (c->devinfo->ver >= 71 &&
-+            ((inst->src[0].file == QFILE_REG && inst->src[0].index == 0) ||
-+              (vir_get_nsrc(inst) > 1 &&
-+               inst->src[1].file == QFILE_REG && inst->src[1].index == 0))) {
-+                for (int i = 0; i < c->num_temps; i++) {
-+                        if (c->temp_start[i] < ip &&
-+                            c->temp_end[i] > last_ldvary_ip) {
-+                                        ra_add_node_interference(c->g,
-+                                                                 temp_to_node(c, i),
-+                                                                 implicit_rf_nodes[0]);
-+                        }
-+                }
-+        }
-+
-         if (inst->dst.file == QFILE_TEMP) {
-                 /* Only a ldunif gets to write to R5, which only has a
-                  * single 32-bit channel of storage.
-@@ -1270,10 +1290,27 @@ v3d_register_allocate(struct v3d_compile *c)
-          * interferences.
-          */
-         int ip = 0;
-+        int last_ldvary_ip = -1;
-         vir_for_each_inst_inorder(inst, c) {
-                 inst->ip = ip++;
-+
-+                /* ldunif(a) always write to a temporary, so we have
-+                 * liveness info available to decide if rf0 is
-+                 * available for them, however, ldvary is different:
-+                 * it always writes to rf0 directly so we don't have
-+                 * liveness information for its implicit rf0 write.
-+                 *
-+                 * That means the allocator may assign rf0 to a temp
-+                 * that is defined while an implicit rf0 write from
-+                 * ldvary is still live. We fix that by manually
-+                 * tracking rf0 live ranges from ldvary instructions.
-+                 */
-+                if (inst->qpu.sig.ldvary)
-+                        last_ldvary_ip = ip;
-+
-                 update_graph_and_reg_classes_for_inst(c, acc_nodes,
-                                                      implicit_rf_nodes, inst);
-+                                                      implicit_rf_nodes,
-+                                                      last_ldvary_ip, inst);
-         }
- 
-         /* Set the register classes for all our temporaries in the graph */
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0056-broadcom-compiler-convert-mul-to-add-when-needed-to-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0056-broadcom-compiler-convert-mul-to-add-when-needed-to-.patch
@ -1,139 +0,0 @@
-From cbaa469c09974c1574b16f559173694904fe1bb0 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Mon, 25 Oct 2021 09:38:57 +0200
-Subject: [PATCH 056/142] broadcom/compiler: convert mul to add when needed to
- allow merge
-
-V3D 7.x added 'mov' opcodes to the ADD alu, so now it is possible to
-move these to the ADD alu to facilitate merging them with other MUL
-instructions.
---
- src/broadcom/compiler/qpu_schedule.c | 102 ++++++++++++++++++++++++---
- 1 file changed, 94 insertions(+), 8 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 334ffdc6d58..caa84254998 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -1086,6 +1086,57 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
-         inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
- }
- 
-+static bool
-+can_do_mul_as_add(const struct v3d_device_info *devinfo, enum v3d_qpu_mul_op op)
-+{
-+        switch (op) {
-+        case V3D_QPU_M_MOV:
-+        case V3D_QPU_M_FMOV:
-+                return devinfo->ver >= 71;
-+        default:
-+                return false;
-+        }
-+}
-+
-+static enum v3d_qpu_mul_op
-+mul_op_as_add_op(enum v3d_qpu_mul_op op)
-+{
-+        switch (op) {
-+        case V3D_QPU_M_MOV:
-+                return V3D_QPU_A_MOV;
-+        case V3D_QPU_M_FMOV:
-+                return V3D_QPU_A_FMOV;
-+        default:
-+                unreachable("unexpected mov opcode");
-+        }
-+}
-+
-+static void
-+qpu_convert_mul_to_add(struct v3d_qpu_instr *inst)
-+{
-+        STATIC_ASSERT(sizeof(inst->alu.add) == sizeof(inst->alu.mul));
-+        assert(inst->alu.mul.op != V3D_QPU_M_NOP);
-+        assert(inst->alu.add.op == V3D_QPU_A_NOP);
-+
-+        memcpy(&inst->alu.add, &inst->alu.mul, sizeof(inst->alu.add));
-+        inst->alu.add.op = mul_op_as_add_op(inst->alu.mul.op);
-+        inst->alu.mul.op = V3D_QPU_M_NOP;
-+
-+        inst->flags.ac = inst->flags.mc;
-+        inst->flags.apf = inst->flags.mpf;
-+        inst->flags.auf = inst->flags.muf;
-+        inst->flags.mc = V3D_QPU_COND_NONE;
-+        inst->flags.mpf = V3D_QPU_PF_NONE;
-+        inst->flags.muf = V3D_QPU_UF_NONE;
-+
-+        inst->alu.add.output_pack = inst->alu.mul.output_pack;
-+        inst->alu.add.a.unpack = inst->alu.mul.a.unpack;
-+        inst->alu.add.b.unpack = inst->alu.mul.b.unpack;
-+        inst->alu.mul.output_pack = V3D_QPU_PACK_NONE;
-+        inst->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
-+        inst->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
-+}
-+
- static bool
- qpu_merge_inst(const struct v3d_device_info *devinfo,
-                struct v3d_qpu_instr *result,
-@@ -1151,17 +1202,52 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
-                 }
-         }
- 
-+        struct v3d_qpu_instr add_inst;
-         if (b->alu.mul.op != V3D_QPU_M_NOP) {
-                if (a->alu.mul.op != V3D_QPU_M_NOP)
-                        return false;
-                merge.alu.mul = b->alu.mul;
-+                if (a->alu.mul.op == V3D_QPU_M_NOP) {
-+                        merge.alu.mul = b->alu.mul;
-+
-+                        merge.flags.mc = b->flags.mc;
-+                        merge.flags.mpf = b->flags.mpf;
-+                        merge.flags.muf = b->flags.muf;
-+
-+                        mul_instr = b;
-+                        add_instr = a;
-+                }
-+                /* If a's mul op is used but its add op is not, then see if we
-+                 * can convert either a's mul op or b's mul op to an add op
-+                 * so we can merge.
-+                 */
-+                else if (a->alu.add.op == V3D_QPU_A_NOP &&
-+                         can_do_mul_as_add(devinfo, b->alu.mul.op)) {
-+                        add_inst = *b;
-+                        qpu_convert_mul_to_add(&add_inst);
- 
-                merge.flags.mc = b->flags.mc;
-                merge.flags.mpf = b->flags.mpf;
-                merge.flags.muf = b->flags.muf;
-+                        merge.alu.add = add_inst.alu.add;
- 
-                mul_instr = b;
-                add_instr = a;
-+                        merge.flags.ac = b->flags.mc;
-+                        merge.flags.apf = b->flags.mpf;
-+                        merge.flags.auf = b->flags.muf;
-+
-+                        mul_instr = a;
-+                        add_instr = &add_inst;
-+                } else if (a->alu.add.op == V3D_QPU_A_NOP &&
-+                           can_do_mul_as_add(devinfo, a->alu.mul.op)) {
-+                        add_inst = *a;
-+                        qpu_convert_mul_to_add(&add_inst);
-+
-+                        merge = add_inst;
-+                        merge.alu.mul = b->alu.mul;
-+
-+                        merge.flags.mc = b->flags.mc;
-+                        merge.flags.mpf = b->flags.mpf;
-+                        merge.flags.muf = b->flags.muf;
-+
-+                        mul_instr = b;
-+                        add_instr = &add_inst;
-+                } else {
-+                        return false;
-+                }
-         }
- 
-         /* V3D 4.x and earlier use muxes to select the inputs for the ALUs and
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0057-broadcom-compiler-implement-small-immediates-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0057-broadcom-compiler-implement-small-immediates-for-v71.patch
@ -1,418 +0,0 @@
-From b59b3725fb16f4ab1ac0db86a5452a4ed6176074 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 3 Nov 2021 10:34:19 +0100
-Subject: [PATCH 057/142] broadcom/compiler: implement small immediates for v71
-
---
- src/broadcom/compiler/qpu_schedule.c          | 90 +++++++++++++------
- src/broadcom/compiler/qpu_validate.c          | 20 ++++-
- .../compiler/vir_opt_small_immediates.c       | 26 +++++-
- src/broadcom/compiler/vir_to_qpu.c            | 11 ++-
- src/broadcom/qpu/qpu_disasm.c                 |  1 -
- src/broadcom/qpu/qpu_instr.c                  |  8 +-
- src/broadcom/qpu/qpu_instr.h                  |  2 +-
- src/broadcom/qpu/qpu_pack.c                   | 36 ++++----
- 8 files changed, 139 insertions(+), 55 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index caa84254998..bd1c920848a 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -714,7 +714,6 @@ qpu_instruction_uses_rf(const struct v3d_device_info *devinfo,
-                     !inst->sig.small_imm_b && (inst->raddr_b == waddr))
-                         return true;
-         } else {
-                /* FIXME: skip if small immediate */
-                 if (v3d71_qpu_reads_raddr(inst, waddr))
-                         return true;
-         }
-@@ -948,10 +947,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a,
-         return raddrs_used;
- }
- 
-/* Take two instructions and attempt to merge their raddr fields
- * into one merged instruction. Returns false if the two instructions
- * access more than two different rf registers between them, or more
- * than one rf register and one small immediate.
-+/* Takes two instructions and attempts to merge their raddr fields (including
-+ * small immediates) into one merged instruction. For V3D 4.x, returns false
-+ * if the two instructions access more than two different rf registers between
-+ * them, or more than one rf register and one small immediate. For 7.x returns
-+ * false if both instructions use small immediates.
-  */
- static bool
- qpu_merge_raddrs(struct v3d_qpu_instr *result,
-@@ -959,6 +959,27 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
-                  const struct v3d_qpu_instr *mul_instr,
-                  const struct v3d_device_info *devinfo)
- {
-+        if (devinfo->ver >= 71) {
-+                assert(add_instr->sig.small_imm_a +
-+                       add_instr->sig.small_imm_b <= 1);
-+                assert(add_instr->sig.small_imm_c +
-+                       add_instr->sig.small_imm_d == 0);
-+                assert(mul_instr->sig.small_imm_a +
-+                       mul_instr->sig.small_imm_b == 0);
-+                assert(mul_instr->sig.small_imm_c +
-+                       mul_instr->sig.small_imm_d <= 1);
-+
-+                result->sig.small_imm_a = add_instr->sig.small_imm_a;
-+                result->sig.small_imm_b = add_instr->sig.small_imm_b;
-+                result->sig.small_imm_c = mul_instr->sig.small_imm_c;
-+                result->sig.small_imm_d = mul_instr->sig.small_imm_d;
-+
-+                return (result->sig.small_imm_a +
-+                        result->sig.small_imm_b +
-+                        result->sig.small_imm_c +
-+                        result->sig.small_imm_d) <= 1;
-+        }
-+
-         assert(devinfo->ver <= 42);
- 
-         uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr);
-@@ -1060,7 +1081,8 @@ add_op_as_mul_op(enum v3d_qpu_add_op op)
- }
- 
- static void
-qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
-+qpu_convert_add_to_mul(const struct v3d_device_info *devinfo,
-+                       struct v3d_qpu_instr *inst)
- {
-         STATIC_ASSERT(sizeof(inst->alu.mul) == sizeof(inst->alu.add));
-         assert(inst->alu.add.op != V3D_QPU_A_NOP);
-@@ -1084,6 +1106,18 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
-         inst->alu.add.output_pack = V3D_QPU_PACK_NONE;
-         inst->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
-         inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
-+
-+        if (devinfo->ver >= 71) {
-+                assert(!inst->sig.small_imm_c && !inst->sig.small_imm_d);
-+                assert(inst->sig.small_imm_a + inst->sig.small_imm_b <= 1);
-+                if (inst->sig.small_imm_a) {
-+                        inst->sig.small_imm_c = true;
-+                        inst->sig.small_imm_a = false;
-+                } else if (inst->sig.small_imm_b) {
-+                        inst->sig.small_imm_d = true;
-+                        inst->sig.small_imm_b = false;
-+                }
-+        }
- }
- 
- static bool
-@@ -1135,6 +1169,16 @@ qpu_convert_mul_to_add(struct v3d_qpu_instr *inst)
-         inst->alu.mul.output_pack = V3D_QPU_PACK_NONE;
-         inst->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
-         inst->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
-+
-+        assert(!inst->sig.small_imm_a && !inst->sig.small_imm_b);
-+        assert(inst->sig.small_imm_c + inst->sig.small_imm_d <= 1);
-+        if (inst->sig.small_imm_c) {
-+                inst->sig.small_imm_a = true;
-+                inst->sig.small_imm_c = false;
-+        } else if (inst->sig.small_imm_d) {
-+                inst->sig.small_imm_b = true;
-+                inst->sig.small_imm_d = false;
-+        }
- }
- 
- static bool
-@@ -1173,20 +1217,20 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
-                 else if (a->alu.mul.op == V3D_QPU_M_NOP &&
-                          can_do_add_as_mul(b->alu.add.op)) {
-                         mul_inst = *b;
-                        qpu_convert_add_to_mul(&mul_inst);
-+                        qpu_convert_add_to_mul(devinfo, &mul_inst);
- 
-                         merge.alu.mul = mul_inst.alu.mul;
- 
-                        merge.flags.mc = b->flags.ac;
-                        merge.flags.mpf = b->flags.apf;
-                        merge.flags.muf = b->flags.auf;
-+                        merge.flags.mc = mul_inst.flags.mc;
-+                        merge.flags.mpf = mul_inst.flags.mpf;
-+                        merge.flags.muf = mul_inst.flags.muf;
- 
-                         add_instr = a;
-                         mul_instr = &mul_inst;
-                 } else if (a->alu.mul.op == V3D_QPU_M_NOP &&
-                            can_do_add_as_mul(a->alu.add.op)) {
-                         mul_inst = *a;
-                        qpu_convert_add_to_mul(&mul_inst);
-+                        qpu_convert_add_to_mul(devinfo, &mul_inst);
- 
-                         merge = mul_inst;
-                         merge.alu.add = b->alu.add;
-@@ -1225,9 +1269,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
- 
-                         merge.alu.add = add_inst.alu.add;
- 
-                        merge.flags.ac = b->flags.mc;
-                        merge.flags.apf = b->flags.mpf;
-                        merge.flags.auf = b->flags.muf;
-+                        merge.flags.ac = add_inst.flags.ac;
-+                        merge.flags.apf = add_inst.flags.apf;
-+                        merge.flags.auf = add_inst.flags.auf;
- 
-                         mul_instr = a;
-                         add_instr = &add_inst;
-@@ -1252,17 +1296,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
- 
-         /* V3D 4.x and earlier use muxes to select the inputs for the ALUs and
-          * they have restrictions on the number of raddrs that can be adressed
-         * in a single instruction.
-         *
-         * FIXME: for V3D 7.x we can't merge instructions if they address more
-         * than one small immediate. For now, we don't support small immediates,
-         * so it is not a problem.
-+         * in a single instruction. In V3D 7.x, we don't have that restriction,
-+         * but we are still limited to a single small immediate per instruction.
-          */
-        if (devinfo->ver <= 42) {
-                if (add_instr && mul_instr &&
-                    !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) {
-                                return false;
-                }
-+        if (add_instr && mul_instr &&
-+            !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) {
-+                return false;
-         }
- 
-         merge.sig.thrsw |= b->sig.thrsw;
-@@ -1273,7 +1312,6 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
-         merge.sig.ldtmu |= b->sig.ldtmu;
-         merge.sig.ldvary |= b->sig.ldvary;
-         merge.sig.ldvpm |= b->sig.ldvpm;
-        merge.sig.small_imm_b |= b->sig.small_imm_b;
-         merge.sig.ldtlb |= b->sig.ldtlb;
-         merge.sig.ldtlbu |= b->sig.ldtlbu;
-         merge.sig.ucb |= b->sig.ucb;
-@@ -1933,8 +1971,6 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
-                 if (c->devinfo->ver >= 71) {
-                         /* RF2-3 might be overwritten during the delay slots by
-                          * fragment shader setup.
-                         *
-                         * FIXME: handle small immediate cases
-                          */
-                         if (v3d71_qpu_reads_raddr(inst, 2) ||
-                             v3d71_qpu_reads_raddr(inst, 3)) {
-diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
-index fde6695d59b..41070484286 100644
--- a/src/broadcom/compiler/qpu_validate.c
-+++ b/src/broadcom/compiler/qpu_validate.c
-@@ -116,8 +116,24 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
-                 return;
- 
-         if (devinfo->ver < 71) {
-           if (inst->sig.small_imm_a || inst->sig.small_imm_c || inst->sig.small_imm_d)
-              fail_instr(state, "small imm a/c/d added after V3D 7.1");
-+                if (inst->sig.small_imm_a || inst->sig.small_imm_c ||
-+                    inst->sig.small_imm_d) {
-+                        fail_instr(state, "small imm a/c/d added after V3D 7.1");
-+                }
-+        } else {
-+                if ((inst->sig.small_imm_a || inst->sig.small_imm_b) &&
-+                    !vir_is_add(qinst)) {
-+                        fail_instr(state, "small imm a/b used but no ADD inst");
-+                }
-+                if ((inst->sig.small_imm_c || inst->sig.small_imm_d) &&
-+                    !vir_is_mul(qinst)) {
-+                        fail_instr(state, "small imm c/d used but no MUL inst");
-+                }
-+                if (inst->sig.small_imm_a + inst->sig.small_imm_b +
-+                    inst->sig.small_imm_c + inst->sig.small_imm_d > 1) {
-+                        fail_instr(state, "only one small immediate can be "
-+                                   "enabled per instruction");
-+                }
-         }
- 
-         /* LDVARY writes r5 two instructions later and LDUNIF writes
-diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c
-index df0d6c36c9b..ed5bc011964 100644
--- a/src/broadcom/compiler/vir_opt_small_immediates.c
-+++ b/src/broadcom/compiler/vir_opt_small_immediates.c
-@@ -44,7 +44,9 @@ vir_opt_small_immediates(struct v3d_compile *c)
-                 /* The small immediate value sits in the raddr B field, so we
-                  * can't have 2 small immediates in one instruction (unless
-                  * they're the same value, but that should be optimized away
-                 * elsewhere).
-+                 * elsewhere). Since 7.x we can encode small immediates in
-+                 * any raddr field, but each instruction can still only use
-+                 * one.
-                  */
-                 bool uses_small_imm = false;
-                 for (int i = 0; i < vir_get_nsrc(inst); i++) {
-@@ -80,7 +82,22 @@ vir_opt_small_immediates(struct v3d_compile *c)
-                          */
-                         struct v3d_qpu_sig new_sig = inst->qpu.sig;
-                         uint32_t sig_packed;
-                        new_sig.small_imm_b = true;
-+                        if (c->devinfo->ver <= 42) {
-+                                new_sig.small_imm_b = true;
-+                        } else {
-+                               if (vir_is_add(inst)) {
-+                                       if (i == 0)
-+                                               new_sig.small_imm_a = true;
-+                                       else
-+                                               new_sig.small_imm_b = true;
-+                               } else {
-+                                       if (i == 0)
-+                                               new_sig.small_imm_c = true;
-+                                       else
-+                                               new_sig.small_imm_d = true;
-+                               }
-+                        }
-+
-                         if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig_packed))
-                                 continue;
- 
-@@ -89,7 +106,10 @@ vir_opt_small_immediates(struct v3d_compile *c)
-                                 vir_dump_inst(c, inst);
-                                 fprintf(stderr, "\n");
-                         }
-                        inst->qpu.sig.small_imm_b = true;
-+                        inst->qpu.sig.small_imm_a = new_sig.small_imm_a;
-+                        inst->qpu.sig.small_imm_b = new_sig.small_imm_b;
-+                        inst->qpu.sig.small_imm_c = new_sig.small_imm_c;
-+                        inst->qpu.sig.small_imm_d = new_sig.small_imm_d;
-                         inst->qpu.raddr_b = packed;
- 
-                         inst->src[i].file = QFILE_SMALL_IMM;
-diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
-index cbbb495592b..4ed184cbbcb 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
-+++ b/src/broadcom/compiler/vir_to_qpu.c
-@@ -89,8 +89,15 @@ new_qpu_nop_before(struct qinst *inst)
- static void
- v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src)
- {
-        if (src.smimm)
-                unreachable("v3d71_set_src: pending handling small immediates");
-+        /* If we have a small immediate move it from inst->raddr_b to the
-+         * corresponding raddr.
-+         */
-+        if (src.smimm) {
-+                assert(instr->sig.small_imm_a || instr->sig.small_imm_b ||
-+                       instr->sig.small_imm_c || instr->sig.small_imm_d);
-+                *raddr = instr->raddr_b;
-+                return;
-+        }
- 
-         assert(!src.magic);
-         *raddr = src.index;
-diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
-index b613de781dc..c1590a760de 100644
--- a/src/broadcom/qpu/qpu_disasm.c
-+++ b/src/broadcom/qpu/qpu_disasm.c
-@@ -113,7 +113,6 @@ v3d71_qpu_disasm_raddr(struct disasm_state *disasm,
-         }
- 
-         if (is_small_imm) {
-                unreachable("Pending handling small immediates");
-                 uint32_t val;
-                 ASSERTED bool ok =
-                         v3d_qpu_small_imm_unpack(disasm->devinfo,
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index f54ce7210fb..c30f4bbbccf 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -975,10 +975,10 @@ v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr)
-         int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
-         int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
- 
-        return (add_nsrc > 0 && inst->alu.add.a.raddr == raddr) ||
-               (add_nsrc > 1 && inst->alu.add.b.raddr == raddr) ||
-               (mul_nsrc > 0 && inst->alu.mul.a.raddr == raddr) ||
-               (mul_nsrc > 1 && inst->alu.mul.b.raddr == raddr);
-+        return (add_nsrc > 0 && !inst->sig.small_imm_a && inst->alu.add.a.raddr == raddr) ||
-+               (add_nsrc > 1 && !inst->sig.small_imm_b && inst->alu.add.b.raddr == raddr) ||
-+               (mul_nsrc > 0 && !inst->sig.small_imm_c && inst->alu.mul.a.raddr == raddr) ||
-+               (mul_nsrc > 1 && !inst->sig.small_imm_d && inst->alu.mul.b.raddr == raddr);
- }
- 
- bool
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index dece45c5c54..d408fb426fa 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -402,7 +402,7 @@ struct v3d_qpu_instr {
-         uint8_t sig_addr;
-         bool sig_magic; /* If the signal writes to a magic address */
-         uint8_t raddr_a; /* V3D 4.x */
-        uint8_t raddr_b; /* V3D 4.x*/
-+        uint8_t raddr_b; /* V3D 4.x (holds packed small immediate in 7.x too) */
-         struct v3d_qpu_flags flags;
- 
-         union {
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index ed5a8bc667d..7984712d527 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -1218,16 +1218,11 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
- 
-         instr->alu.add.op = desc->op;
- 
-        /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
-+        /* FADD/FADDNF and FMIN/FMAX are determined by the order of the
-          * operands.
-          */
-        /* FIXME: for now hardcoded values, until we got the small_imm support
-         * in place
-         */
-        uint32_t small_imm_a = 0;
-        uint32_t small_imm_b = 0;
-        if (small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
-            small_imm_b *256 + (op & 3) * 64 + raddr_b) {
-+        if (instr->sig.small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
-+            instr->sig.small_imm_b * 256 + (op & 3) * 64 + raddr_b) {
-                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
-                         instr->alu.add.op = V3D_QPU_A_FMAX;
-                 if (instr->alu.add.op == V3D_QPU_A_FADD)
-@@ -1858,11 +1853,6 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
-                 uint32_t output_pack;
-                 uint32_t a_unpack;
-                 uint32_t b_unpack;
-                /* FIXME: for now hardcoded values, until we got the small_imm
-                 * support in place
-                 */
-                uint32_t small_imm_a = 0;
-                uint32_t small_imm_b = 0;
- 
-                 if (instr->alu.add.op != V3D_QPU_A_FCMP) {
-                         if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
-@@ -1886,8 +1876,8 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
-                  * distinguished by which order their operands come in.
-                  */
-                 bool ordering =
-                        small_imm_a * 256 + a_unpack * 64 + raddr_a >
-                        small_imm_b * 256 + b_unpack * 64 + raddr_b;
-+                        instr->sig.small_imm_a * 256 + a_unpack * 64 + raddr_a >
-+                        instr->sig.small_imm_b * 256 + b_unpack * 64 + raddr_b;
-                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
-                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
-                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
-@@ -1901,6 +1891,22 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
-                         temp = raddr_a;
-                         raddr_a = raddr_b;
-                         raddr_b = temp;
-+
-+                        /* If we are swapping raddr_a/b we also need to swap
-+                         * small_imm_a/b.
-+                         */
-+                        if (instr->sig.small_imm_a || instr->sig.small_imm_b) {
-+                                assert(instr->sig.small_imm_a !=
-+                                       instr->sig.small_imm_b);
-+                                struct v3d_qpu_sig new_sig = instr->sig;
-+                                new_sig.small_imm_a = !instr->sig.small_imm_a;
-+                                new_sig.small_imm_b = !instr->sig.small_imm_b;
-+                                uint32_t sig;
-+                                if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
-+                                    return false;
-+                            *packed_instr &= ~V3D_QPU_SIG_MASK;
-+                            *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
-+                        }
-                 }
- 
-                 opcode |= a_unpack << 2;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0058-broadcom-compiler-update-thread-end-restrictions-for.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0058-broadcom-compiler-update-thread-end-restrictions-for.patch
@ -1,61 +0,0 @@
-From 3af87d2672da7c928ecf8a0a1cd1bef8a6729364 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Mon, 22 Nov 2021 12:56:03 +0100
-Subject: [PATCH 058/142] broadcom/compiler: update thread end restrictions for
- v7.x
-
-In 4.x it is not allowed to write to the register file in the last
-3 instructions, but in 7.x we only have this restriction in the
-thread end instruction itself, and only if the write comes from
-the ALU ports.
---
- src/broadcom/compiler/qpu_schedule.c | 31 ++++++++++++++++++++--------
- 1 file changed, 22 insertions(+), 9 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index bd1c920848a..cba16c77d67 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -1938,17 +1938,30 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
-                         return false;
-                 }
- 
-                /* No writing physical registers at the end. */
-                bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
-                bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP;
-                if ((!add_is_nop && !inst->alu.add.magic_write) ||
-                    (!mul_is_nop && !inst->alu.mul.magic_write)) {
-                        return false;
-+                if (c->devinfo->ver <= 42) {
-+                        /* No writing physical registers at the end. */
-+                        bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
-+                        bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP;
-+                        if ((!add_is_nop && !inst->alu.add.magic_write) ||
-+                            (!mul_is_nop && !inst->alu.mul.magic_write)) {
-+                                return false;
-+                        }
-+
-+                        if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) &&
-+                            !inst->sig_magic) {
-+                                return false;
-+                        }
-                 }
- 
-                if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) &&
-                    !inst->sig_magic) {
-                        return false;
-+                if (c->devinfo->ver >= 71) {
-+                        /* The thread end instruction must not write to the
-+                         * register file via the add/mul ALUs.
-+                         */
-+                        if (slot == 0 &&
-+                            (!inst->alu.add.magic_write ||
-+                             !inst->alu.mul.magic_write)) {
-+                                return false;
-+                        }
-                 }
- 
-                 if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0059-broadcom-compiler-update-ldvary-thread-switch-delay-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0059-broadcom-compiler-update-ldvary-thread-switch-delay-.patch
@ -1,112 +0,0 @@
-From 7cfd5b808bb2f1cb17f57435cb5d411c4ac3aa6c Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 23 Nov 2021 10:04:49 +0100
-Subject: [PATCH 059/142] broadcom/compiler: update ldvary thread switch delay
- slot restriction for v7.x
-
-In V3D 7.x we don't have accumulators which would not survive a thread
-switch, so the only restriction is that ldvary can't be placed in the second
-delay slot of a thread switch.
-
-shader-db results for UnrealEngine4 shaders:
-
-total instructions in shared programs: 446458 -> 446401 (-0.01%)
-instructions in affected programs: 13492 -> 13435 (-0.42%)
-helped: 58
-HURT: 3
-Instructions are helped.
-
-total nops in shared programs: 19571 -> 19541 (-0.15%)
-nops in affected programs: 161 -> 131 (-18.63%)
-helped: 30
-HURT: 0
-Nops are helped.
---
- src/broadcom/compiler/qpu_schedule.c | 33 +++++++++++++++++++++-------
- src/broadcom/compiler/qpu_validate.c | 10 +++++++--
- 2 files changed, 33 insertions(+), 10 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index cba16c77d67..32f651851cf 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -1491,11 +1491,20 @@ retry:
-                          * ldvary now if the follow-up fixup would place
-                          * it in the delay slots of a thrsw, which is not
-                          * allowed and would prevent the fixup from being
-                         * successful.
-+                         * successful. In V3D 7.x we can allow this to happen
-+                         * as long as it is not the last delay slot.
-                          */
-                        if (inst->sig.ldvary &&
-                            scoreboard->last_thrsw_tick + 2 >= scoreboard->tick - 1) {
-                                continue;
-+                        if (inst->sig.ldvary) {
-+                                if (c->devinfo->ver <= 42 &&
-+                                    scoreboard->last_thrsw_tick + 2 >=
-+                                    scoreboard->tick - 1) {
-+                                        continue;
-+                                }
-+                                if (c->devinfo->ver >= 71 &&
-+                                    scoreboard->last_thrsw_tick + 2 ==
-+                                    scoreboard->tick - 1) {
-+                                        continue;
-+                                }
-                         }
- 
-                         /* We can emit a new tmu lookup with a previous ldtmu
-@@ -2020,8 +2029,12 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
-         if (slot > 0 && v3d_qpu_instr_is_legacy_sfu(&qinst->qpu))
-                 return false;
- 
-        if (slot > 0 && qinst->qpu.sig.ldvary)
-                return false;
-+        if (qinst->qpu.sig.ldvary) {
-+                if (c->devinfo->ver <= 42 && slot > 0)
-+                        return false;
-+                if (c->devinfo->ver >= 71 && slot == 2)
-+                        return false;
-+        }
- 
-         /* unifa and the following 3 instructions can't overlap a
-          * thread switch/end. The docs further clarify that this means
-@@ -2618,9 +2631,13 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
- 
-         /* We can't put an ldvary in the delay slots of a thrsw. We should've
-          * prevented this when pairing up the ldvary with another instruction
-         * and flagging it for a fixup.
-+         * and flagging it for a fixup. In V3D 7.x this is limited only to the
-+         * second delay slot.
-          */
-        assert(scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1);
-+        assert((devinfo->ver <= 42 &&
-+                scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1) ||
-+               (devinfo->ver >= 71 &&
-+                scoreboard->last_thrsw_tick + 2 != scoreboard->tick - 1));
- 
-         /* Move the ldvary to the previous instruction and remove it from the
-          * current one.
-diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
-index 41070484286..4f09aa8aef4 100644
--- a/src/broadcom/compiler/qpu_validate.c
-+++ b/src/broadcom/compiler/qpu_validate.c
-@@ -215,8 +215,14 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
-                                    "SFU write started during THRSW delay slots ");
-                 }
- 
-                if (inst->sig.ldvary)
-                        fail_instr(state, "LDVARY during THRSW delay slots");
-+                if (inst->sig.ldvary) {
-+                        if (devinfo->ver <= 42)
-+                                fail_instr(state, "LDVARY during THRSW delay slots");
-+                        if (devinfo->ver >= 71 &&
-+                            state->ip - state->last_thrsw_ip == 2) {
-+                                fail_instr(state, "LDVARY in 2nd THRSW delay slot");
-+                        }
-+                }
-         }
- 
-         (void)qpu_magic_waddr_matches; /* XXX */
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0060-broadcom-compiler-lift-restriction-for-branch-msfign.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0060-broadcom-compiler-lift-restriction-for-branch-msfign.patch
@ -1,30 +0,0 @@
-From ca4063d627cd31c589a8e8688f2876dd8211d1bc Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Thu, 25 Nov 2021 08:31:02 +0100
-Subject: [PATCH 060/142] broadcom/compiler: lift restriction for branch +
- msfign after setmsf for v7.x
-
---
- src/broadcom/compiler/qpu_schedule.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 32f651851cf..476eae691ab 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -2373,10 +2373,11 @@ emit_branch(struct v3d_compile *c,
-         assert(scoreboard->last_branch_tick + 3 < branch_tick);
-         assert(scoreboard->last_unifa_write_tick + 3 < branch_tick);
- 
-        /* Can't place a branch with msfign != 0 and cond != 0,2,3 after
-+        /* V3D 4.x can't place a branch with msfign != 0 and cond != 0,2,3 after
-          * setmsf.
-          */
-         bool is_safe_msf_branch =
-+                c->devinfo->ver >= 71 ||
-                 inst->qpu.branch.msfign == V3D_QPU_MSFIGN_NONE ||
-                 inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_ALWAYS ||
-                 inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_A0 ||
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0061-broadcom-compiler-start-allocating-from-RF-4-in-V7.x.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0061-broadcom-compiler-start-allocating-from-RF-4-in-V7.x.patch
@ -1,38 +0,0 @@
-From 167510aa43bbcf06e57a64495cee40e8cdaf5f8b Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Fri, 26 Nov 2021 10:37:05 +0100
-Subject: [PATCH 061/142] broadcom/compiler: start allocating from RF 4 in V7.x
-
-In V3D 4.x we start at RF3 so that we allocate RF0-2 only if there
-aren't any other RFs available. This is useful with small shaders
-to ensure that our TLB writes don't use these registers because
-these are the last instructions we emit in fragment shaders and
-the last instructions in a program can't write to these registers,
-so if we do, we need to emit NOPs.
-
-In V3D 7.x the registers affected by this restriction are RF2-3,
-so we choose to start at RF4.
---
- src/broadcom/compiler/vir_register_allocate.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index 6f7b1ca0589..440b093a636 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -1234,9 +1234,10 @@ v3d_register_allocate(struct v3d_compile *c)
-                 .phys_index = phys_index,
-                 .next_acc = 0,
-                 /* Start at RF3, to try to keep the TLB writes from using
-                 * RF0-2.
-+                 * RF0-2. Start at RF4 in 7.x to prevent TLB writes from
-+                 * using RF2-3.
-                  */
-                .next_phys = 3,
-+                .next_phys = c->devinfo->ver <= 42 ? 3 : 4,
-                 .nodes = &c->nodes,
-                 .devinfo = c->devinfo,
-         };
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0062-broadcom-compiler-validate-restrictions-after-TLB-Z-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0062-broadcom-compiler-validate-restrictions-after-TLB-Z-.patch
@ -1,71 +0,0 @@
-From d47ea903b96e43b07bdef21f8026da818e30fcd1 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Thu, 25 Nov 2021 13:00:34 +0100
-Subject: [PATCH 062/142] broadcom/compiler: validate restrictions after TLB Z
- write
-
---
- src/broadcom/compiler/qpu_validate.c | 28 ++++++++++++++++++++++++++++
- 1 file changed, 28 insertions(+)
-
-diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
-index 4f09aa8aef4..1082fb7d50a 100644
--- a/src/broadcom/compiler/qpu_validate.c
-+++ b/src/broadcom/compiler/qpu_validate.c
-@@ -41,6 +41,7 @@ struct v3d_qpu_validate_state {
-         int last_sfu_write;
-         int last_branch_ip;
-         int last_thrsw_ip;
-+        int first_tlb_z_write;
- 
-         /* Set when we've found the last-THRSW signal, or if we were started
-          * in single-segment mode.
-@@ -110,11 +111,37 @@ static void
- qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
- {
-         const struct v3d_device_info *devinfo = state->c->devinfo;
-+
-+        if (qinst->is_tlb_z_write && state->ip < state->first_tlb_z_write)
-+                state->first_tlb_z_write = state->ip;
-+
-         const struct v3d_qpu_instr *inst = &qinst->qpu;
- 
-+        if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH &&
-+            state->first_tlb_z_write >= 0 &&
-+            state->ip > state->first_tlb_z_write &&
-+            inst->branch.msfign != V3D_QPU_MSFIGN_NONE &&
-+            inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS &&
-+            inst->branch.cond != V3D_QPU_BRANCH_COND_A0 &&
-+            inst->branch.cond != V3D_QPU_BRANCH_COND_NA0) {
-+                fail_instr(state, "Implicit branch MSF read after TLB Z write");
-+        }
-+
-         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
-                 return;
- 
-+        if (inst->alu.add.op == V3D_QPU_A_SETMSF &&
-+            state->first_tlb_z_write >= 0 &&
-+            state->ip > state->first_tlb_z_write) {
-+                fail_instr(state, "SETMSF after TLB Z write");
-+        }
-+
-+        if (state->first_tlb_z_write >= 0 &&
-+            state->ip > state->first_tlb_z_write &&
-+            inst->alu.add.op == V3D_QPU_A_MSF) {
-+                fail_instr(state, "MSF read after TLB Z write");
-+        }
-+
-         if (devinfo->ver < 71) {
-                 if (inst->sig.small_imm_a || inst->sig.small_imm_c ||
-                     inst->sig.small_imm_d) {
-@@ -348,6 +375,7 @@ qpu_validate(struct v3d_compile *c)
-                 .last_sfu_write = -10,
-                 .last_thrsw_ip = -10,
-                 .last_branch_ip = -10,
-+                .first_tlb_z_write = INT_MAX,
-                 .ip = 0,
- 
-                 .last_thrsw_found = !c->last_thrsw,
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0063-broadcom-compiler-lift-restriction-on-vpmwt-in-last-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0063-broadcom-compiler-lift-restriction-on-vpmwt-in-last-.patch
@ -1,26 +0,0 @@
-From 6cdf01fad49489b5fc66d231b527de5245d5de32 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Mon, 29 Nov 2021 13:23:11 +0100
-Subject: [PATCH 063/142] broadcom/compiler: lift restriction on vpmwt in last
- instruction for V3D 7.x
-
---
- src/broadcom/compiler/qpu_schedule.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 476eae691ab..77fb6a794e6 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -1934,7 +1934,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
-         if (slot > 0 && qinst->uniform != ~0)
-                 return false;
- 
-        if (v3d_qpu_waits_vpm(inst))
-+        if (c->devinfo->ver <= 42 && v3d_qpu_waits_vpm(inst))
-                 return false;
- 
-         if (inst->sig.ldvary)
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0064-broadcom-compiler-fix-up-copy-propagation-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0064-broadcom-compiler-fix-up-copy-propagation-for-v71.patch
@ -1,134 +0,0 @@
-From acc54637f0787ba4dc887130c25c628ccdaf4e38 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 9 Nov 2021 11:34:59 +0100
-Subject: [PATCH 064/142] broadcom/compiler: fix up copy propagation for v71
-
-Update rules for unsafe copy propagations to match v7.x.
---
- .../compiler/vir_opt_copy_propagate.c         | 83 +++++++++++++------
- 1 file changed, 56 insertions(+), 27 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c
-index c4aa7255a17..1260838ca05 100644
--- a/src/broadcom/compiler/vir_opt_copy_propagate.c
-+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c
-@@ -35,7 +35,7 @@
- #include "v3d_compiler.h"
- 
- static bool
-is_copy_mov(struct qinst *inst)
-+is_copy_mov(const struct v3d_device_info *devinfo, struct qinst *inst)
- {
-         if (!inst)
-                 return false;
-@@ -62,36 +62,65 @@ is_copy_mov(struct qinst *inst)
-                 return false;
-         }
- 
-        switch (inst->src[0].file) {
-        case QFILE_MAGIC:
-                /* No copy propagating from R3/R4/R5 -- the MOVs from those
-                 * are there to register allocate values produced into R3/4/5
-                 * to other regs (though hopefully r3/4/5).
-                 */
-                switch (inst->src[0].index) {
-                case V3D_QPU_WADDR_R3:
-                case V3D_QPU_WADDR_R4:
-                case V3D_QPU_WADDR_R5:
-                        return false;
-+        if (devinfo->ver <= 42) {
-+                switch (inst->src[0].file) {
-+                case QFILE_MAGIC:
-+                        /* No copy propagating from R3/R4/R5 -- the MOVs from
-+                         * those are there to register allocate values produced
-+                         * into R3/4/5 to other regs (though hopefully r3/4/5).
-+                         */
-+                        switch (inst->src[0].index) {
-+                        case V3D_QPU_WADDR_R3:
-+                        case V3D_QPU_WADDR_R4:
-+                        case V3D_QPU_WADDR_R5:
-+                                return false;
-+                        default:
-+                                break;
-+                        }
-+                        break;
-+
-+                case QFILE_REG:
-+                        switch (inst->src[0].index) {
-+                        case 0:
-+                        case 1:
-+                        case 2:
-+                                /* MOVs from rf0/1/2 are only to track the live
-+                                 * intervals for W/centroid W/Z.
-+                                 */
-+                                return false;
-+                        }
-+                        break;
-+
-                 default:
-                         break;
-                 }
-                break;
-
-        case QFILE_REG:
-                switch (inst->src[0].index) {
-                case 0:
-                case 1:
-                case 2:
-                        /* MOVs from rf0/1/2 are only to track the live
-+        } else {
-+                assert(devinfo->ver >= 71);
-+                switch (inst->src[0].file) {
-+                case QFILE_REG:
-+                        switch (inst->src[0].index) {
-+                        /* MOVs from rf1/2/3 are only to track the live
-                          * intervals for W/centroid W/Z.
-+                         *
-+                         * Note: rf0 can be implicitly written by ldvary
-+                         * (no temp involved), so it is not an SSA value and
-+                         * could clash with writes to other temps that are
-+                         * also allocated to rf0. In theory, that would mean
-+                         * that we can't copy propagate from it, but we handle
-+                         * this at register allocation time, preventing temps
-+                         * from being allocated to rf0 while the rf0 value from
-+                         * ldvary is still live.
-                          */
-                        return false;
-                }
-                break;
-+                        case 1:
-+                        case 2:
-+                        case 3:
-+                                return false;
-+                        }
-+                        break;
- 
-        default:
-                break;
-+                default:
-+                        break;
-+                }
-         }
- 
-         return true;
-@@ -135,7 +164,7 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
-                  */
-                 struct qinst *mov = movs[inst->src[i].index];
-                 if (!mov) {
-                        if (!is_copy_mov(c->defs[inst->src[i].index]))
-+                        if (!is_copy_mov(c->devinfo, c->defs[inst->src[i].index]))
-                                 continue;
-                         mov = c->defs[inst->src[i].index];
- 
-@@ -245,7 +274,7 @@ vir_opt_copy_propagate(struct v3d_compile *c)
- 
-                         apply_kills(c, movs, inst);
- 
-                        if (is_copy_mov(inst))
-+                        if (is_copy_mov(c->devinfo, inst))
-                                 movs[inst->dst.index] = inst;
-                 }
-         }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0065-broadcom-qpu-new-packing-conversion-v71-instructions.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0065-broadcom-qpu-new-packing-conversion-v71-instructions.patch
@ -1,150 +0,0 @@
-From c340f7f1eb4a1e5c0fafe1ea2f801f2ebaf82d8d Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Fri, 26 Nov 2021 01:24:12 +0100
-Subject: [PATCH 065/142] broadcom/qpu: new packing/conversion v71 instructions
-
-This commits adds the qpu definitions for several new v71
-instructions.
-
-Packing:
-  * vpack does a 2x32 to 2x16 bit integer pack
-  * v8pack: Pack 2 x 2x16 bit integers into 4x8 bits
-  * v10pack packs parts of 2 2x16 bit integer into r10g10b10a2.
-  * v11fpack packs parts of 2 2x16 bit float into r11g11b10 rounding
-    to nearest
-
-Conversion to unorm/snorm:
-  * vftounorm8/vftosnorm8: converts from 2x16-bit floating point
-    to 2x8 bit unorm/snorm.
-  * ftounorm16/ftosnorm16: converts floating point to 16-bit
-    unorm/snorm
-  * vftounorm10lo: Convert 2x16-bit floating point to 2x10-bit unorm
-  * vftounorm10hi: Convert 2x16-bit floating point to one 2-bit and one 10-bit unorm
---
- src/broadcom/qpu/qpu_instr.c | 20 ++++++++++++++++++++
- src/broadcom/qpu/qpu_instr.h | 12 ++++++++++++
- src/broadcom/qpu/qpu_pack.c  | 12 ++++++++++++
- 3 files changed, 44 insertions(+)
-
-diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
-index c30f4bbbccf..44f20618a5a 100644
--- a/src/broadcom/qpu/qpu_instr.c
-+++ b/src/broadcom/qpu/qpu_instr.c
-@@ -179,6 +179,10 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
-                 [V3D_QPU_A_UTOF] = "utof",
-                 [V3D_QPU_A_MOV] = "mov",
-                 [V3D_QPU_A_FMOV] = "fmov",
-+                [V3D_QPU_A_VPACK] = "vpack",
-+                [V3D_QPU_A_V8PACK] = "v8pack",
-+                [V3D_QPU_A_V10PACK] = "v10pack",
-+                [V3D_QPU_A_V11FPACK] = "v11fpack",
-         };
- 
-         if (op >= ARRAY_SIZE(op_names))
-@@ -201,6 +205,12 @@ v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
-                 [V3D_QPU_M_MOV] = "mov",
-                 [V3D_QPU_M_NOP] = "nop",
-                 [V3D_QPU_M_FMUL] = "fmul",
-+                [V3D_QPU_M_FTOUNORM16] = "ftounorm16",
-+                [V3D_QPU_M_FTOSNORM16] = "ftosnorm16",
-+                [V3D_QPU_M_VFTOUNORM8] = "vftounorm8",
-+                [V3D_QPU_M_VFTOSNORM8] = "vftosnorm8",
-+                [V3D_QPU_M_VFTOUNORM10LO] = "vftounorm10lo",
-+                [V3D_QPU_M_VFTOUNORM10HI] = "vftounorm10hi",
-         };
- 
-         if (op >= ARRAY_SIZE(op_names))
-@@ -463,6 +473,10 @@ static const uint8_t add_op_args[] = {
- 
-         [V3D_QPU_A_MOV] = D | A,
-         [V3D_QPU_A_FMOV] = D | A,
-+        [V3D_QPU_A_VPACK] = D | A | B,
-+        [V3D_QPU_A_V8PACK] = D | A | B,
-+        [V3D_QPU_A_V10PACK] = D | A | B,
-+        [V3D_QPU_A_V11FPACK] = D | A | B,
- };
- 
- static const uint8_t mul_op_args[] = {
-@@ -476,6 +490,12 @@ static const uint8_t mul_op_args[] = {
-         [V3D_QPU_M_NOP] = 0,
-         [V3D_QPU_M_MOV] = D | A,
-         [V3D_QPU_M_FMUL] = D | A | B,
-+        [V3D_QPU_M_FTOUNORM16] = D | A,
-+        [V3D_QPU_M_FTOSNORM16] = D | A,
-+        [V3D_QPU_M_VFTOUNORM8] = D | A,
-+        [V3D_QPU_M_VFTOSNORM8] = D | A,
-+        [V3D_QPU_M_VFTOUNORM10LO] = D | A,
-+        [V3D_QPU_M_VFTOUNORM10HI] = D | A,
- };
- 
- bool
-diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
-index d408fb426fa..56eee9f9cac 100644
--- a/src/broadcom/qpu/qpu_instr.h
-+++ b/src/broadcom/qpu/qpu_instr.h
-@@ -231,6 +231,10 @@ enum v3d_qpu_add_op {
-         /* V3D 7.x */
-         V3D_QPU_A_FMOV,
-         V3D_QPU_A_MOV,
-+        V3D_QPU_A_VPACK,
-+        V3D_QPU_A_V8PACK,
-+        V3D_QPU_A_V10PACK,
-+        V3D_QPU_A_V11FPACK,
- };
- 
- enum v3d_qpu_mul_op {
-@@ -244,6 +248,14 @@ enum v3d_qpu_mul_op {
-         V3D_QPU_M_MOV,
-         V3D_QPU_M_NOP,
-         V3D_QPU_M_FMUL,
-+
-+        /* V3D 7.x */
-+        V3D_QPU_M_FTOUNORM16,
-+        V3D_QPU_M_FTOSNORM16,
-+        V3D_QPU_M_VFTOUNORM8,
-+        V3D_QPU_M_VFTOSNORM8,
-+        V3D_QPU_M_VFTOUNORM10LO,
-+        V3D_QPU_M_VFTOUNORM10HI,
- };
- 
- enum v3d_qpu_output_pack {
-diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
-index 7984712d527..6cd75adac6d 100644
--- a/src/broadcom/qpu/qpu_pack.c
-+++ b/src/broadcom/qpu/qpu_pack.c
-@@ -783,6 +783,9 @@ static const struct opcode_desc add_ops_v71[] = {
-         { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
-         { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
- 
-+        { 247, 247, .raddr_mask = ANYOPMASK, V3D_QPU_A_VPACK, 71 },
-+        { 248, 248, .raddr_mask = ANYOPMASK, V3D_QPU_A_V8PACK, 71 },
-+
-         { 249, 249, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FMOV, 71 },
-         { 249, 249, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FMOV, 71 },
-         { 249, 249, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FMOV, 71 },
-@@ -797,6 +800,8 @@ static const struct opcode_desc add_ops_v71[] = {
-         { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
-         { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
- 
-+        { 250, 250, .raddr_mask = ANYOPMASK, V3D_QPU_A_V10PACK, 71 },
-+        { 251, 251, .raddr_mask = ANYOPMASK, V3D_QPU_A_V11FPACK, 71 },
- };
- 
- static const struct opcode_desc mul_ops_v71[] = {
-@@ -822,6 +827,13 @@ static const struct opcode_desc mul_ops_v71[] = {
-         { 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 },
-         { 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 },
- 
-+        { 14, 14, .raddr_mask = OP_MASK(32), V3D_QPU_M_FTOUNORM16, 71 },
-+        { 14, 14, .raddr_mask = OP_MASK(33), V3D_QPU_M_FTOSNORM16, 71 },
-+        { 14, 14, .raddr_mask = OP_MASK(34), V3D_QPU_M_VFTOUNORM8, 71 },
-+        { 14, 14, .raddr_mask = OP_MASK(35), V3D_QPU_M_VFTOSNORM8, 71 },
-+        { 14, 14, .raddr_mask = OP_MASK(48), V3D_QPU_M_VFTOUNORM10LO, 71 },
-+        { 14, 14, .raddr_mask = OP_MASK(49), V3D_QPU_M_VFTOUNORM10HI, 71 },
-+
-         { 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 },
- 
-         { 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL },
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0068-broadcom-compiler-don-t-allocate-spill-base-to-rf0-i.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0068-broadcom-compiler-don-t-allocate-spill-base-to-rf0-i.patch
@ -1,68 +0,0 @@
-From f6082e941a3454c8735df2ff2713ae49b3daa74f Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 18 Apr 2023 08:50:13 +0200
-Subject: [PATCH 068/142] broadcom/compiler: don't allocate spill base to rf0
- in V3D 7.x
-
-Otherwise it can be stomped by instructions doing implicit rf0 writes.
---
- src/broadcom/compiler/vir_register_allocate.c | 21 +++++++++++++++----
- 1 file changed, 17 insertions(+), 4 deletions(-)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index 440b093a636..121c9b2794f 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -582,7 +582,8 @@ interferes(int32_t t0_start, int32_t t0_end, int32_t t1_start, int32_t t1_end)
- }
- 
- static void
-v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
-+v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int *implicit_rf_nodes,
-+              int spill_temp)
- {
-         c->spill_start_num_temps = c->num_temps;
-         c->spilling = true;
-@@ -594,8 +595,20 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
-                 spill_offset = c->spill_size;
-                 c->spill_size += V3D_CHANNELS * sizeof(uint32_t);
- 
-                if (spill_offset == 0)
-+                if (spill_offset == 0) {
-                         v3d_setup_spill_base(c);
-+
-+                        /* Don't allocate our spill base to rf0 to avoid
-+                         * conflicts with instructions doing implicit writes
-+                         * to that register.
-+                         */
-+                        if (!c->devinfo->has_accumulators) {
-+                                ra_add_node_interference(
-+                                        c->g,
-+                                        temp_to_node(c, c->spill_base.index),
-+                                        implicit_rf_nodes[0]);
-+                        }
-+                }
-         }
- 
-         struct qinst *last_thrsw = c->last_thrsw;
-@@ -1346,7 +1359,7 @@ v3d_register_allocate(struct v3d_compile *c)
-                         int node = v3d_choose_spill_node(c);
-                         uint32_t temp = node_to_temp(c, node);
-                         if (node != -1) {
-                                v3d_spill_reg(c, acc_nodes, temp);
-+                                v3d_spill_reg(c, acc_nodes, implicit_rf_nodes, temp);
-                                 continue;
-                         }
-                 }
-@@ -1363,7 +1376,7 @@ v3d_register_allocate(struct v3d_compile *c)
-                 enum temp_spill_type spill_type =
-                         get_spill_type_for_temp(c, temp);
-                 if (spill_type != SPILL_TYPE_TMU || tmu_spilling_allowed(c)) {
-                        v3d_spill_reg(c, acc_nodes, temp);
-+                        v3d_spill_reg(c, acc_nodes, implicit_rf_nodes, temp);
-                         if (c->spills + c->fills > c->max_tmu_spills)
-                                 goto spill_fail;
-                 } else {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0069-broadcom-compiler-improve-allocation-for-final-progr.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0069-broadcom-compiler-improve-allocation-for-final-progr.patch
@ -1,186 +0,0 @@
-From 0e9577fbb18a026390f653ca22f5a98a69a5fe59 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 2 May 2023 10:12:37 +0200
-Subject: [PATCH 069/142] broadcom/compiler: improve allocation for final
- program instructions
-
-The last 3 instructions can't use specific registers so flag all the
-nodes for temps used in the last program instructions and try to
-avoid assigning any of these. This may help us avoid injecting nops
-for the last thread switch instruction.
-
-Because regisster allocation needs to happen before QPU scheduling
-and instruction merging we can't tell exactly what the last 3
-instructions will be, so we do this for a few more instructions than
-just 3.
-
-We only do this for fragment shaders because other shader stages
-always end with VPM store instructions that take an small immediate
-and therefore will never allow us to merge the final thread switch
-earlier, so limiting allocation for these shaders will never improve
-anything and might instead be detrimental.
-
-total instructions in shared programs: 11471389 -> 11464335 (-0.06%)
-instructions in affected programs: 582908 -> 575854 (-1.21%)
-helped: 4669
-HURT: 578
-Instructions are helped.
-
-total max-temps in shared programs: 2230497 -> 2230150 (-0.02%)
-max-temps in affected programs: 5662 -> 5315 (-6.13%)
-helped: 344
-HURT: 44
-Max-temps are helped.
-
-total sfu-stalls in shared programs: 18068 -> 18077 (0.05%)
-sfu-stalls in affected programs: 264 -> 273 (3.41%)
-helped: 37
-HURT: 48
-Inconclusive result (value mean confidence interval includes 0).
-
-total inst-and-stalls in shared programs: 11489457 -> 11482412 (-0.06%)
-inst-and-stalls in affected programs: 585180 -> 578135 (-1.20%)
-helped: 4659
-HURT: 588
-Inst-and-stalls are helped.
-
-total nops in shared programs: 301738 -> 298140 (-1.19%)
-nops in affected programs: 14680 -> 11082 (-24.51%)
-helped: 3252
-HURT: 108
-Nops are helped.
---
- src/broadcom/compiler/v3d_compiler.h          |  1 +
- src/broadcom/compiler/vir_register_allocate.c | 69 +++++++++++++++++--
- 2 files changed, 66 insertions(+), 4 deletions(-)
-
-diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
-index 425ab0cdf9d..2642d23b629 100644
--- a/src/broadcom/compiler/v3d_compiler.h
-+++ b/src/broadcom/compiler/v3d_compiler.h
-@@ -613,6 +613,7 @@ struct v3d_ra_node_info {
-         struct {
-                 uint32_t priority;
-                 uint8_t class_bits;
-+                bool is_program_end;
- 
-                 /* V3D 7.x */
-                 bool is_ldunif_dst;
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index 121c9b2794f..495644bb557 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -385,6 +385,7 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits)
-         c->nodes.info[node].class_bits = class_bits;
-         c->nodes.info[node].priority = 0;
-         c->nodes.info[node].is_ldunif_dst = false;
-+        c->nodes.info[node].is_program_end = false;
- }
- 
- /* The spill offset for this thread takes a bit of setup, so do it once at
-@@ -929,6 +930,17 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
-                 return true;
-         }
- 
-+        /* The last 3 instructions in a shader can't use some specific registers
-+         * (usually early rf registers, depends on v3d version) so try to
-+         * avoid allocating these to registers used by the last instructions
-+         * in the shader.
-+         */
-+        const uint32_t safe_rf_start = v3d_ra->devinfo->ver <= 42 ? 3 : 4;
-+        if (v3d_ra->nodes->info[node].is_program_end &&
-+            v3d_ra->next_phys < safe_rf_start) {
-+                v3d_ra->next_phys = safe_rf_start;
-+        }
-+
-         for (int i = 0; i < PHYS_COUNT; i++) {
-                 int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
-                 int phys = v3d_ra->phys_index + phys_off;
-@@ -1218,6 +1230,44 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
-         }
- }
- 
-+static void
-+flag_program_end_nodes(struct v3d_compile *c)
-+{
-+        /* Only look for registers used in this many instructions */
-+        uint32_t last_set_count = 6;
-+
-+        struct qblock *last_block = vir_exit_block(c);
-+        list_for_each_entry_rev(struct qinst, inst, &last_block->instructions, link) {
-+                if (!inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU)
-+                        continue;
-+
-+                int num_src = v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
-+                for (int i = 0; i < num_src; i++) {
-+                        if (inst->src[i].file == QFILE_TEMP) {
-+                                int node = temp_to_node(c, inst->src[i].index);
-+                                c->nodes.info[node].is_program_end = true;
-+                        }
-+                }
-+
-+                num_src = v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
-+                for (int i = 0; i < num_src; i++) {
-+                       if (inst->src[i].file == QFILE_TEMP) {
-+                                int node = temp_to_node(c, inst->src[i].index);
-+                                c->nodes.info[node].is_program_end = true;
-+
-+                        }
-+                }
-+
-+                if (inst->dst.file == QFILE_TEMP) {
-+                        int node = temp_to_node(c, inst->dst.index);
-+                        c->nodes.info[node].is_program_end = true;
-+                }
-+
-+                if (--last_set_count == 0)
-+                        break;
-+        }
-+}
-+
- /**
-  * Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
-  *
-@@ -1280,17 +1330,16 @@ v3d_register_allocate(struct v3d_compile *c)
-          */
-         for (uint32_t i = 0; i < num_ra_nodes; i++) {
-                 c->nodes.info[i].is_ldunif_dst = false;
-+                c->nodes.info[i].is_program_end = false;
-+                c->nodes.info[i].priority = 0;
-+                c->nodes.info[i].class_bits = 0;
-                 if (c->devinfo->has_accumulators && i < ACC_COUNT) {
-                         acc_nodes[i] = i;
-                         ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
-                        c->nodes.info[i].priority = 0;
-                        c->nodes.info[i].class_bits = 0;
-                 } else if (!c->devinfo->has_accumulators &&
-                            i < ARRAY_SIZE(implicit_rf_nodes)) {
-                         implicit_rf_nodes[i] = i;
-                         ra_set_node_reg(c->g, implicit_rf_nodes[i], phys_index + i);
-                        c->nodes.info[i].priority = 0;
-                        c->nodes.info[i].class_bits = 0;
-                 } else {
-                         uint32_t t = node_to_temp(c, i);
-                         c->nodes.info[i].priority =
-@@ -1327,6 +1376,18 @@ v3d_register_allocate(struct v3d_compile *c)
-                                                       last_ldvary_ip, inst);
-         }
- 
-+        /* Flag the nodes that are used in the last instructions of the program
-+         * (there are some registers that cannot be used in the last 3
-+         * instructions). We only do this for fragment shaders, because the idea
-+         * is that by avoiding this conflict we may be able to emit the last
-+         * thread switch earlier in some cases, however, in non-fragment shaders
-+         * this won't happen because the last instructions are always VPM stores
-+         * with a small immediate, which conflicts with other signals,
-+         * preventing us from ever moving the thrsw earlier.
-+         */
-+        if (c->s->info.stage == MESA_SHADER_FRAGMENT)
-+                flag_program_end_nodes(c);
-+
-         /* Set the register classes for all our temporaries in the graph */
-         for (uint32_t i = 0; i < c->num_temps; i++) {
-                 ra_set_node_class(c->g, temp_to_node(c, i),
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0070-broadcom-compiler-don-t-assign-registers-to-unused-n.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0070-broadcom-compiler-don-t-assign-registers-to-unused-n.patch
@ -1,105 +0,0 @@
-From 645fe451bcecbe3345a144222306d06fb39f6b9f Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 2 May 2023 10:17:47 +0200
-Subject: [PATCH 070/142] broadcom/compiler: don't assign registers to unused
- nodes/temps
-
-In programs with a lot of unused temps, if we don't do this, we may
-end up recycling previously used rfs more often, which can be
-detrimental to instruction pairing.
-
-total instructions in shared programs: 11464335 -> 11444136 (-0.18%)
-instructions in affected programs: 8976743 -> 8956544 (-0.23%)
-helped: 33196
-HURT: 33778
-Inconclusive result
-
-total max-temps in shared programs: 2230150 -> 2229445 (-0.03%)
-max-temps in affected programs: 86413 -> 85708 (-0.82%)
-helped: 2217
-HURT: 1523
-Max-temps are helped.
-
-total sfu-stalls in shared programs: 18077 -> 17104 (-5.38%)
-sfu-stalls in affected programs: 8669 -> 7696 (-11.22%)
-helped: 2657
-HURT: 2182
-Sfu-stalls are helped.
-
-total inst-and-stalls in shared programs: 11482412 -> 11461240 (-0.18%)
-inst-and-stalls in affected programs: 8995697 -> 8974525 (-0.24%)
-helped: 33319
-HURT: 33708
-Inconclusive result
-
-total nops in shared programs: 298140 -> 296185 (-0.66%)
-nops in affected programs: 52805 -> 50850 (-3.70%)
-helped: 3797
-HURT: 2662
-Inconclusive result
---
- src/broadcom/compiler/v3d_compiler.h          |  1 +
- src/broadcom/compiler/vir_register_allocate.c | 14 ++++++++++++++
- 2 files changed, 15 insertions(+)
-
-diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
-index 2642d23b629..f1a807e38fd 100644
--- a/src/broadcom/compiler/v3d_compiler.h
-+++ b/src/broadcom/compiler/v3d_compiler.h
-@@ -614,6 +614,7 @@ struct v3d_ra_node_info {
-                 uint32_t priority;
-                 uint8_t class_bits;
-                 bool is_program_end;
-+                bool unused;
- 
-                 /* V3D 7.x */
-                 bool is_ldunif_dst;
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index 495644bb557..0ab0474424f 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -386,6 +386,7 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits)
-         c->nodes.info[node].priority = 0;
-         c->nodes.info[node].is_ldunif_dst = false;
-         c->nodes.info[node].is_program_end = false;
-+        c->nodes.info[node].unused = false;
- }
- 
- /* The spill offset for this thread takes a bit of setup, so do it once at
-@@ -918,6 +919,12 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
-                  BITSET_WORD *regs,
-                  unsigned int *out)
- {
-+        /* If this node is for an unused temp, ignore. */
-+        if (v3d_ra->nodes->info[node].unused) {
-+                *out = 0;
-+                return true;
-+        }
-+
-         /* In V3D 7.x, try to assign rf0 to temps used as ldunif's dst
-          * so we can avoid turning them into ldunifrf (which uses the
-          * cond field to encode the dst and would prevent merge with
-@@ -1331,6 +1338,7 @@ v3d_register_allocate(struct v3d_compile *c)
-         for (uint32_t i = 0; i < num_ra_nodes; i++) {
-                 c->nodes.info[i].is_ldunif_dst = false;
-                 c->nodes.info[i].is_program_end = false;
-+                c->nodes.info[i].unused = false;
-                 c->nodes.info[i].priority = 0;
-                 c->nodes.info[i].class_bits = 0;
-                 if (c->devinfo->has_accumulators && i < ACC_COUNT) {
-@@ -1396,6 +1404,12 @@ v3d_register_allocate(struct v3d_compile *c)
- 
-         /* Add register interferences based on liveness data */
-         for (uint32_t i = 0; i < c->num_temps; i++) {
-+                /* And while we are here, let's also flag nodes for
-+                 * unused temps.
-+                 */
-+                if (c->temp_start[i] > c->temp_end[i])
-+                        c->nodes.info[temp_to_node(c, i)].unused = true;
-+
-                 for (uint32_t j = i + 1; j < c->num_temps; j++) {
-                         if (interferes(c->temp_start[i], c->temp_end[i],
-                                        c->temp_start[j], c->temp_end[j])) {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0071-broadcom-compiler-only-assign-rf0-as-last-resort-in-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0071-broadcom-compiler-only-assign-rf0-as-last-resort-in-.patch
@ -1,83 +0,0 @@
-From 851704169d59e28c5429b06d05e5ef952be893a2 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Mon, 15 May 2023 10:02:10 +0200
-Subject: [PATCH 071/142] broadcom/compiler: only assign rf0 as last resort in
- V3D 7.x
-
-So we can use it for ldunif(a) and avoid generating ldunif(a)rf which
-can't be paired with conditional instructions.
-
-shader-db (pi5):
-
-total instructions in shared programs: 11357802 -> 11338883 (-0.17%)
-instructions in affected programs: 7117889 -> 7098970 (-0.27%)
-helped: 24264
-HURT: 17574
-Instructions are helped.
-
-total uniforms in shared programs: 3857808 -> 3857815 (<.01%)
-uniforms in affected programs: 92 -> 99 (7.61%)
-helped: 0
-HURT: 1
-
-total max-temps in shared programs: 2230904 -> 2230199 (-0.03%)
-max-temps in affected programs: 52309 -> 51604 (-1.35%)
-helped: 1219
-HURT: 725
-Max-temps are helped.
-
-total sfu-stalls in shared programs: 15021 -> 15236 (1.43%)
-sfu-stalls in affected programs: 6848 -> 7063 (3.14%)
-helped: 1866
-HURT: 1704
-Inconclusive result
-
-total inst-and-stalls in shared programs: 11372823 -> 11354119 (-0.16%)
-inst-and-stalls in affected programs: 7149177 -> 7130473 (-0.26%)
-helped: 24315
-HURT: 17561
-Inst-and-stalls are helped.
-
-total nops in shared programs: 273624 -> 273711 (0.03%)
-nops in affected programs: 31562 -> 31649 (0.28%)
-helped: 1619
-HURT: 1854
-Inconclusive result (value mean confidence interval includes 0).
---
- src/broadcom/compiler/vir_register_allocate.c | 13 +++++++++++++
- 1 file changed, 13 insertions(+)
-
-diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
-index 0ab0474424f..8eac2b75bd7 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
-+++ b/src/broadcom/compiler/vir_register_allocate.c
-@@ -950,6 +950,11 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
- 
-         for (int i = 0; i < PHYS_COUNT; i++) {
-                 int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
-+
-+                /* Try to keep rf0 available for ldunif in 7.x (see above). */
-+                if (v3d_ra->devinfo->ver >= 71 && phys_off == 0)
-+                        continue;
-+
-                 int phys = v3d_ra->phys_index + phys_off;
- 
-                 if (BITSET_TEST(regs, phys)) {
-@@ -959,6 +964,14 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
-                 }
-         }
- 
-+        /* If we couldn't allocate, do try to assign rf0 if it is available. */
-+        if (v3d_ra->devinfo->ver >= 71 &&
-+            BITSET_TEST(regs, v3d_ra->phys_index)) {
-+                v3d_ra->next_phys = 1;
-+                *out = v3d_ra->phys_index;
-+                return true;
-+        }
-+
-         return false;
- }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0072-v3dv-recover-non-conformant-warning-for-not-fully-su.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0072-v3dv-recover-non-conformant-warning-for-not-fully-su.patch
@ -1,30 +0,0 @@
-From 0d3fd30d67ffc0195b0783e30ab6afbbe403310a Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 28 Apr 2021 14:31:38 +0200
-Subject: [PATCH 072/142] v3dv: recover non-conformant warning for not fully
- supported hw
-
---
- src/broadcom/vulkan/v3dv_device.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
-index d5de3517670..d29ffad3531 100644
--- a/src/broadcom/vulkan/v3dv_device.c
-+++ b/src/broadcom/vulkan/v3dv_device.c
-@@ -1212,6 +1212,12 @@ create_physical_device(struct v3dv_instance *instance,
- 
-    list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
- 
-+   if (device->devinfo.ver != 42) {
-+      fprintf(stderr, "WARNING: v3dv support for hw version %i is neither "
-+              "a complete nor a conformant Vulkan implementation. Testing "
-+              "use only.\n", device->devinfo.ver);
-+   }
-+
-    return VK_SUCCESS;
- 
- fail:
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0073-v3dv-meson-add-v71-hw-generation.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0073-v3dv-meson-add-v71-hw-generation.patch
@ -1,504 +0,0 @@
-From 52b5ac62b367ae89574c8031fdcf7c1dae05c942 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 29 Jun 2021 11:59:53 +0200
-Subject: [PATCH 073/142] v3dv/meson: add v71 hw generation
-
-Starting point for v71 version inclusion.
-
-This just adds it as one of the versions to be compiled (on meson),
-updates the v3dX/v3dv_X macros, and update the code enough to get it
-compiling when building using the two versions. For any packet not
-available on v71 we just provide a generic asserted placeholder of
-generation not supported.
-
-Any real v71 support will be implemented on following commits.
---
- src/broadcom/vulkan/meson.build         |  6 +-
- src/broadcom/vulkan/v3dv_private.h      |  7 +++
- src/broadcom/vulkan/v3dvx_cmd_buffer.c  | 75 +++++++++++++++++++++++--
- src/broadcom/vulkan/v3dvx_image.c       | 16 +++++-
- src/broadcom/vulkan/v3dvx_meta_common.c | 32 +++++++++++
- src/broadcom/vulkan/v3dvx_pipeline.c    |  5 ++
- src/broadcom/vulkan/v3dvx_queue.c       | 11 ++++
- 7 files changed, 142 insertions(+), 10 deletions(-)
-
-diff --git a/src/broadcom/vulkan/meson.build b/src/broadcom/vulkan/meson.build
-index ad032d832ad..3da7364686f 100644
--- a/src/broadcom/vulkan/meson.build
-+++ b/src/broadcom/vulkan/meson.build
-@@ -27,6 +27,7 @@ v3dv_entrypoints = custom_target(
-     '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'v3dv',
-     '--beta', with_vulkan_beta.to_string(),
-     '--device-prefix', 'ver42',
-+    '--device-prefix', 'ver71',
-   ],
-   depend_files : vk_entrypoints_gen_depend_files,
- )
-@@ -67,10 +68,7 @@ files_per_version = files(
-   'v3dvx_queue.c',
- )
- 
-# The vulkan driver only supports version >= 42, which is the version present in
-# Rpi4. We need to explicitly set it as we are reusing pieces from the GL v3d
-# driver.
-v3d_versions = ['42']
-+v3d_versions = ['42', '71']
- 
- v3dv_flags = []
- 
-diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
-index c6707211529..6bdf338c67b 100644
--- a/src/broadcom/vulkan/v3dv_private.h
-+++ b/src/broadcom/vulkan/v3dv_private.h
-@@ -2608,6 +2608,9 @@ u64_compare(const void *key1, const void *key2)
-    case 42:                                           \
-       v3d_X_thing = &v3d42_##thing;                   \
-       break;                                          \
-+   case 71:                                           \
-+      v3d_X_thing = &v3d71_##thing;                   \
-+      break;                                          \
-    default:                                           \
-       unreachable("Unsupported hardware generation"); \
-    }                                                  \
-@@ -2626,6 +2629,10 @@ u64_compare(const void *key1, const void *key2)
- #  define v3dX(x) v3d42_##x
- #  include "v3dvx_private.h"
- #  undef v3dX
-+
-+#  define v3dX(x) v3d71_##x
-+#  include "v3dvx_private.h"
-+#  undef v3dX
- #endif
- 
- #ifdef ANDROID
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index f182b790d36..b958e634c82 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -56,10 +56,15 @@ v3dX(job_emit_enable_double_buffer)(struct v3dv_job *job)
-    };
-    config.width_in_pixels = tiling->width;
-    config.height_in_pixels = tiling->height;
-+#if V3D_VERSION == 42
-    config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
-    config.multisample_mode_4x = tiling->msaa;
-    config.double_buffer_in_non_ms_mode = tiling->double_buffer;
-    config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("HW generation 71 not supported yet.");
-+#endif
- 
-    uint8_t *rewrite_addr = (uint8_t *)job->bcl_tile_binning_mode_ptr;
-    cl_packet_pack(TILE_BINNING_MODE_CFG)(NULL, rewrite_addr, &config);
-@@ -82,10 +87,15 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
-    cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
-       config.width_in_pixels = tiling->width;
-       config.height_in_pixels = tiling->height;
-+#if V3D_VERSION == 42
-       config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
-       config.multisample_mode_4x = tiling->msaa;
-       config.double_buffer_in_non_ms_mode = tiling->double_buffer;
-       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("HW generation 71 not supported yet.");
-+#endif
-    }
- 
-    /* There's definitely nothing in the VCD cache we want. */
-@@ -649,10 +659,15 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
-     * bit and instead we have to emit a single clear of all tile buffers.
-     */
-    if (use_global_zs_clear || use_global_rt_clear) {
-+#if V3D_VERSION == 42
-       cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
-          clear.clear_z_stencil_buffer = use_global_zs_clear;
-          clear.clear_all_render_targets = use_global_rt_clear;
-       }
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("Hardware generation 71 not supported yet.");
-+#endif
-    }
- }
- 
-@@ -824,7 +839,12 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-       config.number_of_render_targets = MAX2(subpass->color_count, 1);
-       config.multisample_mode_4x = tiling->msaa;
-       config.double_buffer_in_non_ms_mode = tiling->double_buffer;
-+#if V3D_VERSION == 42
-       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("HW generation 71 not supported yet.");
-+#endif
- 
-       if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
-          const struct v3dv_image_view *iview =
-@@ -920,7 +940,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-       const struct v3d_resource_slice *slice =
-          &image->planes[plane].slices[iview->vk.base_mip_level];
- 
-      const uint32_t *clear_color =
-+      UNUSED const uint32_t *clear_color =
-          &state->attachments[attachment_idx].clear_value.color[0];
- 
-       uint32_t clear_pad = 0;
-@@ -937,13 +957,19 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-          }
-       }
- 
-+#if V3D_VERSION == 42
-       cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
-          clear.clear_color_low_32_bits = clear_color[0];
-          clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
-          clear.render_target_number = i;
-       };
-+#endif
-+#if V3D_VERSION >= 71
-+         unreachable("HW generation 71 not supported yet.");
-+#endif
- 
-       if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) {
-+#if V3D_VERSION == 42
-          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
-             clear.clear_color_mid_low_32_bits =
-                ((clear_color[1] >> 24) | (clear_color[2] << 8));
-@@ -951,17 +977,28 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-                ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
-             clear.render_target_number = i;
-          };
-+#endif
-+#if V3D_VERSION >= 71
-+         unreachable("HW generation 71 not supported yet.");
-+#endif
-+
-       }
- 
-       if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
-+#if V3D_VERSION == 42
-          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
-             clear.uif_padded_height_in_uif_blocks = clear_pad;
-             clear.clear_color_high_16_bits = clear_color[3] >> 16;
-             clear.render_target_number = i;
-          };
-+#endif
-+#if V3D_VERSION >= 71
-+         unreachable("HW generation 71 not supported yet.");
-+#endif
-       }
-    }
- 
-+#if V3D_VERSION == 42
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
-       v3dX(cmd_buffer_render_pass_setup_render_target)
-          (cmd_buffer, 0, &rt.render_target_0_internal_bpp,
-@@ -976,6 +1013,10 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-          (cmd_buffer, 3, &rt.render_target_3_internal_bpp,
-           &rt.render_target_3_internal_type, &rt.render_target_3_clamp);
-    }
-+#endif
-+#if V3D_VERSION >= 71
-+   unreachable("Hardware generation 71 not supported yet.");
-+#endif
- 
-    /* Ends rendering mode config. */
-    if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
-@@ -1036,10 +1077,15 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-       }
-       if (cmd_buffer->state.tile_aligned_render_area &&
-           (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
-+#if V3D_VERSION == 42
-          cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
-             clear.clear_z_stencil_buffer = !job->early_zs_clear;
-             clear.clear_all_render_targets = true;
-          }
-+#endif
-+#if V3D_VERSION >= 71
-+         unreachable("HW generation 71 not supported yet.");
-+#endif
-       }
-       cl_emit(rcl, END_OF_TILE_MARKER, end);
-    }
-@@ -1065,7 +1111,9 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
-     * now, would need to change if we allow multiple viewports
-     */
-    float *vptranslate = dynamic->viewport.translate[0];
-+#if V3D_VERSION == 42
-    float *vpscale = dynamic->viewport.scale[0];
-+#endif
- 
-    struct v3dv_job *job = cmd_buffer->state.job;
-    assert(job);
-@@ -1078,10 +1126,15 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
-    v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size);
-    v3dv_return_if_oom(cmd_buffer, NULL);
- 
-+#if V3D_VERSION == 42
-    cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
-       clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f;
-       clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f;
-    }
-+#endif
-+#if V3D_VERSION >= 71
-+   unreachable("HW generation 71 not supported yet.");
-+#endif
- 
-    float translate_z, scale_z;
-    v3dv_cmd_buffer_state_get_viewport_z_xform(&cmd_buffer->state, 0,
-@@ -1591,16 +1644,20 @@ v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer)
-    struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
-    assert(pipeline);
- 
-   bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
-
-    v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS));
-    v3dv_return_if_oom(cmd_buffer, NULL);
- 
-+#if V3D_VERSION == 42
-+   bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
-    cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
-       config.early_z_enable = enable_ez;
-       config.early_z_updates_enable = config.early_z_enable &&
-          pipeline->z_updates_enable;
-    }
-+#endif
-+#if V3D_VERSION >= 71
-+   unreachable("HW generation 71 not supported yet.");
-+#endif
- }
- 
- void
-@@ -2031,10 +2088,12 @@ v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer)
-                                 pipeline->vpm_cfg.Gv);
-    }
- 
-+#if V3D_VERSION == 42
-    struct v3dv_bo *default_attribute_values =
-       pipeline->default_attribute_values != NULL ?
-       pipeline->default_attribute_values :
-       pipeline->device->default_attribute_float;
-+#endif
- 
-    cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD,
-                           pipeline->shader_state_record, shader) {
-@@ -2060,8 +2119,10 @@ v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer)
-       shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs;
-       shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs;
- 
-+#if V3D_VERSION == 42
-       shader.address_of_default_attribute_values =
-          v3dv_cl_address(default_attribute_values, 0);
-+#endif
- 
-       shader.any_shader_reads_hardware_written_primitive_id =
-          (pipeline->has_gs && prog_data_gs->uses_pid) || prog_data_fs->uses_pid;
-@@ -2399,11 +2460,17 @@ v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buf
- 
-    assert(iview->plane_count == 1);
-    *rt_bpp = iview->planes[0].internal_bpp;
-   *rt_type = iview->planes[0].internal_type;
-    if (vk_format_is_int(iview->vk.view_format))
-+#if V3D_VERSION == 42
-+   *rt_type = iview->planes[0].internal_type;
-+   if (vk_format_is_int(iview->vk.format))
-       *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT;
-    else if (vk_format_is_srgb(iview->vk.view_format))
-       *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM;
-    else
-       *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
-+#endif
-+#if V3D_VERSION >= 71
-+   unreachable("HW generation 71 not supported yet.");
-+#endif
- }
-diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c
-index 80a3e5bfde8..dac6ff2741f 100644
--- a/src/broadcom/vulkan/v3dvx_image.c
-+++ b/src/broadcom/vulkan/v3dvx_image.c
-@@ -76,8 +76,6 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
-          tex.swizzle_b = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[2]);
-          tex.swizzle_a = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[3]);
- 
-         tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse;
-
-          tex.texture_type = image_view->format->planes[plane].tex_type;
- 
-          if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
-@@ -110,7 +108,16 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
- 
-          tex.array_stride_64_byte_aligned = image->planes[iplane].cube_map_stride / 64;
- 
-+#if V3D_VERSION == 42
-+         tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse;
-+#endif
-+
-+#if V3D_VERSION == 42
-          tex.srgb = vk_format_is_srgb(image_view->vk.view_format);
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("Hardware generation 71 not supported yet.");
-+#endif
- 
-          /* At this point we don't have the job. That's the reason the first
-           * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
-@@ -166,7 +173,12 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
- 
-       assert(buffer_view->format->plane_count == 1);
-       tex.texture_type = buffer_view->format->planes[0].tex_type;
-+#if V3D_VERSION == 42
-       tex.srgb = vk_format_is_srgb(buffer_view->vk_format);
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("Hardware generation 71 not supported yet.");
-+#endif
- 
-       /* At this point we don't have the job. That's the reason the first
-        * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
-diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c
-index 04147b82cbd..2db07ea7427 100644
--- a/src/broadcom/vulkan/v3dvx_meta_common.c
-+++ b/src/broadcom/vulkan/v3dvx_meta_common.c
-@@ -58,7 +58,12 @@ emit_rcl_prologue(struct v3dv_job *job,
-       config.number_of_render_targets = 1;
-       config.multisample_mode_4x = tiling->msaa;
-       config.double_buffer_in_non_ms_mode = tiling->double_buffer;
-+#if V3D_VERSION == 42
-       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("Hardware generation 71 not supported yet.");
-+#endif
-       config.internal_depth_type = fb->internal_depth_type;
-    }
- 
-@@ -88,14 +93,20 @@ emit_rcl_prologue(struct v3dv_job *job,
-          }
-       }
- 
-+#if V3D_VERSION == 42
-       const uint32_t *color = &clear_info->clear_value->color[0];
-       cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
-          clear.clear_color_low_32_bits = color[0];
-          clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
-          clear.render_target_number = 0;
-       };
-+#endif
-+#if V3D_VERSION >= 71
-+   unreachable("Hardware generation 71 not supported yet.");
-+#endif
- 
-       if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
-+#if V3D_VERSION == 42
-          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
-             clear.clear_color_mid_low_32_bits =
-               ((color[1] >> 24) | (color[2] << 8));
-@@ -103,22 +114,37 @@ emit_rcl_prologue(struct v3dv_job *job,
-               ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
-             clear.render_target_number = 0;
-          };
-+#endif
-+#if V3D_VERSION >= 71
-+   unreachable("Hardware generation 71 not supported yet.");
-+#endif
-+
-       }
- 
-       if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
-+#if V3D_VERSION == 42
-          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
-             clear.uif_padded_height_in_uif_blocks = clear_pad;
-             clear.clear_color_high_16_bits = color[3] >> 16;
-             clear.render_target_number = 0;
-          };
-+#endif
-+#if V3D_VERSION >= 71
-+   unreachable("Hardware generation 71 not supported yet.");
-+#endif
-       }
-    }
- 
-+#if V3D_VERSION == 42
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
-       rt.render_target_0_internal_bpp = tiling->internal_bpp;
-       rt.render_target_0_internal_type = fb->internal_type;
-       rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
-    }
-+#endif
-+#if V3D_VERSION >= 71
-+   unreachable("Hardware generation 71 not supported yet.");
-+#endif
- 
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
-       clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
-@@ -179,10 +205,16 @@ emit_frame_setup(struct v3dv_job *job,
-        */
-       if (clear_value &&
-           (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
-+#if V3D_VERSION == 42
-          cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
-             clear.clear_z_stencil_buffer = true;
-             clear.clear_all_render_targets = true;
-          }
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("Hardware generation 71 not supported yet.");
-+#endif
-+
-       }
-       cl_emit(rcl, END_OF_TILE_MARKER, end);
-    }
-diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
-index 5d32d414ed8..922698b08a2 100644
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
-+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
-@@ -447,10 +447,15 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
-       /* FIXME: Use combined input/output size flag in the common case (also
-        * on v3d, see v3dx_draw).
-        */
-+#if V3D_VERSION == 42
-       shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
-          prog_data_vs_bin->separate_segments;
-       shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
-          prog_data_vs->separate_segments;
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("HW generation 71 not supported yet.");
-+#endif
- 
-       shader.coordinate_shader_input_vpm_segment_size =
-          prog_data_vs_bin->separate_segments ?
-diff --git a/src/broadcom/vulkan/v3dvx_queue.c b/src/broadcom/vulkan/v3dvx_queue.c
-index efe63de425c..1a26d04aef7 100644
--- a/src/broadcom/vulkan/v3dvx_queue.c
-+++ b/src/broadcom/vulkan/v3dvx_queue.c
-@@ -42,14 +42,25 @@ v3dX(job_emit_noop)(struct v3dv_job *job)
-       config.image_height_pixels = 1;
-       config.number_of_render_targets = 1;
-       config.multisample_mode_4x = false;
-+#if V3D_VERSION == 42
-       config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
-+#endif
-+#if V3D_VERSION >= 71
-+      unreachable("HW generation 71 not supported yet.");
-+#endif
-    }
- 
-+#if V3D_VERSION == 42
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
-       rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32;
-       rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8;
-       rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
-    }
-+#endif
-+#if V3D_VERSION >= 71
-+   unreachable("Hardware generation 71 not supported yet.");
-+#endif
-+
- 
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
-       clear.z_clear_value = 1.0f;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0074-v3dv-expose-V3D-revision-number-in-device-name.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0074-v3dv-expose-V3D-revision-number-in-device-name.patch
@ -1,29 +0,0 @@
-From 7aa016bca8bb1bf449ea79505692353c0bd174b8 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 10 Nov 2021 10:06:50 +0100
-Subject: [PATCH 074/142] v3dv: expose V3D revision number in device name
-
---
- src/broadcom/vulkan/v3dv_device.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
-index d29ffad3531..3034b561480 100644
--- a/src/broadcom/vulkan/v3dv_device.c
-+++ b/src/broadcom/vulkan/v3dv_device.c
-@@ -1123,8 +1123,10 @@ create_physical_device(struct v3dv_instance *instance,
-    device->next_program_id = 0;
- 
-    ASSERTED int len =
-      asprintf(&device->name, "V3D %d.%d",
-               device->devinfo.ver / 10, device->devinfo.ver % 10);
-+      asprintf(&device->name, "V3D %d.%d.%d",
-+               device->devinfo.ver / 10,
-+               device->devinfo.ver % 10,
-+               device->devinfo.rev);
-    assert(len != -1);
- 
-    v3dv_physical_device_init_disk_cache(device);
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0075-v3dv-device-handle-new-rpi5-device-bcm2712.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0075-v3dv-device-handle-new-rpi5-device-bcm2712.patch
@ -1,54 +0,0 @@
-From fb9e95b7e1d5987fd25e914635c4e09d81ea9561 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 10 Nov 2021 07:54:35 +0100
-Subject: [PATCH 075/142] v3dv/device: handle new rpi5 device (bcm2712)
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This includes both master and primary devices.
-
-Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
-Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
---
- src/broadcom/vulkan/v3dv_device.c | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
-index 3034b561480..c8719d33f15 100644
--- a/src/broadcom/vulkan/v3dv_device.c
-+++ b/src/broadcom/vulkan/v3dv_device.c
-@@ -1287,7 +1287,8 @@ enumerate_devices(struct vk_instance *vk_instance)
-       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER) {
-          char **compat = devices[i]->deviceinfo.platform->compatible;
-          while (*compat) {
-            if (strncmp(*compat, "brcm,2711-v3d", 13) == 0) {
-+            if (strncmp(*compat, "brcm,2711-v3d", 13) == 0 ||
-+                strncmp(*compat, "brcm,2712-v3d", 13) == 0) {
-                v3d_idx = i;
-                break;
-             }
-@@ -1296,8 +1297,9 @@ enumerate_devices(struct vk_instance *vk_instance)
-       } else if (devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) {
-          char **compat = devices[i]->deviceinfo.platform->compatible;
-          while (*compat) {
-            if (strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 ||
-                strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0 ) {
-+            if (strncmp(*compat, "brcm,bcm2712-vc6", 16) == 0 ||
-+                strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 ||
-+                strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0) {
-                vc4_idx = i;
-                break;
-             }
-@@ -1334,6 +1336,8 @@ v3dv_physical_device_device_id(struct v3dv_physical_device *dev)
-    switch (dev->devinfo.ver) {
-    case 42:
-       return 0xBE485FD3; /* Broadcom deviceID for 2711 */
-+   case 71:
-+      return 0x55701C33; /* Broadcom deviceID for 2712 */
-    default:
-       unreachable("Unsupported V3D version");
-    }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0076-v3dv-cmd_buffer-emit-TILE_BINNING_MODE_CFG-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0076-v3dv-cmd_buffer-emit-TILE_BINNING_MODE_CFG-for-v71.patch
@ -1,32 +0,0 @@
-From c4f957af4fb0e10abf0a7ffad4f7a468633b7d99 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 20 Jul 2021 14:00:44 +0200
-Subject: [PATCH 076/142] v3dv/cmd_buffer: emit TILE_BINNING_MODE_CFG for v71
-
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c | 9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index b958e634c82..17b2f46850d 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -94,7 +94,14 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
-       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
- #endif
- #if V3D_VERSION >= 71
-      unreachable("HW generation 71 not supported yet.");
-+      config.log2_tile_width = log2_tile_size(tiling->tile_width);
-+      config.log2_tile_height = log2_tile_size(tiling->tile_height);
-+      /* FIXME: ideally we would like next assert on the packet header (as is
-+       * general, so also applies to GL). We would need to expand
-+       * gen_pack_header for that.
-+       */
-+      assert(config.log2_tile_width == config.log2_tile_height ||
-+             config.log2_tile_width == config.log2_tile_height + 1);
- #endif
-    }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0077-v3dv-emit-TILE_RENDERING_MODE_CFG_COMMON-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0077-v3dv-emit-TILE_RENDERING_MODE_CFG_COMMON-for-v71.patch
@ -1,53 +0,0 @@
-From 1934ac07df73cb685f6550b8b0f5b4f2ead11396 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 20 Jul 2021 14:33:00 +0200
-Subject: [PATCH 077/142] v3dv: emit TILE_RENDERING_MODE_CFG_COMMON for v71
-
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c  | 9 ++++++++-
- src/broadcom/vulkan/v3dvx_meta_common.c | 9 ++++++++-
- 2 files changed, 16 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index 17b2f46850d..7837b460051 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -850,7 +850,14 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
- #endif
- #if V3D_VERSION >= 71
-      unreachable("HW generation 71 not supported yet.");
-+      config.log2_tile_width = log2_tile_size(tiling->tile_width);
-+      config.log2_tile_height = log2_tile_size(tiling->tile_height);
-+      /* FIXME: ideallly we would like next assert on the packet header (as is
-+       * general, so also applies to GL). We would need to expand
-+       * gen_pack_header for that.
-+       */
-+      assert(config.log2_tile_width == config.log2_tile_height ||
-+             config.log2_tile_width == config.log2_tile_height + 1);
- #endif
- 
-       if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
-diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c
-index 2db07ea7427..e4084d851fc 100644
--- a/src/broadcom/vulkan/v3dvx_meta_common.c
-+++ b/src/broadcom/vulkan/v3dvx_meta_common.c
-@@ -62,7 +62,14 @@ emit_rcl_prologue(struct v3dv_job *job,
-       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
- #endif
- #if V3D_VERSION >= 71
-      unreachable("Hardware generation 71 not supported yet.");
-+      config.log2_tile_width = log2_tile_size(tiling->tile_width);
-+      config.log2_tile_height = log2_tile_size(tiling->tile_height);
-+      /* FIXME: ideallly we would like next assert on the packet header (as is
-+       * general, so also applies to GL). We would need to expand
-+       * gen_pack_header for that.
-+       */
-+      assert(config.log2_tile_width == config.log2_tile_height ||
-+             config.log2_tile_width == config.log2_tile_height + 1);
- #endif
-       config.internal_depth_type = fb->internal_depth_type;
-    }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0078-v3dv-cmd_buffer-emit-TILE_RENDERING_MODE_CFG_RENDER_.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0078-v3dv-cmd_buffer-emit-TILE_RENDERING_MODE_CFG_RENDER_.patch
@ -1,315 +0,0 @@
-From f0f9eea3cad83ed8824c6a7686150327407a5286 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Thu, 22 Jul 2021 14:26:13 +0200
-Subject: [PATCH 078/142] v3dv/cmd_buffer: emit
- TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1 for v71
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
-Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c  | 186 +++++++++++++++++-------
- src/broadcom/vulkan/v3dvx_meta_common.c |  12 +-
- src/broadcom/vulkan/v3dvx_private.h     |  11 +-
- 3 files changed, 147 insertions(+), 62 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index 7837b460051..c6307890da5 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -800,6 +800,103 @@ set_rcl_early_z_config(struct v3dv_job *job,
-    }
- }
- 
-+/* Note that for v71, render target cfg packets has just one field that
-+ * combined the internal type and clamp mode. For simplicity we keep just one
-+ * helper.
-+ *
-+ * Note: rt_type is in fact a "enum V3DX(Internal_Type)".
-+ *
-+ * FIXME: for v71 we are not returning all the possible combinations for
-+ * render target internal type and clamp. For example for int types we are
-+ * always using clamp int, and for 16f we are using clamp none or pos (that
-+ * seem the equivalent for no-clamp on 4.2), but not pq or hlg. In summary
-+ * right now we are just porting what we were doing on 4.2
-+ */
-+uint32_t
-+v3dX(clamp_for_format_and_type)(uint32_t rt_type,
-+                                VkFormat vk_format)
-+{
-+#if V3D_VERSION == 42
-+   if (vk_format_is_int(vk_format))
-+      return V3D_RENDER_TARGET_CLAMP_INT;
-+   else if (vk_format_is_srgb(vk_format))
-+      return V3D_RENDER_TARGET_CLAMP_NORM;
-+   else
-+      return V3D_RENDER_TARGET_CLAMP_NONE;
-+#endif
-+#if V3D_VERSION >= 71
-+   switch (rt_type) {
-+   case V3D_INTERNAL_TYPE_8I:
-+      return V3D_RENDER_TARGET_TYPE_CLAMP_8I_CLAMPED;
-+   case V3D_INTERNAL_TYPE_8UI:
-+      return V3D_RENDER_TARGET_TYPE_CLAMP_8UI_CLAMPED;
-+   case V3D_INTERNAL_TYPE_8:
-+      return V3D_RENDER_TARGET_TYPE_CLAMP_8;
-+   case V3D_INTERNAL_TYPE_16I:
-+      return V3D_RENDER_TARGET_TYPE_CLAMP_16I_CLAMPED;
-+   case V3D_INTERNAL_TYPE_16UI:
-+      return V3D_RENDER_TARGET_TYPE_CLAMP_16UI_CLAMPED;
-+   case V3D_INTERNAL_TYPE_16F:
-+      return vk_format_is_srgb(vk_format) ?
-+         V3D_RENDER_TARGET_TYPE_CLAMP_16F_CLAMP_NORM :
-+         V3D_RENDER_TARGET_TYPE_CLAMP_16F;
-+   case V3D_INTERNAL_TYPE_32I:
-+      return V3D_RENDER_TARGET_TYPE_CLAMP_32I_CLAMPED;
-+   case V3D_INTERNAL_TYPE_32UI:
-+      return V3D_RENDER_TARGET_TYPE_CLAMP_32UI_CLAMPED;
-+   case V3D_INTERNAL_TYPE_32F:
-+      return V3D_RENDER_TARGET_TYPE_CLAMP_32F;
-+   default:
-+      unreachable("Unknown internal render target type");
-+   }
-+
-+   return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
-+#endif
-+}
-+
-+static void
-+cmd_buffer_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer,
-+                                           int rt,
-+                                           uint32_t *rt_bpp,
-+#if V3D_VERSION == 42
-+                                           uint32_t *rt_type,
-+                                           uint32_t *rt_clamp)
-+#else
-+                                           uint32_t *rt_type_clamp)
-+#endif
-+{
-+   const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
-+
-+   assert(state->subpass_idx < state->pass->subpass_count);
-+   const struct v3dv_subpass *subpass =
-+      &state->pass->subpasses[state->subpass_idx];
-+
-+   if (rt >= subpass->color_count)
-+      return;
-+
-+   struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
-+   const uint32_t attachment_idx = attachment->attachment;
-+   if (attachment_idx == VK_ATTACHMENT_UNUSED)
-+      return;
-+
-+   assert(attachment_idx < state->framebuffer->attachment_count &&
-+          attachment_idx < state->attachment_alloc_count);
-+   struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view;
-+   assert(vk_format_is_color(iview->vk.format));
-+
-+   assert(iview->plane_count == 1);
-+   *rt_bpp = iview->planes[0].internal_bpp;
-+#if V3D_VERSION == 42
-+   *rt_type = iview->planes[0].internal_type;
-+   *rt_clamp = v3dX(clamp_for_format_and_type)(iview->planes[0].internal_type,
-+                                               iview->vk.format);
-+#endif
-+#if V3D_VERSION >= 71
-+   *rt_type_clamp = v3dX(clamp_for_format_and_type)(iview->planes[0].internal_type,
-+                                                    iview->vk.format);
-+#endif
-+}
-+
- void
- v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
- {
-@@ -939,10 +1036,20 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-     */
-    job->early_zs_clear = do_early_zs_clear;
- 
-+#if V3D_VERSION >= 71
-+   uint32_t base_addr = 0;
-+#endif
-    for (uint32_t i = 0; i < subpass->color_count; i++) {
-       uint32_t attachment_idx = subpass->color_attachments[i].attachment;
-      if (attachment_idx == VK_ATTACHMENT_UNUSED)
-+      if (attachment_idx == VK_ATTACHMENT_UNUSED) {
-+#if V3D_VERSION >= 71
-+         cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
-+            rt.render_target_number = i;
-+            rt.stride = 1; /* Unused */
-+         }
-+#endif
-          continue;
-+      }
- 
-       struct v3dv_image_view *iview =
-          state->attachments[attachment_idx].image_view;
-@@ -978,9 +1085,6 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-          clear.render_target_number = i;
-       };
- #endif
-#if V3D_VERSION >= 71
-         unreachable("HW generation 71 not supported yet.");
-#endif
- 
-       if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) {
- #if V3D_VERSION == 42
-@@ -1010,27 +1114,44 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-          unreachable("HW generation 71 not supported yet.");
- #endif
-       }
-+
-+#if V3D_VERSION >= 71
-+      cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
-+         rt.clear_color_low_bits = clear_color[0];
-+         cmd_buffer_render_pass_setup_render_target(cmd_buffer, i, &rt.internal_bpp,
-+                                                    &rt.internal_type_and_clamping);
-+         rt.stride =
-+            v3d_compute_rt_row_row_stride_128_bits(tiling->tile_width,
-+                                                   v3d_internal_bpp_words(rt.internal_bpp));
-+         rt.base_address = base_addr;
-+         rt.render_target_number = i;
-+
-+         /* base_addr in multiples of 512 bits. We divide by 8 because stride
-+          * is in 128-bit units, but it is packing 2 rows worth of data, so we
-+          * need to divide it by 2 so it is only 1 row, and then again by 4 so
-+          * it is in 512-bit units.
-+          */
-+         base_addr += (tiling->tile_height * rt.stride) / 8;
-+      }
-+#endif
-    }
- 
- #if V3D_VERSION == 42
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
-      v3dX(cmd_buffer_render_pass_setup_render_target)
-+      cmd_buffer_render_pass_setup_render_target
-          (cmd_buffer, 0, &rt.render_target_0_internal_bpp,
-           &rt.render_target_0_internal_type, &rt.render_target_0_clamp);
-      v3dX(cmd_buffer_render_pass_setup_render_target)
-+      cmd_buffer_render_pass_setup_render_target
-          (cmd_buffer, 1, &rt.render_target_1_internal_bpp,
-           &rt.render_target_1_internal_type, &rt.render_target_1_clamp);
-      v3dX(cmd_buffer_render_pass_setup_render_target)
-+      cmd_buffer_render_pass_setup_render_target
-          (cmd_buffer, 2, &rt.render_target_2_internal_bpp,
-           &rt.render_target_2_internal_type, &rt.render_target_2_clamp);
-      v3dX(cmd_buffer_render_pass_setup_render_target)
-+      cmd_buffer_render_pass_setup_render_target
-          (cmd_buffer, 3, &rt.render_target_3_internal_bpp,
-           &rt.render_target_3_internal_type, &rt.render_target_3_clamp);
-    }
- #endif
-#if V3D_VERSION >= 71
-   unreachable("Hardware generation 71 not supported yet.");
-#endif
- 
-    /* Ends rendering mode config. */
-    if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
-@@ -2445,46 +2566,3 @@ v3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer,
-                                      buffer->mem_offset + offset);
-    }
- }
-
-void
-v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer,
-                                                 int rt,
-                                                 uint32_t *rt_bpp,
-                                                 uint32_t *rt_type,
-                                                 uint32_t *rt_clamp)
-{
-   const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
-
-   assert(state->subpass_idx < state->pass->subpass_count);
-   const struct v3dv_subpass *subpass =
-      &state->pass->subpasses[state->subpass_idx];
-
-   if (rt >= subpass->color_count)
-      return;
-
-   struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
-   const uint32_t attachment_idx = attachment->attachment;
-   if (attachment_idx == VK_ATTACHMENT_UNUSED)
-      return;
-
-   assert(attachment_idx < state->framebuffer->attachment_count &&
-          attachment_idx < state->attachment_alloc_count);
-   struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view;
-   assert(vk_format_is_color(iview->vk.format));
-
-   assert(iview->plane_count == 1);
-   *rt_bpp = iview->planes[0].internal_bpp;
-   if (vk_format_is_int(iview->vk.view_format))
-#if V3D_VERSION == 42
-   *rt_type = iview->planes[0].internal_type;
-   if (vk_format_is_int(iview->vk.format))
-      *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT;
-   else if (vk_format_is_srgb(iview->vk.view_format))
-      *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM;
-   else
-      *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
-#endif
-#if V3D_VERSION >= 71
-   unreachable("HW generation 71 not supported yet.");
-#endif
-}
-diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c
-index e4084d851fc..c6391bc6d83 100644
--- a/src/broadcom/vulkan/v3dvx_meta_common.c
-+++ b/src/broadcom/vulkan/v3dvx_meta_common.c
-@@ -26,6 +26,7 @@
- 
- #include "broadcom/common/v3d_macros.h"
- #include "broadcom/common/v3d_tfu.h"
-+#include "broadcom/common/v3d_util.h"
- #include "broadcom/cle/v3dx_pack.h"
- #include "broadcom/compiler/v3d_compiler.h"
- 
-@@ -150,7 +151,16 @@ emit_rcl_prologue(struct v3dv_job *job,
-    }
- #endif
- #if V3D_VERSION >= 71
-   unreachable("Hardware generation 71 not supported yet.");
-+   cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
-+      rt.internal_bpp = tiling->internal_bpp;
-+      rt.internal_type_and_clamping = v3dX(clamp_for_format_and_type)(fb->internal_type,
-+                                                                      fb->vk_format);
-+      rt.stride =
-+         v3d_compute_rt_row_row_stride_128_bits(tiling->tile_width,
-+                                                v3d_internal_bpp_words(rt.internal_bpp));
-+      rt.base_address = 0;
-+      rt.render_target_number = 0;
-+   }
- #endif
- 
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
-diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h
-index ad8ddfa5731..a4157d11c7c 100644
--- a/src/broadcom/vulkan/v3dvx_private.h
-+++ b/src/broadcom/vulkan/v3dvx_private.h
-@@ -125,13 +125,6 @@ v3dX(get_hw_clear_color)(const VkClearColorValue *color,
-                          uint32_t internal_size,
-                          uint32_t *hw_color);
- 
-void
-v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer,
-                                                 int rt,
-                                                 uint32_t *rt_bpp,
-                                                 uint32_t *rt_type,
-                                                 uint32_t *rt_clamp);
-
- /* Used at v3dv_device */
- 
- void
-@@ -325,3 +318,7 @@ uint32_t v3dX(max_descriptor_bo_size)(void);
- uint32_t v3dX(combined_image_sampler_texture_state_offset)(uint8_t plane);
- 
- uint32_t v3dX(combined_image_sampler_sampler_state_offset)(uint8_t plane);
-+
-+uint32_t
-+v3dX(clamp_for_format_and_type)(uint32_t rt_type,
-+                                VkFormat vk_format);
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0079-v3dvx-cmd_buffer-emit-CLEAR_RENDER_TARGETS-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0079-v3dvx-cmd_buffer-emit-CLEAR_RENDER_TARGETS-for-v71.patch
@ -1,25 +0,0 @@
-From 7c89d8026fd550282d54933f37ffc2773869326f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Mon, 26 Jul 2021 15:08:11 +0200
-Subject: [PATCH 079/142] v3dvx/cmd_buffer: emit CLEAR_RENDER_TARGETS for v71
-
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index c6307890da5..ae1c21ae00b 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -1219,7 +1219,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-          }
- #endif
- #if V3D_VERSION >= 71
-         unreachable("HW generation 71 not supported yet.");
-+         cl_emit(rcl, CLEAR_RENDER_TARGETS, clear_rt);
- #endif
-       }
-       cl_emit(rcl, END_OF_TILE_MARKER, end);
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0080-v3dv-cmd_buffer-emit-CLIPPER_XY_SCALING-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0080-v3dv-cmd_buffer-emit-CLIPPER_XY_SCALING-for-v71.patch
@ -1,38 +0,0 @@
-From 2eb29b57fde2acda76e12953b3a1050f3056b39d Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Sun, 19 Sep 2021 23:37:32 +0200
-Subject: [PATCH 080/142] v3dv/cmd_buffer: emit CLIPPER_XY_SCALING for v71
-
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c | 7 ++++---
- 1 file changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index ae1c21ae00b..2e525a11619 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -1246,9 +1246,7 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
-     * now, would need to change if we allow multiple viewports
-     */
-    float *vptranslate = dynamic->viewport.translate[0];
-#if V3D_VERSION == 42
-    float *vpscale = dynamic->viewport.scale[0];
-#endif
- 
-    struct v3dv_job *job = cmd_buffer->state.job;
-    assert(job);
-@@ -1268,7 +1266,10 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
-    }
- #endif
- #if V3D_VERSION >= 71
-   unreachable("HW generation 71 not supported yet.");
-+   cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
-+      clip.viewport_half_width_in_1_64th_of_pixel = vpscale[0] * 64.0f;
-+      clip.viewport_half_height_in_1_64th_of_pixel = vpscale[1] * 64.0f;
-+   }
- #endif
- 
-    float translate_z, scale_z;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0081-v3dv-uniforms-update-VIEWPORT_X-Y_SCALE-uniforms-for.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0081-v3dv-uniforms-update-VIEWPORT_X-Y_SCALE-uniforms-for.patch
@ -1,97 +0,0 @@
-From 611bf6a7445837c7e20416ff9f11a6dad9c543d7 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 14 Sep 2021 10:08:19 +0200
-Subject: [PATCH 081/142] v3dv/uniforms: update VIEWPORT_X/Y_SCALE uniforms for
- v71
-
-As the packet CLIPPER_XY scaling, this needs to be computed on 1/64ths
-of pixel, instead of 1/256ths of pixels.
-
-As this is the usual values that we get from macros, we add manually a
-v42 and v71 macro, and define a new helper (V3DV_X) to get the value
-for the current hw version.
---
- src/broadcom/vulkan/v3dv_private.h  | 17 +++++++++++++++++
- src/broadcom/vulkan/v3dv_uniforms.c |  7 ++++---
- src/broadcom/vulkan/v3dvx_private.h |  9 +++++++++
- 3 files changed, 30 insertions(+), 3 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
-index 6bdf338c67b..cd6811b19c2 100644
--- a/src/broadcom/vulkan/v3dv_private.h
-+++ b/src/broadcom/vulkan/v3dv_private.h
-@@ -2617,6 +2617,23 @@ u64_compare(const void *key1, const void *key2)
-    v3d_X_thing;                                       \
- })
- 
-+/* Helper to get hw-specific macro values */
-+#define V3DV_X(device, thing) ({                                \
-+   __typeof(V3D42_##thing) V3D_X_THING;                         \
-+   switch (device->devinfo.ver) {                               \
-+   case 42:                                                     \
-+      V3D_X_THING = V3D42_##thing;                              \
-+      break;                                                    \
-+   case 71:                                                     \
-+      V3D_X_THING = V3D71_##thing;                              \
-+      break;                                                    \
-+   default:                                                     \
-+      unreachable("Unsupported hardware generation");           \
-+   }                                                            \
-+   V3D_X_THING;                                                 \
-+})
-+
-+
- 
- /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
-  * define v3dX for each version supported, because when we compile code that
-diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c
-index 72fa9a1b39c..0e681cc4ee2 100644
--- a/src/broadcom/vulkan/v3dv_uniforms.c
-+++ b/src/broadcom/vulkan/v3dv_uniforms.c
-@@ -497,7 +497,8 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
-    struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
- 
-    struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
-
-+   float clipper_xy_granularity =
-+      V3DV_X(cmd_buffer->device, CLIPPER_XY_GRANULARITY);
-    for (int i = 0; i < uinfo->count; i++) {
-       uint32_t data = uinfo->data[i];
- 
-@@ -520,11 +521,11 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
-          break;
- 
-       case QUNIFORM_VIEWPORT_X_SCALE:
-         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
-+         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * clipper_xy_granularity);
-          break;
- 
-       case QUNIFORM_VIEWPORT_Y_SCALE:
-         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
-+         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * clipper_xy_granularity);
-          break;
- 
-       case QUNIFORM_VIEWPORT_Z_OFFSET: {
-diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h
-index a4157d11c7c..ff9ba75cf93 100644
--- a/src/broadcom/vulkan/v3dvx_private.h
-+++ b/src/broadcom/vulkan/v3dvx_private.h
-@@ -319,6 +319,15 @@ uint32_t v3dX(combined_image_sampler_texture_state_offset)(uint8_t plane);
- 
- uint32_t v3dX(combined_image_sampler_sampler_state_offset)(uint8_t plane);
- 
-+/* General utils */
-+
-+uint32_t
-+v3dX(clamp_for_format_and_type)(uint32_t rt_type,
-+                                VkFormat vk_format);
-+
-+#define V3D42_CLIPPER_XY_GRANULARITY 256.0f
-+#define V3D71_CLIPPER_XY_GRANULARITY 64.0f
-+
- uint32_t
- v3dX(clamp_for_format_and_type)(uint32_t rt_type,
-                                 VkFormat vk_format);
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0082-v3dv-cmd_buffer-just-don-t-fill-up-early-z-fields-fo.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0082-v3dv-cmd_buffer-just-don-t-fill-up-early-z-fields-fo.patch
@ -1,40 +0,0 @@
-From 3819efaf2bb6fd8bd9cd45d54fb7254377b2296a Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Tue, 27 Jul 2021 14:02:30 +0200
-Subject: [PATCH 082/142] v3dv/cmd_buffer: just don't fill up early-z fields
- for CFG_BITS for v71
-
-For v71 early_z_enable/early_z_updates_enable is configured with
-packet 121.
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c | 9 +++------
- 1 file changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index 2e525a11619..fe9f7e43596 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -1783,17 +1783,14 @@ v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer)
-    v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS));
-    v3dv_return_if_oom(cmd_buffer, NULL);
- 
-#if V3D_VERSION == 42
-   bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
-    cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
-+#if V3D_VERSION == 42
-+      bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
-       config.early_z_enable = enable_ez;
-       config.early_z_updates_enable = config.early_z_enable &&
-          pipeline->z_updates_enable;
-   }
-#endif
-#if V3D_VERSION >= 71
-   unreachable("HW generation 71 not supported yet.");
- #endif
-+   }
- }
- 
- void
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0083-v3dv-default-vertex-attribute-values-are-gen-dependa.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0083-v3dv-default-vertex-attribute-values-are-gen-dependa.patch
@ -1,219 +0,0 @@
-From e3b1a578f45ea830d790970115b6de978d56edb8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 28 Jul 2021 12:01:38 +0200
-Subject: [PATCH 083/142] v3dv: default vertex attribute values are gen
- dependant
-
-Content, structure and size would depend on the generation. Even if it
-is needed at all.
-
-So let's move it to the v3dvx files.
---
- src/broadcom/vulkan/v3dv_device.c    |  2 +-
- src/broadcom/vulkan/v3dv_pipeline.c  | 61 ++-------------------------
- src/broadcom/vulkan/v3dv_private.h   |  4 --
- src/broadcom/vulkan/v3dvx_pipeline.c | 63 ++++++++++++++++++++++++++++
- src/broadcom/vulkan/v3dvx_private.h  |  8 ++++
- 5 files changed, 75 insertions(+), 63 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
-index c8719d33f15..01e2dd7ac2d 100644
--- a/src/broadcom/vulkan/v3dv_device.c
-+++ b/src/broadcom/vulkan/v3dv_device.c
-@@ -2043,7 +2043,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
-    v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0,
-                             device->instance->default_pipeline_cache_enabled);
-    device->default_attribute_float =
-      v3dv_pipeline_create_default_attribute_values(device, NULL);
-+      v3dv_X(device, create_default_attribute_values)(device, NULL);
- 
-    device->device_address_mem_ctx = ralloc_context(NULL);
-    util_dynarray_init(&device->device_address_bo_list,
-diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
-index 22f01bdf64b..d012ff8f948 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
-+++ b/src/broadcom/vulkan/v3dv_pipeline.c
-@@ -2802,62 +2802,6 @@ pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
-    }
- }
- 
-static bool
-pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
-{
-   for (uint8_t i = 0; i < pipeline->va_count; i++) {
-      if (vk_format_is_int(pipeline->va[i].vk_format))
-         return true;
-   }
-   return false;
-}
-
-/* @pipeline can be NULL. We assume in that case that all the attributes have
- * a float format (we only create an all-float BO once and we reuse it with
- * all float pipelines), otherwise we look at the actual type of each
- * attribute used with the specific pipeline passed in.
- */
-struct v3dv_bo *
-v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
-                                              struct v3dv_pipeline *pipeline)
-{
-   uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
-   struct v3dv_bo *bo;
-
-   bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
-
-   if (!bo) {
-      fprintf(stderr, "failed to allocate memory for the default "
-              "attribute values\n");
-      return NULL;
-   }
-
-   bool ok = v3dv_bo_map(device, bo, size);
-   if (!ok) {
-      fprintf(stderr, "failed to map default attribute values buffer\n");
-      return false;
-   }
-
-   uint32_t *attrs = bo->map;
-   uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
-   for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
-      attrs[i * 4 + 0] = 0;
-      attrs[i * 4 + 1] = 0;
-      attrs[i * 4 + 2] = 0;
-      VkFormat attr_format =
-         pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
-      if (i < va_count && vk_format_is_int(attr_format)) {
-         attrs[i * 4 + 3] = 1;
-      } else {
-         attrs[i * 4 + 3] = fui(1.0);
-      }
-   }
-
-   v3dv_bo_unmap(device, bo);
-
-   return bo;
-}
-
- static void
- pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
-                          const VkPipelineMultisampleStateCreateInfo *ms_info)
-@@ -2992,9 +2936,10 @@ pipeline_init(struct v3dv_pipeline *pipeline,
- 
-    v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
- 
-   if (pipeline_has_integer_vertex_attrib(pipeline)) {
-+   if (v3dv_X(device, pipeline_needs_default_attribute_values)(pipeline)) {
-       pipeline->default_attribute_values =
-         v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
-+         v3dv_X(pipeline->device, create_default_attribute_values)(pipeline->device, pipeline);
-+
-       if (!pipeline->default_attribute_values)
-          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
-    } else {
-diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
-index cd6811b19c2..a9fab24d19e 100644
--- a/src/broadcom/vulkan/v3dv_private.h
-+++ b/src/broadcom/vulkan/v3dv_private.h
-@@ -2500,10 +2500,6 @@ void
- v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
-                                     struct v3dv_pipeline_cache *cache);
- 
-struct v3dv_bo *
-v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
-                                              struct v3dv_pipeline *pipeline);
-
- VkResult
- v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
-                                       nir_shader *nir,
-diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
-index 922698b08a2..e235220cb14 100644
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
-+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
-@@ -664,3 +664,66 @@ v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
-       }
-    }
- }
-+
-+static bool
-+pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
-+{
-+   for (uint8_t i = 0; i < pipeline->va_count; i++) {
-+      if (vk_format_is_int(pipeline->va[i].vk_format))
-+         return true;
-+   }
-+   return false;
-+}
-+
-+bool
-+v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline)
-+{
-+   return pipeline_has_integer_vertex_attrib(pipeline);
-+}
-+
-+/* @pipeline can be NULL. In that case we assume the most common case. For
-+ * example, for v42 we assume in that case that all the attributes have a
-+ * float format (we only create an all-float BO once and we reuse it with all
-+ * float pipelines), otherwise we look at the actual type of each attribute
-+ * used with the specific pipeline passed in.
-+ */
-+struct v3dv_bo *
-+v3dX(create_default_attribute_values)(struct v3dv_device *device,
-+                                      struct v3dv_pipeline *pipeline)
-+{
-+   uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
-+   struct v3dv_bo *bo;
-+
-+   bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
-+
-+   if (!bo) {
-+      fprintf(stderr, "failed to allocate memory for the default "
-+              "attribute values\n");
-+      return NULL;
-+   }
-+
-+   bool ok = v3dv_bo_map(device, bo, size);
-+   if (!ok) {
-+      fprintf(stderr, "failed to map default attribute values buffer\n");
-+      return NULL;
-+   }
-+
-+   uint32_t *attrs = bo->map;
-+   uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
-+   for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
-+      attrs[i * 4 + 0] = 0;
-+      attrs[i * 4 + 1] = 0;
-+      attrs[i * 4 + 2] = 0;
-+      VkFormat attr_format =
-+         pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
-+      if (i < va_count && vk_format_is_int(attr_format)) {
-+         attrs[i * 4 + 3] = 1;
-+      } else {
-+         attrs[i * 4 + 3] = fui(1.0);
-+      }
-+   }
-+
-+   v3dv_bo_unmap(device, bo);
-+
-+   return bo;
-+}
-diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h
-index ff9ba75cf93..036ce11b455 100644
--- a/src/broadcom/vulkan/v3dvx_private.h
-+++ b/src/broadcom/vulkan/v3dvx_private.h
-@@ -306,6 +306,14 @@ void
- v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
-                                   const VkPipelineVertexInputStateCreateInfo *vi_info,
-                                   const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info);
-+
-+bool
-+v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline);
-+
-+struct v3dv_bo *
-+v3dX(create_default_attribute_values)(struct v3dv_device *device,
-+                                      struct v3dv_pipeline *pipeline);
-+
- /* Used at v3dv_queue */
- void
- v3dX(job_emit_noop)(struct v3dv_job *job);
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0084-v3dv-pipeline-default-vertex-attributes-values-are-n.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0084-v3dv-pipeline-default-vertex-attributes-values-are-n.patch
@ -1,87 +0,0 @@
-From 8464dc8869f3d2eccfecac7b4358cc0ffe05f081 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 28 Jul 2021 12:05:26 +0200
-Subject: [PATCH 084/142] v3dv/pipeline: default vertex attributes values are
- not needed for v71
-
-There are not part of the shader state record.
---
- src/broadcom/vulkan/v3dv_private.h   | 10 +++++++++-
- src/broadcom/vulkan/v3dvx_pipeline.c | 10 ++++++++++
- 2 files changed, 19 insertions(+), 1 deletion(-)
-
-diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
-index a9fab24d19e..300a1ec8ae1 100644
--- a/src/broadcom/vulkan/v3dv_private.h
-+++ b/src/broadcom/vulkan/v3dv_private.h
-@@ -581,6 +581,10 @@ struct v3dv_device {
-     * being float being float, allowing us to reuse the same BO for all
-     * pipelines matching this requirement. Pipelines that need integer
-     * attributes will create their own BO.
-+    *
-+    * Note that since v71 the default attribute values are not needed, so this
-+    * can be NULL.
-+    *
-     */
-    struct v3dv_bo *default_attribute_float;
- 
-@@ -2289,11 +2293,15 @@ struct v3dv_pipeline {
-    unsigned char sha1[20];
- 
-    /* In general we can reuse v3dv_device->default_attribute_float, so note
-    * that the following can be NULL.
-+    * that the following can be NULL. In 7.x this is not used, so it will be
-+    * NULL.
-     *
-     * FIXME: the content of this BO will be small, so it could be improved to
-     * be uploaded to a common BO. But as in most cases it will be NULL, it is
-     * not a priority.
-+    *
-+    * Note that since v71 the default attribute values are not needed, so this
-+    * can be NULL.
-     */
-    struct v3dv_bo *default_attribute_values;
- 
-diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
-index e235220cb14..4dc6d70efe1 100644
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
-+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
-@@ -665,6 +665,7 @@ v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
-    }
- }
- 
-+#if V3D_VERSION == 42
- static bool
- pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
- {
-@@ -674,11 +675,16 @@ pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
-    }
-    return false;
- }
-+#endif
- 
- bool
- v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline)
- {
-+#if V3D_VERSION == 42
-    return pipeline_has_integer_vertex_attrib(pipeline);
-+#endif
-+
-+   return false;
- }
- 
- /* @pipeline can be NULL. In that case we assume the most common case. For
-@@ -691,6 +697,10 @@ struct v3dv_bo *
- v3dX(create_default_attribute_values)(struct v3dv_device *device,
-                                       struct v3dv_pipeline *pipeline)
- {
-+#if V3D_VERSION >= 71
-+   return NULL;
-+#endif
-+
-    uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
-    struct v3dv_bo *bo;
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0085-v3dv-pipeline-handle-GL_SHADER_STATE_RECORD-changed-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0085-v3dv-pipeline-handle-GL_SHADER_STATE_RECORD-changed-.patch
@ -1,39 +0,0 @@
-From 339096598660ec34be8087007dd4d66581de1c4e Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Wed, 28 Jul 2021 13:45:52 +0200
-Subject: [PATCH 085/142] v3dv/pipeline: handle GL_SHADER_STATE_RECORD changed
- size on v71
-
-It is likely that we would need more changes, as this packet changed,
-but this is enough to get basic tests running. Any additional support
-will be handled with new commits.
---
- src/broadcom/vulkan/v3dvx_pipeline.c | 5 +----
- 1 file changed, 1 insertion(+), 4 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
-index 4dc6d70efe1..a640c1d084a 100644
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
-+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
-@@ -360,7 +360,7 @@ v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
- static void
- pack_shader_state_record(struct v3dv_pipeline *pipeline)
- {
-   assert(sizeof(pipeline->shader_state_record) ==
-+   assert(sizeof(pipeline->shader_state_record) >=
-           cl_packet_length(GL_SHADER_STATE_RECORD));
- 
-    struct v3d_fs_prog_data *prog_data_fs =
-@@ -453,9 +453,6 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
-       shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
-          prog_data_vs->separate_segments;
- #endif
-#if V3D_VERSION >= 71
-      unreachable("HW generation 71 not supported yet.");
-#endif
- 
-       shader.coordinate_shader_input_vpm_segment_size =
-          prog_data_vs_bin->separate_segments ?
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0086-v3dv-setup-render-pass-color-clears-for-any-format-b.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0086-v3dv-setup-render-pass-color-clears-for-any-format-b.patch
@ -1,89 +0,0 @@
-From 5b1342eb1e255d17619b1a7b33eaf7b31f5e50a5 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 22 Sep 2021 12:03:58 +0200
-Subject: [PATCH 086/142] v3dv: setup render pass color clears for any format
- bpp in v71
-
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c | 33 ++++++++++++++++----------
- 1 file changed, 20 insertions(+), 13 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index fe9f7e43596..1b39e230580 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -1064,7 +1064,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-       UNUSED const uint32_t *clear_color =
-          &state->attachments[attachment_idx].clear_value.color[0];
- 
-      uint32_t clear_pad = 0;
-+      UNUSED uint32_t clear_pad = 0;
-       if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
-           slice->tiling == V3D_TILING_UIF_XOR) {
-          int uif_block_height = v3d_utile_height(image->planes[plane].cpp) * 2;
-@@ -1084,10 +1084,8 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-          clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
-          clear.render_target_number = i;
-       };
-#endif
- 
-       if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) {
-#if V3D_VERSION == 42
-          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
-             clear.clear_color_mid_low_32_bits =
-                ((clear_color[1] >> 24) | (clear_color[2] << 8));
-@@ -1095,25 +1093,16 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-                ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
-             clear.render_target_number = i;
-          };
-#endif
-#if V3D_VERSION >= 71
-         unreachable("HW generation 71 not supported yet.");
-#endif
-
-       }
- 
-       if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
-#if V3D_VERSION == 42
-          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
-             clear.uif_padded_height_in_uif_blocks = clear_pad;
-             clear.clear_color_high_16_bits = clear_color[3] >> 16;
-             clear.render_target_number = i;
-          };
-#endif
-#if V3D_VERSION >= 71
-         unreachable("HW generation 71 not supported yet.");
-#endif
-       }
-+#endif
- 
- #if V3D_VERSION >= 71
-       cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
-@@ -1133,6 +1122,24 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-           */
-          base_addr += (tiling->tile_height * rt.stride) / 8;
-       }
-+
-+      if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) {
-+         cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
-+            rt.clear_color_mid_bits = /* 40 bits (32 + 8)  */
-+               ((uint64_t) clear_color[1]) |
-+               (((uint64_t) (clear_color[2] & 0xff)) << 32);
-+            rt.render_target_number = i;
-+         }
-+      }
-+
-+      if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128) {
-+         cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
-+            rt.clear_color_top_bits = /* 56 bits (24 + 32) */
-+               (((uint64_t) (clear_color[2] & 0xffffff00)) >> 8) |
-+               (((uint64_t) (clear_color[3])) << 24);
-+            rt.render_target_number = i;
-+         }
-+      }
- #endif
-    }
- 
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0087-v3dv-setup-TLB-clear-color-for-meta-operations-in-v7.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0087-v3dv-setup-TLB-clear-color-for-meta-operations-in-v7.patch
@ -1,126 +0,0 @@
-From ff5b5d4405b1d5600d7f1c4355202fd303f56700 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 22 Sep 2021 12:04:21 +0200
-Subject: [PATCH 087/142] v3dv: setup TLB clear color for meta operations in
- v71
-
---
- src/broadcom/vulkan/v3dvx_meta_common.c | 46 +++++++++++++++----------
- 1 file changed, 27 insertions(+), 19 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c
-index c6391bc6d83..09ebcfa97c1 100644
--- a/src/broadcom/vulkan/v3dvx_meta_common.c
-+++ b/src/broadcom/vulkan/v3dvx_meta_common.c
-@@ -75,8 +75,9 @@ emit_rcl_prologue(struct v3dv_job *job,
-       config.internal_depth_type = fb->internal_depth_type;
-    }
- 
-+   const uint32_t *color = NULL;
-    if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
-      uint32_t clear_pad = 0;
-+      UNUSED uint32_t clear_pad = 0;
-       if (clear_info->image) {
-          const struct v3dv_image *image = clear_info->image;
- 
-@@ -101,20 +102,16 @@ emit_rcl_prologue(struct v3dv_job *job,
-          }
-       }
- 
-+      color = &clear_info->clear_value->color[0];
-+
- #if V3D_VERSION == 42
-      const uint32_t *color = &clear_info->clear_value->color[0];
-       cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
-          clear.clear_color_low_32_bits = color[0];
-          clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
-          clear.render_target_number = 0;
-       };
-#endif
-#if V3D_VERSION >= 71
-   unreachable("Hardware generation 71 not supported yet.");
-#endif
- 
-       if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
-#if V3D_VERSION == 42
-          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
-             clear.clear_color_mid_low_32_bits =
-               ((color[1] >> 24) | (color[2] << 8));
-@@ -122,25 +119,16 @@ emit_rcl_prologue(struct v3dv_job *job,
-               ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
-             clear.render_target_number = 0;
-          };
-#endif
-#if V3D_VERSION >= 71
-   unreachable("Hardware generation 71 not supported yet.");
-#endif
-
-       }
- 
-       if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
-#if V3D_VERSION == 42
-          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
-             clear.uif_padded_height_in_uif_blocks = clear_pad;
-             clear.clear_color_high_16_bits = color[3] >> 16;
-             clear.render_target_number = 0;
-          };
-#endif
-#if V3D_VERSION >= 71
-   unreachable("Hardware generation 71 not supported yet.");
-#endif
-       }
-+#endif
-    }
- 
- #if V3D_VERSION == 42
-@@ -150,8 +138,11 @@ emit_rcl_prologue(struct v3dv_job *job,
-       rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
-    }
- #endif
-+
- #if V3D_VERSION >= 71
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
-+      if (color)
-+         rt.clear_color_low_bits = color[0];
-       rt.internal_bpp = tiling->internal_bpp;
-       rt.internal_type_and_clamping = v3dX(clamp_for_format_and_type)(fb->internal_type,
-                                                                       fb->vk_format);
-@@ -161,6 +152,24 @@ emit_rcl_prologue(struct v3dv_job *job,
-       rt.base_address = 0;
-       rt.render_target_number = 0;
-    }
-+
-+   if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
-+      cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
-+         rt.clear_color_mid_bits = /* 40 bits (32 + 8)  */
-+            ((uint64_t) color[1]) |
-+            (((uint64_t) (color[2] & 0xff)) << 32);
-+         rt.render_target_number = 0;
-+      }
-+   }
-+
-+   if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_128) {
-+      cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
-+         rt.clear_color_top_bits = /* 56 bits (24 + 32) */
-+            (((uint64_t) (color[2] & 0xffffff00)) >> 8) |
-+            (((uint64_t) (color[3])) << 24);
-+         rt.render_target_number = 0;
-+      }
-+   }
- #endif
- 
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
-@@ -229,9 +238,8 @@ emit_frame_setup(struct v3dv_job *job,
-          }
- #endif
- #if V3D_VERSION >= 71
-      unreachable("Hardware generation 71 not supported yet.");
-+         cl_emit(rcl, CLEAR_RENDER_TARGETS, clear);
- #endif
-
-       }
-       cl_emit(rcl, END_OF_TILE_MARKER, end);
-    }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0088-v3dv-fix-up-texture-shader-state-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0088-v3dv-fix-up-texture-shader-state-for-v71.patch
@ -1,49 +0,0 @@
-From 1e9d7d69849fa646b331f7661c74ee138badc4bb Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Mon, 25 Oct 2021 01:37:12 +0200
-Subject: [PATCH 088/142] v3dv: fix up texture shader state for v71
-
-There are some new fields for YCbCr with pointers for the various
-planes in multi-planar formats. These need to match the base address
-pointer in the texture state, or the hardware will assume this is a
-multi-planar texture.
---
- src/broadcom/vulkan/v3dvx_image.c | 16 ++++++++++++++++
- 1 file changed, 16 insertions(+)
-
-diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c
-index dac6ff2741f..848290c2a47 100644
--- a/src/broadcom/vulkan/v3dvx_image.c
-+++ b/src/broadcom/vulkan/v3dvx_image.c
-@@ -129,6 +129,14 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
-             v3dv_layer_offset(image, 0, image_view->vk.base_array_layer,
-                               iplane);
-          tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
-+
-+#if V3D_VERSION >= 71
-+         tex.chroma_offset_x = 1;
-+         tex.chroma_offset_y = 1;
-+         /* See comment in XML field definition for rationale of the shifts */
-+         tex.texture_base_pointer_cb = base_offset >> 6;
-+         tex.texture_base_pointer_cr = base_offset >> 6;
-+#endif
-       }
-    }
- }
-@@ -191,5 +199,13 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
-          buffer_view->offset;
- 
-       tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
-+
-+#if V3D_VERSION >= 71
-+      tex.chroma_offset_x = 1;
-+      tex.chroma_offset_y = 1;
-+      /* See comment in XML field definition for rationale of the shifts */
-+      tex.texture_base_pointer_cb = base_offset >> 6;
-+      tex.texture_base_pointer_cr = base_offset >> 6;
-+#endif
-    }
- }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0089-v3dv-handle-new-texture-state-transfer-functions-in-.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0089-v3dv-handle-new-texture-state-transfer-functions-in-.patch
@ -1,52 +0,0 @@
-From 1f150a3a92741f7654a13626bd5b27b5575f2b76 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Mon, 25 Oct 2021 01:38:31 +0200
-Subject: [PATCH 089/142] v3dv: handle new texture state transfer functions in
- v71
-
---
- src/broadcom/vulkan/v3dvx_image.c | 11 +++++++----
- 1 file changed, 7 insertions(+), 4 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c
-index 848290c2a47..437d4588c7e 100644
--- a/src/broadcom/vulkan/v3dvx_image.c
-+++ b/src/broadcom/vulkan/v3dvx_image.c
-@@ -108,15 +108,16 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
- 
-          tex.array_stride_64_byte_aligned = image->planes[iplane].cube_map_stride / 64;
- 
-+         bool is_srgb = vk_format_is_srgb(image_view->vk.format);
- #if V3D_VERSION == 42
-          tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse;
- #endif
- 
- #if V3D_VERSION == 42
-         tex.srgb = vk_format_is_srgb(image_view->vk.view_format);
-+         tex.srgb = is_srgb;
- #endif
- #if V3D_VERSION >= 71
-      unreachable("Hardware generation 71 not supported yet.");
-+         tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE;
- #endif
- 
-          /* At this point we don't have the job. That's the reason the first
-@@ -181,11 +182,13 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
- 
-       assert(buffer_view->format->plane_count == 1);
-       tex.texture_type = buffer_view->format->planes[0].tex_type;
-+
-+      bool is_srgb = vk_format_is_srgb(buffer_view->vk_format);
- #if V3D_VERSION == 42
-      tex.srgb = vk_format_is_srgb(buffer_view->vk_format);
-+      tex.srgb = is_srgb;
- #endif
- #if V3D_VERSION >= 71
-      unreachable("Hardware generation 71 not supported yet.");
-+      tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE;
- #endif
- 
-       /* At this point we don't have the job. That's the reason the first
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0090-v3dv-implement-noop-job-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0090-v3dv-implement-noop-job-for-v71.patch
@ -1,42 +0,0 @@
-From 45de9f019ee92635de9a505db58439f0f4561281 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 28 Sep 2021 08:14:11 +0200
-Subject: [PATCH 090/142] v3dv: implement noop job for v71
-
---
- src/broadcom/vulkan/v3dvx_queue.c | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_queue.c b/src/broadcom/vulkan/v3dvx_queue.c
-index 1a26d04aef7..f8cee36e3bf 100644
--- a/src/broadcom/vulkan/v3dvx_queue.c
-+++ b/src/broadcom/vulkan/v3dvx_queue.c
-@@ -46,7 +46,8 @@ v3dX(job_emit_noop)(struct v3dv_job *job)
-       config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
- #endif
- #if V3D_VERSION >= 71
-      unreachable("HW generation 71 not supported yet.");
-+      config.log2_tile_width = 3; /* Tile size 64 */
-+      config.log2_tile_height = 3; /* Tile size 64 */
- #endif
-    }
- 
-@@ -58,10 +59,13 @@ v3dX(job_emit_noop)(struct v3dv_job *job)
-    }
- #endif
- #if V3D_VERSION >= 71
-   unreachable("Hardware generation 71 not supported yet.");
-+   cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
-+      rt.internal_bpp = V3D_INTERNAL_BPP_32;
-+      rt.internal_type_and_clamping = V3D_RENDER_TARGET_TYPE_CLAMP_8;
-+      rt.stride = 1; /* Unused RT */
-+   }
- #endif
- 
-
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
-       clear.z_clear_value = 1.0f;
-       clear.stencil_clear_value = 0;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0091-v3dv-handle-render-pass-global-clear-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0091-v3dv-handle-render-pass-global-clear-for-v71.patch
@ -1,117 +0,0 @@
-From 3e607bb28056bb52242be6878281efae84026813 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 28 Sep 2021 08:23:48 +0200
-Subject: [PATCH 091/142] v3dv: handle render pass global clear for v71
-
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c | 66 ++++++++++++++++----------
- 1 file changed, 41 insertions(+), 25 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index 1b39e230580..48b2e319e51 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -362,6 +362,11 @@ cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer,
-                                              iview->vk.base_array_layer + layer,
-                                              image_plane);
- 
-+   /* The Clear Buffer bit is not supported for Z/Stencil stores in 7.x and it
-+    * is broken in earlier V3D versions.
-+    */
-+   assert((buffer != Z && buffer != STENCIL && buffer != ZSTENCIL) || !clear);
-+
-    cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
-       store.buffer_to_store = buffer;
-       store.address = v3dv_cl_address(image->planes[image_plane].mem->bo, layer_offset);
-@@ -484,6 +489,30 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
-       const VkImageAspectFlags aspects =
-          vk_format_aspects(ds_attachment->desc.format);
- 
-+#if V3D_VERSION <= 42
-+      /* GFXH-1689: The per-buffer store command's clear buffer bit is broken
-+       * for depth/stencil.
-+       *
-+       * There used to be some confusion regarding the Clear Tile Buffers
-+       * Z/S bit also being broken, but we confirmed with Broadcom that this
-+       * is not the case, it was just that some other hardware bugs (that we
-+       * need to work around, such as GFXH-1461) could cause this bit to behave
-+       * incorrectly.
-+       *
-+       * There used to be another issue where the RTs bit in the Clear Tile
-+       * Buffers packet also cleared Z/S, but Broadcom confirmed this is
-+       * fixed since V3D 4.1.
-+       *
-+       * So if we have to emit a clear of depth or stencil we don't use
-+       * the per-buffer store clear bit, even if we need to store the buffers,
-+       * instead we always have to use the Clear Tile Buffers Z/S bit.
-+       * If we have configured the job to do early Z/S clearing, then we
-+       * don't want to emit any Clear Tile Buffers command at all here.
-+       *
-+       * Note that GFXH-1689 is not reproduced in the simulator, where
-+       * using the clear buffer bit in depth/stencil stores works fine.
-+       */
-+
-       /* Only clear once on the first subpass that uses the attachment */
-       uint32_t ds_first_subpass = !state->pass->multiview_enabled ?
-          ds_attachment->first_subpass :
-@@ -503,6 +532,17 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
-                            ds_attachment->desc.stencilLoadOp,
-                            subpass->do_stencil_clear_with_draw);
- 
-+      use_global_zs_clear = !state->job->early_zs_clear &&
-+         (needs_depth_clear || needs_stencil_clear);
-+#endif
-+#if V3D_VERSION >= 71
-+      /* The store command's clear buffer bit cannot be used for Z/S stencil:
-+       * since V3D 4.5.6 Z/S buffers are automatically cleared between tiles,
-+       * so we don't want to emit redundant clears here.
-+       */
-+      use_global_zs_clear = false;
-+#endif
-+
-       /* Skip the last store if it is not required */
-       uint32_t ds_last_subpass = !pass->multiview_enabled ?
-          ds_attachment->last_subpass :
-@@ -545,30 +585,6 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
-          needs_stencil_store = subpass->resolve_stencil;
-       }
- 
-      /* GFXH-1689: The per-buffer store command's clear buffer bit is broken
-       * for depth/stencil.
-       *
-       * There used to be some confusion regarding the Clear Tile Buffers
-       * Z/S bit also being broken, but we confirmed with Broadcom that this
-       * is not the case, it was just that some other hardware bugs (that we
-       * need to work around, such as GFXH-1461) could cause this bit to behave
-       * incorrectly.
-       *
-       * There used to be another issue where the RTs bit in the Clear Tile
-       * Buffers packet also cleared Z/S, but Broadcom confirmed this is
-       * fixed since V3D 4.1.
-       *
-       * So if we have to emit a clear of depth or stencil we don't use
-       * the per-buffer store clear bit, even if we need to store the buffers,
-       * instead we always have to use the Clear Tile Buffers Z/S bit.
-       * If we have configured the job to do early Z/S clearing, then we
-       * don't want to emit any Clear Tile Buffers command at all here.
-       *
-       * Note that GFXH-1689 is not reproduced in the simulator, where
-       * using the clear buffer bit in depth/stencil stores works fine.
-       */
-      use_global_zs_clear = !state->job->early_zs_clear &&
-         (needs_depth_clear || needs_stencil_clear);
-       if (needs_depth_store || needs_stencil_store) {
-          const uint32_t zs_buffer =
-             v3dv_zs_buffer(needs_depth_store, needs_stencil_store);
-@@ -673,7 +689,7 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
-       }
- #endif
- #if V3D_VERSION >= 71
-      unreachable("Hardware generation 71 not supported yet.");
-+      cl_emit(cl, CLEAR_RENDER_TARGETS, clear);
- #endif
-    }
- }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0092-v3dv-GFX-1461-does-not-affect-V3D-7.x.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0092-v3dv-GFX-1461-does-not-affect-V3D-7.x.patch
@ -1,32 +0,0 @@
-From 3794f6f08c559c4e442b57e992d501fb7d515b9b Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 28 Sep 2021 08:31:04 +0200
-Subject: [PATCH 092/142] v3dv: GFX-1461 does not affect V3D 7.x
-
---
- src/broadcom/vulkan/v3dv_pass.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dv_pass.c b/src/broadcom/vulkan/v3dv_pass.c
-index 20f5014268d..3e82c15df88 100644
--- a/src/broadcom/vulkan/v3dv_pass.c
-+++ b/src/broadcom/vulkan/v3dv_pass.c
-@@ -236,11 +236,13 @@ v3dv_CreateRenderPass2(VkDevice _device,
- 
-          /* GFXH-1461: if depth is cleared but stencil is loaded (or vice versa),
-           * the clear might get lost. If a subpass has this then we can't emit
-          * the clear using the TLB and we have to do it as a draw call.
-+          * the clear using the TLB and we have to do it as a draw call. This
-+          * issue is fixed since V3D 4.3.18.
-           *
-           * FIXME: separate stencil.
-           */
-         if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
-+         if (device->devinfo.ver == 42 &&
-+             subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
-             struct v3dv_render_pass_attachment *att =
-                &pass->attachments[subpass->ds_attachment.attachment];
-             if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0093-v3dv-update-thread-end-restrictions-validation-for-v.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0093-v3dv-update-thread-end-restrictions-validation-for-v.patch
@ -1,69 +0,0 @@
-From 5be7f484210103e40b77fa3135042da4a8406659 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Tue, 28 Sep 2021 08:59:08 +0200
-Subject: [PATCH 093/142] v3dv: update thread end restrictions validation for
- v71
-
---
- src/broadcom/compiler/qpu_validate.c | 37 +++++++++++++++++++++++++---
- 1 file changed, 34 insertions(+), 3 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
-index 1082fb7d50a..0466ee5d0b6 100644
--- a/src/broadcom/compiler/qpu_validate.c
-+++ b/src/broadcom/compiler/qpu_validate.c
-@@ -316,17 +316,48 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
-             inst->type == V3D_QPU_INSTR_TYPE_ALU) {
-                 if ((inst->alu.add.op != V3D_QPU_A_NOP &&
-                      !inst->alu.add.magic_write)) {
-                        fail_instr(state, "RF write after THREND");
-+                        if (devinfo->ver <= 42) {
-+                                fail_instr(state, "RF write after THREND");
-+                        } else if (devinfo->ver >= 71) {
-+                                if (state->last_thrsw_ip - state->ip == 0) {
-+                                        fail_instr(state,
-+                                                   "ADD RF write at THREND");
-+                                }
-+                                if (inst->alu.add.waddr == 2 ||
-+                                    inst->alu.add.waddr == 3) {
-+                                        fail_instr(state,
-+                                                   "RF2-3 write after THREND");
-+                                }
-+                        }
-                 }
- 
-                 if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
-                      !inst->alu.mul.magic_write)) {
-                        fail_instr(state, "RF write after THREND");
-+                        if (devinfo->ver <= 42) {
-+                                fail_instr(state, "RF write after THREND");
-+                        } else if (devinfo->ver >= 71) {
-+                                if (state->last_thrsw_ip - state->ip == 0) {
-+                                        fail_instr(state,
-+                                                   "MUL RF write at THREND");
-+                                }
-+
-+                                if (inst->alu.mul.waddr == 2 ||
-+                                    inst->alu.mul.waddr == 3) {
-+                                        fail_instr(state,
-+                                                   "RF2-3 write after THREND");
-+                                }
-+                        }
-                 }
- 
-                 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
-                     !inst->sig_magic) {
-                        fail_instr(state, "RF write after THREND");
-+                        if (devinfo->ver <= 42) {
-+                                fail_instr(state, "RF write after THREND");
-+                        } else if (devinfo->ver >= 71 &&
-+                                   (inst->sig_addr == 2 ||
-+                                    inst->sig_addr == 3)) {
-+                                fail_instr(state, "RF2-3 write after THREND");
-+                        }
-                 }
- 
-                 /* GFXH-1625: No TMUWT in the last instruction */
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0094-v3dv-handle-early-Z-S-clears-for-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0094-v3dv-handle-early-Z-S-clears-for-v71.patch
@ -1,68 +0,0 @@
-From a751dff57b6d769f5b031054cc65415cc3b44c08 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 29 Sep 2021 08:22:59 +0200
-Subject: [PATCH 094/142] v3dv: handle early Z/S clears for v71
-
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c | 30 ++++++++++++++++++++------
- 1 file changed, 23 insertions(+), 7 deletions(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index 48b2e319e51..4580e2a4650 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -998,6 +998,10 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-           * Early-Z/S clearing is independent of Early Z/S testing, so it is
-           * possible to enable one but not the other so long as their
-           * respective requirements are met.
-+          *
-+          * From V3D 4.5.6, Z/S buffers are always cleared automatically
-+          * between tiles, but we still want to enable early ZS clears
-+          * when Z/S are not loaded or stored.
-           */
-          struct v3dv_render_pass_attachment *ds_attachment =
-             &pass->attachments[ds_attachment_idx];
-@@ -1005,21 +1009,33 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
-          const VkImageAspectFlags ds_aspects =
-             vk_format_aspects(ds_attachment->desc.format);
- 
-         bool needs_depth_clear =
-            check_needs_clear(state,
-                              ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
-                              ds_attachment->first_subpass,
-                              ds_attachment->desc.loadOp,
-                              subpass->do_depth_clear_with_draw);
-
-          bool needs_depth_store =
-             v3dv_cmd_buffer_check_needs_store(state,
-                                               ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
-                                               ds_attachment->last_subpass,
-                                               ds_attachment->desc.storeOp) ||
-                                               subpass->resolve_depth;
-+#if V3D_VERSION <= 42
-+         bool needs_depth_clear =
-+            check_needs_clear(state,
-+                              ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
-+                              ds_attachment->first_subpass,
-+                              ds_attachment->desc.loadOp,
-+                              subpass->do_depth_clear_with_draw);
- 
-          do_early_zs_clear = needs_depth_clear && !needs_depth_store;
-+#endif
-+#if V3D_VERSION >= 71
-+         bool needs_depth_load =
-+            v3dv_cmd_buffer_check_needs_load(state,
-+                                             ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
-+                                             ds_attachment->first_subpass,
-+                                             ds_attachment->desc.loadOp,
-+                                             ds_attachment->last_subpass,
-+                                             ds_attachment->desc.storeOp);
-+         do_early_zs_clear = !needs_depth_load && !needs_depth_store;
-+#endif
-+
-          if (do_early_zs_clear &&
-              vk_format_has_stencil(ds_attachment->desc.format)) {
-             bool needs_stencil_load =
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0095-v3dv-handle-RTs-with-no-color-targets-in-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0095-v3dv-handle-RTs-with-no-color-targets-in-v71.patch
@ -1,34 +0,0 @@
-From 2add46ebce4760bf8349606201324ee0e6b1f9da Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Wed, 29 Sep 2021 09:07:28 +0200
-Subject: [PATCH 095/142] v3dv: handle RTs with no color targets in v71
-
---
- src/broadcom/vulkan/v3dvx_cmd_buffer.c | 11 +++++++++++
- 1 file changed, 11 insertions(+)
-
-diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-index 4580e2a4650..750486a6ccf 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
-@@ -1175,6 +1175,17 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
- #endif
-    }
- 
-+#if V3D_VERSION >= 71
-+   /* If we don't have any color RTs, we still need to emit one and flag
-+    * it as not used using stride = 1.
-+    */
-+   if (subpass->color_count == 0) {
-+      cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
-+         rt.stride = 1;
-+      }
-+   }
-+#endif
-+
- #if V3D_VERSION == 42
-    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
-       cmd_buffer_render_pass_setup_render_target
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0096-v3dv-no-specific-separate_segments-flag-for-V3D-7.1.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0096-v3dv-no-specific-separate_segments-flag-for-V3D-7.1.patch
@ -1,85 +0,0 @@
-From 019abbd34d2d904d6bb33f9fa4433cb53ca7899c Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
-Date: Fri, 1 Oct 2021 15:18:38 +0200
-Subject: [PATCH 096/142] v3dv: no specific separate_segments flag for V3D 7.1
-
-On V3D 7.1 there is not a flag on the Shader State Record to specify
-if we are using shared or separate segments. This is done by setting
-the vpm input size to 0 (so we need to ensure that the output would be
-the max needed for input/output).
-
-We were already doing the latter on the prog_data_vs, so we just need
-to use those values, instead of assigning default values.
-
-As we are here, we also add some comments on the compiler part.
---
- src/broadcom/compiler/qpu_schedule.c |  4 ++++
- src/broadcom/compiler/vir.c          |  4 ++++
- src/broadcom/vulkan/v3dvx_pipeline.c | 15 +++++++++++++--
- 3 files changed, 21 insertions(+), 2 deletions(-)
-
-diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
-index 77fb6a794e6..4f767296860 100644
--- a/src/broadcom/compiler/qpu_schedule.c
-+++ b/src/broadcom/compiler/qpu_schedule.c
-@@ -297,6 +297,10 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
-         /* If the input and output segments are shared, then all VPM reads to
-          * a location need to happen before all writes.  We handle this by
-          * serializing all VPM operations for now.
-+         *
-+         * FIXME: we are assuming that the segments are shared. That is
-+         * correct right now as we are only using shared, but technically you
-+         * can choose.
-          */
-         bool separate_vpm_segment = false;
- 
-diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
-index 7612eed7130..dd0aa761c43 100644
--- a/src/broadcom/compiler/vir.c
-+++ b/src/broadcom/compiler/vir.c
-@@ -745,6 +745,10 @@ v3d_vs_set_prog_data(struct v3d_compile *c,
- 
-         /* Set us up for shared input/output segments.  This is apparently
-          * necessary for our VCM setup to avoid varying corruption.
-+         *
-+         * FIXME: initially testing on V3D 7.1 seems to work fine when using
-+         * separate segments. So we could try to reevaluate in the future, if
-+         * there is any advantage of using separate segments.
-          */
-         prog_data->separate_segments = false;
-         prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
-diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
-index a640c1d084a..a72ca3c241b 100644
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
-+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
-@@ -452,14 +452,25 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
-          prog_data_vs_bin->separate_segments;
-       shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
-          prog_data_vs->separate_segments;
-#endif
-
-       shader.coordinate_shader_input_vpm_segment_size =
-          prog_data_vs_bin->separate_segments ?
-          prog_data_vs_bin->vpm_input_size : 1;
-       shader.vertex_shader_input_vpm_segment_size =
-          prog_data_vs->separate_segments ?
-          prog_data_vs->vpm_input_size : 1;
-+#endif
-+
-+      /* On V3D 7.1 there isn't a specific flag to set if we are using
-+       * shared/separate segments or not. We just set the value of
-+       * vpm_input_size to 0, and set output to the max needed. That should be
-+       * already properly set on prog_data_vs_bin
-+       */
-+#if V3D_VERSION == 71
-+      shader.coordinate_shader_input_vpm_segment_size =
-+         prog_data_vs_bin->vpm_input_size;
-+      shader.vertex_shader_input_vpm_segment_size =
-+         prog_data_vs->vpm_input_size;
-+#endif
- 
-       shader.coordinate_shader_output_vpm_segment_size =
-          prog_data_vs_bin->vpm_output_size;
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0097-v3dv-don-t-convert-floating-point-border-colors-in-v.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0097-v3dv-don-t-convert-floating-point-border-colors-in-v.patch
@ -1,39 +0,0 @@
-From 4f6b4f91577ec04aab907d59d836d0c17731a9d0 Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Thu, 7 Oct 2021 12:43:49 +0200
-Subject: [PATCH 097/142] v3dv: don't convert floating point border colors in
- v71
-
-The TMU does this for us now.
---
- src/broadcom/vulkan/v3dvx_device.c | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/src/broadcom/vulkan/v3dvx_device.c b/src/broadcom/vulkan/v3dvx_device.c
-index e235983864c..72daefadb08 100644
--- a/src/broadcom/vulkan/v3dvx_device.c
-+++ b/src/broadcom/vulkan/v3dvx_device.c
-@@ -118,7 +118,11 @@ static union pipe_color_union encode_border_color(
-                              (1 << (desc->channel[i].size - 1)) - 1);
-    }
- 
-   /* convert from float to expected format */
-+#if V3D_VERSION <= 42
-+   /* The TMU in V3D 7.x always takes 32-bit floats and handles conversions
-+    * for us. In V3D 4.x we need to manually convert floating point color
-+    * values to the expected format.
-+    */
-    if (vk_format_is_srgb(bc_info->format) ||
-        vk_format_is_compressed(bc_info->format)) {
-       for (int i = 0; i < 4; i++)
-@@ -170,6 +174,7 @@ static union pipe_color_union encode_border_color(
-          }
-       }
-    }
-+#endif
- 
-    return border;
- }
-- 
-2.39.2
-
--- a/projects/RPi/devices/RPi5/patches/mesa/0098-v3dv-handle-Z-clipping-in-v71.patch
+++ b/projects/RPi/devices/RPi5/patches/mesa/0098-v3dv-handle-Z-clipping-in-v71.patch
@ -1,60 +0,0 @@
-From d8083cb8f104e0f035f5b812e000a500fa52d66f Mon Sep 17 00:00:00 2001
-From: Iago Toral Quiroga <itoral@igalia.com>
-Date: Fri, 15 Oct 2021 13:06:31 +0200
-Subject: [PATCH 098/142] v3dv: handle Z clipping in v71
-
-Fixes the following tests:
-
-dEQP-VK.clipping.clip_volume.*
-dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_* (except deltazero)
---
- src/broadcom/vulkan/v3dvx_pipeline.c | 33 ++++++++++++++++++++++++++++
- 1 file changed, 33 insertions(+)
-
-diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
-index a72ca3c241b..7b1133f8173 100644
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
-+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
-@@ -227,6 +227,39 @@ pack_cfg_bits(struct v3dv_pipeline *pipeline,
-          ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
- 
-       pipeline->z_updates_enable = config.z_updates_enable;
-+
-+#if V3D_VERSION >= 71
-+      /* From the Vulkan spec:
-+       *
-+       *    "depthClampEnable controls whether to clamp the fragment’s depth
-+       *     values as described in Depth Test. If the pipeline is not created
-+       *     with VkPipelineRasterizationDepthClipStateCreateInfoEXT present
-+       *     then enabling depth clamp will also disable clipping primitives to
-+       *     the z planes of the frustrum as described in Primitive Clipping.
-+       *     Otherwise depth clipping is controlled by the state set in
-+       *     VkPipelineRasterizationDepthClipStateCreateInfoEXT."
-+       *
-+       * Note: neither depth clamping nor VK_EXT_depth_clip_enable are actually
-+       * supported in the driver yet, so in practice we are always enabling Z
-+       * clipping for now.
-+       */
-+      bool z_clip_enable = false;
-+      const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
-+         ds_info ? vk_find_struct_const(ds_info->pNext,
-+                                        PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT) :
-+                   NULL;
-+      if (clip_info)
-+         z_clip_enable = clip_info->depthClipEnable;
-+      else if (!(rs_info && rs_info->depthClampEnable))
-+         z_clip_enable = true;
-+
-+      if (z_clip_enable) {
-+         config.z_clipping_mode = pipeline->negative_one_to_one ?
-+	    V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_ZERO_TO_ONE;
-+      } else {
-+         config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE;
-+      }
-+#endif
-    };
- }
- 
-- 
-2.39.2
-
--- a/Show More
+++ b/Show More