diff --git a/packages/graphics/mesa/package.mk b/packages/graphics/mesa/package.mk index 6dda0840a4..93653a9239 100644 --- a/packages/graphics/mesa/package.mk +++ b/packages/graphics/mesa/package.mk @@ -3,8 +3,8 @@ # Copyright (C) 2018-present Team LibreELEC (https://libreelec.tv) PKG_NAME="mesa" -PKG_VERSION="23.2.1" -PKG_SHA256="64de0616fc2d801f929ab1ac2a4f16b3e2783c4309a724c8a259b20df8bbc1cc" +PKG_VERSION="23.3.0" +PKG_SHA256="50f729dd60ed6335b989095baad81ef5edf7cfdd4b4b48b9b955917cb07d69c5" PKG_LICENSE="OSS" PKG_SITE="http://www.mesa3d.org/" PKG_URL="https://mesa.freedesktop.org/archive/mesa-${PKG_VERSION}.tar.xz" @@ -13,6 +13,10 @@ PKG_LONGDESC="Mesa is a 3-D graphics library with an API." get_graphicdrivers +if [ "${DEVICE}" = "Dragonboard" ]; then + PKG_DEPENDS_TARGET+=" libarchive libxml2 lua54" +fi + PKG_MESON_OPTS_TARGET="-Dgallium-drivers=${GALLIUM_DRIVERS// /,} \ -Dgallium-extra-hud=false \ -Dgallium-omx=disabled \ diff --git a/projects/RPi/devices/RPi5/patches/mesa/0001-broadcom-cle-clif-common-simulator-add-7.1-version-o.patch b/projects/RPi/devices/RPi5/patches/mesa/0001-broadcom-cle-clif-common-simulator-add-7.1-version-o.patch deleted file mode 100644 index ee9e032293..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0001-broadcom-cle-clif-common-simulator-add-7.1-version-o.patch +++ /dev/null @@ -1,332 +0,0 @@ -From f62aa2640f92796ff5216da0a5d3c8f46a2855b4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Mon, 26 Apr 2021 00:02:21 +0200 -Subject: [PATCH 001/142] broadcom(cle,clif,common,simulator): add 7.1 version - on the list of versions to build - -This adds 7.1 to the list of available V3D_VERSION, and first changes -on the simulator needed to get it working. - -Note that we needed to touch all those 4 codebases because it is -needed if we want to use V3D_DEBUG=clif with the simulator, that it is -the easier way to see which packets a vulkan program is using. - -About the simulator, this commit only handle the rename of some -registers. Any additional changes needed to get a proper support for -v71 will be handled them on following commits. ---- - src/broadcom/cle/meson.build | 3 +- - src/broadcom/cle/v3dx_pack.h | 2 + - src/broadcom/clif/clif_private.h | 2 + - src/broadcom/common/v3d_device_info.c | 1 + - src/broadcom/common/v3d_macros.h | 3 + - src/broadcom/meson.build | 2 +- - src/broadcom/simulator/v3d_simulator.c | 81 +++++++++++++++++++------ - src/broadcom/simulator/v3d_simulator.h | 5 ++ - src/broadcom/simulator/v3dx_simulator.c | 31 ++++++++-- - 9 files changed, 106 insertions(+), 24 deletions(-) - -diff --git a/src/broadcom/cle/meson.build b/src/broadcom/cle/meson.build -index 31a0d5bfa94..8ac32b313e4 100644 ---- a/src/broadcom/cle/meson.build -+++ b/src/broadcom/cle/meson.build -@@ -23,7 +23,8 @@ v3d_versions = [ - [21, 21], - [33, 33], - [41, 33], -- [42, 33] -+ [42, 33], -+ [71, 33] - ] - - v3d_xml_files = [] -diff --git a/src/broadcom/cle/v3dx_pack.h b/src/broadcom/cle/v3dx_pack.h -index 5762e5aaa70..e5a1eb26698 100644 ---- a/src/broadcom/cle/v3dx_pack.h -+++ b/src/broadcom/cle/v3dx_pack.h -@@ -37,6 +37,8 @@ - # include "cle/v3d_packet_v41_pack.h" - #elif (V3D_VERSION == 42) - # include "cle/v3d_packet_v42_pack.h" -+#elif (V3D_VERSION == 71) -+# include "cle/v3d_packet_v71_pack.h" - #else - # error "Need to add a pack header include for this v3d version" - #endif -diff --git a/src/broadcom/clif/clif_private.h b/src/broadcom/clif/clif_private.h -index 6ace62b0310..cda407a00bf 100644 ---- a/src/broadcom/clif/clif_private.h -+++ b/src/broadcom/clif/clif_private.h -@@ -101,6 +101,8 @@ bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset, - const uint8_t *cl, uint32_t *size, bool reloc_mode); - bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset, - const uint8_t *cl, uint32_t *size, bool reloc_mode); -+bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset, -+ const uint8_t *cl, uint32_t *size, bool reloc_mode); - - static inline void - out(struct clif_dump *clif, const char *fmt, ...) -diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c -index 272190eb2e5..7e0862f1f02 100644 ---- a/src/broadcom/common/v3d_device_info.c -+++ b/src/broadcom/common/v3d_device_info.c -@@ -66,6 +66,7 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i - case 33: - case 41: - case 42: -+ case 71: - break; - default: - fprintf(stderr, -diff --git a/src/broadcom/common/v3d_macros.h b/src/broadcom/common/v3d_macros.h -index fe89398208a..b4291fb5350 100644 ---- a/src/broadcom/common/v3d_macros.h -+++ b/src/broadcom/common/v3d_macros.h -@@ -41,6 +41,9 @@ - #elif (V3D_VERSION == 42) - # define V3DX(x) V3D42_##x - # define v3dX(x) v3d42_##x -+#elif (V3D_VERSION == 71) -+# define V3DX(x) V3D71_##x -+# define v3dX(x) v3d71_##x - #else - # error "Need to add prefixing macros for this v3d version" - #endif -diff --git a/src/broadcom/meson.build b/src/broadcom/meson.build -index 2c10e46b188..73cb7aa0575 100644 ---- a/src/broadcom/meson.build -+++ b/src/broadcom/meson.build -@@ -22,7 +22,7 @@ inc_broadcom = include_directories('.', 'cle') - - subdir('cle') - --v3d_versions = ['33', '41', '42'] -+v3d_versions = ['33', '41', '42', '71'] - v3d_libs = [] - - if with_gallium_v3d or with_broadcom_vk -diff --git a/src/broadcom/simulator/v3d_simulator.c b/src/broadcom/simulator/v3d_simulator.c -index eea5d3f050e..5cceb1a82cc 100644 ---- a/src/broadcom/simulator/v3d_simulator.c -+++ b/src/broadcom/simulator/v3d_simulator.c -@@ -490,10 +490,20 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit) - - v3d_simulator_perfmon_switch(fd, submit->perfmon_id); - -- if (sim_state.ver >= 41) -- v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); -- else -- v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); -+ switch(sim_state.ver) { -+ case 33: -+ v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); -+ break; -+ case 41: -+ case 42: -+ v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); -+ break; -+ case 71: -+ v3d71_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); -+ break; -+ default: -+ unreachable("Unsupported V3D version\n"); -+ } - - util_dynarray_foreach(&sim_state.bin_oom, struct v3d_simulator_bo *, - sim_bo) { -@@ -635,10 +645,17 @@ v3d_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args) - static int - v3d_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args) - { -- if (sim_state.ver >= 41) -- return v3d41_simulator_get_param_ioctl(sim_state.v3d, args); -- else -+ switch(sim_state.ver) { -+ case 33: - return v3d33_simulator_get_param_ioctl(sim_state.v3d, args); -+ case 41: -+ case 42: -+ return v3d41_simulator_get_param_ioctl(sim_state.v3d, args); -+ case 71: -+ return v3d71_simulator_get_param_ioctl(sim_state.v3d, args); -+ default: -+ unreachable("Unsupported V3D version\n"); -+ } - } - - static int -@@ -652,10 +669,20 @@ v3d_simulator_submit_tfu_ioctl(int fd, struct drm_v3d_submit_tfu *args) - v3d_simulator_copy_in_handle(file, args->bo_handles[2]); - v3d_simulator_copy_in_handle(file, args->bo_handles[3]); - -- if (sim_state.ver >= 41) -- ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args); -- else -+ switch(sim_state.ver) { -+ case 33: - ret = v3d33_simulator_submit_tfu_ioctl(sim_state.v3d, args); -+ break; -+ case 41: -+ case 42: -+ ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args); -+ break; -+ case 71: -+ ret = v3d71_simulator_submit_tfu_ioctl(sim_state.v3d, args); -+ break; -+ default: -+ unreachable("Unsupported V3D version\n"); -+ } - - v3d_simulator_copy_out_handle(file, args->bo_handles[0]); - -@@ -682,11 +709,19 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args) - - v3d_simulator_perfmon_switch(fd, args->perfmon_id); - -- if (sim_state.ver >= 41) -- ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args, -- file->gmp->ofs); -- else -- ret = -1; -+ switch(sim_state.ver) { -+ case 41: -+ case 42: -+ ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args, -+ file->gmp->ofs); -+ break; -+ case 71: -+ ret = v3d71_simulator_submit_csd_ioctl(sim_state.v3d, args, -+ file->gmp->ofs); -+ break; -+ default: -+ ret = -1; -+ } - - for (int i = 0; i < args->bo_handle_count; i++) - v3d_simulator_copy_out_handle(file, bo_handles[i]); -@@ -880,10 +915,20 @@ v3d_simulator_init_global() - - util_dynarray_init(&sim_state.bin_oom, NULL); - -- if (sim_state.ver >= 41) -- v3d41_simulator_init_regs(sim_state.v3d); -- else -+ switch(sim_state.ver) { -+ case 33: - v3d33_simulator_init_regs(sim_state.v3d); -+ break; -+ case 41: -+ case 42: -+ v3d41_simulator_init_regs(sim_state.v3d); -+ break; -+ case 71: -+ v3d71_simulator_init_regs(sim_state.v3d); -+ break; -+ default: -+ unreachable("Not supported V3D version\n"); -+ } - } - - struct v3d_simulator_file * -diff --git a/src/broadcom/simulator/v3d_simulator.h b/src/broadcom/simulator/v3d_simulator.h -index ddb079c1455..1472c313a03 100644 ---- a/src/broadcom/simulator/v3d_simulator.h -+++ b/src/broadcom/simulator/v3d_simulator.h -@@ -52,6 +52,11 @@ uint32_t v3d_simulator_get_mem_free(void); - # define v3dX(x) v3d41_##x - # include "v3dx_simulator.h" - # undef v3dX -+ -+# define v3dX(x) v3d71_##x -+# include "v3dx_simulator.h" -+# undef v3dX -+ - #endif - - #endif -diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c -index c9322f0397b..723796b16c9 100644 ---- a/src/broadcom/simulator/v3dx_simulator.c -+++ b/src/broadcom/simulator/v3dx_simulator.c -@@ -46,11 +46,15 @@ - - #define HW_REGISTER_RO(x) (x) - #define HW_REGISTER_RW(x) (x) --#if V3D_VERSION >= 41 -+#if V3D_VERSION == 71 -+#include "libs/core/v3d/registers/7.1.5.1/v3d.h" -+#else -+#if V3D_VERSION == 41 || V3D_VERSION == 42 - #include "libs/core/v3d/registers/4.1.35.0/v3d.h" - #else - #include "libs/core/v3d/registers/3.3.0.0/v3d.h" - #endif -+#endif - - #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val) - #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg) -@@ -310,16 +314,17 @@ v3d_isr_core(struct v3d_hw *v3d, - return; - } - -+#if V3D_VERSION <= 42 - if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) { - fprintf(stderr, "GMP violation at 0x%08x\n", - V3D_READ(V3D_GMP_VIO_ADDR)); -- abort(); - } else { - fprintf(stderr, - "Unexpected ISR with core status 0x%08x\n", - core_status); - } - abort(); -+#endif - } - - static void -@@ -396,6 +401,18 @@ v3d_isr_hub(struct v3d_hw *v3d) - } - - handle_mmu_interruptions(v3d, hub_status); -+ -+#if V3D_VERSION == 71 -+ if (hub_status & V3D_HUB_CTL_INT_STS_INT_GMPV_SET) { -+ fprintf(stderr, "GMP violation at 0x%08x\n", -+ V3D_READ(V3D_GMP_VIO_ADDR)); -+ } else { -+ fprintf(stderr, -+ "Unexpected ISR with status 0x%08x\n", -+ hub_status); -+ } -+ abort(); -+#endif - } - - static void -@@ -436,8 +453,11 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d) - * for tracing. Perhaps we should evaluate to do the same here and add - * some debug options. - */ -- uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET | -- V3D_CTL_0_INT_STS_INT_OUTOMEM_SET); -+ uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_OUTOMEM_SET; -+#if V3D_VERSION <= 42 -+ core_interrupts |= V3D_CTL_0_INT_STS_INT_GMPV_SET; -+#endif -+ - V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts); - V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts); - -@@ -447,6 +467,9 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d) - V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */ - V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */ - -+#if V3D_VERSION == 71 -+ hub_interrupts |= V3D_HUB_CTL_INT_STS_INT_GMPV_SET; -+#endif - V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts); - V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts); - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0142-gallium-Add-kmsro-drivers-for-RP1-DSI-DPI-and-VEC-de.patch b/projects/RPi/devices/RPi5/patches/mesa/0001-gallium-Add-kmsro-drivers-for-RP1-DSI-DPI-and-VEC-de.patch similarity index 81% rename from projects/RPi/devices/RPi5/patches/mesa/0142-gallium-Add-kmsro-drivers-for-RP1-DSI-DPI-and-VEC-de.patch rename to projects/RPi/devices/RPi5/patches/mesa/0001-gallium-Add-kmsro-drivers-for-RP1-DSI-DPI-and-VEC-de.patch index 4055fc4658..e7df5c3908 100644 --- a/projects/RPi/devices/RPi5/patches/mesa/0142-gallium-Add-kmsro-drivers-for-RP1-DSI-DPI-and-VEC-de.patch +++ b/projects/RPi/devices/RPi5/patches/mesa/0001-gallium-Add-kmsro-drivers-for-RP1-DSI-DPI-and-VEC-de.patch @@ -1,7 +1,7 @@ -From 3322c102282cf726ae575b122358060abd5b24db Mon Sep 17 00:00:00 2001 +From 54cc206be2d48916862d7e264e886f58b27dd653 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Thu, 5 Oct 2023 19:32:10 +0100 -Subject: [PATCH 142/142] gallium: Add kmsro drivers for RP1 DSI, DPI, and VEC +Subject: [PATCH 1/3] gallium: Add kmsro drivers for RP1 DSI, DPI, and VEC devices Signed-off-by: Dave Stevenson @@ -11,7 +11,7 @@ Signed-off-by: Dave Stevenson 2 files changed, 6 insertions(+) diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build -index fbec1da957b..59daf3b6fb6 100644 +index 66619bba0db..443923772e8 100644 --- a/src/gallium/targets/dri/meson.build +++ b/src/gallium/targets/dri/meson.build @@ -68,6 +68,9 @@ libgallium_dri = shared_library( @@ -22,10 +22,10 @@ index fbec1da957b..59daf3b6fb6 100644 + 'drm-rp1-dsi_dri.so', + 'drm-rp1-vec_dri.so', 'exynos_dri.so', + 'hdlcd_dri.so', 'hx8357d_dri.so', - 'ili9225_dri.so', diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c -index d506869cbb4..ecb25edd03b 100644 +index 9d3069eb004..79f60a7224a 100644 --- a/src/gallium/targets/dri/target.c +++ b/src/gallium/targets/dri/target.c @@ -98,6 +98,9 @@ DEFINE_LOADER_DRM_ENTRYPOINT(tegra); @@ -36,8 +36,8 @@ index d506869cbb4..ecb25edd03b 100644 +DEFINE_LOADER_DRM_ENTRYPOINT(drm_rp1_dsi) +DEFINE_LOADER_DRM_ENTRYPOINT(drm_rp1_vec) DEFINE_LOADER_DRM_ENTRYPOINT(exynos) + DEFINE_LOADER_DRM_ENTRYPOINT(hdlcd) DEFINE_LOADER_DRM_ENTRYPOINT(hx8357d) - DEFINE_LOADER_DRM_ENTRYPOINT(ili9225) -- 2.39.2 diff --git a/projects/RPi/devices/RPi5/patches/mesa/0002-broadcom-simulator-reset-CFG7-for-compute-dispatch-i.patch b/projects/RPi/devices/RPi5/patches/mesa/0002-broadcom-simulator-reset-CFG7-for-compute-dispatch-i.patch deleted file mode 100644 index 5224359446..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0002-broadcom-simulator-reset-CFG7-for-compute-dispatch-i.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 9e85edd1b347b0e779b393f463f42044a720bcff Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 28 Sep 2021 13:16:49 +0200 -Subject: [PATCH 002/142] broadcom/simulator: reset CFG7 for compute dispatch - in v71 - -This register is new in 7.x, it doesn't seem that we need to -do anything specific for now, but let's make sure it is reset -every time. ---- - src/broadcom/simulator/v3dx_simulator.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c -index 723796b16c9..f23b0538de3 100644 ---- a/src/broadcom/simulator/v3dx_simulator.c -+++ b/src/broadcom/simulator/v3dx_simulator.c -@@ -227,6 +227,9 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d, - V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]); - V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]); - V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]); -+#if V3D_VERSION >= 71 -+ V3D_WRITE(V3D_CSD_0_QUEUED_CFG7, 0); -+#endif - /* CFG0 kicks off the job */ - V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]); - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0066-nir-add-new-opcodes-to-map-new-v71-packing-conversio.patch b/projects/RPi/devices/RPi5/patches/mesa/0002-nir-add-new-opcodes-to-map-new-v71-packing-conversio.patch similarity index 68% rename from projects/RPi/devices/RPi5/patches/mesa/0066-nir-add-new-opcodes-to-map-new-v71-packing-conversio.patch rename to projects/RPi/devices/RPi5/patches/mesa/0002-nir-add-new-opcodes-to-map-new-v71-packing-conversio.patch index ee65de9a53..bd6e399b9f 100644 --- a/projects/RPi/devices/RPi5/patches/mesa/0066-nir-add-new-opcodes-to-map-new-v71-packing-conversio.patch +++ b/projects/RPi/devices/RPi5/patches/mesa/0002-nir-add-new-opcodes-to-map-new-v71-packing-conversio.patch @@ -1,8 +1,8 @@ -From 4f33de7771621e15aae3e3c60c09fd5a2f29bdac Mon Sep 17 00:00:00 2001 +From 80050d6960a688d061eac9798c6f5f1b0eb3e960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= Date: Tue, 30 Nov 2021 02:39:20 +0100 -Subject: [PATCH 066/142] nir: add new opcodes to map new v71 - packing/conversion instructions +Subject: [PATCH 2/3] nir: add new opcodes to map new v71 packing/conversion + instructions Since v71, broadcom hw include specific packing/conversion instructions, so this commit adds opcodes to be able to make use of @@ -28,17 +28,14 @@ integer. Interestingly broadcom also defines a similar one that packs the higher halfword. Not used yet. -FIXME: vftounorm10lo/hi constant expression implementation is somewhat -convoluted. It is likely that it could be implemented in a more easy -way. But it works (passing the tests added with CTS issue #3372, -created with this change in mind). +Reviewed-by: Iago Toral Quiroga --- - src/compiler/nir/nir_constant_expressions.py | 106 +++++++++++++++++++ - src/compiler/nir/nir_opcodes.py | 44 ++++++++ - 2 files changed, 150 insertions(+) + src/compiler/nir/nir_constant_expressions.py | 94 ++++++++++++++++++++ + src/compiler/nir/nir_opcodes.py | 52 +++++++++++ + 2 files changed, 146 insertions(+) diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py -index e6383b67737..46395d79a89 100644 +index e6383b67737..0d0797526a9 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -62,6 +62,8 @@ template = """\ @@ -50,7 +47,7 @@ index e6383b67737..46395d79a89 100644 #include "nir_constant_expressions.h" /** -@@ -277,6 +279,110 @@ unpack_half_1x16(uint16_t u) +@@ -277,6 +279,98 @@ unpack_half_1x16(uint16_t u) return _mesa_half_to_float(u); } @@ -61,24 +58,22 @@ index e6383b67737..46395d79a89 100644 +static uint32_t v11fpack_v3d(const uint32_t src0, + const uint32_t src1) +{ -+ float rgb[3]; -+ -+ rgb[0] = unpack_half_1x16((src0 & 0xffff)); -+ rgb[1] = unpack_half_1x16((src0 >> 16)); -+ rgb[2] = unpack_half_1x16((src1 & 0xffff)); ++ float rgb[3] = { ++ unpack_half_1x16((src0 & 0xffff)), ++ unpack_half_1x16((src0 >> 16)), ++ unpack_half_1x16((src1 & 0xffff)), ++ }; + + return float3_to_r11g11b10f(rgb); +} + +/** + * The three methods below are basically wrappers over pack_s/unorm_1x8/1x16, -+ * as it receives a uint16_t val instead of a float ++ * as they receives a uint16_t val instead of a float + */ -+static uint8_t _mesa_half_to_snorm8(uint16_t val) ++static inline uint8_t _mesa_half_to_snorm8(uint16_t val) +{ -+ float x = _mesa_half_to_float(val); -+ -+ return pack_snorm_1x8(x); ++ return pack_snorm_1x8(_mesa_half_to_float(val)); +} + +static uint16_t _mesa_float_to_snorm16(uint32_t val) @@ -95,51 +90,42 @@ index e6383b67737..46395d79a89 100644 + return pack_unorm_1x16(aux.f); +} + -+/* FIXME: the implementation below of vftounorm10hi/lo is somewhat too -+ * verbose. It is likely that there would be a simpler way to implement -+ * it. -+ */ -+static uint32_t float_pack16_v3d(uint32_t f32) ++static inline uint32_t float_pack16_v3d(uint32_t f32) +{ -+ float f = uif(f32); -+ return _mesa_float_to_half(f); ++ return _mesa_float_to_half(uif(f32)); +} + -+static uint32_t float_unpack16_v3d(uint32_t f16) ++static inline uint32_t float_unpack16_v3d(uint32_t f16) +{ -+ float f = _mesa_half_to_float(f16); -+ return fui(f); ++ return fui(_mesa_half_to_float(f16)); +} + -+static uint32_t vfpack_v3d(uint32_t a, uint32_t b) ++static inline uint32_t vfpack_v3d(uint32_t a, uint32_t b) +{ + return float_pack16_v3d(b) << 16 | float_pack16_v3d(a); +} + -+static uint32_t vfsat_v3d(uint32_t a) ++static inline uint32_t vfsat_v3d(uint32_t a) +{ -+ return vfpack_v3d( -+ fui(SATURATE(_mesa_half_to_float(a & 0xffff))), -+ fui(SATURATE(_mesa_half_to_float(a >> 16)))); ++ const uint32_t low = fui(SATURATE(_mesa_half_to_float(a & 0xffff))); ++ const uint32_t high = fui(SATURATE(_mesa_half_to_float(a >> 16))); ++ ++ return vfpack_v3d(low, high); +} + -+static uint32_t fmul_v3d(uint32_t a, uint32_t b) ++static inline uint32_t fmul_v3d(uint32_t a, uint32_t b) +{ -+ float f = uif(a); -+ float g = uif(b); -+ -+ float x = f * g; -+ -+ return fui(x); ++ return fui(uif(a) * uif(b)); +} + -+#define L(x) float_unpack16_v3d((x) & 0xffff) -+#define H(x) float_unpack16_v3d((x) >> 16) -+#define V(f,a,b) vfpack_v3d(f(L(a), L(b)), f(H(a), H(b))) -+ +static uint32_t vfmul_v3d(uint32_t a, uint32_t b) +{ -+ return V(fmul_v3d, a, b); ++ const uint32_t low = fmul_v3d(float_unpack16_v3d(a & 0xffff), ++ float_unpack16_v3d(b & 0xffff)); ++ const uint32_t high = fmul_v3d(float_unpack16_v3d(a >> 16), ++ float_unpack16_v3d(b >> 16)); ++ ++ return vfpack_v3d(low, high); +} + +/* Convert 2x16-bit floating point to 2x10-bit unorm */ @@ -156,34 +142,41 @@ index e6383b67737..46395d79a89 100644 +{ + return vfmul_v3d(vfsat_v3d(src0), 0x000303ff); +} -+ + /* Some typed vector structures to make things like src0.y work */ typedef int8_t int1_t; typedef uint8_t uint1_t; diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py -index e4d87aa6126..63aa7cfa315 100644 +index 0f81328f441..b70d9567cd6 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py -@@ -1393,6 +1393,50 @@ for (int i = 0; i < 32; i += 8) { +@@ -1413,6 +1413,58 @@ for (int i = 0; i < 32; i += 8) { } """) +# v3d-specific opcodes + -+# v3d-specific (v71) instruction that packs parts of 2 2x16 floating point into -+# r11g11b10 bits, rounding to nearest even ++# v3d-specific (v71) instruction that packs bits of 2 2x16 floating point into ++# r11g11b10 bits, rounding to nearest even, so ++# dst[10:0] = float16_to_float11 (src0[15:0]) ++# dst[21:11] = float16_to_float11 (src0[31:16]) ++# dst[31:22] = float16_to_float10 (src1[15:0]) +binop_convert("v11fpack_v3d", tuint32, tuint32, "", + "v11fpack_v3d(src0, src1)") + +# v3d-specific (v71) instruction that packs 2x32 bit to 2x16 bit integer. The +# difference with pack_32_2x16_split is that the sources are 32bit too. So it -+# receives 2 32-bit integer, and pack the lower halfword as 2x16 on a 32-bit -+# pack. ++# receives 2 32-bit integer, and packs the lower halfword as 2x16 on a 32-bit ++# integer. +binop_horiz("vpack_v3d", 1, tuint32, 1, tuint32, 1, tuint32, + "(src0.x & 0xffff) | (src1.x << 16)") + -+# v3d-specific (v71) instruction that packs parts of 2 2x16 integers into r10g10b10a2 ++# v3d-specific (v71) instruction that packs bits of 2 2x16 integers into ++# r10g10b10a2: ++# dst[9:0] = src0[9:0] ++# dst[19:10] = src0[25:16] ++# dst[29:20] = src1[9:0] ++# dst[31:30] = src1[17:16] +binop_convert("v10pack_v3d", tuint32, tuint32, "", + "(src0 & 0x3ff) | ((src0 >> 16) & 0x3ff) << 10 | (src1 & 0x3ff) << 20 | ((src1 >> 16) & 0x3ff) << 30") + diff --git a/projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-cle-update-the-packet-definitions-for-new-g.patch b/projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-cle-update-the-packet-definitions-for-new-g.patch deleted file mode 100644 index 80190c0aef..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-cle-update-the-packet-definitions-for-new-g.patch +++ /dev/null @@ -1,712 +0,0 @@ -From 6f744bc4bec98f9769486d427e8e2d4e314ae056 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 29 Jun 2021 12:03:24 +0200 -Subject: [PATCH 003/142] broadcom/cle: update the packet definitions for new - generation v71 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Using as reference the spec for 7.1.5. This include totally new -packets, and redefine some that already existed on v42. - -Full list: - * Add Depth Bounds Test Limits - * Redefine Tile Binning Mode Cfg - * Redefine Cfg Bits. There are some changes on the fields: - * Line Rasterization is now 1 bit size - * Depth Bounds Enable (that takes one of the bits of Line Rasterization) - * Early-Z/Early-Z updates enable bits (16-17) figure now as reserved. - * New Z-Clipping mode field - * Redefine Tile Rendering Mode Cfg (Common). Changes with respect to v42: - * New log2 tile height/width fields starting at bit 52/55 - * Due those two news, end pad is smaller - * sub-id has now a size of 3. Bit 4 is reserved. - * Number of render targets: this field max value is now 7 (not - reflected on the xml). - * Maximum BPP is removed on v71 (now bits 40-41 are reserved) - * Depth Buffer disable: on bit 44 - * Update Store Tile Buffer General - * Adding Cfg Render Target Part1/2/3 packets: they replace v4X "Tile - Rendering Mode Cfg (Color)" (real name "Rendering Configuration - (Render Targets Config)"), "Tile Rendering Mode Cfg (Clear Colors - Part1)", "Tile Rendering Mode Cfg (Clear Colors Part2)", and "Tile - Rendering Mode Cfg (Clear Colors Part3)". On those old versions, - the first packet is used to configure 4 render targets. Now that 8 - are supported, invididual per-render-target are used. - * Update ZS clear values packet. - * Add new v71 output formats - * Define Clear Render Targets (Replaces Clear Tile Buffers from v42) - * Redefine GL Shader State Record. Changes copared with v42: - * Fields removed: - * "Coordinate shader has separate input and output VPM blocks" - (reserved bit now) - * "Vertex shader has separate input and output VPM blocks" - (reserved bit now) - * "Address of table of default attribute Values." (we needed to - change the start position for all the following fields) - * New field: - * "Never defer FEP depth writes to fragment shader auto Z writes - on scoreboard conflict" - * Redefine clipper xy scaling: Now it uses 1/64ths of pixels, instead - of 1/256ths - * Update texture shader state. - * Notice we don't use an address type for these fields in the XML - description. This is because the addresses are 64-bit aligned - (even though the PRM doesn't say it) which means the 6 LSB bits - are implicitly 0, but the fields are encoded before the 6th bit - of their starting byte, so we can't use the usual trick we do - with address types where the first 6 bits in the byte are - implicitly overwritten by other fields and we have to encode this - manually as a uint field. This would mean that if we had an - actual BO we would also need to add it manually to the job's - list, but since we don't have one, we don't have to do anything - about it. - * Add new RB_Swap field for texture shader state - * Document Cb/Cr addresses as uint fields in texture shader state - * Fixup Blend Config description: we now support 8 RTs. - * TMU config parameter 2 has new fields - * Add new clipper Z without guardband packet in v71 - * Add enums for the Z clip modes accepted in v71 - * Fix texture state array stride packing for V3D 7.1.5 - -Signed-off-by: Iago Toral Quiroga -Signed-off-by: Alejandro Piñeiro - -broadcom/cle: rb_swap ---- - src/broadcom/cle/v3d_packet_v33.xml | 386 ++++++++++++++++++++++++++-- - 1 file changed, 368 insertions(+), 18 deletions(-) - -diff --git a/src/broadcom/cle/v3d_packet_v33.xml b/src/broadcom/cle/v3d_packet_v33.xml -index a0242b5f1c2..624353ca2bf 100644 ---- a/src/broadcom/cle/v3d_packet_v33.xml -+++ b/src/broadcom/cle/v3d_packet_v33.xml -@@ -1,4 +1,4 @@ -- -+ - - - -@@ -167,13 +167,36 @@ - - - -- -+ - - - - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - -+ - - - -@@ -1099,7 +1263,7 @@ - - - -- -+ - - - -@@ -1108,6 +1272,15 @@ - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -@@ -1117,7 +1290,7 @@ - - - -- -+ - - - -@@ -1126,6 +1299,19 @@ - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -@@ -1135,7 +1321,7 @@ - - - -- -+ - - - -@@ -1144,6 +1330,13 @@ - - - -+ -+ -+ -+ -+ -+ -+ - - - -@@ -1155,7 +1348,7 @@ - - - -- -+ - - - -@@ -1166,6 +1359,13 @@ - - - -+ -+ -+ -+ -+ -+ -+ - - - -@@ -1240,7 +1440,7 @@ - - - -- -+ - - - -@@ -1299,6 +1499,63 @@ - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -@@ -1543,7 +1800,7 @@ - - - -- -+ - - - -@@ -1558,6 +1815,23 @@ - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -@@ -1611,7 +1885,7 @@ - - - -- -+ - - - -@@ -1652,6 +1926,82 @@ - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0067-broadcom-compiler-update-image-store-lowering-to-use.patch b/projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-compiler-update-image-store-lowering-to-use.patch similarity index 79% rename from projects/RPi/devices/RPi5/patches/mesa/0067-broadcom-compiler-update-image-store-lowering-to-use.patch rename to projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-compiler-update-image-store-lowering-to-use.patch index 911dd462a8..1133fcc53e 100644 --- a/projects/RPi/devices/RPi5/patches/mesa/0067-broadcom-compiler-update-image-store-lowering-to-use.patch +++ b/projects/RPi/devices/RPi5/patches/mesa/0003-broadcom-compiler-update-image-store-lowering-to-use.patch @@ -1,8 +1,8 @@ -From 381c29e3ff5237c89380cc53eb2271d1985f4e34 Mon Sep 17 00:00:00 2001 +From 7e151fd3a213848c8022c9f48e10f2aec76c3e4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= Date: Thu, 2 Dec 2021 13:26:43 +0100 -Subject: [PATCH 067/142] broadcom/compiler: update image store lowering to use - v71 new packing/conversion instructions +Subject: [PATCH 3/3] broadcom/compiler: update image store lowering to use v71 + new packing/conversion instructions Vulkan shaderdb stats with pattern dEQP-VK.image.*.with_format.*.*: total instructions in shared programs: 35993 -> 33245 (-7.63%) @@ -31,18 +31,20 @@ Vulkan shaderdb stats with pattern dEQP-VK.image.*.with_format.*.*: FWIW, that one HURT on the instructions count is for just one instruction. + +Reviewed-by: Iago Toral Quiroga --- - src/broadcom/compiler/nir_to_vir.c | 39 +++ + src/broadcom/compiler/nir_to_vir.c | 40 +++ src/broadcom/compiler/v3d_compiler.h | 16 +- - .../compiler/v3d_nir_lower_image_load_store.c | 246 +++++++++++++++++- + .../compiler/v3d_nir_lower_image_load_store.c | 239 +++++++++++++++++- src/broadcom/compiler/vir.c | 2 +- - 4 files changed, 294 insertions(+), 9 deletions(-) + 4 files changed, 288 insertions(+), 9 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c -index 90fe1d1e7f0..a8cf02dd386 100644 +index 220c864a056..4329d4c85f6 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c -@@ -1689,6 +1689,22 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) +@@ -1688,6 +1688,22 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) result = vir_VFPACK(c, src[0], src[1]); break; @@ -65,10 +67,10 @@ index 90fe1d1e7f0..a8cf02dd386 100644 case nir_op_unpack_half_2x16_split_x: result = vir_FMOV(c, src[0]); vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_L); -@@ -1719,6 +1735,29 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) - result = vir_FMOV(c, vir_SEL(c, V3D_QPU_COND_IFNA, tmp, zero)); +@@ -1698,6 +1714,30 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) + vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_H); break; - } + + case nir_op_vftounorm8_v3d: + result = vir_VFTOUNORM8(c, src[0]); + break; @@ -92,14 +94,15 @@ index 90fe1d1e7f0..a8cf02dd386 100644 + case nir_op_ftosnorm16_v3d: + result = vir_FTOSNORM16(c, src[0]); + break; - ++ default: fprintf(stderr, "unknown NIR ALU inst: "); + nir_print_instr(&instr->instr, stderr); diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h -index 36adf8830b5..425ab0cdf9d 100644 +index 095b33c03b8..5714e85d2b8 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h -@@ -1186,7 +1186,7 @@ bool v3d_nir_lower_line_smooth(nir_shader *shader); +@@ -1180,7 +1180,7 @@ bool v3d_nir_lower_line_smooth(nir_shader *shader); bool v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c); bool v3d_nir_lower_scratch(nir_shader *s); bool v3d_nir_lower_txf_ms(nir_shader *s); @@ -108,7 +111,7 @@ index 36adf8830b5..425ab0cdf9d 100644 bool v3d_nir_lower_load_store_bitsize(nir_shader *s); void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components); -@@ -1427,6 +1427,20 @@ VIR_SFU(LOG) +@@ -1421,6 +1421,20 @@ VIR_SFU(LOG) VIR_SFU(SIN) VIR_SFU(RSQRT2) @@ -130,7 +133,7 @@ index 36adf8830b5..425ab0cdf9d 100644 vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond, struct qreg dest, struct qreg src) diff --git a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c -index 2900a29817f..bbb55be4a14 100644 +index 5f8363377cb..ec43f834897 100644 --- a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c +++ b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c @@ -40,6 +40,10 @@ @@ -151,9 +154,9 @@ index 2900a29817f..bbb55be4a14 100644 + * + * This is the generic helper, using all common nir operations. */ - static nir_ssa_def * - pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits, -@@ -91,8 +97,185 @@ pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits, + static nir_def * + pack_bits(nir_builder *b, nir_def *color, const unsigned *bits, +@@ -91,8 +97,180 @@ pack_bits(nir_builder *b, nir_def *color, const unsigned *bits, return nir_vec(b, results, DIV_ROUND_UP(offset, 32)); } @@ -161,46 +164,42 @@ index 2900a29817f..bbb55be4a14 100644 + * just easier to read vfpack on the code, specially while using the PRM as + * reference + */ -+static nir_ssa_def * -+nir_vfpack(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2) ++static inline nir_def * ++nir_vfpack(nir_builder *b, nir_def *p1, nir_def *p2) +{ + return nir_pack_half_2x16_split(b, p1, p2); +} + -+static inline nir_ssa_def * -+pack_11f11f10f(nir_builder *b, nir_ssa_def *color) ++static inline nir_def * ++pack_11f11f10f(nir_builder *b, nir_def *color) +{ -+ nir_ssa_def *p1 = nir_vfpack(b, nir_channel(b, color, 0), ++ nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0), + nir_channel(b, color, 1)); -+ /* FIXME: we noted that we could just use p2 again as the second -+ * element to pack, and CTS tests still works. Just using undef as is -+ * slightly more correct -+ */ -+ nir_ssa_def *undef = nir_ssa_undef(b, 1, color->bit_size); -+ nir_ssa_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), undef); ++ nir_def *undef = nir_undef(b, 1, color->bit_size); ++ nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), undef); + + return nir_v11fpack_v3d(b, p1, p2); +} + -+static inline nir_ssa_def * -+pack_r10g10b10a2_uint(nir_builder *b, nir_ssa_def *color) ++static inline nir_def * ++pack_r10g10b10a2_uint(nir_builder *b, nir_def *color) +{ -+ nir_ssa_def *p1 = nir_vpack_v3d(b, nir_channel(b, color, 0), ++ nir_def *p1 = nir_vpack_v3d(b, nir_channel(b, color, 0), + nir_channel(b, color, 1)); -+ nir_ssa_def *p2 = nir_vpack_v3d(b, nir_channel(b, color, 2), ++ nir_def *p2 = nir_vpack_v3d(b, nir_channel(b, color, 2), + nir_channel(b, color, 3)); + + return nir_v10pack_v3d(b, p1, p2); +} + -+static inline nir_ssa_def * -+pack_r10g10b10a2_unorm(nir_builder *b, nir_ssa_def *color) ++static inline nir_def * ++pack_r10g10b10a2_unorm(nir_builder *b, nir_def *color) +{ -+ nir_ssa_def *p1 = nir_vfpack(b, nir_channel(b, color, 0), ++ nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0), + nir_channel(b, color, 1)); + p1 = nir_vftounorm10lo_v3d(b, p1); + -+ nir_ssa_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), ++ nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), + nir_channel(b, color, 3)); + p2 = nir_vftounorm10hi_v3d(b, p2); + @@ -213,8 +212,8 @@ index 2900a29817f..bbb55be4a14 100644 + TO_UNORM +}; + -+static inline nir_ssa_def * -+pack_8bit(nir_builder *b, nir_ssa_def *color, ++static inline nir_def * ++pack_8bit(nir_builder *b, nir_def *color, + unsigned num_components, + enum hw_conversion conversion) +{ @@ -223,8 +222,8 @@ index 2900a29817f..bbb55be4a14 100644 + * conversion. But we support also that case, and let the caller + * decide which method to use. + */ -+ nir_ssa_def *p1; -+ nir_ssa_def *p2; ++ nir_def *p1; ++ nir_def *p2; + + if (conversion == NONE) { + p1 = nir_vpack_v3d(b, nir_channel(b, color, 0), @@ -246,10 +245,9 @@ index 2900a29817f..bbb55be4a14 100644 + nir_vftounorm8_v3d(b, p2) : nir_vftosnorm8_v3d(b, p2); + } + } else { -+ /* As mentioned on the comment before, using an undef here -+ * would be more correct. But for this case we are getting -+ * worse values, and in fact even some worse instruction count -+ * with some CTS tests, so we just reuse the first packing ++ /* Using an undef here would be more correct. But for this ++ * case we are getting worse shader-db values with some CTS ++ * tests, so we just reuse the first packing. + */ + p2 = p1; + } @@ -257,13 +255,13 @@ index 2900a29817f..bbb55be4a14 100644 + return nir_v8pack_v3d(b, p1, p2); +} + -+static inline nir_ssa_def * -+pack_16bit(nir_builder *b, nir_ssa_def *color, ++static inline nir_def * ++pack_16bit(nir_builder *b, nir_def *color, + unsigned num_components, + enum hw_conversion conversion) +{ -+ nir_ssa_def *results[2]; -+ nir_ssa_def *channels[4]; ++ nir_def *results[2]; ++ nir_def *channels[4]; + + /* Note that usually you should not use this method (that relies on + * custom packing) if we are not doing any conversion. But we support @@ -299,8 +297,8 @@ index 2900a29817f..bbb55be4a14 100644 + return nir_vec(b, results, DIV_ROUND_UP(num_components, 2)); +} + -+static inline nir_ssa_def * -+pack_xbit(nir_builder *b, nir_ssa_def *color, ++static inline nir_def * ++pack_xbit(nir_builder *b, nir_def *color, + unsigned num_components, + const struct util_format_channel_description *r_chan) +{ @@ -340,7 +338,7 @@ index 2900a29817f..bbb55be4a14 100644 { enum pipe_format format = nir_intrinsic_format(instr); assert(format != PIPE_FORMAT_NONE); -@@ -118,9 +301,6 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) +@@ -118,9 +296,6 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) */ formatted = color; } else { @@ -350,7 +348,7 @@ index 2900a29817f..bbb55be4a14 100644 const unsigned *bits; switch (r_chan->size) { -@@ -171,6 +351,52 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) +@@ -170,6 +345,50 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) return true; } @@ -366,10 +364,9 @@ index 2900a29817f..bbb55be4a14 100644 + unsigned num_components = util_format_get_nr_components(format); + b->cursor = nir_before_instr(&instr->instr); + -+ nir_ssa_def *color = nir_channels(b, -+ nir_ssa_for_src(b, instr->src[3], 4), -+ (1 << num_components) - 1); -+ nir_ssa_def *formatted = NULL; ++ nir_def *color = ++ nir_trim_vector(b, instr->src[3].ssa, num_components); ++ nir_def *formatted = NULL; + if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { + formatted = nir_format_pack_r9g9b9e5(b, color); + } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { @@ -393,8 +390,7 @@ index 2900a29817f..bbb55be4a14 100644 + formatted = pack_xbit(b, color, num_components, r_chan); + } + -+ nir_instr_rewrite_src(&instr->instr, &instr->src[3], -+ nir_src_for_ssa(formatted)); ++ nir_src_rewrite(&instr->src[3], formatted); + instr->num_components = formatted->num_components; + + return true; @@ -403,10 +399,10 @@ index 2900a29817f..bbb55be4a14 100644 static bool v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr) { -@@ -215,11 +441,17 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b, - nir_intrinsic_instr *intr = - nir_instr_as_intrinsic(instr); - +@@ -207,11 +426,17 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b, + nir_intrinsic_instr *intr, + void *_state) + { + struct v3d_compile *c = (struct v3d_compile *) _state; + switch (intr->intrinsic) { @@ -422,23 +418,24 @@ index 2900a29817f..bbb55be4a14 100644 default: return false; } -@@ -228,9 +460,9 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b, +@@ -220,10 +445,10 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b, } bool -v3d_nir_lower_image_load_store(nir_shader *s) +v3d_nir_lower_image_load_store(nir_shader *s, struct v3d_compile *c) { - return nir_shader_instructions_pass(s, v3d_nir_lower_image_load_store_cb, + return nir_shader_intrinsics_pass(s, + v3d_nir_lower_image_load_store_cb, nir_metadata_block_index | - nir_metadata_dominance, NULL); + nir_metadata_dominance, c); } diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c -index aea113f050e..7612eed7130 100644 +index 8c536b8fbcc..acb13a6cbf9 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c -@@ -1576,7 +1576,7 @@ v3d_attempt_compile(struct v3d_compile *c) +@@ -1599,7 +1599,7 @@ v3d_attempt_compile(struct v3d_compile *c) NIR_PASS(_, c->s, v3d_nir_lower_io, c); NIR_PASS(_, c->s, v3d_nir_lower_txf_ms); diff --git a/projects/RPi/devices/RPi5/patches/mesa/0004-broadcom-common-retrieve-V3D-revision-number.patch b/projects/RPi/devices/RPi5/patches/mesa/0004-broadcom-common-retrieve-V3D-revision-number.patch deleted file mode 100644 index 6f2fe867f4..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0004-broadcom-common-retrieve-V3D-revision-number.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 569cbe4229df737ce5915c4be2cad534707fb4f7 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 9 Nov 2021 08:50:51 +0100 -Subject: [PATCH 004/142] broadcom/common: retrieve V3D revision number - -The subrev field from the hub ident3 register is bumped with every -hardware revision doing backwards incompatible changes so we want to -keep track of this. - -Instead of modifying the 'ver' field info to acommodate subrev info, -which would require a lot of changes, simply add a new 'rev' field in -devinfo that we can use when we need to make changes based on the -revision number of a hardware release. ---- - src/broadcom/common/v3d_device_info.c | 14 +++++++++++++- - src/broadcom/common/v3d_device_info.h | 3 +++ - 2 files changed, 16 insertions(+), 1 deletion(-) - -diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c -index 7e0862f1f02..7512fe3a06b 100644 ---- a/src/broadcom/common/v3d_device_info.c -+++ b/src/broadcom/common/v3d_device_info.c -@@ -36,6 +36,9 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i - struct drm_v3d_get_param ident1 = { - .param = DRM_V3D_PARAM_V3D_CORE0_IDENT1, - }; -+ struct drm_v3d_get_param hub_ident3 = { -+ .param = DRM_V3D_PARAM_V3D_HUB_IDENT3, -+ }; - int ret; - - ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &ident0); -@@ -76,5 +79,14 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i - return false; - } - -- return true; -+ ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &hub_ident3); -+ if (ret != 0) { -+ fprintf(stderr, "Couldn't get V3D core HUB IDENT3: %s\n", -+ strerror(errno)); -+ return false; -+ } -+ -+ devinfo->rev = (hub_ident3.value >> 8) & 0xff; -+ -+ return true; - } -diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h -index 97abd9b8d9f..32cb65cf81f 100644 ---- a/src/broadcom/common/v3d_device_info.h -+++ b/src/broadcom/common/v3d_device_info.h -@@ -34,6 +34,9 @@ struct v3d_device_info { - /** Simple V3D version: major * 10 + minor */ - uint8_t ver; - -+ /** V3D revision number */ -+ uint8_t rev; -+ - /** Size of the VPM, in bytes. */ - int vpm_size; - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0005-broadcom-common-add-some-common-v71-helpers.patch b/projects/RPi/devices/RPi5/patches/mesa/0005-broadcom-common-add-some-common-v71-helpers.patch deleted file mode 100644 index 2f07c250d8..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0005-broadcom-common-add-some-common-v71-helpers.patch +++ /dev/null @@ -1,91 +0,0 @@ -From c260843c882d25bd31e308566b45d4517fda0fa2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 17 Nov 2021 14:40:47 +0100 -Subject: [PATCH 005/142] broadcom/common: add some common v71 helpers - ---- - src/broadcom/common/v3d_util.c | 27 +++++++++++++++++++++++++++ - src/broadcom/common/v3d_util.h | 27 +++++++++++++++++++++++++++ - 2 files changed, 54 insertions(+) - -diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c -index 57872a923d3..26f5c6b336f 100644 ---- a/src/broadcom/common/v3d_util.c -+++ b/src/broadcom/common/v3d_util.c -@@ -170,3 +170,30 @@ v3d_hw_prim_type(enum mesa_prim prim_type) - unreachable("Unsupported primitive type"); - } - } -+ -+uint32_t -+v3d_internal_bpp_words(uint32_t internal_bpp) -+{ -+ switch (internal_bpp) { -+ case 0 /* V3D_INTERNAL_BPP_32 */: -+ return 1; -+ case 1 /* V3D_INTERNAL_BPP_64 */: -+ return 2; -+ case 2 /* V3D_INTERNAL_BPP_128 */: -+ return 4; -+ default: -+ unreachable("Unsupported internal BPP"); -+ } -+} -+ -+uint32_t -+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width, -+ uint32_t bpp) -+{ -+ /* stride in multiples of 128 bits, and covers 2 rows. This is the -+ * reason we divide by 2 instead of 4, as we divide number of 32-bit -+ * words per row by 2. -+ */ -+ -+ return (tile_width * bpp) / 2; -+} -diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h -index eb802b77f67..864fc949ffa 100644 ---- a/src/broadcom/common/v3d_util.h -+++ b/src/broadcom/common/v3d_util.h -@@ -24,6 +24,7 @@ - #ifndef V3D_UTIL_H - #define V3D_UTIL_H - -+#include "util/macros.h" - #include "common/v3d_device_info.h" - #include "pipe/p_defines.h" - -@@ -46,4 +47,30 @@ v3d_translate_pipe_swizzle(enum pipe_swizzle swizzle); - uint32_t - v3d_hw_prim_type(enum mesa_prim prim_type); - -+uint32_t -+v3d_internal_bpp_words(uint32_t internal_bpp); -+ -+/* Some configuration packets want the size on log2, but starting at 0 for -+ * size 8. -+ */ -+static inline uint8_t -+log2_tile_size(uint32_t size) -+{ -+ switch(size) { -+ case 8: -+ return 0; -+ case 16: -+ return 1; -+ case 32: -+ return 2; -+ case 64: -+ return 3; -+ default: -+ unreachable("Unsupported tile width/height"); -+ } -+} -+ -+uint32_t -+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width, -+ uint32_t bpp); - #endif --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0006-broadcom-qpu-add-comments-on-waddr-not-used-on-V3D-7.patch b/projects/RPi/devices/RPi5/patches/mesa/0006-broadcom-qpu-add-comments-on-waddr-not-used-on-V3D-7.patch deleted file mode 100644 index 0250d31af5..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0006-broadcom-qpu-add-comments-on-waddr-not-used-on-V3D-7.patch +++ /dev/null @@ -1,53 +0,0 @@ -From a5211a4d71acc53183d2a90eb1694d8cce6eb44f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 5 Aug 2021 01:03:11 +0200 -Subject: [PATCH 006/142] broadcom/qpu: add comments on waddr not used on V3D - 7.x - ---- - src/broadcom/qpu/qpu_instr.h | 22 +++++++++++----------- - 1 file changed, 11 insertions(+), 11 deletions(-) - -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 2e133472698..45a0cad9760 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -88,11 +88,11 @@ enum v3d_qpu_uf { - }; - - enum v3d_qpu_waddr { -- V3D_QPU_WADDR_R0 = 0, -- V3D_QPU_WADDR_R1 = 1, -- V3D_QPU_WADDR_R2 = 2, -- V3D_QPU_WADDR_R3 = 3, -- V3D_QPU_WADDR_R4 = 4, -+ V3D_QPU_WADDR_R0 = 0, /* Reserved on V3D 7.x */ -+ V3D_QPU_WADDR_R1 = 1, /* Reserved on V3D 7.x */ -+ V3D_QPU_WADDR_R2 = 2, /* Reserved on V3D 7.x */ -+ V3D_QPU_WADDR_R3 = 3, /* Reserved on V3D 7.x */ -+ V3D_QPU_WADDR_R4 = 4, /* Reserved on V3D 7.x */ - V3D_QPU_WADDR_R5 = 5, - V3D_QPU_WADDR_NOP = 6, - V3D_QPU_WADDR_TLB = 7, -@@ -108,12 +108,12 @@ enum v3d_qpu_waddr { - V3D_QPU_WADDR_SYNC = 16, - V3D_QPU_WADDR_SYNCU = 17, - V3D_QPU_WADDR_SYNCB = 18, -- V3D_QPU_WADDR_RECIP = 19, -- V3D_QPU_WADDR_RSQRT = 20, -- V3D_QPU_WADDR_EXP = 21, -- V3D_QPU_WADDR_LOG = 22, -- V3D_QPU_WADDR_SIN = 23, -- V3D_QPU_WADDR_RSQRT2 = 24, -+ V3D_QPU_WADDR_RECIP = 19, /* Reserved on V3D 7.x */ -+ V3D_QPU_WADDR_RSQRT = 20, /* Reserved on V3D 7.x */ -+ V3D_QPU_WADDR_EXP = 21, /* Reserved on V3D 7.x */ -+ V3D_QPU_WADDR_LOG = 22, /* Reserved on V3D 7.x */ -+ V3D_QPU_WADDR_SIN = 23, /* Reserved on V3D 7.x */ -+ V3D_QPU_WADDR_RSQRT2 = 24, /* Reserved on V3D 7.x */ - V3D_QPU_WADDR_TMUC = 32, - V3D_QPU_WADDR_TMUS = 33, - V3D_QPU_WADDR_TMUT = 34, --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0007-broadcom-qpu-set-V3D-7.x-names-for-some-waddr-aliasi.patch b/projects/RPi/devices/RPi5/patches/mesa/0007-broadcom-qpu-set-V3D-7.x-names-for-some-waddr-aliasi.patch deleted file mode 100644 index 2a1a7ae248..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0007-broadcom-qpu-set-V3D-7.x-names-for-some-waddr-aliasi.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 0ccf3043e4a584e5592bb7fad737d5d98ed23db0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 5 Aug 2021 01:00:47 +0200 -Subject: [PATCH 007/142] broadcom/qpu: set V3D 7.x names for some waddr - aliasing - -V3D 7.x got rid of the accumulator, but still uses the values for -WADDR_R5 and WADDR_R5REP, so let's return a proper name and add some -aliases. ---- - src/broadcom/qpu/qpu_instr.c | 8 ++++++++ - src/broadcom/qpu/qpu_instr.h | 6 ++++-- - 2 files changed, 12 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index 60dabf74e8e..7759fb0efdf 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -35,6 +35,14 @@ v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo, - if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU) - return "tmu"; - -+ /* V3D 7.x QUAD and REP aliases R5 and R5REPT in the table below -+ */ -+ if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_QUAD) -+ return "quad"; -+ -+ if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_REP) -+ return "rep"; -+ - static const char *waddr_magic[] = { - [V3D_QPU_WADDR_R0] = "r0", - [V3D_QPU_WADDR_R1] = "r1", -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 45a0cad9760..19bf721dbe1 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -93,7 +93,8 @@ enum v3d_qpu_waddr { - V3D_QPU_WADDR_R2 = 2, /* Reserved on V3D 7.x */ - V3D_QPU_WADDR_R3 = 3, /* Reserved on V3D 7.x */ - V3D_QPU_WADDR_R4 = 4, /* Reserved on V3D 7.x */ -- V3D_QPU_WADDR_R5 = 5, -+ V3D_QPU_WADDR_R5 = 5, /* V3D 4.x */ -+ V3D_QPU_WADDR_QUAD = 5, /* V3D 7.x */ - V3D_QPU_WADDR_NOP = 6, - V3D_QPU_WADDR_TLB = 7, - V3D_QPU_WADDR_TLBU = 8, -@@ -129,7 +130,8 @@ enum v3d_qpu_waddr { - V3D_QPU_WADDR_TMUHSCM = 44, - V3D_QPU_WADDR_TMUHSF = 45, - V3D_QPU_WADDR_TMUHSLOD = 46, -- V3D_QPU_WADDR_R5REP = 55, -+ V3D_QPU_WADDR_R5REP = 55, /* V3D 4.x */ -+ V3D_QPU_WADDR_REP = 55, /* V3D 7.x */ - }; - - struct v3d_qpu_flags { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0008-broadcom-compiler-rename-small_imm-to-small_imm_b.patch b/projects/RPi/devices/RPi5/patches/mesa/0008-broadcom-compiler-rename-small_imm-to-small_imm_b.patch deleted file mode 100644 index 96d81a2c1a..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0008-broadcom-compiler-rename-small_imm-to-small_imm_b.patch +++ /dev/null @@ -1,241 +0,0 @@ -From 18de3cc85cf8bbe294e044f7a12abe14e554de0a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Sun, 19 Sep 2021 03:20:18 +0200 -Subject: [PATCH 008/142] broadcom/compiler: rename small_imm to small_imm_b - -Current small_imm is associated with the "B" read address. - -We do this change in advance for v71 support, where we will have 4 -different small_imm (a/b/c/d), so we start with a renaming. ---- - src/broadcom/compiler/qpu_schedule.c | 22 +++++++++---------- - .../compiler/vir_opt_small_immediates.c | 4 ++-- - src/broadcom/compiler/vir_to_qpu.c | 2 +- - src/broadcom/qpu/qpu_disasm.c | 2 +- - src/broadcom/qpu/qpu_instr.h | 2 +- - src/broadcom/qpu/qpu_pack.c | 22 +++++++++---------- - 6 files changed, 27 insertions(+), 27 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 3b32b48f86f..a10fa03ed10 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -160,7 +160,7 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n, - add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n); - break; - case V3D_QPU_MUX_B: -- if (!n->inst->qpu.sig.small_imm) { -+ if (!n->inst->qpu.sig.small_imm_b) { - add_read_dep(state, - state->last_rf[n->inst->qpu.raddr_b], n); - } -@@ -615,7 +615,7 @@ qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst, - return true; - - if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) && -- !inst->sig.small_imm && (inst->raddr_b == waddr)) -+ !inst->sig.small_imm_b && (inst->raddr_b == waddr)) - return true; - - return false; -@@ -790,11 +790,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a, - uint64_t raddrs_used = 0; - if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_A)) - raddrs_used |= (1ll << a->raddr_a); -- if (!a->sig.small_imm && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B)) -+ if (!a->sig.small_imm_b && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B)) - raddrs_used |= (1ll << a->raddr_b); - if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_A)) - raddrs_used |= (1ll << b->raddr_a); -- if (!b->sig.small_imm && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B)) -+ if (!b->sig.small_imm_b && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B)) - raddrs_used |= (1ll << b->raddr_b); - - return raddrs_used; -@@ -816,16 +816,16 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result, - if (naddrs > 2) - return false; - -- if ((add_instr->sig.small_imm || mul_instr->sig.small_imm)) { -+ if ((add_instr->sig.small_imm_b || mul_instr->sig.small_imm_b)) { - if (naddrs > 1) - return false; - -- if (add_instr->sig.small_imm && mul_instr->sig.small_imm) -+ if (add_instr->sig.small_imm_b && mul_instr->sig.small_imm_b) - if (add_instr->raddr_b != mul_instr->raddr_b) - return false; - -- result->sig.small_imm = true; -- result->raddr_b = add_instr->sig.small_imm ? -+ result->sig.small_imm_b = true; -+ result->raddr_b = add_instr->sig.small_imm_b ? - add_instr->raddr_b : mul_instr->raddr_b; - } - -@@ -836,7 +836,7 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result, - raddrs_used &= ~(1ll << raddr_a); - result->raddr_a = raddr_a; - -- if (!result->sig.small_imm) { -+ if (!result->sig.small_imm_b) { - if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) && - raddr_a == add_instr->raddr_b) { - if (add_instr->alu.add.a == V3D_QPU_MUX_B) -@@ -1025,7 +1025,7 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, - merge.sig.ldtmu |= b->sig.ldtmu; - merge.sig.ldvary |= b->sig.ldvary; - merge.sig.ldvpm |= b->sig.ldvpm; -- merge.sig.small_imm |= b->sig.small_imm; -+ merge.sig.small_imm_b |= b->sig.small_imm_b; - merge.sig.ldtlb |= b->sig.ldtlb; - merge.sig.ldtlbu |= b->sig.ldtlbu; - merge.sig.ucb |= b->sig.ucb; -@@ -1614,7 +1614,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, - return false; - - if (inst->raddr_b < 3 && -- !inst->sig.small_imm && -+ !inst->sig.small_imm_b && - v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) { - return false; - } -diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c -index 47d7722968d..df0d6c36c9b 100644 ---- a/src/broadcom/compiler/vir_opt_small_immediates.c -+++ b/src/broadcom/compiler/vir_opt_small_immediates.c -@@ -80,7 +80,7 @@ vir_opt_small_immediates(struct v3d_compile *c) - */ - struct v3d_qpu_sig new_sig = inst->qpu.sig; - uint32_t sig_packed; -- new_sig.small_imm = true; -+ new_sig.small_imm_b = true; - if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig_packed)) - continue; - -@@ -89,7 +89,7 @@ vir_opt_small_immediates(struct v3d_compile *c) - vir_dump_inst(c, inst); - fprintf(stderr, "\n"); - } -- inst->qpu.sig.small_imm = true; -+ inst->qpu.sig.small_imm_b = true; - inst->qpu.raddr_b = packed; - - inst->src[i].file = QFILE_SMALL_IMM; -diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c -index 45e6bfa1470..15c2e3674c2 100644 ---- a/src/broadcom/compiler/vir_to_qpu.c -+++ b/src/broadcom/compiler/vir_to_qpu.c -@@ -94,7 +94,7 @@ static void - set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) - { - if (src.smimm) { -- assert(instr->sig.small_imm); -+ assert(instr->sig.small_imm_b); - *mux = V3D_QPU_MUX_B; - return; - } -diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c -index 28fb2357b97..6aca3c28e78 100644 ---- a/src/broadcom/qpu/qpu_disasm.c -+++ b/src/broadcom/qpu/qpu_disasm.c -@@ -62,7 +62,7 @@ v3d_qpu_disasm_raddr(struct disasm_state *disasm, - if (mux == V3D_QPU_MUX_A) { - append(disasm, "rf%d", instr->raddr_a); - } else if (mux == V3D_QPU_MUX_B) { -- if (instr->sig.small_imm) { -+ if (instr->sig.small_imm_b) { - uint32_t val; - ASSERTED bool ok = - v3d_qpu_small_imm_unpack(disasm->devinfo, -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 19bf721dbe1..9cd831863b4 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -50,7 +50,7 @@ struct v3d_qpu_sig { - bool ldvpm:1; - bool ldtlb:1; - bool ldtlbu:1; -- bool small_imm:1; -+ bool small_imm_b:1; - bool ucb:1; - bool rotate:1; - bool wrtmuc:1; -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index a875683c6f8..beac591d3c1 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -112,7 +112,7 @@ - #define LDTMU .ldtmu = true - #define LDVARY .ldvary = true - #define LDVPM .ldvpm = true --#define SMIMM .small_imm = true -+#define SMIMM_B .small_imm_b = true - #define LDTLB .ldtlb = true - #define LDTLBU .ldtlbu = true - #define UCB .ucb = true -@@ -135,8 +135,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = { - [11] = { THRSW, LDVARY, LDUNIF }, - [12] = { LDVARY, LDTMU, }, - [13] = { THRSW, LDVARY, LDTMU, }, -- [14] = { SMIMM, LDVARY, }, -- [15] = { SMIMM, }, -+ [14] = { SMIMM_B, LDVARY, }, -+ [15] = { SMIMM_B, }, - [16] = { LDTLB, }, - [17] = { LDTLBU, }, - /* 18-21 reserved */ -@@ -148,8 +148,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = { - [27] = { THRSW, LDVPM, LDUNIF }, - [28] = { LDVPM, LDTMU, }, - [29] = { THRSW, LDVPM, LDTMU, }, -- [30] = { SMIMM, LDVPM, }, -- [31] = { SMIMM, }, -+ [30] = { SMIMM_B, LDVPM, }, -+ [31] = { SMIMM_B, }, - }; - - static const struct v3d_qpu_sig v40_sig_map[] = { -@@ -167,8 +167,8 @@ static const struct v3d_qpu_sig v40_sig_map[] = { - [10] = { LDVARY, LDUNIF }, - [11] = { THRSW, LDVARY, LDUNIF }, - /* 12-13 reserved */ -- [14] = { SMIMM, LDVARY, }, -- [15] = { SMIMM, }, -+ [14] = { SMIMM_B, LDVARY, }, -+ [15] = { SMIMM_B, }, - [16] = { LDTLB, }, - [17] = { LDTLBU, }, - [18] = { WRTMUC }, -@@ -178,7 +178,7 @@ static const struct v3d_qpu_sig v40_sig_map[] = { - [22] = { UCB, }, - [23] = { ROT, }, - /* 24-30 reserved */ -- [31] = { SMIMM, LDTMU, }, -+ [31] = { SMIMM_B, LDTMU, }, - }; - - static const struct v3d_qpu_sig v41_sig_map[] = { -@@ -197,8 +197,8 @@ static const struct v3d_qpu_sig v41_sig_map[] = { - [11] = { THRSW, LDVARY, LDUNIF }, - [12] = { LDUNIFRF }, - [13] = { THRSW, LDUNIFRF }, -- [14] = { SMIMM, LDVARY, }, -- [15] = { SMIMM, }, -+ [14] = { SMIMM_B, LDVARY }, -+ [15] = { SMIMM_B, }, - [16] = { LDTLB, }, - [17] = { LDTLBU, }, - [18] = { WRTMUC }, -@@ -210,7 +210,7 @@ static const struct v3d_qpu_sig v41_sig_map[] = { - [24] = { LDUNIFA}, - [25] = { LDUNIFARF }, - /* 26-30 reserved */ -- [31] = { SMIMM, LDTMU, }, -+ [31] = { SMIMM_B, LDTMU, }, - }; - - bool --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0009-broadcom-compiler-add-small_imm-a-c-d-on-v3d_qpu_sig.patch b/projects/RPi/devices/RPi5/patches/mesa/0009-broadcom-compiler-add-small_imm-a-c-d-on-v3d_qpu_sig.patch deleted file mode 100644 index 02e8c47d7e..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0009-broadcom-compiler-add-small_imm-a-c-d-on-v3d_qpu_sig.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 0e87405fe73694c173b7ce14c3d60611f241922c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 5 Aug 2021 00:50:12 +0200 -Subject: [PATCH 009/142] broadcom/compiler: add small_imm a/c/d on v3d_qpu_sig - -small_imm_a, small_imm_c and small_imm_d added on top of the already -existing small_imm_b, as V3D 7.1 defines 4 small immediates, tied to -the 4 raddr. Note that this is only the definition, and just a inst -validation rule to check that are not used before v71. Any real use is -still pending. ---- - src/broadcom/compiler/qpu_validate.c | 5 +++++ - src/broadcom/qpu/qpu_instr.h | 5 ++++- - 2 files changed, 9 insertions(+), 1 deletion(-) - -diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c -index 2cc7a0eb0ae..12788692432 100644 ---- a/src/broadcom/compiler/qpu_validate.c -+++ b/src/broadcom/compiler/qpu_validate.c -@@ -115,6 +115,11 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) - if (inst->type != V3D_QPU_INSTR_TYPE_ALU) - return; - -+ if (devinfo->ver < 71) { -+ if (inst->sig.small_imm_a || inst->sig.small_imm_c || inst->sig.small_imm_d) -+ fail_instr(state, "small imm a/c/d added after V3D 7.1"); -+ } -+ - /* LDVARY writes r5 two instructions later and LDUNIF writes - * r5 one instruction later, which is illegal to have - * together. -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 9cd831863b4..13b3f37d43f 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -50,10 +50,13 @@ struct v3d_qpu_sig { - bool ldvpm:1; - bool ldtlb:1; - bool ldtlbu:1; -- bool small_imm_b:1; - bool ucb:1; - bool rotate:1; - bool wrtmuc:1; -+ bool small_imm_a:1; /* raddr_a (add a), since V3D 7.x */ -+ bool small_imm_b:1; /* raddr_b (add b) */ -+ bool small_imm_c:1; /* raddr_c (mul a), since V3D 7.x */ -+ bool small_imm_d:1; /* raddr_d (mul b), since V3D 7.x */ - }; - - enum v3d_qpu_cond { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0010-broadcom-qpu-add-v71-signal-map.patch b/projects/RPi/devices/RPi5/patches/mesa/0010-broadcom-qpu-add-v71-signal-map.patch deleted file mode 100644 index a2d2598b9f..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0010-broadcom-qpu-add-v71-signal-map.patch +++ /dev/null @@ -1,106 +0,0 @@ -From eca19c911d9af3b0ab3b563ea65dc455e3d27987 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 4 Aug 2021 01:11:16 +0200 -Subject: [PATCH 010/142] broadcom/qpu: add v71 signal map -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Compared with v41, the differences are: - * 14, 15, 29 and 30 are now about immediate a, b, c, d respectively - * 23 is now reserved. On v42 this was for rotate signals, that are - gone on v71. - -Signed-off-by: Alejandro Piñeiro -Signed-off-by: Iago Toral Quiroga ---- - src/broadcom/qpu/qpu_pack.c | 47 ++++++++++++++++++++++++++++++++++--- - 1 file changed, 44 insertions(+), 3 deletions(-) - -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index beac591d3c1..2820d9d4c56 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -112,12 +112,15 @@ - #define LDTMU .ldtmu = true - #define LDVARY .ldvary = true - #define LDVPM .ldvpm = true --#define SMIMM_B .small_imm_b = true - #define LDTLB .ldtlb = true - #define LDTLBU .ldtlbu = true - #define UCB .ucb = true - #define ROT .rotate = true - #define WRTMUC .wrtmuc = true -+#define SMIMM_A .small_imm_a = true -+#define SMIMM_B .small_imm_b = true -+#define SMIMM_C .small_imm_c = true -+#define SMIMM_D .small_imm_d = true - - static const struct v3d_qpu_sig v33_sig_map[] = { - /* MISC R3 R4 R5 */ -@@ -213,6 +216,40 @@ static const struct v3d_qpu_sig v41_sig_map[] = { - [31] = { SMIMM_B, LDTMU, }, - }; - -+ -+static const struct v3d_qpu_sig v71_sig_map[] = { -+ /* MISC phys RF0 */ -+ [0] = { }, -+ [1] = { THRSW, }, -+ [2] = { LDUNIF }, -+ [3] = { THRSW, LDUNIF }, -+ [4] = { LDTMU, }, -+ [5] = { THRSW, LDTMU, }, -+ [6] = { LDTMU, LDUNIF }, -+ [7] = { THRSW, LDTMU, LDUNIF }, -+ [8] = { LDVARY, }, -+ [9] = { THRSW, LDVARY, }, -+ [10] = { LDVARY, LDUNIF }, -+ [11] = { THRSW, LDVARY, LDUNIF }, -+ [12] = { LDUNIFRF }, -+ [13] = { THRSW, LDUNIFRF }, -+ [14] = { SMIMM_A, }, -+ [15] = { SMIMM_B, }, -+ [16] = { LDTLB, }, -+ [17] = { LDTLBU, }, -+ [18] = { WRTMUC }, -+ [19] = { THRSW, WRTMUC }, -+ [20] = { LDVARY, WRTMUC }, -+ [21] = { THRSW, LDVARY, WRTMUC }, -+ [22] = { UCB, }, -+ /* 23 reserved */ -+ [24] = { LDUNIFA}, -+ [25] = { LDUNIFARF }, -+ /* 26-29 reserved */ -+ [30] = { SMIMM_C, }, -+ [31] = { SMIMM_D, }, -+}; -+ - bool - v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, - uint32_t packed_sig, -@@ -221,7 +258,9 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, - if (packed_sig >= ARRAY_SIZE(v33_sig_map)) - return false; - -- if (devinfo->ver >= 41) -+ if (devinfo->ver >= 71) -+ *sig = v71_sig_map[packed_sig]; -+ else if (devinfo->ver >= 41) - *sig = v41_sig_map[packed_sig]; - else if (devinfo->ver == 40) - *sig = v40_sig_map[packed_sig]; -@@ -240,7 +279,9 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, - { - static const struct v3d_qpu_sig *map; - -- if (devinfo->ver >= 41) -+ if (devinfo->ver >= 71) -+ map = v71_sig_map; -+ else if (devinfo->ver >= 41) - map = v41_sig_map; - else if (devinfo->ver == 40) - map = v40_sig_map; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0011-broadcom-qpu-define-v3d_qpu_input-use-on-v3d_qpu_alu.patch b/projects/RPi/devices/RPi5/patches/mesa/0011-broadcom-qpu-define-v3d_qpu_input-use-on-v3d_qpu_alu.patch deleted file mode 100644 index d5813b8c05..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0011-broadcom-qpu-define-v3d_qpu_input-use-on-v3d_qpu_alu.patch +++ /dev/null @@ -1,778 +0,0 @@ -From d10e67a396d713ec81fb133f3516e09fe1e067b6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 6 Aug 2021 01:22:31 +0200 -Subject: [PATCH 011/142] broadcom/qpu: define v3d_qpu_input, use on - v3d_qpu_alu_instr -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -At this point it just tidy up a little the alu_instr structure. - -But also serves to prepare the structure for new changes, as 7.x uses -raddr instead of mux, and it is just easier to add the raddr to the -new input structure. - -Signed-off-by: Alejandro Piñeiro -Signed-off-by: Iago Toral Quiroga ---- - src/broadcom/compiler/qpu_schedule.c | 65 +++++++-------- - src/broadcom/compiler/vir.c | 16 ++-- - src/broadcom/compiler/vir_dump.c | 8 +- - .../compiler/vir_opt_copy_propagate.c | 12 +-- - .../compiler/vir_opt_redundant_flags.c | 8 +- - src/broadcom/compiler/vir_to_qpu.c | 30 +++---- - src/broadcom/qpu/qpu_disasm.c | 16 ++-- - src/broadcom/qpu/qpu_instr.c | 8 +- - src/broadcom/qpu/qpu_instr.h | 13 +-- - src/broadcom/qpu/qpu_pack.c | 82 +++++++++---------- - src/broadcom/qpu/tests/qpu_disasm.c | 8 +- - 11 files changed, 134 insertions(+), 132 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index a10fa03ed10..455fa3867be 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -306,14 +306,14 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) - /* XXX: LOAD_IMM */ - - if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) -- process_mux_deps(state, n, inst->alu.add.a); -+ process_mux_deps(state, n, inst->alu.add.a.mux); - if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) -- process_mux_deps(state, n, inst->alu.add.b); -+ process_mux_deps(state, n, inst->alu.add.b.mux); - - if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) -- process_mux_deps(state, n, inst->alu.mul.a); -+ process_mux_deps(state, n, inst->alu.mul.a.mux); - if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) -- process_mux_deps(state, n, inst->alu.mul.b); -+ process_mux_deps(state, n, inst->alu.mul.b.mux); - - switch (inst->alu.add.op) { - case V3D_QPU_A_VPMSETUP: -@@ -537,22 +537,22 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, - - if (inst->alu.add.op != V3D_QPU_A_NOP) { - if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0 && -- mux_reads_too_soon(scoreboard, inst, inst->alu.add.a)) { -+ mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux)) { - return true; - } - if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1 && -- mux_reads_too_soon(scoreboard, inst, inst->alu.add.b)) { -+ mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux)) { - return true; - } - } - - if (inst->alu.mul.op != V3D_QPU_M_NOP) { - if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0 && -- mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a)) { -+ mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux)) { - return true; - } - if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1 && -- mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b)) { -+ mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux)) { - return true; - } - } -@@ -839,20 +839,20 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result, - if (!result->sig.small_imm_b) { - if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) && - raddr_a == add_instr->raddr_b) { -- if (add_instr->alu.add.a == V3D_QPU_MUX_B) -- result->alu.add.a = V3D_QPU_MUX_A; -- if (add_instr->alu.add.b == V3D_QPU_MUX_B && -+ if (add_instr->alu.add.a.mux == V3D_QPU_MUX_B) -+ result->alu.add.a.mux = V3D_QPU_MUX_A; -+ if (add_instr->alu.add.b.mux == V3D_QPU_MUX_B && - v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) { -- result->alu.add.b = V3D_QPU_MUX_A; -+ result->alu.add.b.mux = V3D_QPU_MUX_A; - } - } - if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_B) && - raddr_a == mul_instr->raddr_b) { -- if (mul_instr->alu.mul.a == V3D_QPU_MUX_B) -- result->alu.mul.a = V3D_QPU_MUX_A; -- if (mul_instr->alu.mul.b == V3D_QPU_MUX_B && -+ if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_B) -+ result->alu.mul.a.mux = V3D_QPU_MUX_A; -+ if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_B && - v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) { -- result->alu.mul.b = V3D_QPU_MUX_A; -+ result->alu.mul.b.mux = V3D_QPU_MUX_A; - } - } - } -@@ -863,20 +863,20 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result, - result->raddr_b = raddr_b; - if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_A) && - raddr_b == add_instr->raddr_a) { -- if (add_instr->alu.add.a == V3D_QPU_MUX_A) -- result->alu.add.a = V3D_QPU_MUX_B; -- if (add_instr->alu.add.b == V3D_QPU_MUX_A && -+ if (add_instr->alu.add.a.mux == V3D_QPU_MUX_A) -+ result->alu.add.a.mux = V3D_QPU_MUX_B; -+ if (add_instr->alu.add.b.mux == V3D_QPU_MUX_A && - v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) { -- result->alu.add.b = V3D_QPU_MUX_B; -+ result->alu.add.b.mux = V3D_QPU_MUX_B; - } - } - if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_A) && - raddr_b == mul_instr->raddr_a) { -- if (mul_instr->alu.mul.a == V3D_QPU_MUX_A) -- result->alu.mul.a = V3D_QPU_MUX_B; -- if (mul_instr->alu.mul.b == V3D_QPU_MUX_A && -+ if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_A) -+ result->alu.mul.a.mux = V3D_QPU_MUX_B; -+ if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_A && - v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) { -- result->alu.mul.b = V3D_QPU_MUX_B; -+ result->alu.mul.b.mux = V3D_QPU_MUX_B; - } - } - -@@ -927,11 +927,12 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst) - inst->flags.auf = V3D_QPU_UF_NONE; - - inst->alu.mul.output_pack = inst->alu.add.output_pack; -- inst->alu.mul.a_unpack = inst->alu.add.a_unpack; -- inst->alu.mul.b_unpack = inst->alu.add.b_unpack; -+ -+ inst->alu.mul.a.unpack = inst->alu.add.a.unpack; -+ inst->alu.mul.b.unpack = inst->alu.add.b.unpack; - inst->alu.add.output_pack = V3D_QPU_PACK_NONE; -- inst->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; -- inst->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; -+ inst->alu.add.a.unpack = V3D_QPU_UNPACK_NONE; -+ inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE; - } - - static bool -@@ -2064,12 +2065,12 @@ alu_reads_register(struct v3d_qpu_instr *inst, - - if (add) { - num_src = v3d_qpu_add_op_num_src(inst->alu.add.op); -- mux_a = inst->alu.add.a; -- mux_b = inst->alu.add.b; -+ mux_a = inst->alu.add.a.mux; -+ mux_b = inst->alu.add.b.mux; - } else { - num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op); -- mux_a = inst->alu.mul.a; -- mux_b = inst->alu.mul.b; -+ mux_a = inst->alu.mul.a.mux; -+ mux_b = inst->alu.mul.b.mux; - } - - for (int i = 0; i < num_src; i++) { -diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c -index 660b11b0577..007cb0a941b 100644 ---- a/src/broadcom/compiler/vir.c -+++ b/src/broadcom/compiler/vir.c -@@ -113,10 +113,10 @@ vir_is_raw_mov(struct qinst *inst) - return false; - } - -- if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE || -- inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE || -- inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE || -- inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) { -+ if (inst->qpu.alu.add.a.unpack != V3D_QPU_UNPACK_NONE || -+ inst->qpu.alu.add.b.unpack != V3D_QPU_UNPACK_NONE || -+ inst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE || -+ inst->qpu.alu.mul.b.unpack != V3D_QPU_UNPACK_NONE) { - return false; - } - -@@ -209,15 +209,15 @@ vir_set_unpack(struct qinst *inst, int src, - - if (vir_is_add(inst)) { - if (src == 0) -- inst->qpu.alu.add.a_unpack = unpack; -+ inst->qpu.alu.add.a.unpack = unpack; - else -- inst->qpu.alu.add.b_unpack = unpack; -+ inst->qpu.alu.add.b.unpack = unpack; - } else { - assert(vir_is_mul(inst)); - if (src == 0) -- inst->qpu.alu.mul.a_unpack = unpack; -+ inst->qpu.alu.mul.a.unpack = unpack; - else -- inst->qpu.alu.mul.b_unpack = unpack; -+ inst->qpu.alu.mul.b.unpack = unpack; - } - } - -diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c -index 5c47bbdc1b0..ab5d4043039 100644 ---- a/src/broadcom/compiler/vir_dump.c -+++ b/src/broadcom/compiler/vir_dump.c -@@ -270,8 +270,8 @@ vir_dump_alu(struct v3d_compile *c, struct qinst *inst) - vir_print_reg(c, inst, inst->dst); - fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.add.output_pack)); - -- unpack[0] = instr->alu.add.a_unpack; -- unpack[1] = instr->alu.add.b_unpack; -+ unpack[0] = instr->alu.add.a.unpack; -+ unpack[1] = instr->alu.add.b.unpack; - } else { - fprintf(stderr, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op)); - fprintf(stderr, "%s", v3d_qpu_cond_name(instr->flags.mc)); -@@ -282,8 +282,8 @@ vir_dump_alu(struct v3d_compile *c, struct qinst *inst) - vir_print_reg(c, inst, inst->dst); - fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.mul.output_pack)); - -- unpack[0] = instr->alu.mul.a_unpack; -- unpack[1] = instr->alu.mul.b_unpack; -+ unpack[0] = instr->alu.mul.a.unpack; -+ unpack[1] = instr->alu.mul.b.unpack; - } - - for (int i = 0; i < nsrc; i++) { -diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c -index da121c2a5bd..c4aa7255a17 100644 ---- a/src/broadcom/compiler/vir_opt_copy_propagate.c -+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c -@@ -104,14 +104,14 @@ vir_has_unpack(struct qinst *inst, int chan) - - if (vir_is_add(inst)) { - if (chan == 0) -- return inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE; -+ return inst->qpu.alu.add.a.unpack != V3D_QPU_UNPACK_NONE; - else -- return inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE; -+ return inst->qpu.alu.add.b.unpack != V3D_QPU_UNPACK_NONE; - } else { - if (chan == 0) -- return inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE; -+ return inst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE; - else -- return inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE; -+ return inst->qpu.alu.mul.b.unpack != V3D_QPU_UNPACK_NONE; - } - } - -@@ -161,7 +161,7 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs) - continue; - - /* these ops can't represent abs. */ -- if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) { -+ if (mov->qpu.alu.mul.a.unpack == V3D_QPU_UNPACK_ABS) { - switch (inst->qpu.alu.add.op) { - case V3D_QPU_A_VFPACK: - case V3D_QPU_A_FROUND: -@@ -189,7 +189,7 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs) - - inst->src[i] = mov->src[0]; - if (vir_has_unpack(mov, 0)) { -- enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a_unpack; -+ enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a.unpack; - - vir_set_unpack(inst, i, unpack); - } -diff --git a/src/broadcom/compiler/vir_opt_redundant_flags.c b/src/broadcom/compiler/vir_opt_redundant_flags.c -index c7896d57f2b..6b61ed6a39a 100644 ---- a/src/broadcom/compiler/vir_opt_redundant_flags.c -+++ b/src/broadcom/compiler/vir_opt_redundant_flags.c -@@ -81,11 +81,11 @@ vir_instr_flags_op_equal(struct qinst *a, struct qinst *b) - a->qpu.flags.mpf != b->qpu.flags.mpf || - a->qpu.alu.add.op != b->qpu.alu.add.op || - a->qpu.alu.mul.op != b->qpu.alu.mul.op || -- a->qpu.alu.add.a_unpack != b->qpu.alu.add.a_unpack || -- a->qpu.alu.add.b_unpack != b->qpu.alu.add.b_unpack || -+ a->qpu.alu.add.a.unpack != b->qpu.alu.add.a.unpack || -+ a->qpu.alu.add.b.unpack != b->qpu.alu.add.b.unpack || - a->qpu.alu.add.output_pack != b->qpu.alu.add.output_pack || -- a->qpu.alu.mul.a_unpack != b->qpu.alu.mul.a_unpack || -- a->qpu.alu.mul.b_unpack != b->qpu.alu.mul.b_unpack || -+ a->qpu.alu.mul.a.unpack != b->qpu.alu.mul.a.unpack || -+ a->qpu.alu.mul.b.unpack != b->qpu.alu.mul.b.unpack || - a->qpu.alu.mul.output_pack != b->qpu.alu.mul.output_pack) { - return false; - } -diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c -index 15c2e3674c2..c8b6e0a91a0 100644 ---- a/src/broadcom/compiler/vir_to_qpu.c -+++ b/src/broadcom/compiler/vir_to_qpu.c -@@ -106,20 +106,20 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) - return; - } - -- if (instr->alu.add.a != V3D_QPU_MUX_A && -- instr->alu.add.b != V3D_QPU_MUX_A && -- instr->alu.mul.a != V3D_QPU_MUX_A && -- instr->alu.mul.b != V3D_QPU_MUX_A) { -+ if (instr->alu.add.a.mux != V3D_QPU_MUX_A && -+ instr->alu.add.b.mux != V3D_QPU_MUX_A && -+ instr->alu.mul.a.mux != V3D_QPU_MUX_A && -+ instr->alu.mul.b.mux != V3D_QPU_MUX_A) { - instr->raddr_a = src.index; - *mux = V3D_QPU_MUX_A; - } else { - if (instr->raddr_a == src.index) { - *mux = V3D_QPU_MUX_A; - } else { -- assert(!(instr->alu.add.a == V3D_QPU_MUX_B && -- instr->alu.add.b == V3D_QPU_MUX_B && -- instr->alu.mul.a == V3D_QPU_MUX_B && -- instr->alu.mul.b == V3D_QPU_MUX_B) || -+ assert(!(instr->alu.add.a.mux == V3D_QPU_MUX_B && -+ instr->alu.add.b.mux == V3D_QPU_MUX_B && -+ instr->alu.mul.a.mux == V3D_QPU_MUX_B && -+ instr->alu.mul.b.mux == V3D_QPU_MUX_B) || - src.index == instr->raddr_b); - - instr->raddr_b = src.index; -@@ -147,14 +147,14 @@ is_no_op_mov(struct qinst *qinst) - if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4) - return false; - -- if (qinst->qpu.alu.mul.a != -+ if (qinst->qpu.alu.mul.a.mux != - V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) { - return false; - } - } else { - int raddr; - -- switch (qinst->qpu.alu.mul.a) { -+ switch (qinst->qpu.alu.mul.a.mux) { - case V3D_QPU_MUX_A: - raddr = qinst->qpu.raddr_a; - break; -@@ -171,7 +171,7 @@ is_no_op_mov(struct qinst *qinst) - /* No packing or flags updates, or we need to execute the - * instruction. - */ -- if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE || -+ if (qinst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE || - qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE || - qinst->qpu.flags.mc != V3D_QPU_COND_NONE || - qinst->qpu.flags.mpf != V3D_QPU_PF_NONE || -@@ -302,11 +302,11 @@ v3d_generate_code_block(struct v3d_compile *c, - assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); - if (nsrc >= 1) { - set_src(&qinst->qpu, -- &qinst->qpu.alu.add.a, src[0]); -+ &qinst->qpu.alu.add.a.mux, src[0]); - } - if (nsrc >= 2) { - set_src(&qinst->qpu, -- &qinst->qpu.alu.add.b, src[1]); -+ &qinst->qpu.alu.add.b.mux, src[1]); - } - - qinst->qpu.alu.add.waddr = dst.index; -@@ -314,11 +314,11 @@ v3d_generate_code_block(struct v3d_compile *c, - } else { - if (nsrc >= 1) { - set_src(&qinst->qpu, -- &qinst->qpu.alu.mul.a, src[0]); -+ &qinst->qpu.alu.mul.a.mux, src[0]); - } - if (nsrc >= 2) { - set_src(&qinst->qpu, -- &qinst->qpu.alu.mul.b, src[1]); -+ &qinst->qpu.alu.mul.b.mux, src[1]); - } - - qinst->qpu.alu.mul.waddr = dst.index; -diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c -index 6aca3c28e78..588a665f770 100644 ---- a/src/broadcom/qpu/qpu_disasm.c -+++ b/src/broadcom/qpu/qpu_disasm.c -@@ -121,16 +121,16 @@ v3d_qpu_disasm_add(struct disasm_state *disasm, - if (num_src >= 1) { - if (has_dst) - append(disasm, ", "); -- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a); -+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a.mux); - append(disasm, "%s", -- v3d_qpu_unpack_name(instr->alu.add.a_unpack)); -+ v3d_qpu_unpack_name(instr->alu.add.a.unpack)); - } - - if (num_src >= 2) { - append(disasm, ", "); -- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b); -+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b.mux); - append(disasm, "%s", -- v3d_qpu_unpack_name(instr->alu.add.b_unpack)); -+ v3d_qpu_unpack_name(instr->alu.add.b.unpack)); - } - } - -@@ -164,16 +164,16 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm, - if (num_src >= 1) { - if (has_dst) - append(disasm, ", "); -- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a); -+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a.mux); - append(disasm, "%s", -- v3d_qpu_unpack_name(instr->alu.mul.a_unpack)); -+ v3d_qpu_unpack_name(instr->alu.mul.a.unpack)); - } - - if (num_src >= 2) { - append(disasm, ", "); -- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b); -+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b.mux); - append(disasm, "%s", -- v3d_qpu_unpack_name(instr->alu.mul.b_unpack)); -+ v3d_qpu_unpack_name(instr->alu.mul.b.unpack)); - } - } - -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index 7759fb0efdf..7ece8b5e570 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -926,10 +926,10 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux) - int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op); - int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op); - -- return ((add_nsrc > 0 && inst->alu.add.a == mux) || -- (add_nsrc > 1 && inst->alu.add.b == mux) || -- (mul_nsrc > 0 && inst->alu.mul.a == mux) || -- (mul_nsrc > 1 && inst->alu.mul.b == mux)); -+ return ((add_nsrc > 0 && inst->alu.add.a.mux == mux) || -+ (add_nsrc > 1 && inst->alu.add.b.mux == mux) || -+ (mul_nsrc > 0 && inst->alu.mul.a.mux == mux) || -+ (mul_nsrc > 1 && inst->alu.mul.b.mux == mux)); - } - - bool -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 13b3f37d43f..53a51bfb3e1 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -294,25 +294,26 @@ enum v3d_qpu_mux { - V3D_QPU_MUX_B, - }; - -+struct v3d_qpu_input { -+ enum v3d_qpu_mux mux; -+ enum v3d_qpu_input_unpack unpack; -+}; -+ - struct v3d_qpu_alu_instr { - struct { - enum v3d_qpu_add_op op; -- enum v3d_qpu_mux a, b; -+ struct v3d_qpu_input a, b; - uint8_t waddr; - bool magic_write; - enum v3d_qpu_output_pack output_pack; -- enum v3d_qpu_input_unpack a_unpack; -- enum v3d_qpu_input_unpack b_unpack; - } add; - - struct { - enum v3d_qpu_mul_op op; -- enum v3d_qpu_mux a, b; -+ struct v3d_qpu_input a, b; - uint8_t waddr; - bool magic_write; - enum v3d_qpu_output_pack output_pack; -- enum v3d_qpu_input_unpack a_unpack; -- enum v3d_qpu_input_unpack b_unpack; - } mul; - }; - -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 2820d9d4c56..6e975793fc0 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -853,12 +853,12 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - instr->alu.add.output_pack = V3D_QPU_PACK_NONE; - - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, -- &instr->alu.add.a_unpack)) { -+ &instr->alu.add.a.unpack)) { - return false; - } - - if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, -- &instr->alu.add.b_unpack)) { -+ &instr->alu.add.b.unpack)) { - return false; - } - break; -@@ -872,7 +872,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - instr->alu.add.output_pack = mux_b & 0x3; - - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, -- &instr->alu.add.a_unpack)) { -+ &instr->alu.add.a.unpack)) { - return false; - } - break; -@@ -884,7 +884,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - instr->alu.add.output_pack = V3D_QPU_PACK_NONE; - - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, -- &instr->alu.add.a_unpack)) { -+ &instr->alu.add.a.unpack)) { - return false; - } - break; -@@ -892,23 +892,23 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - case V3D_QPU_A_VFMIN: - case V3D_QPU_A_VFMAX: - if (!v3d_qpu_float16_unpack_unpack(op & 0x7, -- &instr->alu.add.a_unpack)) { -+ &instr->alu.add.a.unpack)) { - return false; - } - - instr->alu.add.output_pack = V3D_QPU_PACK_NONE; -- instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; -+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE; - break; - - default: - instr->alu.add.output_pack = V3D_QPU_PACK_NONE; -- instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; -- instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; -+ instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE; -+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE; - break; - } - -- instr->alu.add.a = mux_a; -- instr->alu.add.b = mux_b; -+ instr->alu.add.a.mux = mux_a; -+ instr->alu.add.b.mux = mux_b; - instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); - - instr->alu.add.magic_write = false; -@@ -956,12 +956,12 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; - - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, -- &instr->alu.mul.a_unpack)) { -+ &instr->alu.mul.a.unpack)) { - return false; - } - - if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, -- &instr->alu.mul.b_unpack)) { -+ &instr->alu.mul.b.unpack)) { - return false; - } - -@@ -972,7 +972,7 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - ((mux_b >> 2) & 1)); - - if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, -- &instr->alu.mul.a_unpack)) { -+ &instr->alu.mul.a.unpack)) { - return false; - } - -@@ -982,23 +982,23 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; - - if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7, -- &instr->alu.mul.a_unpack)) { -+ &instr->alu.mul.a.unpack)) { - return false; - } - -- instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; -+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE; - - break; - - default: - instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; -- instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE; -- instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; -+ instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE; -+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE; - break; - } - -- instr->alu.mul.a = mux_a; -- instr->alu.mul.b = mux_b; -+ instr->alu.mul.a.mux = mux_a; -+ instr->alu.mul.b.mux = mux_b; - instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); - instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM; - -@@ -1030,8 +1030,8 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - const struct v3d_qpu_instr *instr, uint64_t *packed_instr) - { - uint32_t waddr = instr->alu.add.waddr; -- uint32_t mux_a = instr->alu.add.a; -- uint32_t mux_b = instr->alu.add.b; -+ uint32_t mux_a = instr->alu.add.a.mux; -+ uint32_t mux_b = instr->alu.add.b.mux; - int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); - const struct opcode_desc *desc = - lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops), -@@ -1102,12 +1102,12 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - } - opcode |= output_pack << 4; - -- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, - &a_unpack)) { - return false; - } - -- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack, - &b_unpack)) { - return false; - } -@@ -1141,17 +1141,17 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - uint32_t a_unpack; - uint32_t b_unpack; - -- if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS || -- instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) { -+ if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS || -+ instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) { - return false; - } - -- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, - &a_unpack)) { - return false; - } - -- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack, - &b_unpack)) { - return false; - } -@@ -1176,7 +1176,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - } - mux_b |= packed; - -- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, - &packed)) { - return false; - } -@@ -1194,7 +1194,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - return false; - - uint32_t packed; -- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, - &packed)) { - return false; - } -@@ -1207,11 +1207,11 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - case V3D_QPU_A_VFMIN: - case V3D_QPU_A_VFMAX: - if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || -- instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) { -+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) { - return false; - } - -- if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack, -+ if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack, - &packed)) { - return false; - } -@@ -1221,8 +1221,8 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - default: - if (instr->alu.add.op != V3D_QPU_A_NOP && - (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || -- instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE || -- instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) { -+ instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE || -+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) { - return false; - } - break; -@@ -1242,8 +1242,8 @@ static bool - v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, - const struct v3d_qpu_instr *instr, uint64_t *packed_instr) - { -- uint32_t mux_a = instr->alu.mul.a; -- uint32_t mux_b = instr->alu.mul.b; -+ uint32_t mux_a = instr->alu.mul.a.mux; -+ uint32_t mux_b = instr->alu.mul.b.mux; - int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); - - const struct opcode_desc *desc = -@@ -1277,13 +1277,13 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, - */ - opcode += packed << 4; - -- if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack, - &packed)) { - return false; - } - opcode |= packed << 2; - -- if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack, -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack, - &packed)) { - return false; - } -@@ -1301,7 +1301,7 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, - opcode |= (packed >> 1) & 1; - mux_b = (packed & 1) << 2; - -- if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack, - &packed)) { - return false; - } -@@ -1315,16 +1315,16 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, - if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE) - return false; - -- if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack, -+ if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack, - &packed)) { - return false; - } -- if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16) -+ if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16) - opcode = 8; - else - opcode |= (packed + 4) & 7; - -- if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) -+ if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE) - return false; - - break; -diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c -index 2f8e19c73fe..be7b78d5ef0 100644 ---- a/src/broadcom/qpu/tests/qpu_disasm.c -+++ b/src/broadcom/qpu/tests/qpu_disasm.c -@@ -160,10 +160,10 @@ main(int argc, char **argv) - /* Swap the operands to be sure that we test - * how the QPUs distinguish between these ops. - */ -- swap_mux(&instr.alu.add.a, -- &instr.alu.add.b); -- swap_pack(&instr.alu.add.a_unpack, -- &instr.alu.add.b_unpack); -+ swap_mux(&instr.alu.add.a.mux, -+ &instr.alu.add.b.mux); -+ swap_pack(&instr.alu.add.a.unpack, -+ &instr.alu.add.b.unpack); - break; - default: - break; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0012-broadcom-qpu-add-raddr-on-v3d_qpu_input.patch b/projects/RPi/devices/RPi5/patches/mesa/0012-broadcom-qpu-add-raddr-on-v3d_qpu_input.patch deleted file mode 100644 index 9c2303f4e4..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0012-broadcom-qpu-add-raddr-on-v3d_qpu_input.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 52ea09792ff8a438ccdecac47b8415657be90098 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 6 Aug 2021 01:33:32 +0200 -Subject: [PATCH 012/142] broadcom/qpu: add raddr on v3d_qpu_input - -On V3D 7.x mux are not used, and raddr_a/b/c/d are used instead - -This is not perfect, as for v71, the raddr_a/b defined at qpu_instr -became superfluous. But the alternative would be to define two -different structs, or even having them defined based on version -ifdefs, so this is a reasonable compromise. ---- - src/broadcom/qpu/qpu_instr.h | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 53a51bfb3e1..9e56e2d6a99 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -295,7 +295,10 @@ enum v3d_qpu_mux { - }; - - struct v3d_qpu_input { -- enum v3d_qpu_mux mux; -+ union { -+ enum v3d_qpu_mux mux; /* V3D 4.x */ -+ uint8_t raddr; /* V3D 7.x */ -+ }; - enum v3d_qpu_input_unpack unpack; - }; - -@@ -385,8 +388,8 @@ struct v3d_qpu_instr { - struct v3d_qpu_sig sig; - uint8_t sig_addr; - bool sig_magic; /* If the signal writes to a magic address */ -- uint8_t raddr_a; -- uint8_t raddr_b; -+ uint8_t raddr_a; /* V3D 4.x */ -+ uint8_t raddr_b; /* V3D 4.x*/ - struct v3d_qpu_flags flags; - - union { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0013-broadcom-qpu-defining-shift-mask-for-raddr_c-d.patch b/projects/RPi/devices/RPi5/patches/mesa/0013-broadcom-qpu-defining-shift-mask-for-raddr_c-d.patch deleted file mode 100644 index 162529e963..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0013-broadcom-qpu-defining-shift-mask-for-raddr_c-d.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 3e5ad0881c2789619cdf65f40a44d5481e28e800 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 12 Aug 2021 02:24:02 +0200 -Subject: [PATCH 013/142] broadcom/qpu: defining shift/mask for raddr_c/d - -On V3D 7.x it replaces mul_a/b and add_a/b ---- - src/broadcom/qpu/qpu_pack.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 6e975793fc0..4f106909729 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -84,6 +84,9 @@ - #define V3D_QPU_MUL_A_SHIFT 18 - #define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18) - -+#define V3D_QPU_RADDR_C_SHIFT 18 -+#define V3D_QPU_RADDR_C_MASK QPU_MASK(23, 18) -+ - #define V3D_QPU_ADD_B_SHIFT 15 - #define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15) - -@@ -98,6 +101,9 @@ - #define V3D_QPU_BRANCH_BDI_SHIFT 12 - #define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) - -+#define V3D_QPU_RADDR_D_SHIFT 12 -+#define V3D_QPU_RADDR_D_MASK QPU_MASK(17, 12) -+ - #define V3D_QPU_RADDR_A_SHIFT 6 - #define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6) - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0014-broadcom-commmon-add-has_accumulators-field-on-v3d_d.patch b/projects/RPi/devices/RPi5/patches/mesa/0014-broadcom-commmon-add-has_accumulators-field-on-v3d_d.patch deleted file mode 100644 index 1855816d95..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0014-broadcom-commmon-add-has_accumulators-field-on-v3d_d.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 81febf14fe05ad26e992275b911e8bc1e1416ebc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 17 Sep 2021 01:04:31 +0200 -Subject: [PATCH 014/142] broadcom/commmon: add has_accumulators field on - v3d_device_info - -Even if we can just check for the version on the code, checking for -this field makes several places more readable. So for example, on the -register allocate code we doesn't assign an accumulator because we -don't have accumulators on that hw, instead of because hw version is a -given one. ---- - src/broadcom/common/v3d_device_info.c | 2 ++ - src/broadcom/common/v3d_device_info.h | 3 +++ - 2 files changed, 5 insertions(+) - -diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c -index 7512fe3a06b..7bc2b662cfc 100644 ---- a/src/broadcom/common/v3d_device_info.c -+++ b/src/broadcom/common/v3d_device_info.c -@@ -65,6 +65,8 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i - int qups = (ident1.value >> 8) & 0xf; - devinfo->qpu_count = nslc * qups; - -+ devinfo->has_accumulators = devinfo->ver < 71; -+ - switch (devinfo->ver) { - case 33: - case 41: -diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h -index 32cb65cf81f..8dfc7858727 100644 ---- a/src/broadcom/common/v3d_device_info.h -+++ b/src/broadcom/common/v3d_device_info.h -@@ -42,6 +42,9 @@ struct v3d_device_info { - - /* NSLC * QUPS from the core's IDENT registers. */ - int qpu_count; -+ -+ /* If the hw has accumulator registers */ -+ bool has_accumulators; - }; - - typedef int (*v3d_ioctl_fun)(int fd, unsigned long request, void *arg); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0015-broadcom-qpu-add-qpu_writes_rf0_implicitly-helper.patch b/projects/RPi/devices/RPi5/patches/mesa/0015-broadcom-qpu-add-qpu_writes_rf0_implicitly-helper.patch deleted file mode 100644 index 8bd646ac94..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0015-broadcom-qpu-add-qpu_writes_rf0_implicitly-helper.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 7d42eca87b6e144697810405308d99d200dca62a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 15 Sep 2021 10:56:43 +0200 -Subject: [PATCH 015/142] broadcom/qpu: add qpu_writes_rf0_implicitly helper - -On v71 rf0 replaces r5 as the register that gets updated implicitly -with uniform loads, and gets the C coefficient with ldvary. This -helper return if rf0 gets implicitly updated. ---- - src/broadcom/qpu/qpu_instr.c | 12 ++++++++++++ - src/broadcom/qpu/qpu_instr.h | 2 ++ - 2 files changed, 14 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index 7ece8b5e570..8de99c611d5 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -920,6 +920,18 @@ v3d_qpu_writes_accum(const struct v3d_device_info *devinfo, - return false; - } - -+bool -+v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *inst) -+{ -+ if (devinfo->ver >= 71 && -+ (inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa)) { -+ return true; -+ } -+ -+ return false; -+} -+ - bool - v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux) - { -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 9e56e2d6a99..a25be8e0ee6 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -473,6 +473,8 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, - const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; - bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, - const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; -+bool v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; - bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo, - const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; - bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0016-broadcom-qpu-add-pack-unpack-support-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0016-broadcom-qpu-add-pack-unpack-support-for-v71.patch deleted file mode 100644 index 8afa579075..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0016-broadcom-qpu-add-pack-unpack-support-for-v71.patch +++ /dev/null @@ -1,1258 +0,0 @@ -From f0859613bd59e14fb21571e7978bb5c5d5e9c6d7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Sat, 7 Aug 2021 02:20:39 +0200 -Subject: [PATCH 016/142] broadcom/qpu: add pack/unpack support for v71 - -Note that we provide new v71 alu pack/unpack methods. As there are a -lot that it is equivalent, initially we tried to use existing methods -as template and add version checks on the existing methods. At some -early point that become just really unreadable, so it become better to -just provide new methods, even if v42 and v71 methods have a really -similar structure. - -Note that we have splitted the op tables, and created a two (add/mul) -for v71. As the description struct include versioning info, we could -have just used one table. But, specially with the add table, there are -a lot of differences with v71. So it is slightly tidier this -way. Also, taking into account that we do a linear search on the -tables, this can be even justified by performance. ---- - src/broadcom/qpu/qpu_pack.c | 1049 ++++++++++++++++++++++++++++++----- - 1 file changed, 904 insertions(+), 145 deletions(-) - -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 4f106909729..4045275cb9a 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -490,16 +490,26 @@ v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, - - /* Make a mapping of the table of opcodes in the spec. The opcode is - * determined by a combination of the opcode field, and in the case of 0 or -- * 1-arg opcodes, the mux_b field as well. -+ * 1-arg opcodes, the mux (version <= 42) or raddr (version >= 71) field as -+ * well. - */ --#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1)) --#define ANYMUX MUX_MASK(0, 7) -+#define OP_MASK(val) BITFIELD64_BIT(val) -+#define OP_RANGE(bot, top) BITFIELD64_RANGE(bot, top - bot + 1) -+#define ANYMUX OP_RANGE(0, 7) -+#define ANYOPMASK OP_RANGE(0, 63) - - struct opcode_desc { - uint8_t opcode_first; - uint8_t opcode_last; -- uint8_t mux_b_mask; -- uint8_t mux_a_mask; -+ -+ union { -+ struct { -+ uint8_t b_mask; -+ uint8_t a_mask; -+ } mux; -+ uint64_t raddr_mask; -+ }; -+ - uint8_t op; - - /* first_ver == 0 if it's the same across all V3D versions. -@@ -512,122 +522,288 @@ struct opcode_desc { - uint8_t last_ver; - }; - --static const struct opcode_desc add_ops[] = { -+static const struct opcode_desc add_ops_v33[] = { - /* FADD is FADDNF depending on the order of the mux_a/mux_b. */ -- { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD }, -- { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF }, -- { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, -- { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD }, -- { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, -- { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB }, -- { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, -- { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB }, -- { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN }, -- { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX }, -- { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN }, -- { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX }, -- { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL }, -- { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR }, -- { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR }, -- { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR }, -+ { 0, 47, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADD }, -+ { 0, 47, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADDNF }, -+ { 53, 55, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK }, -+ { 56, 56, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ADD }, -+ { 57, 59, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK }, -+ { 60, 60, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SUB }, -+ { 61, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK }, -+ { 64, 111, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FSUB }, -+ { 120, 120, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MIN }, -+ { 121, 121, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MAX }, -+ { 122, 122, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMIN }, -+ { 123, 123, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMAX }, -+ { 124, 124, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHL }, -+ { 125, 125, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHR }, -+ { 126, 126, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ASR }, -+ { 127, 127, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ROR }, - /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */ -- { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN }, -- { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX }, -- { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN }, -- -- { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND }, -- { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR }, -- { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR }, -- -- { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD }, -- { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB }, -- { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT }, -- { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG }, -- { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, -- { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, -- { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP }, -- { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP }, -- { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, -- { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, -- { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, -- { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX }, -- { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX }, -- { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR }, -- { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA }, -- { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA }, -- { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB }, -- { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB }, -- -- { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD }, -- { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD }, -- { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD }, -- { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD }, -- -- { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF }, -- { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF }, -- { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 }, -- { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 }, -- { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 }, -- { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 }, -- { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, -- { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, -- { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 }, -- { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 }, -- { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, -- -- { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, -- { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 }, -- { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, -- { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 }, -- { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, -- { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 }, -- { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 }, -- { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 }, -- { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 }, -- { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 }, -- { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, -- { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 }, -+ { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMIN }, -+ { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMAX }, -+ { 176, 180, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMIN }, -+ -+ { 181, 181, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_AND }, -+ { 182, 182, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_OR }, -+ { 183, 183, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_XOR }, -+ -+ { 184, 184, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VADD }, -+ { 185, 185, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VSUB }, -+ { 186, 186, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_NOT }, -+ { 186, 186, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_NEG }, -+ { 186, 186, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_FLAPUSH }, -+ { 186, 186, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FLBPUSH }, -+ { 186, 186, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_FLPOP }, -+ { 186, 186, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_RECIP }, -+ { 186, 186, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SETMSF }, -+ { 186, 186, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_SETREVF }, -+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 }, -+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(1), V3D_QPU_A_TIDX }, -+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(2), V3D_QPU_A_EIDX }, -+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(3), V3D_QPU_A_LR }, -+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(4), V3D_QPU_A_VFLA }, -+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(5), V3D_QPU_A_VFLNA }, -+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VFLB }, -+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(7), V3D_QPU_A_VFLNB }, -+ -+ { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(0, 2), V3D_QPU_A_FXCD }, -+ { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(3), V3D_QPU_A_XCD }, -+ { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(4, 6), V3D_QPU_A_FYCD }, -+ { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(7), V3D_QPU_A_YCD }, -+ -+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(0), V3D_QPU_A_MSF }, -+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(1), V3D_QPU_A_REVF }, -+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_VDWWT, 33 }, -+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_IID, 40 }, -+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(3), V3D_QPU_A_SAMPID, 40 }, -+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(4), V3D_QPU_A_BARRIERID, 40 }, -+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(5), V3D_QPU_A_TMUWT }, -+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VPMWT }, -+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(7), V3D_QPU_A_FLAFIRST, 41 }, -+ { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = OP_MASK(0), V3D_QPU_A_FLNAFIRST, 41 }, -+ { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, -+ -+ { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, -+ { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 }, -+ { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, -+ { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 }, -+ { 188, 188, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMP, 40 }, -+ { 188, 188, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT, 41 }, -+ { 188, 188, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_EXP, 41 }, -+ { 188, 188, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_LOG, 41 }, -+ { 188, 188, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SIN, 41 }, -+ { 188, 188, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT2, 41 }, -+ { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, -+ { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 }, - - /* FIXME: MORE COMPLICATED */ -- /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ -+ /* { 190, 191, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ - -- { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP }, -- { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX }, -+ { 192, 239, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FCMP }, -+ { 240, 244, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMAX }, - -- { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND }, -- { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN }, -- { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC }, -- { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ }, -- { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR }, -- { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ }, -- { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL }, -- { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC }, -+ { 245, 245, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FROUND }, -+ { 245, 245, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIN }, -+ { 245, 245, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FTRUNC }, -+ { 245, 245, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIZ }, -+ { 246, 246, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FFLOOR }, -+ { 246, 246, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOUZ }, -+ { 246, 246, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FCEIL }, -+ { 246, 246, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOC }, - -- { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX }, -- { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY }, -+ { 247, 247, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FDX }, -+ { 247, 247, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FDY }, - - /* The stvpms are distinguished by the waddr field. */ -- { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV }, -- { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD }, -- { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP }, -+ { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMV }, -+ { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMD }, -+ { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMP }, -+ -+ { 252, 252, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_ITOF }, -+ { 252, 252, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_CLZ }, -+ { 252, 252, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_UTOF }, -+}; -+ -+static const struct opcode_desc mul_ops_v33[] = { -+ { 1, 1, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_ADD }, -+ { 2, 2, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SUB }, -+ { 3, 3, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_UMUL24 }, -+ { 4, 8, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_VFMUL }, -+ { 9, 9, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SMUL24 }, -+ { 10, 10, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_MULTOP }, -+ { 14, 14, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMOV, 33, 42 }, -+ { 15, 15, .mux.b_mask = OP_RANGE(0, 3), ANYMUX, V3D_QPU_M_FMOV, 33, 42}, -+ { 15, 15, .mux.b_mask = OP_MASK(4), .mux.a_mask = OP_MASK(0), V3D_QPU_M_NOP, 33, 42 }, -+ { 15, 15, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_M_MOV, 33, 42 }, -+ -+ { 16, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMUL }, -+}; -+ -+/* Note that it would have been possible to define all the add/mul opcodes in -+ * just one table, using the first_ver/last_ver. But taking into account that -+ * for v71 there were a lot of changes, it was more tidy this way. Also right -+ * now we are doing a linear search on those tables, so this maintains the -+ * tables smaller. -+ * -+ * Just in case we merge the tables, we define the first_ver as 71 for those -+ * opcodes that changed on v71 -+ */ -+static const struct opcode_desc add_ops_v71[] = { -+ { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD }, -+ { 53, 55, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK }, -+ { 56, 56, .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD }, -+ { 57, 59, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK }, -+ { 60, 60, .raddr_mask = ANYOPMASK, V3D_QPU_A_SUB }, -+ { 61, 63, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK }, -+ { 64, 111, .raddr_mask = ANYOPMASK, V3D_QPU_A_FSUB }, -+ { 120, 120, .raddr_mask = ANYOPMASK, V3D_QPU_A_MIN }, -+ { 121, 121, .raddr_mask = ANYOPMASK, V3D_QPU_A_MAX }, -+ { 122, 122, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMIN }, -+ { 123, 123, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMAX }, -+ { 124, 124, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHL }, -+ { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR }, -+ { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR }, -+ { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR }, -+ -+ { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND }, -+ { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR }, -+ { 183, 183, .raddr_mask = ANYOPMASK, V3D_QPU_A_XOR }, -+ { 184, 184, .raddr_mask = ANYOPMASK, V3D_QPU_A_VADD }, -+ { 185, 185, .raddr_mask = ANYOPMASK, V3D_QPU_A_VSUB }, -+ -+ { 186, 186, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOT }, -+ { 186, 186, .raddr_mask = OP_MASK(1), V3D_QPU_A_NEG }, -+ { 186, 186, .raddr_mask = OP_MASK(2), V3D_QPU_A_FLAPUSH }, -+ { 186, 186, .raddr_mask = OP_MASK(3), V3D_QPU_A_FLBPUSH }, -+ { 186, 186, .raddr_mask = OP_MASK(4), V3D_QPU_A_FLPOP }, -+ { 186, 186, .raddr_mask = OP_MASK(5), V3D_QPU_A_CLZ }, -+ { 186, 186, .raddr_mask = OP_MASK(6), V3D_QPU_A_SETMSF }, -+ { 186, 186, .raddr_mask = OP_MASK(7), V3D_QPU_A_SETREVF }, -+ -+ { 187, 187, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 }, -+ { 187, 187, .raddr_mask = OP_MASK(1), V3D_QPU_A_TIDX }, -+ { 187, 187, .raddr_mask = OP_MASK(2), V3D_QPU_A_EIDX }, -+ { 187, 187, .raddr_mask = OP_MASK(3), V3D_QPU_A_LR }, -+ { 187, 187, .raddr_mask = OP_MASK(4), V3D_QPU_A_VFLA }, -+ { 187, 187, .raddr_mask = OP_MASK(5), V3D_QPU_A_VFLNA }, -+ { 187, 187, .raddr_mask = OP_MASK(6), V3D_QPU_A_VFLB }, -+ { 187, 187, .raddr_mask = OP_MASK(7), V3D_QPU_A_VFLNB }, -+ { 187, 187, .raddr_mask = OP_MASK(8), V3D_QPU_A_XCD }, -+ { 187, 187, .raddr_mask = OP_MASK(9), V3D_QPU_A_YCD }, -+ { 187, 187, .raddr_mask = OP_MASK(10), V3D_QPU_A_MSF }, -+ { 187, 187, .raddr_mask = OP_MASK(11), V3D_QPU_A_REVF }, -+ { 187, 187, .raddr_mask = OP_MASK(12), V3D_QPU_A_IID }, -+ { 187, 187, .raddr_mask = OP_MASK(13), V3D_QPU_A_SAMPID }, -+ { 187, 187, .raddr_mask = OP_MASK(14), V3D_QPU_A_BARRIERID }, -+ { 187, 187, .raddr_mask = OP_MASK(15), V3D_QPU_A_TMUWT }, -+ { 187, 187, .raddr_mask = OP_MASK(16), V3D_QPU_A_VPMWT }, -+ { 187, 187, .raddr_mask = OP_MASK(17), V3D_QPU_A_FLAFIRST }, -+ { 187, 187, .raddr_mask = OP_MASK(18), V3D_QPU_A_FLNAFIRST }, -+ -+ { 187, 187, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FXCD }, -+ { 187, 187, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FYCD }, -+ -+ { 188, 188, .raddr_mask = OP_MASK(0), V3D_QPU_A_LDVPMV_IN, 71 }, -+ { 188, 188, .raddr_mask = OP_MASK(1), V3D_QPU_A_LDVPMD_IN, 71 }, -+ { 188, 188, .raddr_mask = OP_MASK(2), V3D_QPU_A_LDVPMP, 71 }, -+ -+ { 188, 188, .raddr_mask = OP_MASK(32), V3D_QPU_A_RECIP, 71 }, -+ { 188, 188, .raddr_mask = OP_MASK(33), V3D_QPU_A_RSQRT, 71 }, -+ { 188, 188, .raddr_mask = OP_MASK(34), V3D_QPU_A_EXP, 71 }, -+ { 188, 188, .raddr_mask = OP_MASK(35), V3D_QPU_A_LOG, 71 }, -+ { 188, 188, .raddr_mask = OP_MASK(36), V3D_QPU_A_SIN, 71 }, -+ { 188, 188, .raddr_mask = OP_MASK(37), V3D_QPU_A_RSQRT2, 71 }, -+ -+ { 189, 189, .raddr_mask = ANYOPMASK, V3D_QPU_A_LDVPMG_IN, 71 }, - -- { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF }, -- { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ }, -- { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF }, -+ /* The stvpms are distinguished by the waddr field. */ -+ { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMV, 71}, -+ { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMD, 71}, -+ { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMP, 71}, -+ -+ { 192, 207, .raddr_mask = ANYOPMASK, V3D_QPU_A_FCMP, 71 }, -+ -+ { 245, 245, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FROUND, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FROUND, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FROUND, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FROUND, 71 }, -+ -+ { 245, 245, .raddr_mask = OP_MASK(3), V3D_QPU_A_FTOIN, 71 }, -+ { 245, 245, .raddr_mask = OP_MASK(7), V3D_QPU_A_FTOIN, 71 }, -+ { 245, 245, .raddr_mask = OP_MASK(11), V3D_QPU_A_FTOIN, 71 }, -+ { 245, 245, .raddr_mask = OP_MASK(15), V3D_QPU_A_FTOIN, 71 }, -+ -+ { 245, 245, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FTRUNC, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FTRUNC, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FTRUNC, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FTRUNC, 71 }, -+ -+ { 245, 245, .raddr_mask = OP_MASK(19), V3D_QPU_A_FTOIZ, 71 }, -+ { 245, 245, .raddr_mask = OP_MASK(23), V3D_QPU_A_FTOIZ, 71 }, -+ { 245, 245, .raddr_mask = OP_MASK(27), V3D_QPU_A_FTOIZ, 71 }, -+ { 245, 245, .raddr_mask = OP_MASK(31), V3D_QPU_A_FTOIZ, 71 }, -+ -+ { 245, 245, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FFLOOR, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FFLOOR, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(40, 42), V3D_QPU_A_FFLOOR, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(44, 46), V3D_QPU_A_FFLOOR, 71 }, -+ -+ { 245, 245, .raddr_mask = OP_MASK(35), V3D_QPU_A_FTOUZ, 71 }, -+ { 245, 245, .raddr_mask = OP_MASK(39), V3D_QPU_A_FTOUZ, 71 }, -+ { 245, 245, .raddr_mask = OP_MASK(43), V3D_QPU_A_FTOUZ, 71 }, -+ { 245, 245, .raddr_mask = OP_MASK(47), V3D_QPU_A_FTOUZ, 71 }, -+ -+ { 245, 245, .raddr_mask = OP_RANGE(48, 50), V3D_QPU_A_FCEIL, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(52, 54), V3D_QPU_A_FCEIL, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(56, 58), V3D_QPU_A_FCEIL, 71 }, -+ { 245, 245, .raddr_mask = OP_RANGE(60, 62), V3D_QPU_A_FCEIL, 71 }, -+ -+ { 245, 245, .raddr_mask = OP_MASK(51), V3D_QPU_A_FTOC }, -+ { 245, 245, .raddr_mask = OP_MASK(55), V3D_QPU_A_FTOC }, -+ { 245, 245, .raddr_mask = OP_MASK(59), V3D_QPU_A_FTOC }, -+ { 245, 245, .raddr_mask = OP_MASK(63), V3D_QPU_A_FTOC }, -+ -+ { 246, 246, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FDX, 71 }, -+ { 246, 246, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FDX, 71 }, -+ { 246, 246, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FDX, 71 }, -+ { 246, 246, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FDX, 71 }, -+ { 246, 246, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FDY, 71 }, -+ { 246, 246, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FDY, 71 }, -+ { 246, 246, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FDY, 71 }, -+ { 246, 246, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FDY, 71 }, -+ -+ { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 }, -+ { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 }, - }; - --static const struct opcode_desc mul_ops[] = { -- { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD }, -- { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB }, -- { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 }, -- { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL }, -- { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 }, -- { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP }, -- { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV }, -- { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV }, -- { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 }, -- { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV }, -- { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, -+static const struct opcode_desc mul_ops_v71[] = { -+ /* For V3D 7.1, second mask field would be ignored */ -+ { 1, 1, .raddr_mask = ANYOPMASK, V3D_QPU_M_ADD, 71 }, -+ { 2, 2, .raddr_mask = ANYOPMASK, V3D_QPU_M_SUB, 71 }, -+ { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 }, -+ { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 }, -+ { 4, 8, .raddr_mask = ANYOPMASK, V3D_QPU_M_VFMUL, 71 }, -+ { 9, 9, .raddr_mask = ANYOPMASK, V3D_QPU_M_SMUL24, 71 }, -+ { 10, 10, .raddr_mask = ANYOPMASK, V3D_QPU_M_MULTOP, 71 }, -+ -+ { 14, 14, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_M_FMOV, 71 }, -+ { 14, 14, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_M_FMOV, 71 }, -+ { 14, 14, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_M_FMOV, 71 }, -+ { 14, 14, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_M_FMOV, 71 }, -+ { 14, 14, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_M_FMOV, 71 }, -+ { 14, 14, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_M_FMOV, 71 }, -+ -+ { 14, 14, .raddr_mask = OP_MASK(3), V3D_QPU_M_MOV, 71 }, -+ { 14, 14, .raddr_mask = OP_MASK(7), V3D_QPU_M_MOV, 71 }, -+ { 14, 14, .raddr_mask = OP_MASK(11), V3D_QPU_M_MOV, 71 }, -+ { 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 }, -+ { 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 }, -+ -+ { 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 }, -+ -+ { 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL }, - }; - - /* Returns true if op_desc should be filtered out based on devinfo->ver -@@ -636,17 +812,23 @@ static const struct opcode_desc mul_ops[] = { - */ - static bool - opcode_invalid_in_version(const struct v3d_device_info *devinfo, -- const struct opcode_desc *op_desc) -+ const uint8_t first_ver, -+ const uint8_t last_ver) - { -- return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) || -- (op_desc->last_ver != 0 && devinfo->ver > op_desc->last_ver); -+ return (first_ver != 0 && devinfo->ver < first_ver) || -+ (last_ver != 0 && devinfo->ver > last_ver); - } - -+/* Note that we pass as parameters mux_a, mux_b and raddr, even if depending -+ * on the devinfo->ver some would be ignored. We do this way just to avoid -+ * having two really similar lookup_opcode methods -+ */ - static const struct opcode_desc * - lookup_opcode_from_packed(const struct v3d_device_info *devinfo, - const struct opcode_desc *opcodes, - size_t num_opcodes, uint32_t opcode, -- uint32_t mux_a, uint32_t mux_b) -+ uint32_t mux_a, uint32_t mux_b, -+ uint32_t raddr) - { - for (int i = 0; i < num_opcodes; i++) { - const struct opcode_desc *op_desc = &opcodes[i]; -@@ -655,14 +837,19 @@ lookup_opcode_from_packed(const struct v3d_device_info *devinfo, - opcode > op_desc->opcode_last) - continue; - -- if (opcode_invalid_in_version(devinfo, op_desc)) -+ if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver)) - continue; - -- if (!(op_desc->mux_b_mask & (1 << mux_b))) -- continue; -+ if (devinfo->ver < 71) { -+ if (!(op_desc->mux.b_mask & (1 << mux_b))) -+ continue; - -- if (!(op_desc->mux_a_mask & (1 << mux_a))) -- continue; -+ if (!(op_desc->mux.a_mask & (1 << mux_a))) -+ continue; -+ } else { -+ if (!(op_desc->raddr_mask & ((uint64_t) 1 << raddr))) -+ continue; -+ } - - return op_desc; - } -@@ -784,8 +971,8 @@ v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack, - } - - static bool --v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, -- struct v3d_qpu_instr *instr) -+v3d33_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, -+ struct v3d_qpu_instr *instr) - { - uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD); - uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A); -@@ -802,8 +989,9 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - map_op = (map_op - 253 + 245); - - const struct opcode_desc *desc = -- lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops), -- map_op, mux_a, mux_b); -+ lookup_opcode_from_packed(devinfo, add_ops_v33, -+ ARRAY_SIZE(add_ops_v33), -+ map_op, mux_a, mux_b, 0); - - if (!desc) - return false; -@@ -939,8 +1127,160 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - } - - static bool --v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, -+v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, -+ struct v3d_qpu_instr *instr) -+{ -+ uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD); -+ uint32_t raddr_a = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_A); -+ uint32_t raddr_b = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_B); -+ uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); -+ uint32_t map_op = op; -+ -+ const struct opcode_desc *desc = -+ lookup_opcode_from_packed(devinfo, -+ add_ops_v71, -+ ARRAY_SIZE(add_ops_v71), -+ map_op, 0, 0, -+ raddr_b); -+ if (!desc) -+ return false; -+ -+ instr->alu.add.op = desc->op; -+ -+ /* Some QPU ops require a bit more than just basic opcode and mux a/b -+ * comparisons to distinguish them. -+ */ -+ switch (instr->alu.add.op) { -+ case V3D_QPU_A_STVPMV: -+ case V3D_QPU_A_STVPMD: -+ case V3D_QPU_A_STVPMP: -+ switch (waddr) { -+ case 0: -+ instr->alu.add.op = V3D_QPU_A_STVPMV; -+ break; -+ case 1: -+ instr->alu.add.op = V3D_QPU_A_STVPMD; -+ break; -+ case 2: -+ instr->alu.add.op = V3D_QPU_A_STVPMP; -+ break; -+ default: -+ return false; -+ } -+ break; -+ default: -+ break; -+ } -+ -+ switch (instr->alu.add.op) { -+ case V3D_QPU_A_FADD: -+ case V3D_QPU_A_FADDNF: -+ case V3D_QPU_A_FSUB: -+ case V3D_QPU_A_FMIN: -+ case V3D_QPU_A_FMAX: -+ case V3D_QPU_A_FCMP: -+ case V3D_QPU_A_VFPACK: -+ if (instr->alu.add.op != V3D_QPU_A_VFPACK && -+ instr->alu.add.op != V3D_QPU_A_FCMP) { -+ instr->alu.add.output_pack = (op >> 4) & 0x3; -+ } else { -+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE; -+ } -+ -+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, -+ &instr->alu.add.a.unpack)) { -+ return false; -+ } -+ -+ if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, -+ &instr->alu.add.b.unpack)) { -+ return false; -+ } -+ break; -+ -+ case V3D_QPU_A_FFLOOR: -+ case V3D_QPU_A_FROUND: -+ case V3D_QPU_A_FTRUNC: -+ case V3D_QPU_A_FCEIL: -+ case V3D_QPU_A_FDX: -+ case V3D_QPU_A_FDY: -+ instr->alu.add.output_pack = raddr_b & 0x3; -+ -+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, -+ &instr->alu.add.a.unpack)) { -+ return false; -+ } -+ break; -+ -+ case V3D_QPU_A_FTOIN: -+ case V3D_QPU_A_FTOIZ: -+ case V3D_QPU_A_FTOUZ: -+ case V3D_QPU_A_FTOC: -+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE; -+ -+ if (!v3d_qpu_float32_unpack_unpack((raddr_b >> 2) & 0x3, -+ &instr->alu.add.a.unpack)) { -+ return false; -+ } -+ break; -+ -+ case V3D_QPU_A_VFMIN: -+ case V3D_QPU_A_VFMAX: -+ unreachable("pending v71 update"); -+ if (!v3d_qpu_float16_unpack_unpack(op & 0x7, -+ &instr->alu.add.a.unpack)) { -+ return false; -+ } -+ -+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE; -+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE; -+ break; -+ -+ default: -+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE; -+ instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE; -+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE; -+ break; -+ } -+ -+ instr->alu.add.a.raddr = raddr_a; -+ instr->alu.add.b.raddr = raddr_b; -+ instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); -+ -+ instr->alu.add.magic_write = false; -+ if (packed_inst & V3D_QPU_MA) { -+ switch (instr->alu.add.op) { -+ case V3D_QPU_A_LDVPMV_IN: -+ instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; -+ break; -+ case V3D_QPU_A_LDVPMD_IN: -+ instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT; -+ break; -+ case V3D_QPU_A_LDVPMG_IN: -+ instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT; -+ break; -+ default: -+ instr->alu.add.magic_write = true; -+ break; -+ } -+ } -+ -+ return true; -+} -+ -+static bool -+v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - struct v3d_qpu_instr *instr) -+{ -+ if (devinfo->ver < 71) -+ return v3d33_qpu_add_unpack(devinfo, packed_inst, instr); -+ else -+ return v3d71_qpu_add_unpack(devinfo, packed_inst, instr); -+} -+ -+static bool -+v3d33_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, -+ struct v3d_qpu_instr *instr) - { - uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL); - uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A); -@@ -948,9 +1288,10 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - - { - const struct opcode_desc *desc = -- lookup_opcode_from_packed(devinfo, mul_ops, -- ARRAY_SIZE(mul_ops), -- op, mux_a, mux_b); -+ lookup_opcode_from_packed(devinfo, -+ mul_ops_v33, -+ ARRAY_SIZE(mul_ops_v33), -+ op, mux_a, mux_b, 0); - if (!desc) - return false; - -@@ -1011,6 +1352,91 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, - return true; - } - -+static bool -+v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, -+ struct v3d_qpu_instr *instr) -+{ -+ uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL); -+ uint32_t raddr_c = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_C); -+ uint32_t raddr_d = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_D); -+ -+ { -+ const struct opcode_desc *desc = -+ lookup_opcode_from_packed(devinfo, -+ mul_ops_v71, -+ ARRAY_SIZE(mul_ops_v71), -+ op, 0, 0, -+ raddr_d); -+ if (!desc) -+ return false; -+ -+ instr->alu.mul.op = desc->op; -+ } -+ -+ switch (instr->alu.mul.op) { -+ case V3D_QPU_M_FMUL: -+ instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; -+ -+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, -+ &instr->alu.mul.a.unpack)) { -+ return false; -+ } -+ -+ if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, -+ &instr->alu.mul.b.unpack)) { -+ return false; -+ } -+ -+ break; -+ -+ case V3D_QPU_M_FMOV: -+ instr->alu.mul.output_pack = (raddr_d >> 2) & 1; -+ -+ if (!v3d_qpu_float32_unpack_unpack(raddr_d & 0x3, -+ &instr->alu.mul.a.unpack)) { -+ return false; -+ } -+ -+ break; -+ -+ case V3D_QPU_M_VFMUL: -+ unreachable("pending v71 update"); -+ instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; -+ -+ if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7, -+ &instr->alu.mul.a.unpack)) { -+ return false; -+ } -+ -+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE; -+ -+ break; -+ -+ default: -+ instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; -+ instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE; -+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE; -+ break; -+ } -+ -+ instr->alu.mul.a.raddr = raddr_c; -+ instr->alu.mul.b.raddr = raddr_d; -+ instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); -+ instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM; -+ -+ return true; -+} -+ -+static bool -+v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, -+ struct v3d_qpu_instr *instr) -+{ -+ if (devinfo->ver < 71) -+ return v3d33_qpu_mul_unpack(devinfo, packed_inst, instr); -+ else -+ return v3d71_qpu_mul_unpack(devinfo, packed_inst, instr); -+} -+ - static const struct opcode_desc * - lookup_opcode_from_instr(const struct v3d_device_info *devinfo, - const struct opcode_desc *opcodes, size_t num_opcodes, -@@ -1022,7 +1448,7 @@ lookup_opcode_from_instr(const struct v3d_device_info *devinfo, - if (op_desc->op != op) - continue; - -- if (opcode_invalid_in_version(devinfo, op_desc)) -+ if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver)) - continue; - - return op_desc; -@@ -1032,30 +1458,31 @@ lookup_opcode_from_instr(const struct v3d_device_info *devinfo, - } - - static bool --v3d_qpu_add_pack(const struct v3d_device_info *devinfo, -- const struct v3d_qpu_instr *instr, uint64_t *packed_instr) -+v3d33_qpu_add_pack(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr) - { - uint32_t waddr = instr->alu.add.waddr; - uint32_t mux_a = instr->alu.add.a.mux; - uint32_t mux_b = instr->alu.add.b.mux; - int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); - const struct opcode_desc *desc = -- lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops), -+ lookup_opcode_from_instr(devinfo, add_ops_v33, -+ ARRAY_SIZE(add_ops_v33), - instr->alu.add.op); - - if (!desc) - return false; - -- uint32_t opcode = desc->opcode_first; -+ uint32_t opcode = opcode = desc->opcode_first; - - /* If an operation doesn't use an arg, its mux values may be used to - * identify the operation type. - */ - if (nsrc < 2) -- mux_b = ffs(desc->mux_b_mask) - 1; -+ mux_b = ffs(desc->mux.b_mask) - 1; - - if (nsrc < 1) -- mux_a = ffs(desc->mux_a_mask) - 1; -+ mux_a = ffs(desc->mux.a_mask) - 1; - - bool no_magic_write = false; - -@@ -1162,8 +1589,8 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - return false; - } - -- opcode = (opcode & ~(1 << 2)) | (a_unpack << 2); -- opcode = (opcode & ~(1 << 0)) | (b_unpack << 0); -+ opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2); -+ opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0); - - break; - } -@@ -1188,7 +1615,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - } - if (packed == 0) - return false; -- opcode = (opcode & ~(1 << 2)) | packed << 2; -+ opcode = (opcode & ~(0x3 << 2)) | packed << 2; - break; - } - -@@ -1245,15 +1672,211 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, - } - - static bool --v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, -- const struct v3d_qpu_instr *instr, uint64_t *packed_instr) -+v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr) -+{ -+ uint32_t waddr = instr->alu.add.waddr; -+ uint32_t raddr_a = instr->alu.add.a.raddr; -+ uint32_t raddr_b = instr->alu.add.b.raddr; -+ -+ int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); -+ const struct opcode_desc *desc = -+ lookup_opcode_from_instr(devinfo, add_ops_v71, -+ ARRAY_SIZE(add_ops_v71), -+ instr->alu.add.op); -+ if (!desc) -+ return false; -+ -+ uint32_t opcode = opcode = desc->opcode_first; -+ -+ /* If an operation doesn't use an arg, its raddr values may be used to -+ * identify the operation type. -+ */ -+ if (nsrc < 2) -+ raddr_b = ffsll(desc->raddr_mask) - 1; -+ -+ bool no_magic_write = false; -+ -+ switch (instr->alu.add.op) { -+ case V3D_QPU_A_STVPMV: -+ waddr = 0; -+ no_magic_write = true; -+ break; -+ case V3D_QPU_A_STVPMD: -+ waddr = 1; -+ no_magic_write = true; -+ break; -+ case V3D_QPU_A_STVPMP: -+ waddr = 2; -+ no_magic_write = true; -+ break; -+ -+ case V3D_QPU_A_LDVPMV_IN: -+ case V3D_QPU_A_LDVPMD_IN: -+ case V3D_QPU_A_LDVPMP: -+ case V3D_QPU_A_LDVPMG_IN: -+ assert(!instr->alu.add.magic_write); -+ break; -+ -+ case V3D_QPU_A_LDVPMV_OUT: -+ case V3D_QPU_A_LDVPMD_OUT: -+ case V3D_QPU_A_LDVPMG_OUT: -+ assert(!instr->alu.add.magic_write); -+ *packed_instr |= V3D_QPU_MA; -+ break; -+ -+ default: -+ break; -+ } -+ -+ switch (instr->alu.add.op) { -+ case V3D_QPU_A_FADD: -+ case V3D_QPU_A_FADDNF: -+ case V3D_QPU_A_FSUB: -+ case V3D_QPU_A_FMIN: -+ case V3D_QPU_A_FMAX: -+ case V3D_QPU_A_FCMP: { -+ uint32_t output_pack; -+ uint32_t a_unpack; -+ uint32_t b_unpack; -+ -+ if (instr->alu.add.op != V3D_QPU_A_FCMP) { -+ if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, -+ &output_pack)) { -+ return false; -+ } -+ opcode |= output_pack << 4; -+ } -+ -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, -+ &a_unpack)) { -+ return false; -+ } -+ -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack, -+ &b_unpack)) { -+ return false; -+ } -+ -+ opcode |= a_unpack << 2; -+ opcode |= b_unpack << 0; -+ -+ break; -+ } -+ -+ case V3D_QPU_A_VFPACK: { -+ uint32_t a_unpack; -+ uint32_t b_unpack; -+ -+ if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS || -+ instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) { -+ return false; -+ } -+ -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, -+ &a_unpack)) { -+ return false; -+ } -+ -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack, -+ &b_unpack)) { -+ return false; -+ } -+ -+ opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2); -+ opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0); -+ -+ break; -+ } -+ -+ case V3D_QPU_A_FFLOOR: -+ case V3D_QPU_A_FROUND: -+ case V3D_QPU_A_FTRUNC: -+ case V3D_QPU_A_FCEIL: -+ case V3D_QPU_A_FDX: -+ case V3D_QPU_A_FDY: { -+ uint32_t packed; -+ -+ if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, -+ &packed)) { -+ return false; -+ } -+ raddr_b |= packed; -+ -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, -+ &packed)) { -+ return false; -+ } -+ if (packed == 0) -+ return false; -+ raddr_b = (raddr_b & ~(0x3 << 2)) | packed << 2; -+ break; -+ } -+ -+ case V3D_QPU_A_FTOIN: -+ case V3D_QPU_A_FTOIZ: -+ case V3D_QPU_A_FTOUZ: -+ case V3D_QPU_A_FTOC: -+ if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) -+ return false; -+ -+ uint32_t packed; -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, -+ &packed)) { -+ return false; -+ } -+ if (packed == 0) -+ return false; -+ -+ raddr_b |= (raddr_b & ~(0x3 << 2)) | packed << 2; -+ -+ break; -+ -+ case V3D_QPU_A_VFMIN: -+ case V3D_QPU_A_VFMAX: -+ if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || -+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) { -+ return false; -+ } -+ -+ if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack, -+ &packed)) { -+ return false; -+ } -+ opcode |= packed; -+ break; -+ -+ default: -+ if (instr->alu.add.op != V3D_QPU_A_NOP && -+ (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || -+ instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE || -+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) { -+ return false; -+ } -+ break; -+ } -+ -+ *packed_instr |= QPU_SET_FIELD(raddr_a, V3D_QPU_RADDR_A); -+ *packed_instr |= QPU_SET_FIELD(raddr_b, V3D_QPU_RADDR_B); -+ *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD); -+ *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); -+ if (instr->alu.add.magic_write && !no_magic_write) -+ *packed_instr |= V3D_QPU_MA; -+ -+ return true; -+} -+ -+static bool -+v3d33_qpu_mul_pack(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr) - { - uint32_t mux_a = instr->alu.mul.a.mux; - uint32_t mux_b = instr->alu.mul.b.mux; - int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); - - const struct opcode_desc *desc = -- lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops), -+ lookup_opcode_from_instr(devinfo, mul_ops_v33, -+ ARRAY_SIZE(mul_ops_v33), - instr->alu.mul.op); - - if (!desc) -@@ -1265,10 +1888,10 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, - * that here. If mux a/b determine packing, it will be set below. - */ - if (nsrc < 2) -- mux_b = ffs(desc->mux_b_mask) - 1; -+ mux_b = ffs(desc->mux.b_mask) - 1; - - if (nsrc < 1) -- mux_a = ffs(desc->mux_a_mask) - 1; -+ mux_a = ffs(desc->mux.a_mask) - 1; - - switch (instr->alu.mul.op) { - case V3D_QPU_M_FMUL: { -@@ -1351,6 +1974,130 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, - return true; - } - -+static bool -+v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr) -+{ -+ uint32_t raddr_c = instr->alu.mul.a.raddr; -+ uint32_t raddr_d = instr->alu.mul.b.raddr; -+ int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); -+ -+ const struct opcode_desc *desc = -+ lookup_opcode_from_instr(devinfo, mul_ops_v71, -+ ARRAY_SIZE(mul_ops_v71), -+ instr->alu.mul.op); -+ if (!desc) -+ return false; -+ -+ uint32_t opcode = desc->opcode_first; -+ -+ /* Some opcodes have a single valid value for their raddr_d, so set -+ * that here. If raddr_b determine packing, it will be set below. -+ */ -+ if (nsrc < 2) -+ raddr_d = ffsll(desc->raddr_mask) - 1; -+ -+ switch (instr->alu.mul.op) { -+ case V3D_QPU_M_FMUL: { -+ uint32_t packed; -+ -+ if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, -+ &packed)) { -+ return false; -+ } -+ /* No need for a +1 because desc->opcode_first has a 1 in this -+ * field. -+ */ -+ opcode += packed << 4; -+ -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack, -+ &packed)) { -+ return false; -+ } -+ opcode |= packed << 2; -+ -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack, -+ &packed)) { -+ return false; -+ } -+ opcode |= packed << 0; -+ break; -+ } -+ -+ case V3D_QPU_M_FMOV: { -+ uint32_t packed; -+ -+ if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, -+ &packed)) { -+ return false; -+ } -+ opcode |= (packed >> 1) & 1; -+ raddr_d = (packed & 1) << 2; -+ -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack, -+ &packed)) { -+ return false; -+ } -+ raddr_d |= packed; -+ break; -+ } -+ -+ case V3D_QPU_M_VFMUL: { -+ unreachable("pending v71 update"); -+ uint32_t packed; -+ -+ if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE) -+ return false; -+ -+ if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack, -+ &packed)) { -+ return false; -+ } -+ if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16) -+ opcode = 8; -+ else -+ opcode |= (packed + 4) & 7; -+ -+ if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE) -+ return false; -+ -+ break; -+ } -+ -+ default: -+ break; -+ } -+ -+ *packed_instr |= QPU_SET_FIELD(raddr_c, V3D_QPU_RADDR_C); -+ *packed_instr |= QPU_SET_FIELD(raddr_d, V3D_QPU_RADDR_D); -+ *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL); -+ *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); -+ if (instr->alu.mul.magic_write) -+ *packed_instr |= V3D_QPU_MM; -+ -+ return true; -+} -+ -+static bool -+v3d_qpu_add_pack(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr) -+{ -+ if (devinfo->ver < 71) -+ return v3d33_qpu_add_pack(devinfo, instr, packed_instr); -+ else -+ return v3d71_qpu_add_pack(devinfo, instr, packed_instr); -+} -+ -+static bool -+v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr) -+{ -+ if (devinfo->ver < 71) -+ return v3d33_qpu_mul_pack(devinfo, instr, packed_instr); -+ else -+ return v3d71_qpu_mul_pack(devinfo, instr, packed_instr); -+} -+ - static bool - v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, - uint64_t packed_instr, -@@ -1379,8 +2126,14 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, - return false; - } - -- instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A); -- instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B); -+ if (devinfo->ver <= 71) { -+ /* -+ * For v71 this will be set on add/mul unpack, as raddr are now -+ * part of v3d_qpu_input -+ */ -+ instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A); -+ instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B); -+ } - - if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) - return false; -@@ -1466,8 +2219,14 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, - *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG); - - if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { -- *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A); -- *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B); -+ if (devinfo->ver < 71) { -+ /* -+ * For v71 this will be set on add/mul unpack, as raddr are now -+ * part of v3d_qpu_input -+ */ -+ *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A); -+ *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B); -+ } - - if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) - return false; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0017-broadcom-compiler-update-node-temp-translation-for-v.patch b/projects/RPi/devices/RPi5/patches/mesa/0017-broadcom-compiler-update-node-temp-translation-for-v.patch deleted file mode 100644 index 0bf1274d45..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0017-broadcom-compiler-update-node-temp-translation-for-v.patch +++ /dev/null @@ -1,261 +0,0 @@ -From ebba9019461083687f6afd23ff0d4646c1a667cb Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Sun, 29 Jan 2023 00:27:11 +0100 -Subject: [PATCH 017/142] broadcom/compiler: update node/temp translation for - v71 - -As the offset applied needs to take into account if we have -accumulators or not. ---- - src/broadcom/compiler/vir_register_allocate.c | 68 +++++++++---------- - 1 file changed, 34 insertions(+), 34 deletions(-) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index b22f915d1df..aa9473d124b 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -39,30 +39,31 @@ - CLASS_BITS_R5) - - static inline uint32_t --temp_to_node(uint32_t temp) -+temp_to_node(struct v3d_compile *c, uint32_t temp) - { -- return temp + ACC_COUNT; -+ return temp + (c->devinfo->has_accumulators ? ACC_COUNT : 0); - } - - static inline uint32_t --node_to_temp(uint32_t node) -+node_to_temp(struct v3d_compile *c, uint32_t node) - { -- assert(node >= ACC_COUNT); -- return node - ACC_COUNT; -+ assert((c->devinfo->has_accumulators && node >= ACC_COUNT) || -+ (!c->devinfo->has_accumulators && node >= 0)); -+ return node - (c->devinfo->has_accumulators ? ACC_COUNT : 0); - } - - static inline uint8_t --get_temp_class_bits(struct v3d_ra_node_info *nodes, -+get_temp_class_bits(struct v3d_compile *c, - uint32_t temp) - { -- return nodes->info[temp_to_node(temp)].class_bits; -+ return c->nodes.info[temp_to_node(c, temp)].class_bits; - } - - static inline void --set_temp_class_bits(struct v3d_ra_node_info *nodes, -+set_temp_class_bits(struct v3d_compile *c, - uint32_t temp, uint8_t class_bits) - { -- nodes->info[temp_to_node(temp)].class_bits = class_bits; -+ c->nodes.info[temp_to_node(c, temp)].class_bits = class_bits; - } - - static struct ra_class * -@@ -84,7 +85,7 @@ static inline struct ra_class * - choose_reg_class_for_temp(struct v3d_compile *c, uint32_t temp) - { - assert(temp < c->num_temps && temp < c->nodes.alloc_count); -- return choose_reg_class(c, get_temp_class_bits(&c->nodes, temp)); -+ return choose_reg_class(c, get_temp_class_bits(c, temp)); - } - - static inline bool -@@ -313,7 +314,7 @@ v3d_choose_spill_node(struct v3d_compile *c) - - for (unsigned i = 0; i < c->num_temps; i++) { - if (BITSET_TEST(c->spillable, i)) { -- ra_set_node_spill_cost(c->g, temp_to_node(i), -+ ra_set_node_spill_cost(c->g, temp_to_node(c, i), - spill_costs[i]); - } - } -@@ -482,7 +483,7 @@ v3d_emit_spill_tmua(struct v3d_compile *c, - c->temp_start[i] < ip && c->temp_end[i] >= ip : - c->temp_start[i] <= ip && c->temp_end[i] > ip; - if (thrsw_cross) { -- ra_set_node_class(c->g, temp_to_node(i), -+ ra_set_node_class(c->g, temp_to_node(c, i), - choose_reg_class(c, CLASS_BITS_PHYS)); - } - } -@@ -509,8 +510,7 @@ v3d_emit_tmu_spill(struct v3d_compile *c, - * same register class bits as the original. - */ - if (inst == position) { -- uint8_t class_bits = get_temp_class_bits(&c->nodes, -- inst->dst.index); -+ uint8_t class_bits = get_temp_class_bits(c, inst->dst.index); - inst->dst = vir_get_temp(c); - add_node(c, inst->dst.index, class_bits); - } else { -@@ -574,7 +574,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp) - reconstruct_op = orig_def->qpu.alu.add.op; - } - -- uint32_t spill_node = temp_to_node(spill_temp); -+ uint32_t spill_node = temp_to_node(c, spill_temp); - - /* We must disable the ldunif optimization if we are spilling uniforms */ - bool had_disable_ldunif_opt = c->disable_ldunif_opt; -@@ -739,12 +739,12 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp) - * update node priorities based one new liveness data. - */ - uint32_t sb_temp =c->spill_base.index; -- uint32_t sb_node = temp_to_node(sb_temp); -+ uint32_t sb_node = temp_to_node(c, sb_temp); - for (uint32_t i = 0; i < c->num_temps; i++) { - if (c->temp_end[i] == -1) - continue; - -- uint32_t node_i = temp_to_node(i); -+ uint32_t node_i = temp_to_node(c, i); - c->nodes.info[node_i].priority = - c->temp_end[i] - c->temp_start[i]; - -@@ -752,7 +752,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp) - j < c->num_temps; j++) { - if (interferes(c->temp_start[i], c->temp_end[i], - c->temp_start[j], c->temp_end[j])) { -- uint32_t node_j = temp_to_node(j); -+ uint32_t node_j = temp_to_node(c, j); - ra_add_node_interference(c->g, node_i, node_j); - } - } -@@ -958,7 +958,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - for (int i = 0; i < c->num_temps; i++) { - if (c->temp_start[i] < ip && c->temp_end[i] > ip) { - ra_add_node_interference(c->g, -- temp_to_node(i), -+ temp_to_node(c, i), - acc_nodes[3]); - } - } -@@ -968,7 +968,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - for (int i = 0; i < c->num_temps; i++) { - if (c->temp_start[i] < ip && c->temp_end[i] > ip) { - ra_add_node_interference(c->g, -- temp_to_node(i), -+ temp_to_node(c, i), - acc_nodes[4]); - } - } -@@ -987,7 +987,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - * decides whether the LDVPM is in or out) - */ - assert(inst->dst.file == QFILE_TEMP); -- set_temp_class_bits(&c->nodes, inst->dst.index, -+ set_temp_class_bits(c, inst->dst.index, - CLASS_BITS_PHYS); - break; - } -@@ -1002,7 +1002,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - * phys regfile. - */ - assert(inst->dst.file == QFILE_TEMP); -- set_temp_class_bits(&c->nodes, inst->dst.index, -+ set_temp_class_bits(c, inst->dst.index, - CLASS_BITS_PHYS); - break; - } -@@ -1024,7 +1024,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - */ - assert(inst->qpu.alu.mul.op == V3D_QPU_M_MOV); - assert(inst->dst.file == QFILE_TEMP); -- uint32_t node = temp_to_node(inst->dst.index); -+ uint32_t node = temp_to_node(c, inst->dst.index); - ra_set_node_reg(c->g, node, - PHYS_INDEX + inst->src[0].index); - break; -@@ -1043,9 +1043,9 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - */ - if (!inst->qpu.sig.ldunif) { - uint8_t class_bits = -- get_temp_class_bits(&c->nodes, inst->dst.index) & -+ get_temp_class_bits(c, inst->dst.index) & - ~CLASS_BITS_R5; -- set_temp_class_bits(&c->nodes, inst->dst.index, -+ set_temp_class_bits(c, inst->dst.index, - class_bits); - - } else { -@@ -1054,7 +1054,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - * loads interfere with each other. - */ - if (c->devinfo->ver < 40) { -- set_temp_class_bits(&c->nodes, inst->dst.index, -+ set_temp_class_bits(c, inst->dst.index, - CLASS_BITS_R5); - } - } -@@ -1064,7 +1064,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - if (inst->qpu.sig.thrsw) { - for (int i = 0; i < c->num_temps; i++) { - if (c->temp_start[i] < ip && c->temp_end[i] > ip) { -- set_temp_class_bits(&c->nodes, i, -+ set_temp_class_bits(c, i, - CLASS_BITS_PHYS); - } - } -@@ -1125,7 +1125,7 @@ v3d_register_allocate(struct v3d_compile *c) - c->nodes.info[i].priority = 0; - c->nodes.info[i].class_bits = 0; - } else { -- uint32_t t = node_to_temp(i); -+ uint32_t t = node_to_temp(c, i); - c->nodes.info[i].priority = - c->temp_end[t] - c->temp_start[t]; - c->nodes.info[i].class_bits = CLASS_BITS_ANY; -@@ -1143,7 +1143,7 @@ v3d_register_allocate(struct v3d_compile *c) - - /* Set the register classes for all our temporaries in the graph */ - for (uint32_t i = 0; i < c->num_temps; i++) { -- ra_set_node_class(c->g, temp_to_node(i), -+ ra_set_node_class(c->g, temp_to_node(c, i), - choose_reg_class_for_temp(c, i)); - } - -@@ -1153,8 +1153,8 @@ v3d_register_allocate(struct v3d_compile *c) - if (interferes(c->temp_start[i], c->temp_end[i], - c->temp_start[j], c->temp_end[j])) { - ra_add_node_interference(c->g, -- temp_to_node(i), -- temp_to_node(j)); -+ temp_to_node(c, i), -+ temp_to_node(c, j)); - } - } - } -@@ -1171,7 +1171,7 @@ v3d_register_allocate(struct v3d_compile *c) - if (c->spill_size < - V3D_CHANNELS * sizeof(uint32_t) * force_register_spills) { - int node = v3d_choose_spill_node(c); -- uint32_t temp = node_to_temp(node); -+ uint32_t temp = node_to_temp(c, node); - if (node != -1) { - v3d_spill_reg(c, acc_nodes, temp); - continue; -@@ -1186,7 +1186,7 @@ v3d_register_allocate(struct v3d_compile *c) - if (node == -1) - goto spill_fail; - -- uint32_t temp = node_to_temp(node); -+ uint32_t temp = node_to_temp(c, node); - enum temp_spill_type spill_type = - get_spill_type_for_temp(c, temp); - if (spill_type != SPILL_TYPE_TMU || tmu_spilling_allowed(c)) { -@@ -1201,7 +1201,7 @@ v3d_register_allocate(struct v3d_compile *c) - /* Allocation was successful, build the 'temp -> reg' map */ - temp_registers = calloc(c->num_temps, sizeof(*temp_registers)); - for (uint32_t i = 0; i < c->num_temps; i++) { -- int ra_reg = ra_get_node_reg(c->g, temp_to_node(i)); -+ int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i)); - if (ra_reg < PHYS_INDEX) { - temp_registers[i].magic = true; - temp_registers[i].index = (V3D_QPU_WADDR_R0 + --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0018-broadcom-compiler-phys-index-depends-on-hw-version.patch b/projects/RPi/devices/RPi5/patches/mesa/0018-broadcom-compiler-phys-index-depends-on-hw-version.patch deleted file mode 100644 index 88f753bb0b..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0018-broadcom-compiler-phys-index-depends-on-hw-version.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 9b2dfe0286212aba3687a06023cc5b4ce9944ee0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Mon, 23 Aug 2021 02:18:43 +0200 -Subject: [PATCH 018/142] broadcom/compiler: phys index depends on hw version - -For 7.1 there are not accumulators. So we replace the macro with a -function call. ---- - src/broadcom/compiler/vir_register_allocate.c | 39 ++++++++++++++----- - 1 file changed, 29 insertions(+), 10 deletions(-) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index aa9473d124b..a358b616e13 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -28,9 +28,19 @@ - - #define ACC_INDEX 0 - #define ACC_COUNT 6 --#define PHYS_INDEX (ACC_INDEX + ACC_COUNT) --#define PHYS_COUNT 64 - -+#define PHYS_COUNT 64 -+ -+static uint8_t -+get_phys_index(const struct v3d_device_info *devinfo) -+{ -+ if (devinfo->has_accumulators) -+ return ACC_INDEX + ACC_COUNT; -+ else -+ return 0; -+} -+ -+/* ACC as accumulator */ - #define CLASS_BITS_PHYS (1 << 0) - #define CLASS_BITS_ACC (1 << 1) - #define CLASS_BITS_R5 (1 << 4) -@@ -771,9 +781,11 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp) - } - - struct v3d_ra_select_callback_data { -+ uint32_t phys_index; - uint32_t next_acc; - uint32_t next_phys; - struct v3d_ra_node_info *nodes; -+ const struct v3d_device_info *devinfo; - }; - - /* Choosing accumulators improves chances of merging QPU instructions -@@ -794,7 +806,7 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra, - static const int available_rf_threshold = 5; - int available_rf = 0 ; - for (int i = 0; i < PHYS_COUNT; i++) { -- if (BITSET_TEST(regs, PHYS_INDEX + i)) -+ if (BITSET_TEST(regs, v3d_ra->phys_index + i)) - available_rf++; - if (available_rf >= available_rf_threshold) - break; -@@ -854,7 +866,7 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra, - { - for (int i = 0; i < PHYS_COUNT; i++) { - int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT; -- int phys = PHYS_INDEX + phys_off; -+ int phys = v3d_ra->phys_index + phys_off; - - if (BITSET_TEST(regs, phys)) { - v3d_ra->next_phys = phys_off + 1; -@@ -896,8 +908,9 @@ vir_init_reg_sets(struct v3d_compiler *compiler) - * register file can be divided up for fragment shader threading. - */ - int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3); -+ uint8_t phys_index = get_phys_index(compiler->devinfo); - -- compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT, -+ compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT, - false); - if (!compiler->regs) - return false; -@@ -912,8 +925,8 @@ vir_init_reg_sets(struct v3d_compiler *compiler) - compiler->reg_class_phys[threads] = - ra_alloc_contig_reg_class(compiler->regs, 1); - -- for (int i = PHYS_INDEX; -- i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) { -+ for (int i = phys_index; -+ i < phys_index + (PHYS_COUNT >> threads); i++) { - ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i); - ra_class_add_reg(compiler->reg_class_phys[threads], i); - ra_class_add_reg(compiler->reg_class_any[threads], i); -@@ -1026,7 +1039,8 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - assert(inst->dst.file == QFILE_TEMP); - uint32_t node = temp_to_node(c, inst->dst.index); - ra_set_node_reg(c->g, node, -- PHYS_INDEX + inst->src[0].index); -+ get_phys_index(c->devinfo) + -+ inst->src[0].index); - break; - } - } -@@ -1086,13 +1100,17 @@ v3d_register_allocate(struct v3d_compile *c) - c->num_temps + ACC_COUNT), - }; - -+ uint32_t phys_index = get_phys_index(c->devinfo); -+ - struct v3d_ra_select_callback_data callback_data = { -+ .phys_index = phys_index, - .next_acc = 0, - /* Start at RF3, to try to keep the TLB writes from using - * RF0-2. - */ - .next_phys = 3, - .nodes = &c->nodes, -+ .devinfo = c->devinfo, - }; - - vir_calculate_live_intervals(c); -@@ -1139,6 +1157,7 @@ v3d_register_allocate(struct v3d_compile *c) - vir_for_each_inst_inorder(inst, c) { - inst->ip = ip++; - update_graph_and_reg_classes_for_inst(c, acc_nodes, inst); -+ - } - - /* Set the register classes for all our temporaries in the graph */ -@@ -1202,13 +1221,13 @@ v3d_register_allocate(struct v3d_compile *c) - temp_registers = calloc(c->num_temps, sizeof(*temp_registers)); - for (uint32_t i = 0; i < c->num_temps; i++) { - int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i)); -- if (ra_reg < PHYS_INDEX) { -+ if (ra_reg < phys_index) { - temp_registers[i].magic = true; - temp_registers[i].index = (V3D_QPU_WADDR_R0 + - ra_reg - ACC_INDEX); - } else { - temp_registers[i].magic = false; -- temp_registers[i].index = ra_reg - PHYS_INDEX; -+ temp_registers[i].index = ra_reg - phys_index; - } - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0019-broadcom-compiler-don-t-favor-select-accum-registers.patch b/projects/RPi/devices/RPi5/patches/mesa/0019-broadcom-compiler-don-t-favor-select-accum-registers.patch deleted file mode 100644 index 6689d6ee7f..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0019-broadcom-compiler-don-t-favor-select-accum-registers.patch +++ /dev/null @@ -1,40 +0,0 @@ -From da0a3deadf86a46c8323267d3f6a49e442835608 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 17 Sep 2021 01:07:06 +0200 -Subject: [PATCH 019/142] broadcom/compiler: don't favor/select accum registers - for hw not supporting it - -Note that what we do is to just return false on the favor/select accum -methods. We could just avoid to call them, but as the select is called -more than once, it is just easier this way. ---- - src/broadcom/compiler/vir_register_allocate.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index a358b616e13..1f495180784 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -797,6 +797,9 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra, - BITSET_WORD *regs, - int priority) - { -+ if (!v3d_ra->devinfo->has_accumulators) -+ return false; -+ - /* Favor accumulators if we have less that this number of physical - * registers. Accumulators have more restrictions (like being - * invalidated through thrsw), so running out of physical registers -@@ -832,6 +835,9 @@ v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra, - BITSET_WORD *regs, - unsigned int *out) - { -+ if (!v3d_ra->devinfo->has_accumulators) -+ return false; -+ - /* Choose r5 for our ldunifs if possible (nobody else can load to that - * reg, and it keeps the QPU cond field free from being occupied by - * ldunifrf). --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0020-broadcom-vir-implement-is_no_op_mov-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0020-broadcom-vir-implement-is_no_op_mov-for-v71.patch deleted file mode 100644 index 3085733d38..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0020-broadcom-vir-implement-is_no_op_mov-for-v71.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 6c04d7c917da6b38f8b2b4306ab03ed2ab7e6ce0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 9 Sep 2021 00:28:53 +0200 -Subject: [PATCH 020/142] broadcom/vir: implement is_no_op_mov for v71 - -Did some refactoring/splitting. ---- - src/broadcom/compiler/vir_to_qpu.c | 66 ++++++++++++++++++++++++------ - 1 file changed, 53 insertions(+), 13 deletions(-) - -diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c -index c8b6e0a91a0..08970d52954 100644 ---- a/src/broadcom/compiler/vir_to_qpu.c -+++ b/src/broadcom/compiler/vir_to_qpu.c -@@ -129,19 +129,8 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) - } - - static bool --is_no_op_mov(struct qinst *qinst) -+v3d33_mov_src_and_dst_equal(struct qinst *qinst) - { -- static const struct v3d_qpu_sig no_sig = {0}; -- -- /* Make sure it's just a lone MOV. */ -- if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || -- qinst->qpu.alu.mul.op != V3D_QPU_M_MOV || -- qinst->qpu.alu.add.op != V3D_QPU_A_NOP || -- memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) { -- return false; -- } -- -- /* Check if it's a MOV from a register to itself. */ - enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr; - if (qinst->qpu.alu.mul.magic_write) { - if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4) -@@ -168,6 +157,57 @@ is_no_op_mov(struct qinst *qinst) - return false; - } - -+ return true; -+} -+ -+static bool -+v3d71_mov_src_and_dst_equal(struct qinst *qinst) -+{ -+ if (qinst->qpu.alu.mul.magic_write) -+ return false; -+ -+ enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr; -+ int raddr; -+ -+ raddr = qinst->qpu.alu.mul.a.raddr; -+ if (raddr != waddr) -+ return false; -+ -+ return true; -+} -+ -+static bool -+mov_src_and_dst_equal(struct qinst *qinst, -+ const struct v3d_device_info *devinfo) -+{ -+ if (devinfo->ver < 71) -+ return v3d33_mov_src_and_dst_equal(qinst); -+ else -+ return v3d71_mov_src_and_dst_equal(qinst); -+} -+ -+ -+static bool -+is_no_op_mov(struct qinst *qinst, -+ const struct v3d_device_info *devinfo) -+{ -+ static const struct v3d_qpu_sig no_sig = {0}; -+ -+ /* Make sure it's just a lone MOV. We only check for M_MOV. Although -+ * for V3D 7.x there is also A_MOV, we don't need to check for it as -+ * we always emit using M_MOV. We could use A_MOV later on the -+ * squedule to improve performance -+ */ -+ if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || -+ qinst->qpu.alu.mul.op != V3D_QPU_M_MOV || -+ qinst->qpu.alu.add.op != V3D_QPU_A_NOP || -+ memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) { -+ return false; -+ } -+ -+ if (!mov_src_and_dst_equal(qinst, devinfo)) -+ return false; -+ - /* No packing or flags updates, or we need to execute the - * instruction. - */ -@@ -324,7 +364,7 @@ v3d_generate_code_block(struct v3d_compile *c, - qinst->qpu.alu.mul.waddr = dst.index; - qinst->qpu.alu.mul.magic_write = dst.magic; - -- if (is_no_op_mov(qinst)) { -+ if (is_no_op_mov(qinst, c->devinfo)) { - vir_remove_instruction(c, qinst); - continue; - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0021-broadcom-compiler-update-vir_to_qpu-set_src-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0021-broadcom-compiler-update-vir_to_qpu-set_src-for-v71.patch deleted file mode 100644 index 57bd1ad620..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0021-broadcom-compiler-update-vir_to_qpu-set_src-for-v71.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 7b5be2d9b178a45c34c22db2744639a6a8a216d1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 9 Sep 2021 01:18:54 +0200 -Subject: [PATCH 021/142] broadcom/compiler: update vir_to_qpu::set_src for v71 - ---- - src/broadcom/compiler/vir_to_qpu.c | 47 ++++++++++++++++++++++++++---- - 1 file changed, 42 insertions(+), 5 deletions(-) - -diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c -index 08970d52954..afc4941fdb1 100644 ---- a/src/broadcom/compiler/vir_to_qpu.c -+++ b/src/broadcom/compiler/vir_to_qpu.c -@@ -86,12 +86,22 @@ new_qpu_nop_before(struct qinst *inst) - return q; - } - -+static void -+v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src) -+{ -+ if (src.smimm) -+ unreachable("v3d71_set_src: pending handling small immediates"); -+ -+ assert(!src.magic); -+ *raddr = src.index; -+} -+ - /** - * Allocates the src register (accumulator or register file) into the RADDR - * fields of the instruction. - */ - static void --set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) -+v3d33_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) - { - if (src.smimm) { - assert(instr->sig.small_imm_b); -@@ -128,6 +138,24 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) - } - } - -+/* -+ * The main purpose of the following wrapper is to make calling set_src -+ * cleaner. This is the reason it receives both mux and raddr pointers. Those -+ * will be filled or not based on the device version. -+ */ -+static void -+set_src(struct v3d_qpu_instr *instr, -+ enum v3d_qpu_mux *mux, -+ uint8_t *raddr, -+ struct qpu_reg src, -+ const struct v3d_device_info *devinfo) -+{ -+ if (devinfo->ver < 71) -+ return v3d33_set_src(instr, mux, src); -+ else -+ return v3d71_set_src(instr, raddr, src); -+} -+ - static bool - v3d33_mov_src_and_dst_equal(struct qinst *qinst) - { -@@ -340,13 +368,18 @@ v3d_generate_code_block(struct v3d_compile *c, - qinst->qpu.sig_magic = dst.magic; - } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { - assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); -+ - if (nsrc >= 1) { - set_src(&qinst->qpu, -- &qinst->qpu.alu.add.a.mux, src[0]); -+ &qinst->qpu.alu.add.a.mux, -+ &qinst->qpu.alu.add.a.raddr, -+ src[0], c->devinfo); - } - if (nsrc >= 2) { - set_src(&qinst->qpu, -- &qinst->qpu.alu.add.b.mux, src[1]); -+ &qinst->qpu.alu.add.b.mux, -+ &qinst->qpu.alu.add.b.raddr, -+ src[1], c->devinfo); - } - - qinst->qpu.alu.add.waddr = dst.index; -@@ -354,11 +387,15 @@ v3d_generate_code_block(struct v3d_compile *c, - } else { - if (nsrc >= 1) { - set_src(&qinst->qpu, -- &qinst->qpu.alu.mul.a.mux, src[0]); -+ &qinst->qpu.alu.mul.a.mux, -+ &qinst->qpu.alu.mul.a.raddr, -+ src[0], c->devinfo); - } - if (nsrc >= 2) { - set_src(&qinst->qpu, -- &qinst->qpu.alu.mul.b.mux, src[1]); -+ &qinst->qpu.alu.mul.b.mux, -+ &qinst->qpu.alu.mul.b.raddr, -+ src[1], c->devinfo); - } - - qinst->qpu.alu.mul.waddr = dst.index; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0022-broadcom-qpu_schedule-add-process_raddr_deps.patch b/projects/RPi/devices/RPi5/patches/mesa/0022-broadcom-qpu_schedule-add-process_raddr_deps.patch deleted file mode 100644 index 519e72d917..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0022-broadcom-qpu_schedule-add-process_raddr_deps.patch +++ /dev/null @@ -1,92 +0,0 @@ -From fe89703008f2a3d6bfe6e260791f712013be5e48 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 9 Sep 2021 23:59:28 +0200 -Subject: [PATCH 022/142] broadcom/qpu_schedule: add process_raddr_deps - -On v71 we don't have muxes, but more raddr. Adding a equivalent add -deps function. ---- - src/broadcom/compiler/qpu_schedule.c | 52 +++++++++++++++++++++++----- - 1 file changed, 44 insertions(+), 8 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 455fa3867be..89254643c90 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -155,6 +155,7 @@ static void - process_mux_deps(struct schedule_state *state, struct schedule_node *n, - enum v3d_qpu_mux mux) - { -+ assert(state->devinfo->ver < 71); - switch (mux) { - case V3D_QPU_MUX_A: - add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n); -@@ -171,6 +172,17 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n, - } - } - -+ -+static void -+process_raddr_deps(struct schedule_state *state, struct schedule_node *n, -+ uint8_t raddr, bool is_small_imm) -+{ -+ assert(state->devinfo->ver >= 71); -+ -+ if (!is_small_imm) -+ add_read_dep(state, state->last_rf[raddr], n); -+} -+ - static bool - tmu_write_is_sequence_terminator(uint32_t waddr) - { -@@ -305,15 +317,39 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) - - /* XXX: LOAD_IMM */ - -- if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) -- process_mux_deps(state, n, inst->alu.add.a.mux); -- if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) -- process_mux_deps(state, n, inst->alu.add.b.mux); -+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) { -+ if (devinfo->ver < 71) { -+ process_mux_deps(state, n, inst->alu.add.a.mux); -+ } else { -+ process_raddr_deps(state, n, inst->alu.add.a.raddr, -+ inst->sig.small_imm_a); -+ } -+ } -+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) { -+ if (devinfo->ver < 71) { -+ process_mux_deps(state, n, inst->alu.add.b.mux); -+ } else { -+ process_raddr_deps(state, n, inst->alu.add.b.raddr, -+ inst->sig.small_imm_b); -+ } -+ } - -- if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) -- process_mux_deps(state, n, inst->alu.mul.a.mux); -- if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) -- process_mux_deps(state, n, inst->alu.mul.b.mux); -+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) { -+ if (devinfo->ver < 71) { -+ process_mux_deps(state, n, inst->alu.mul.a.mux); -+ } else { -+ process_raddr_deps(state, n, inst->alu.mul.a.raddr, -+ inst->sig.small_imm_c); -+ } -+ } -+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) { -+ if (devinfo->ver < 71) { -+ process_mux_deps(state, n, inst->alu.mul.b.mux); -+ } else { -+ process_raddr_deps(state, n, inst->alu.mul.b.raddr, -+ inst->sig.small_imm_d); -+ } -+ } - - switch (inst->alu.add.op) { - case V3D_QPU_A_VPMSETUP: --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0023-broadcom-qpu-update-disasm_raddr-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0023-broadcom-qpu-update-disasm_raddr-for-v71.patch deleted file mode 100644 index e16ff0f540..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0023-broadcom-qpu-update-disasm_raddr-for-v71.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 20ce426df1ab2546332141f4bc4531ada754cdea Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 10 Sep 2021 01:20:44 +0200 -Subject: [PATCH 023/142] broadcom/qpu: update disasm_raddr for v71 - ---- - src/broadcom/qpu/qpu_disasm.c | 72 ++++++++++++++++++++++++++++++++--- - 1 file changed, 66 insertions(+), 6 deletions(-) - -diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c -index 588a665f770..b613de781dc 100644 ---- a/src/broadcom/qpu/qpu_disasm.c -+++ b/src/broadcom/qpu/qpu_disasm.c -@@ -56,8 +56,9 @@ pad_to(struct disasm_state *disasm, int n) - - - static void --v3d_qpu_disasm_raddr(struct disasm_state *disasm, -- const struct v3d_qpu_instr *instr, uint8_t mux) -+v3d33_qpu_disasm_raddr(struct disasm_state *disasm, -+ const struct v3d_qpu_instr *instr, -+ enum v3d_qpu_mux mux) - { - if (mux == V3D_QPU_MUX_A) { - append(disasm, "rf%d", instr->raddr_a); -@@ -82,6 +83,65 @@ v3d_qpu_disasm_raddr(struct disasm_state *disasm, - } - } - -+enum v3d_qpu_input_class { -+ V3D_QPU_ADD_A, -+ V3D_QPU_ADD_B, -+ V3D_QPU_MUL_A, -+ V3D_QPU_MUL_B -+}; -+ -+static void -+v3d71_qpu_disasm_raddr(struct disasm_state *disasm, -+ const struct v3d_qpu_instr *instr, -+ uint8_t raddr, -+ enum v3d_qpu_input_class input_class) -+{ -+ bool is_small_imm = false; -+ switch(input_class) { -+ case V3D_QPU_ADD_A: -+ is_small_imm = instr->sig.small_imm_a; -+ break; -+ case V3D_QPU_ADD_B: -+ is_small_imm = instr->sig.small_imm_b; -+ break; -+ case V3D_QPU_MUL_A: -+ is_small_imm = instr->sig.small_imm_c; -+ break; -+ case V3D_QPU_MUL_B: -+ is_small_imm = instr->sig.small_imm_d; -+ break; -+ } -+ -+ if (is_small_imm) { -+ unreachable("Pending handling small immediates"); -+ uint32_t val; -+ ASSERTED bool ok = -+ v3d_qpu_small_imm_unpack(disasm->devinfo, -+ raddr, -+ &val); -+ -+ if ((int)val >= -16 && (int)val <= 15) -+ append(disasm, "%d", val); -+ else -+ append(disasm, "0x%08x", val); -+ assert(ok); -+ } else { -+ append(disasm, "rf%d", raddr); -+ } -+} -+ -+static void -+v3d_qpu_disasm_raddr(struct disasm_state *disasm, -+ const struct v3d_qpu_instr *instr, -+ const struct v3d_qpu_input *input, -+ enum v3d_qpu_input_class input_class) -+{ -+ if (disasm->devinfo->ver < 71) -+ v3d33_qpu_disasm_raddr(disasm, instr, input->mux); -+ else -+ v3d71_qpu_disasm_raddr(disasm, instr, input->raddr, input_class); -+} -+ - static void - v3d_qpu_disasm_waddr(struct disasm_state *disasm, uint32_t waddr, bool magic) - { -@@ -121,14 +181,14 @@ v3d_qpu_disasm_add(struct disasm_state *disasm, - if (num_src >= 1) { - if (has_dst) - append(disasm, ", "); -- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a.mux); -+ v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.add.a, V3D_QPU_ADD_A); - append(disasm, "%s", - v3d_qpu_unpack_name(instr->alu.add.a.unpack)); - } - - if (num_src >= 2) { - append(disasm, ", "); -- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b.mux); -+ v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.add.b, V3D_QPU_ADD_B); - append(disasm, "%s", - v3d_qpu_unpack_name(instr->alu.add.b.unpack)); - } -@@ -164,14 +224,14 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm, - if (num_src >= 1) { - if (has_dst) - append(disasm, ", "); -- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a.mux); -+ v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.mul.a, V3D_QPU_MUL_A); - append(disasm, "%s", - v3d_qpu_unpack_name(instr->alu.mul.a.unpack)); - } - - if (num_src >= 2) { - append(disasm, ", "); -- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b.mux); -+ v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.mul.b, V3D_QPU_MUL_B); - append(disasm, "%s", - v3d_qpu_unpack_name(instr->alu.mul.b.unpack)); - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0024-broadcom-qpu-return-false-on-qpu_writes_accumulatorX.patch b/projects/RPi/devices/RPi5/patches/mesa/0024-broadcom-qpu-return-false-on-qpu_writes_accumulatorX.patch deleted file mode 100644 index 3b82c34ea8..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0024-broadcom-qpu-return-false-on-qpu_writes_accumulatorX.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 7263fa24a3c57b1dcd4d870670cda86ae89aa28c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 15 Sep 2021 10:55:49 +0200 -Subject: [PATCH 024/142] broadcom/qpu: return false on - qpu_writes_accumulatorXX helpers for v71 - -As for v71 doesn't have accumulators (devinfo->has_accumulators set to -false), those methods would always return false. ---- - src/broadcom/qpu/qpu_instr.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index 8de99c611d5..7ec3c867260 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -854,6 +854,9 @@ bool - v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, - const struct v3d_qpu_instr *inst) - { -+ if(!devinfo->has_accumulators) -+ return false; -+ - if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3)) - return true; - -@@ -864,6 +867,9 @@ bool - v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, - const struct v3d_qpu_instr *inst) - { -+ if (!devinfo->has_accumulators) -+ return false; -+ - if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { - if (inst->alu.add.op != V3D_QPU_A_NOP && - inst->alu.add.magic_write && -@@ -894,6 +900,9 @@ bool - v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, - const struct v3d_qpu_instr *inst) - { -+ if (!devinfo->has_accumulators) -+ return false; -+ - if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5)) - return true; - -@@ -904,6 +913,9 @@ bool - v3d_qpu_writes_accum(const struct v3d_device_info *devinfo, - const struct v3d_qpu_instr *inst) - { -+ if (!devinfo->has_accumulators) -+ return false; -+ - if (v3d_qpu_writes_r5(devinfo, inst)) - return true; - if (v3d_qpu_writes_r4(devinfo, inst)) --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0025-broadcom-compiler-add-support-for-varyings-on-nir-to.patch b/projects/RPi/devices/RPi5/patches/mesa/0025-broadcom-compiler-add-support-for-varyings-on-nir-to.patch deleted file mode 100644 index 2552764a9e..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0025-broadcom-compiler-add-support-for-varyings-on-nir-to.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 6a9611c5a22218388bba419174d3343e0cdf773b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 14 Sep 2021 10:42:55 +0200 -Subject: [PATCH 025/142] broadcom/compiler: add support for varyings on nir to - vir generation for v71 - -Needs update as v71 doesn't have accumulators anymore, and ldvary uses -now rf0 to return the value. ---- - src/broadcom/compiler/nir_to_vir.c | 34 +++++++++++++++++------------- - 1 file changed, 19 insertions(+), 15 deletions(-) - -diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c -index ca072971f01..79a22c3bd08 100644 ---- a/src/broadcom/compiler/nir_to_vir.c -+++ b/src/broadcom/compiler/nir_to_vir.c -@@ -1005,32 +1005,36 @@ emit_fragcoord_input(struct v3d_compile *c, int attr) - - static struct qreg - emit_smooth_varying(struct v3d_compile *c, -- struct qreg vary, struct qreg w, struct qreg r5) -+ struct qreg vary, struct qreg w, struct qreg c_reg) - { -- return vir_FADD(c, vir_FMUL(c, vary, w), r5); -+ return vir_FADD(c, vir_FMUL(c, vary, w), c_reg); - } - - static struct qreg - emit_noperspective_varying(struct v3d_compile *c, -- struct qreg vary, struct qreg r5) -+ struct qreg vary, struct qreg c_reg) - { -- return vir_FADD(c, vir_MOV(c, vary), r5); -+ return vir_FADD(c, vir_MOV(c, vary), c_reg); - } - - static struct qreg - emit_flat_varying(struct v3d_compile *c, -- struct qreg vary, struct qreg r5) -+ struct qreg vary, struct qreg c_reg) - { - vir_MOV_dest(c, c->undef, vary); -- return vir_MOV(c, r5); -+ return vir_MOV(c, c_reg); - } - - static struct qreg - emit_fragment_varying(struct v3d_compile *c, nir_variable *var, - int8_t input_idx, uint8_t swizzle, int array_index) - { -- struct qreg r3 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3); -- struct qreg r5 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5); -+ struct qreg c_reg; /* C coefficient */ -+ -+ if (c->devinfo->has_accumulators) -+ c_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5); -+ else -+ c_reg = vir_reg(QFILE_REG, 0); - - struct qinst *ldvary = NULL; - struct qreg vary; -@@ -1041,7 +1045,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, - vary = vir_emit_def(c, ldvary); - } else { - vir_NOP(c)->qpu.sig.ldvary = true; -- vary = r3; -+ vary = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3); - } - - /* Store the input value before interpolation so we can implement -@@ -1050,7 +1054,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, - if (input_idx >= 0) { - assert(var); - c->interp[input_idx].vp = vary; -- c->interp[input_idx].C = vir_MOV(c, r5); -+ c->interp[input_idx].C = vir_MOV(c, c_reg); - c->interp[input_idx].mode = var->data.interpolation; - } - -@@ -1060,7 +1064,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, - */ - if (!var) { - assert(input_idx < 0); -- return emit_smooth_varying(c, vary, c->payload_w, r5); -+ return emit_smooth_varying(c, vary, c->payload_w, c_reg); - } - - int i = c->num_inputs++; -@@ -1075,20 +1079,20 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, - if (var->data.centroid) { - BITSET_SET(c->centroid_flags, i); - result = emit_smooth_varying(c, vary, -- c->payload_w_centroid, r5); -+ c->payload_w_centroid, c_reg); - } else { -- result = emit_smooth_varying(c, vary, c->payload_w, r5); -+ result = emit_smooth_varying(c, vary, c->payload_w, c_reg); - } - break; - - case INTERP_MODE_NOPERSPECTIVE: - BITSET_SET(c->noperspective_flags, i); -- result = emit_noperspective_varying(c, vary, r5); -+ result = emit_noperspective_varying(c, vary, c_reg); - break; - - case INTERP_MODE_FLAT: - BITSET_SET(c->flat_shade_flags, i); -- result = emit_flat_varying(c, vary, r5); -+ result = emit_flat_varying(c, vary, c_reg); - break; - - default: --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0026-broadcom-compiler-payload_w-is-loaded-on-rf3-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0026-broadcom-compiler-payload_w-is-loaded-on-rf3-for-v71.patch deleted file mode 100644 index 7302726b66..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0026-broadcom-compiler-payload_w-is-loaded-on-rf3-for-v71.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 06af15a60f7a9c135893e5f8934b8030c1da95f9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 15 Sep 2021 01:14:15 +0200 -Subject: [PATCH 026/142] broadcom/compiler: payload_w is loaded on rf3 for v71 - -And in general rf0 is now used for other needs. ---- - src/broadcom/compiler/nir_to_vir.c | 6 +++++- - src/broadcom/compiler/vir_register_allocate.c | 6 +++++- - 2 files changed, 10 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c -index 79a22c3bd08..1a05b279a2d 100644 ---- a/src/broadcom/compiler/nir_to_vir.c -+++ b/src/broadcom/compiler/nir_to_vir.c -@@ -4325,7 +4325,11 @@ nir_to_vir(struct v3d_compile *c) - { - switch (c->s->info.stage) { - case MESA_SHADER_FRAGMENT: -- c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0)); -+ if (c->devinfo->ver < 71) -+ c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0)); -+ else -+ c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 3)); -+ - c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1)); - c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2)); - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index 1f495180784..eca9a6751a6 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -1034,6 +1034,11 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - if (inst->src[0].file == QFILE_REG) { - switch (inst->src[0].index) { - case 0: -+ /* V3D 7.x doesn't use rf0 for thread payload */ -+ if (c->devinfo->ver >= 71) -+ break; -+ else -+ FALLTHROUGH; - case 1: - case 2: - case 3: { -@@ -1163,7 +1168,6 @@ v3d_register_allocate(struct v3d_compile *c) - vir_for_each_inst_inorder(inst, c) { - inst->ip = ip++; - update_graph_and_reg_classes_for_inst(c, acc_nodes, inst); -- - } - - /* Set the register classes for all our temporaries in the graph */ --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0027-broadcom-qpu_schedule-update-write-deps-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0027-broadcom-qpu_schedule-update-write-deps-for-v71.patch deleted file mode 100644 index 05010aadd8..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0027-broadcom-qpu_schedule-update-write-deps-for-v71.patch +++ /dev/null @@ -1,30 +0,0 @@ -From d38d8056903b9a4f96ab56261ac3b3c3be0af4fb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 15 Sep 2021 11:12:59 +0200 -Subject: [PATCH 027/142] broadcom/qpu_schedule: update write deps for v71 - -We just need to add a write dep if rf0 is written implicitly. - -Note that we don't need to check if we have accumulators when checking -for r3/r4/r5, as v3d_qpu_writes_rX would return false for hw version -that doesn't have accumulators. ---- - src/broadcom/compiler/qpu_schedule.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 89254643c90..2fa9031d7b6 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -422,6 +422,8 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) - add_write_dep(state, &state->last_r[4], n); - if (v3d_qpu_writes_r5(devinfo, inst)) - add_write_dep(state, &state->last_r[5], n); -+ if (v3d_qpu_writes_rf0_implicitly(devinfo, inst)) -+ add_write_dep(state, &state->last_rf[0], n); - - /* If we add any more dependencies here we should consider whether we - * also need to update qpu_inst_after_thrsw_valid_in_delay_slot. --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0028-broadcom-compiler-update-register-classes-to-not-inc.patch b/projects/RPi/devices/RPi5/patches/mesa/0028-broadcom-compiler-update-register-classes-to-not-inc.patch deleted file mode 100644 index 76985d943a..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0028-broadcom-compiler-update-register-classes-to-not-inc.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 7e2a2be830b1672ab846389a46b5d09bad0f7a98 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 16 Sep 2021 00:49:25 +0200 -Subject: [PATCH 028/142] broadcom/compiler: update register classes to not - include accumulators on v71 - ---- - src/broadcom/compiler/vir_register_allocate.c | 56 ++++++++++++------- - 1 file changed, 36 insertions(+), 20 deletions(-) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index eca9a6751a6..7b3f6c41934 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -44,10 +44,15 @@ get_phys_index(const struct v3d_device_info *devinfo) - #define CLASS_BITS_PHYS (1 << 0) - #define CLASS_BITS_ACC (1 << 1) - #define CLASS_BITS_R5 (1 << 4) --#define CLASS_BITS_ANY (CLASS_BITS_PHYS | \ -- CLASS_BITS_ACC | \ -- CLASS_BITS_R5) - -+static uint8_t -+get_class_bit_any(const struct v3d_device_info *devinfo) -+{ -+ if (devinfo->has_accumulators) -+ return (CLASS_BITS_PHYS | CLASS_BITS_ACC | CLASS_BITS_R5); -+ else -+ return CLASS_BITS_PHYS; -+} - static inline uint32_t - temp_to_node(struct v3d_compile *c, uint32_t temp) - { -@@ -82,11 +87,13 @@ choose_reg_class(struct v3d_compile *c, uint8_t class_bits) - if (class_bits == CLASS_BITS_PHYS) { - return c->compiler->reg_class_phys[c->thread_index]; - } else if (class_bits == (CLASS_BITS_R5)) { -+ assert(c->devinfo->has_accumulators); - return c->compiler->reg_class_r5[c->thread_index]; - } else if (class_bits == (CLASS_BITS_PHYS | CLASS_BITS_ACC)) { -+ assert(c->devinfo->has_accumulators); - return c->compiler->reg_class_phys_or_acc[c->thread_index]; - } else { -- assert(class_bits == CLASS_BITS_ANY); -+ assert(class_bits == get_class_bit_any(c->devinfo)); - return c->compiler->reg_class_any[c->thread_index]; - } - } -@@ -447,7 +454,7 @@ v3d_emit_spill_tmua(struct v3d_compile *c, - */ - assert(c->disable_ldunif_opt); - struct qreg offset = vir_uniform_ui(c, spill_offset); -- add_node(c, offset.index, CLASS_BITS_ANY); -+ add_node(c, offset.index, get_class_bit_any(c->devinfo)); - - /* We always enable per-quad on spills/fills to ensure we spill - * any channels involved with helper invocations. -@@ -645,7 +652,8 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp) - * instruction immediately after, so - * we can use any register class for it. - */ -- add_node(c, unif.index, CLASS_BITS_ANY); -+ add_node(c, unif.index, -+ get_class_bit_any(c->devinfo)); - } else if (spill_type == SPILL_TYPE_RECONSTRUCT) { - struct qreg temp = - reconstruct_temp(c, reconstruct_op); -@@ -924,31 +932,38 @@ vir_init_reg_sets(struct v3d_compiler *compiler) - for (int threads = 0; threads < max_thread_index; threads++) { - compiler->reg_class_any[threads] = - ra_alloc_contig_reg_class(compiler->regs, 1); -- compiler->reg_class_r5[threads] = -- ra_alloc_contig_reg_class(compiler->regs, 1); -- compiler->reg_class_phys_or_acc[threads] = -- ra_alloc_contig_reg_class(compiler->regs, 1); -+ if (compiler->devinfo->has_accumulators) { -+ compiler->reg_class_r5[threads] = -+ ra_alloc_contig_reg_class(compiler->regs, 1); -+ compiler->reg_class_phys_or_acc[threads] = -+ ra_alloc_contig_reg_class(compiler->regs, 1); -+ } - compiler->reg_class_phys[threads] = - ra_alloc_contig_reg_class(compiler->regs, 1); - - for (int i = phys_index; - i < phys_index + (PHYS_COUNT >> threads); i++) { -- ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i); -+ if (compiler->devinfo->has_accumulators) -+ ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i); - ra_class_add_reg(compiler->reg_class_phys[threads], i); - ra_class_add_reg(compiler->reg_class_any[threads], i); - } - -- for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) { -- ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i); -- ra_class_add_reg(compiler->reg_class_any[threads], i); -+ if (compiler->devinfo->has_accumulators) { -+ for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) { -+ ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i); -+ ra_class_add_reg(compiler->reg_class_any[threads], i); -+ } - } - /* r5 can only store a single 32-bit value, so not much can - * use it. - */ -- ra_class_add_reg(compiler->reg_class_r5[threads], -- ACC_INDEX + 5); -- ra_class_add_reg(compiler->reg_class_any[threads], -- ACC_INDEX + 5); -+ if (compiler->devinfo->has_accumulators) { -+ ra_class_add_reg(compiler->reg_class_r5[threads], -+ ACC_INDEX + 5); -+ ra_class_add_reg(compiler->reg_class_any[threads], -+ ACC_INDEX + 5); -+ } - } - - ra_set_finalize(compiler->regs, NULL); -@@ -1086,7 +1101,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - } - - /* All accumulators are invalidated across a thread switch. */ -- if (inst->qpu.sig.thrsw) { -+ if (inst->qpu.sig.thrsw && c->devinfo->has_accumulators) { - for (int i = 0; i < c->num_temps; i++) { - if (c->temp_start[i] < ip && c->temp_end[i] > ip) { - set_temp_class_bits(c, i, -@@ -1157,7 +1172,8 @@ v3d_register_allocate(struct v3d_compile *c) - uint32_t t = node_to_temp(c, i); - c->nodes.info[i].priority = - c->temp_end[t] - c->temp_start[t]; -- c->nodes.info[i].class_bits = CLASS_BITS_ANY; -+ c->nodes.info[i].class_bits = -+ get_class_bit_any(c->devinfo); - } - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0029-broadcom-compiler-implement-reads-writes-too-soon-ch.patch b/projects/RPi/devices/RPi5/patches/mesa/0029-broadcom-compiler-implement-reads-writes-too-soon-ch.patch deleted file mode 100644 index 4af561fa4a..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0029-broadcom-compiler-implement-reads-writes-too-soon-ch.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 0157228c729b8812dc4900fa24db63b7d27aa342 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 23 Sep 2021 11:19:58 +0200 -Subject: [PATCH 029/142] broadcom/compiler: implement "reads/writes too soon" - checks for v71 - ---- - src/broadcom/compiler/qpu_schedule.c | 65 ++++++++++++++++++++++------ - 1 file changed, 51 insertions(+), 14 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 2fa9031d7b6..4db0c2e72da 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -562,7 +562,24 @@ mux_reads_too_soon(struct choose_scoreboard *scoreboard, - } - - static bool --reads_too_soon_after_write(struct choose_scoreboard *scoreboard, -+reads_too_soon(struct choose_scoreboard *scoreboard, -+ const struct v3d_qpu_instr *inst, uint8_t raddr) -+{ -+ switch (raddr) { -+ case 0: /* ldvary delayed write of C coefficient to rf0 */ -+ if (scoreboard->tick - scoreboard->last_ldvary_tick <= 1) -+ return true; -+ break; -+ default: -+ break; -+ } -+ -+ return false; -+} -+ -+static bool -+reads_too_soon_after_write(const struct v3d_device_info *devinfo, -+ struct choose_scoreboard *scoreboard, - struct qinst *qinst) - { - const struct v3d_qpu_instr *inst = &qinst->qpu; -@@ -574,24 +591,44 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, - assert(inst->type == V3D_QPU_INSTR_TYPE_ALU); - - if (inst->alu.add.op != V3D_QPU_A_NOP) { -- if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0 && -- mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux)) { -- return true; -+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) { -+ if (devinfo->ver < 71) { -+ if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux)) -+ return true; -+ } else { -+ if (reads_too_soon(scoreboard, inst, inst->alu.add.a.raddr)) -+ return true; -+ } - } -- if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1 && -- mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux)) { -- return true; -+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) { -+ if (devinfo->ver < 71) { -+ if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux)) -+ return true; -+ } else { -+ if (reads_too_soon(scoreboard, inst, inst->alu.add.b.raddr)) -+ return true; -+ } - } - } - - if (inst->alu.mul.op != V3D_QPU_M_NOP) { -- if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0 && -- mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux)) { -- return true; -+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) { -+ if (devinfo->ver < 71) { -+ if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux)) -+ return true; -+ } else { -+ if (reads_too_soon(scoreboard, inst, inst->alu.mul.b.raddr)) -+ return true; -+ } - } -- if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1 && -- mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux)) { -- return true; -+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) { -+ if (devinfo->ver < 71) { -+ if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux)) -+ return true; -+ } else { -+ if (reads_too_soon(scoreboard, inst, inst->alu.mul.b.raddr)) -+ return true; -+ } - } - } - -@@ -1147,7 +1184,7 @@ retry: - * regfile A or B that was written to by the previous - * instruction." - */ -- if (reads_too_soon_after_write(scoreboard, n->inst)) -+ if (reads_too_soon_after_write(c->devinfo, scoreboard, n->inst)) - continue; - - if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst)) --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0030-broadcom-compiler-implement-read-stall-check-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0030-broadcom-compiler-implement-read-stall-check-for-v71.patch deleted file mode 100644 index 9704a18a6b..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0030-broadcom-compiler-implement-read-stall-check-for-v71.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 3fb3333bdf9699157cf0a2bd46ba4c25058bc5c1 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 23 Sep 2021 11:44:59 +0200 -Subject: [PATCH 030/142] broadcom/compiler: implement read stall check for v71 - ---- - src/broadcom/compiler/qpu_schedule.c | 32 +++++++++++++++++----------- - src/broadcom/qpu/qpu_instr.c | 12 +++++++++++ - src/broadcom/qpu/qpu_instr.h | 2 ++ - 3 files changed, 34 insertions(+), 12 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 4db0c2e72da..b78abe003e9 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -679,29 +679,37 @@ pixel_scoreboard_too_soon(struct v3d_compile *c, - } - - static bool --qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst, -+qpu_instruction_uses_rf(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *inst, - uint32_t waddr) { - - if (inst->type != V3D_QPU_INSTR_TYPE_ALU) - return false; - -- if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) && -- inst->raddr_a == waddr) -- return true; -+ if (devinfo->ver < 71) { -+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) && -+ inst->raddr_a == waddr) -+ return true; - -- if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) && -- !inst->sig.small_imm_b && (inst->raddr_b == waddr)) -- return true; -+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) && -+ !inst->sig.small_imm_b && (inst->raddr_b == waddr)) -+ return true; -+ } else { -+ /* FIXME: skip if small immediate */ -+ if (v3d71_qpu_reads_raddr(inst, waddr)) -+ return true; -+ } - - return false; - } - - static bool --mux_read_stalls(struct choose_scoreboard *scoreboard, -- const struct v3d_qpu_instr *inst) -+read_stalls(const struct v3d_device_info *devinfo, -+ struct choose_scoreboard *scoreboard, -+ const struct v3d_qpu_instr *inst) - { - return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 && -- qpu_instruction_uses_rf(inst, -+ qpu_instruction_uses_rf(devinfo, inst, - scoreboard->last_stallable_sfu_reg); - } - -@@ -1319,7 +1327,7 @@ retry: - - int prio = get_instruction_priority(c->devinfo, inst); - -- if (mux_read_stalls(scoreboard, inst)) { -+ if (read_stalls(c->devinfo, scoreboard, inst)) { - /* Don't merge an instruction that stalls */ - if (prev_inst) - continue; -@@ -2389,7 +2397,7 @@ schedule_instructions(struct v3d_compile *c, - } - } - } -- if (mux_read_stalls(scoreboard, inst)) -+ if (read_stalls(c->devinfo, scoreboard, inst)) - c->qpu_inst_stalled_count++; - } - -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index 7ec3c867260..e8bbb2141b0 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -956,6 +956,18 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux) - (mul_nsrc > 1 && inst->alu.mul.b.mux == mux)); - } - -+bool -+v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr) -+{ -+ int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op); -+ int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op); -+ -+ return (add_nsrc > 0 && inst->alu.add.a.raddr == raddr) || -+ (add_nsrc > 1 && inst->alu.add.b.raddr == raddr) || -+ (mul_nsrc > 0 && inst->alu.mul.a.raddr == raddr) || -+ (mul_nsrc > 1 && inst->alu.mul.b.raddr == raddr); -+} -+ - bool - v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, - const struct v3d_qpu_sig *sig) -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index a25be8e0ee6..9f7582ab06d 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -494,4 +494,6 @@ bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; - bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; - - bool v3d_qpu_is_nop(struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; -+ -+bool v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr); - #endif --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0031-broadcom-compiler-add-a-v3d71_qpu_writes_waddr_expli.patch b/projects/RPi/devices/RPi5/patches/mesa/0031-broadcom-compiler-add-a-v3d71_qpu_writes_waddr_expli.patch deleted file mode 100644 index 3aec307f63..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0031-broadcom-compiler-add-a-v3d71_qpu_writes_waddr_expli.patch +++ /dev/null @@ -1,65 +0,0 @@ -From cbe0a7a06a5fb9b3f28acba8c9cac362a6bc5324 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 6 Oct 2021 13:58:00 +0200 -Subject: [PATCH 031/142] broadcom/compiler: add a - v3d71_qpu_writes_waddr_explicitly helper - ---- - src/broadcom/qpu/qpu_instr.c | 28 ++++++++++++++++++++++++++++ - src/broadcom/qpu/qpu_instr.h | 3 +++ - 2 files changed, 31 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index e8bbb2141b0..feb6b343c1c 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -968,6 +968,34 @@ v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr) - (mul_nsrc > 1 && inst->alu.mul.b.raddr == raddr); - } - -+bool -+v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *inst, -+ uint8_t waddr) -+{ -+ if (inst->type != V3D_QPU_INSTR_TYPE_ALU) -+ return false; -+ -+ if (v3d_qpu_add_op_has_dst(inst->alu.add.op) && -+ !inst->alu.add.magic_write && -+ inst->alu.add.waddr == waddr) { -+ return true; -+ } -+ -+ if (v3d_qpu_mul_op_has_dst(inst->alu.mul.op) && -+ !inst->alu.mul.magic_write && -+ inst->alu.mul.waddr == waddr) { -+ return true; -+ } -+ -+ if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && -+ !inst->sig_magic && inst->sig_addr == waddr) { -+ return true; -+ } -+ -+ return false; -+} -+ - bool - v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, - const struct v3d_qpu_sig *sig) -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 9f7582ab06d..50a69ce8c3a 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -496,4 +496,7 @@ bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; - bool v3d_qpu_is_nop(struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; - - bool v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr); -+bool v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info *devinfo, -+ const struct v3d_qpu_instr *inst, -+ uint8_t waddr); - #endif --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0032-broadcom-compiler-prevent-rf2-3-usage-in-thread-end-.patch b/projects/RPi/devices/RPi5/patches/mesa/0032-broadcom-compiler-prevent-rf2-3-usage-in-thread-end-.patch deleted file mode 100644 index f5e3fb5f22..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0032-broadcom-compiler-prevent-rf2-3-usage-in-thread-end-.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 92e91a9b22ae61dc9f39880e8fdaa7714789efdb Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Mon, 27 Sep 2021 11:49:24 +0200 -Subject: [PATCH 032/142] broadcom/compiler: prevent rf2-3 usage in thread end - delay slots for v71 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Iago Toral Quiroga -Signed-off-by: Alejandro Piñeiro ---- - src/broadcom/compiler/qpu_schedule.c | 37 +++++++++++++++++++++------- - 1 file changed, 28 insertions(+), 9 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index b78abe003e9..839c0c62315 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -1691,16 +1691,35 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, - if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF) - return false; - -- /* RF0-2 might be overwritten during the delay slots by -- * fragment shader setup. -- */ -- if (inst->raddr_a < 3 && v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A)) -- return false; -+ if (c->devinfo->ver <= 42) { -+ /* RF0-2 might be overwritten during the delay slots by -+ * fragment shader setup. -+ */ -+ if (inst->raddr_a < 3 && v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A)) -+ return false; - -- if (inst->raddr_b < 3 && -- !inst->sig.small_imm_b && -- v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) { -- return false; -+ if (inst->raddr_b < 3 && -+ !inst->sig.small_imm_b && -+ v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) { -+ return false; -+ } -+ } -+ -+ if (c->devinfo->ver >= 71) { -+ /* RF2-3 might be overwritten during the delay slots by -+ * fragment shader setup. -+ * -+ * FIXME: handle small immediate cases -+ */ -+ if (v3d71_qpu_reads_raddr(inst, 2) || -+ v3d71_qpu_reads_raddr(inst, 3)) { -+ return false; -+ } -+ -+ if (v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 2) || -+ v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 3)) { -+ return false; -+ } - } - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0033-broadcom-qpu-add-new-ADD-opcodes-for-FMOV-MOV-in-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0033-broadcom-qpu-add-new-ADD-opcodes-for-FMOV-MOV-in-v71.patch deleted file mode 100644 index 4a2b89038b..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0033-broadcom-qpu-add-new-ADD-opcodes-for-FMOV-MOV-in-v71.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 68a1545eb973e41608534ff05a9e84a86c046453 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Mon, 27 Sep 2021 13:26:04 +0200 -Subject: [PATCH 033/142] broadcom/qpu: add new ADD opcodes for FMOV/MOV in v71 - ---- - src/broadcom/qpu/qpu_instr.c | 5 +++++ - src/broadcom/qpu/qpu_instr.h | 4 ++++ - src/broadcom/qpu/qpu_pack.c | 15 +++++++++++++++ - 3 files changed, 24 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index feb6b343c1c..195a0dcd232 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -177,6 +177,8 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op) - [V3D_QPU_A_ITOF] = "itof", - [V3D_QPU_A_CLZ] = "clz", - [V3D_QPU_A_UTOF] = "utof", -+ [V3D_QPU_A_MOV] = "mov", -+ [V3D_QPU_A_FMOV] = "fmov", - }; - - if (op >= ARRAY_SIZE(op_names)) -@@ -458,6 +460,9 @@ static const uint8_t add_op_args[] = { - [V3D_QPU_A_ITOF] = D | A, - [V3D_QPU_A_CLZ] = D | A, - [V3D_QPU_A_UTOF] = D | A, -+ -+ [V3D_QPU_A_MOV] = D | A, -+ [V3D_QPU_A_FMOV] = D | A, - }; - - static const uint8_t mul_op_args[] = { -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 50a69ce8c3a..c86a4119c54 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -227,6 +227,10 @@ enum v3d_qpu_add_op { - V3D_QPU_A_ITOF, - V3D_QPU_A_CLZ, - V3D_QPU_A_UTOF, -+ -+ /* V3D 7.x */ -+ V3D_QPU_A_FMOV, -+ V3D_QPU_A_MOV, - }; - - enum v3d_qpu_mul_op { -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 4045275cb9a..0e504e65fbf 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -776,6 +776,21 @@ static const struct opcode_desc add_ops_v71[] = { - - { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 }, - { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 }, -+ -+ { 249, 249, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FMOV, 71 }, -+ { 249, 249, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FMOV, 71 }, -+ { 249, 249, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FMOV, 71 }, -+ { 249, 249, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FMOV, 71 }, -+ { 249, 249, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FMOV, 71 }, -+ { 249, 249, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FMOV, 71 }, -+ { 249, 249, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FMOV, 71 }, -+ -+ { 249, 249, .raddr_mask = OP_MASK(3), V3D_QPU_A_MOV, 71 }, -+ { 249, 249, .raddr_mask = OP_MASK(7), V3D_QPU_A_MOV, 71 }, -+ { 249, 249, .raddr_mask = OP_MASK(11), V3D_QPU_A_MOV, 71 }, -+ { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 }, -+ { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 }, -+ - }; - - static const struct opcode_desc mul_ops_v71[] = { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0034-broadcom-qpu-fix-packing-unpacking-of-fmov-variants-.patch b/projects/RPi/devices/RPi5/patches/mesa/0034-broadcom-qpu-fix-packing-unpacking-of-fmov-variants-.patch deleted file mode 100644 index df5222700d..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0034-broadcom-qpu-fix-packing-unpacking-of-fmov-variants-.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 8dbbb7e22b694fdc62376d112b3dc6105d556c63 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Mon, 4 Oct 2021 13:07:35 +0200 -Subject: [PATCH 034/142] broadcom/qpu: fix packing/unpacking of fmov variants - for v71 - ---- - src/broadcom/qpu/qpu_pack.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 0e504e65fbf..0eb820b3f10 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -1405,9 +1405,9 @@ v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst - break; - - case V3D_QPU_M_FMOV: -- instr->alu.mul.output_pack = (raddr_d >> 2) & 1; -+ instr->alu.mul.output_pack = raddr_d & 0x3; - -- if (!v3d_qpu_float32_unpack_unpack(raddr_d & 0x3, -+ if (!v3d_qpu_float32_unpack_unpack((raddr_d >> 2) & 0x7, - &instr->alu.mul.a.unpack)) { - return false; - } -@@ -2046,14 +2046,13 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo, - &packed)) { - return false; - } -- opcode |= (packed >> 1) & 1; -- raddr_d = (packed & 1) << 2; -+ raddr_d |= packed; - - if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack, - &packed)) { - return false; - } -- raddr_d |= packed; -+ raddr_d |= packed << 2; - break; - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0035-broadcom-qpu-implement-switch-rules-for-fmin-fmax-fa.patch b/projects/RPi/devices/RPi5/patches/mesa/0035-broadcom-qpu-implement-switch-rules-for-fmin-fmax-fa.patch deleted file mode 100644 index 2e244c13dc..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0035-broadcom-qpu-implement-switch-rules-for-fmin-fmax-fa.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 63d0059ebef288afb0e2e746dadda8c2238bdfcb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 28 Sep 2021 01:17:08 +0200 -Subject: [PATCH 035/142] broadcom/qpu: implement switch rules for fmin/fmax - fadd/faddnf for v71 - -They use the same opcodes, and switch between one and the other based -on raddr. - -Note that the rule rule includes also if small_imm_a/b are used. That -is still not in place so that part is hardcode. Would be updated later -when small immediates support for v71 gets implemented. ---- - src/broadcom/qpu/qpu_pack.c | 48 +++++++++++++++++++++++++++++++++++++ - 1 file changed, 48 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 0eb820b3f10..7a262f18ac3 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -651,7 +651,9 @@ static const struct opcode_desc mul_ops_v33[] = { - * opcodes that changed on v71 - */ - static const struct opcode_desc add_ops_v71[] = { -+ /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */ - { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD }, -+ { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF }, - { 53, 55, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK }, - { 56, 56, .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD }, - { 57, 59, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK }, -@@ -666,6 +668,10 @@ static const struct opcode_desc add_ops_v71[] = { - { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR }, - { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR }, - { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR }, -+ /* FMIN is instead FMAX depending on the raddr_a/b order. */ -+ { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN }, -+ { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX }, -+ { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN }, - - { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND }, - { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR }, -@@ -1162,6 +1168,22 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst - - instr->alu.add.op = desc->op; - -+ /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the -+ * operands. -+ */ -+ /* FIXME: for now hardcoded values, until we got the small_imm support -+ * in place -+ */ -+ uint32_t small_imm_a = 0; -+ uint32_t small_imm_b = 0; -+ if (small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a > -+ small_imm_b *256 + (op & 3) * 64 + raddr_b) { -+ if (instr->alu.add.op == V3D_QPU_A_FMIN) -+ instr->alu.add.op = V3D_QPU_A_FMAX; -+ if (instr->alu.add.op == V3D_QPU_A_FADD) -+ instr->alu.add.op = V3D_QPU_A_FADDNF; -+ } -+ - /* Some QPU ops require a bit more than just basic opcode and mux a/b - * comparisons to distinguish them. - */ -@@ -1754,6 +1776,11 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, - uint32_t output_pack; - uint32_t a_unpack; - uint32_t b_unpack; -+ /* FIXME: for now hardcoded values, until we got the small_imm -+ * support in place -+ */ -+ uint32_t small_imm_a = 0; -+ uint32_t small_imm_b = 0; - - if (instr->alu.add.op != V3D_QPU_A_FCMP) { - if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, -@@ -1773,6 +1800,27 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, - return false; - } - -+ /* These operations with commutative operands are -+ * distinguished by which order their operands come in. -+ */ -+ bool ordering = -+ small_imm_a * 256 + a_unpack * 64 + raddr_a > -+ small_imm_b * 256 + b_unpack * 64 + raddr_b; -+ if (((instr->alu.add.op == V3D_QPU_A_FMIN || -+ instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || -+ ((instr->alu.add.op == V3D_QPU_A_FMAX || -+ instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) { -+ uint32_t temp; -+ -+ temp = a_unpack; -+ a_unpack = b_unpack; -+ b_unpack = temp; -+ -+ temp = raddr_a; -+ raddr_a = raddr_b; -+ raddr_b = temp; -+ } -+ - opcode |= a_unpack << 2; - opcode |= b_unpack << 0; - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0036-broadcom-compiler-make-vir_write_rX-return-false-on-.patch b/projects/RPi/devices/RPi5/patches/mesa/0036-broadcom-compiler-make-vir_write_rX-return-false-on-.patch deleted file mode 100644 index 6c80d4a9ab..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0036-broadcom-compiler-make-vir_write_rX-return-false-on-.patch +++ /dev/null @@ -1,37 +0,0 @@ -From c9f6faa3ddc91024b3d9dc67ce2221187daac128 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 29 Sep 2021 11:54:18 +0200 -Subject: [PATCH 036/142] broadcom/compiler: make vir_write_rX return false on - platforms without accums - ---- - src/broadcom/compiler/vir.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c -index 007cb0a941b..d75cd777b6d 100644 ---- a/src/broadcom/compiler/vir.c -+++ b/src/broadcom/compiler/vir.c -@@ -158,6 +158,9 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst) - bool - vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) - { -+ if (!devinfo->has_accumulators) -+ return false; -+ - for (int i = 0; i < vir_get_nsrc(inst); i++) { - switch (inst->src[i].file) { - case QFILE_VPM: -@@ -180,6 +183,9 @@ vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) - bool - vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst) - { -+ if (!devinfo->has_accumulators) -+ return false; -+ - switch (inst->dst.file) { - case QFILE_MAGIC: - switch (inst->dst.index) { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0037-broadcom-compiler-rename-vir_writes_rX-to-vir_writes.patch b/projects/RPi/devices/RPi5/patches/mesa/0037-broadcom-compiler-rename-vir_writes_rX-to-vir_writes.patch deleted file mode 100644 index 1dea74a300..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0037-broadcom-compiler-rename-vir_writes_rX-to-vir_writes.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 3d16229743e26b58735ed049ee982073f6034342 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 29 Sep 2021 12:03:50 +0200 -Subject: [PATCH 037/142] broadcom/compiler: rename vir_writes_rX to - vir_writes_rX_implicitly - -Since that represents more accurately what they check.. ---- - src/broadcom/compiler/v3d_compiler.h | 4 ++-- - src/broadcom/compiler/vir.c | 6 ++++-- - src/broadcom/compiler/vir_register_allocate.c | 4 ++-- - 3 files changed, 8 insertions(+), 6 deletions(-) - -diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h -index eb4e692464b..7e8f3bfc1a7 100644 ---- a/src/broadcom/compiler/v3d_compiler.h -+++ b/src/broadcom/compiler/v3d_compiler.h -@@ -1149,8 +1149,8 @@ bool vir_is_raw_mov(struct qinst *inst); - bool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst); - bool vir_is_add(struct qinst *inst); - bool vir_is_mul(struct qinst *inst); --bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); --bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); -+bool vir_writes_r3_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst); -+bool vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst); - struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); - uint8_t vir_channels_written(struct qinst *inst); - struct qreg ntq_get_src(struct v3d_compile *c, nir_src src, int i); -diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c -index d75cd777b6d..aea113f050e 100644 ---- a/src/broadcom/compiler/vir.c -+++ b/src/broadcom/compiler/vir.c -@@ -156,7 +156,8 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst) - } - - bool --vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) -+vir_writes_r3_implicitly(const struct v3d_device_info *devinfo, -+ struct qinst *inst) - { - if (!devinfo->has_accumulators) - return false; -@@ -181,7 +182,8 @@ vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) - } - - bool --vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst) -+vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, -+ struct qinst *inst) - { - if (!devinfo->has_accumulators) - return false; -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index 7b3f6c41934..f2df35cd458 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -988,7 +988,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - * result to a temp), nothing else can be stored in r3/r4 across - * it. - */ -- if (vir_writes_r3(c->devinfo, inst)) { -+ if (vir_writes_r3_implicitly(c->devinfo, inst)) { - for (int i = 0; i < c->num_temps; i++) { - if (c->temp_start[i] < ip && c->temp_end[i] > ip) { - ra_add_node_interference(c->g, -@@ -998,7 +998,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - } - } - -- if (vir_writes_r4(c->devinfo, inst)) { -+ if (vir_writes_r4_implicitly(c->devinfo, inst)) { - for (int i = 0; i < c->num_temps; i++) { - if (c->temp_start[i] < ip && c->temp_end[i] > ip) { - ra_add_node_interference(c->g, --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0038-broadcom-compiler-only-handle-accumulator-classes-if.patch b/projects/RPi/devices/RPi5/patches/mesa/0038-broadcom-compiler-only-handle-accumulator-classes-if.patch deleted file mode 100644 index b39e7bda94..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0038-broadcom-compiler-only-handle-accumulator-classes-if.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 83fae160491737e8568b8fb5eaa5be4d2c8bf3c8 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 29 Sep 2021 12:10:31 +0200 -Subject: [PATCH 038/142] broadcom/compiler: only handle accumulator classes if - present - ---- - src/broadcom/compiler/vir_register_allocate.c | 77 ++++++++++++------- - 1 file changed, 49 insertions(+), 28 deletions(-) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index f2df35cd458..e78ccb7c6aa 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -53,6 +53,17 @@ get_class_bit_any(const struct v3d_device_info *devinfo) - else - return CLASS_BITS_PHYS; - } -+ -+static uint8_t -+filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits) -+{ -+ if (!devinfo->has_accumulators) { -+ assert(class_bits & CLASS_BITS_PHYS); -+ class_bits = CLASS_BITS_PHYS; -+ } -+ return class_bits; -+} -+ - static inline uint32_t - temp_to_node(struct v3d_compile *c, uint32_t temp) - { -@@ -413,8 +424,10 @@ v3d_setup_spill_base(struct v3d_compile *c) - */ - if (c->spilling) { - int temp_class = CLASS_BITS_PHYS; -- if (i != c->spill_base.index) -+ if (c->devinfo->has_accumulators && -+ i != c->spill_base.index) { - temp_class |= CLASS_BITS_ACC; -+ } - add_node(c, i, temp_class); - } - } -@@ -473,14 +486,16 @@ v3d_emit_spill_tmua(struct v3d_compile *c, - * temp will be used immediately so just like the uniform above we - * can allow accumulators. - */ -+ int temp_class = -+ filter_class_bits(c->devinfo, CLASS_BITS_PHYS | CLASS_BITS_ACC); - if (!fill_dst) { - struct qreg dst = vir_TMUWT(c); - assert(dst.file == QFILE_TEMP); -- add_node(c, dst.index, CLASS_BITS_PHYS | CLASS_BITS_ACC); -+ add_node(c, dst.index, temp_class); - } else { - *fill_dst = vir_LDTMU(c); - assert(fill_dst->file == QFILE_TEMP); -- add_node(c, fill_dst->index, CLASS_BITS_PHYS | CLASS_BITS_ACC); -+ add_node(c, fill_dst->index, temp_class); - } - - /* Temps across the thread switch we injected can't be assigned to -@@ -662,8 +677,10 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp) - * instruction immediately after so we - * can use ACC. - */ -- add_node(c, temp.index, CLASS_BITS_PHYS | -- CLASS_BITS_ACC); -+ int temp_class = -+ filter_class_bits(c->devinfo, CLASS_BITS_PHYS | -+ CLASS_BITS_ACC); -+ add_node(c, temp.index, temp_class); - } else { - /* If we have a postponed spill, we - * don't need a fill as the temp would -@@ -941,6 +958,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler) - compiler->reg_class_phys[threads] = - ra_alloc_contig_reg_class(compiler->regs, 1); - -+ /* Init physical regs */ - for (int i = phys_index; - i < phys_index + (PHYS_COUNT >> threads); i++) { - if (compiler->devinfo->has_accumulators) -@@ -949,16 +967,15 @@ vir_init_reg_sets(struct v3d_compiler *compiler) - ra_class_add_reg(compiler->reg_class_any[threads], i); - } - -+ /* Init accumulator regs */ - if (compiler->devinfo->has_accumulators) { - for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) { - ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i); - ra_class_add_reg(compiler->reg_class_any[threads], i); - } -- } -- /* r5 can only store a single 32-bit value, so not much can -- * use it. -- */ -- if (compiler->devinfo->has_accumulators) { -+ /* r5 can only store a single 32-bit value, so not much can -+ * use it. -+ */ - ra_class_add_reg(compiler->reg_class_r5[threads], - ACC_INDEX + 5); - ra_class_add_reg(compiler->reg_class_any[threads], -@@ -1081,21 +1098,23 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - * because ldunif has usually a shorter lifespan, allowing for - * more accumulator reuse and QPU merges. - */ -- if (!inst->qpu.sig.ldunif) { -- uint8_t class_bits = -- get_temp_class_bits(c, inst->dst.index) & -- ~CLASS_BITS_R5; -- set_temp_class_bits(c, inst->dst.index, -- class_bits); -- -- } else { -- /* Until V3D 4.x, we could only load a uniform -- * to r5, so we'll need to spill if uniform -- * loads interfere with each other. -- */ -- if (c->devinfo->ver < 40) { -+ if (c->devinfo->has_accumulators) { -+ if (!inst->qpu.sig.ldunif) { -+ uint8_t class_bits = -+ get_temp_class_bits(c, inst->dst.index) & -+ ~CLASS_BITS_R5; - set_temp_class_bits(c, inst->dst.index, -- CLASS_BITS_R5); -+ class_bits); -+ -+ } else { -+ /* Until V3D 4.x, we could only load a uniform -+ * to r5, so we'll need to spill if uniform -+ * loads interfere with each other. -+ */ -+ if (c->devinfo->ver < 40) { -+ set_temp_class_bits(c, inst->dst.index, -+ CLASS_BITS_R5); -+ } - } - } - } -@@ -1152,8 +1171,10 @@ v3d_register_allocate(struct v3d_compile *c) - c->thread_index--; - } - -- c->g = ra_alloc_interference_graph(c->compiler->regs, -- c->num_temps + ARRAY_SIZE(acc_nodes)); -+ unsigned num_ra_nodes = c->num_temps; -+ if (c->devinfo->has_accumulators) -+ num_ra_nodes += ARRAY_SIZE(acc_nodes); -+ c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes); - ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data); - - /* Make some fixed nodes for the accumulators, which we will need to -@@ -1162,8 +1183,8 @@ v3d_register_allocate(struct v3d_compile *c) - * live in, but the classes take up a lot of memory to set up, so we - * don't want to make too many. - */ -- for (uint32_t i = 0; i < ACC_COUNT + c->num_temps; i++) { -- if (i < ACC_COUNT) { -+ for (uint32_t i = 0; i < num_ra_nodes; i++) { -+ if (c->devinfo->has_accumulators && i < ACC_COUNT) { - acc_nodes[i] = i; - ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i); - c->nodes.info[i].priority = 0; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0039-broadcom-compiler-don-t-assign-rf0-to-temps-across-i.patch b/projects/RPi/devices/RPi5/patches/mesa/0039-broadcom-compiler-don-t-assign-rf0-to-temps-across-i.patch deleted file mode 100644 index e7553a8295..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0039-broadcom-compiler-don-t-assign-rf0-to-temps-across-i.patch +++ /dev/null @@ -1,187 +0,0 @@ -From fd77cc3204e7c69927f97ce2a1d55d2a47d77a27 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 29 Sep 2021 12:14:04 +0200 -Subject: [PATCH 039/142] broadcom/compiler: don't assign rf0 to temps across - implicit rf0 writes - -In platforms that don't have accumulators and have implicit writes to -the register file we need to be careful and avoid assigning a physical -register to a temp that lives across an implicit write to that same -physical register. - -For now, we have the case of implicit writes to rf0 from various -signals, but it should be easy to extend this to include additional -registers if needed. ---- - src/broadcom/compiler/vir_register_allocate.c | 69 +++++++++++++++---- - 1 file changed, 57 insertions(+), 12 deletions(-) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index e78ccb7c6aa..e0adc1de7a4 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -29,6 +29,9 @@ - #define ACC_INDEX 0 - #define ACC_COUNT 6 - -+/* RA nodes used to track RF registers with implicit writes */ -+#define IMPLICIT_RF_COUNT 1 -+ - #define PHYS_COUNT 64 - - static uint8_t -@@ -67,15 +70,17 @@ filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits) - static inline uint32_t - temp_to_node(struct v3d_compile *c, uint32_t temp) - { -- return temp + (c->devinfo->has_accumulators ? ACC_COUNT : 0); -+ return temp + (c->devinfo->has_accumulators ? ACC_COUNT : -+ IMPLICIT_RF_COUNT); - } - - static inline uint32_t - node_to_temp(struct v3d_compile *c, uint32_t node) - { - assert((c->devinfo->has_accumulators && node >= ACC_COUNT) || -- (!c->devinfo->has_accumulators && node >= 0)); -- return node - (c->devinfo->has_accumulators ? ACC_COUNT : 0); -+ (!c->devinfo->has_accumulators && node >= IMPLICIT_RF_COUNT)); -+ return node - (c->devinfo->has_accumulators ? ACC_COUNT : -+ IMPLICIT_RF_COUNT); - } - - static inline uint8_t -@@ -360,7 +365,8 @@ ensure_nodes(struct v3d_compile *c) - c->nodes.info = reralloc_array_size(c, - c->nodes.info, - sizeof(c->nodes.info[0]), -- c->nodes.alloc_count + ACC_COUNT); -+ c->nodes.alloc_count + -+ MAX2(ACC_COUNT, IMPLICIT_RF_COUNT)); - } - - /* Creates the interference node for a new temp. We use this to keep the node -@@ -372,7 +378,8 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits) - ensure_nodes(c); - - int node = ra_add_node(c->g, choose_reg_class(c, class_bits)); -- assert(node == temp + ACC_COUNT); -+ assert(c->devinfo->has_accumulators ? node == temp + ACC_COUNT : -+ node == temp + IMPLICIT_RF_COUNT); - - /* We fill the node priority after we are done inserting spills */ - c->nodes.info[node].class_bits = class_bits; -@@ -995,7 +1002,9 @@ tmu_spilling_allowed(struct v3d_compile *c) - } - - static void --update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, -+update_graph_and_reg_classes_for_inst(struct v3d_compile *c, -+ int *acc_nodes, -+ int *implicit_rf_nodes, - struct qinst *inst) - { - int32_t ip = inst->ip; -@@ -1025,6 +1034,19 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - } - } - -+ /* If any instruction writes to a physical register implicitly -+ * nothing else can write the same register across it. -+ */ -+ if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) { -+ for (int i = 0; i < c->num_temps; i++) { -+ if (c->temp_start[i] < ip && c->temp_end[i] > ip) { -+ ra_add_node_interference(c->g, -+ temp_to_node(c, i), -+ implicit_rf_nodes[0]); -+ } -+ } -+ } -+ - if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { - switch (inst->qpu.alu.add.op) { - case V3D_QPU_A_LDVPMV_IN: -@@ -1116,6 +1138,16 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes, - CLASS_BITS_R5); - } - } -+ } else { -+ /* If the instruction has an implicit write -+ * we can't allocate its dest to the same -+ * register. -+ */ -+ if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) { -+ ra_add_node_interference(c->g, -+ temp_to_node(c, inst->dst.index), -+ implicit_rf_nodes[0]); -+ } - } - } - -@@ -1139,10 +1171,18 @@ struct qpu_reg * - v3d_register_allocate(struct v3d_compile *c) - { - int acc_nodes[ACC_COUNT]; -+ int implicit_rf_nodes[IMPLICIT_RF_COUNT]; -+ -+ unsigned num_ra_nodes = c->num_temps; -+ if (c->devinfo->has_accumulators) -+ num_ra_nodes += ARRAY_SIZE(acc_nodes); -+ else -+ num_ra_nodes += ARRAY_SIZE(implicit_rf_nodes); -+ - c->nodes = (struct v3d_ra_node_info) { - .alloc_count = c->num_temps, - .info = ralloc_array_size(c, sizeof(c->nodes.info[0]), -- c->num_temps + ACC_COUNT), -+ num_ra_nodes), - }; - - uint32_t phys_index = get_phys_index(c->devinfo); -@@ -1171,9 +1211,6 @@ v3d_register_allocate(struct v3d_compile *c) - c->thread_index--; - } - -- unsigned num_ra_nodes = c->num_temps; -- if (c->devinfo->has_accumulators) -- num_ra_nodes += ARRAY_SIZE(acc_nodes); - c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes); - ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data); - -@@ -1181,7 +1218,8 @@ v3d_register_allocate(struct v3d_compile *c) - * interfere with when ops have implied r3/r4 writes or for the thread - * switches. We could represent these as classes for the nodes to - * live in, but the classes take up a lot of memory to set up, so we -- * don't want to make too many. -+ * don't want to make too many. We use the same mechanism on platforms -+ * without accumulators that can have implicit writes to phys regs. - */ - for (uint32_t i = 0; i < num_ra_nodes; i++) { - if (c->devinfo->has_accumulators && i < ACC_COUNT) { -@@ -1189,6 +1227,12 @@ v3d_register_allocate(struct v3d_compile *c) - ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i); - c->nodes.info[i].priority = 0; - c->nodes.info[i].class_bits = 0; -+ } else if (!c->devinfo->has_accumulators && -+ i < ARRAY_SIZE(implicit_rf_nodes)) { -+ implicit_rf_nodes[i] = i; -+ ra_set_node_reg(c->g, implicit_rf_nodes[i], phys_index + i); -+ c->nodes.info[i].priority = 0; -+ c->nodes.info[i].class_bits = 0; - } else { - uint32_t t = node_to_temp(c, i); - c->nodes.info[i].priority = -@@ -1204,7 +1248,8 @@ v3d_register_allocate(struct v3d_compile *c) - int ip = 0; - vir_for_each_inst_inorder(inst, c) { - inst->ip = ip++; -- update_graph_and_reg_classes_for_inst(c, acc_nodes, inst); -+ update_graph_and_reg_classes_for_inst(c, acc_nodes, -+ implicit_rf_nodes, inst); - } - - /* Set the register classes for all our temporaries in the graph */ --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0040-broadcom-compiler-CS-payload-registers-have-changed-.patch b/projects/RPi/devices/RPi5/patches/mesa/0040-broadcom-compiler-CS-payload-registers-have-changed-.patch deleted file mode 100644 index 8eee3ac26c..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0040-broadcom-compiler-CS-payload-registers-have-changed-.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 9a08ae9f354a6da6d9d71b87800aca8b3df49e29 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 28 Sep 2021 13:37:28 +0200 -Subject: [PATCH 040/142] broadcom/compiler: CS payload registers have changed - in v71 - ---- - src/broadcom/compiler/nir_to_vir.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c -index 1a05b279a2d..220ff6bcd49 100644 ---- a/src/broadcom/compiler/nir_to_vir.c -+++ b/src/broadcom/compiler/nir_to_vir.c -@@ -4362,8 +4362,13 @@ nir_to_vir(struct v3d_compile *c) - V3D_QPU_WADDR_SYNC)); - } - -- c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0)); -- c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2)); -+ if (c->devinfo->ver <= 42) { -+ c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0)); -+ c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2)); -+ } else if (c->devinfo->ver >= 71) { -+ c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 3)); -+ c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2)); -+ } - - /* Set up the division between gl_LocalInvocationIndex and - * wg_in_mem in the payload reg. --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0041-broadcom-compiler-don-t-schedule-rf0-writes-right-af.patch b/projects/RPi/devices/RPi5/patches/mesa/0041-broadcom-compiler-don-t-schedule-rf0-writes-right-af.patch deleted file mode 100644 index 193468668e..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0041-broadcom-compiler-don-t-schedule-rf0-writes-right-af.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 5477884196cb54a71f54fa6cad42c6d3326bde88 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Fri, 22 Oct 2021 13:39:48 +0200 -Subject: [PATCH 041/142] broadcom/compiler: don't schedule rf0 writes right - after ldvary - -ldvary writes rf0 implicitly on the next cycle so they would clash. -This case is not handled correctly by our normal dependency tracking, -which doesn't know anything about delayed writes from instructions -and thinks the rf0 write happens on the same cycle ldvary is emitted. - -Fixes (v71): -dEQP-VK.glsl.conversions.matrix_to_matrix.mat2x3_to_mat4x2_fragment ---- - src/broadcom/compiler/qpu_schedule.c | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 839c0c62315..870823fd2b1 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -652,6 +652,21 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo, - v3d_qpu_writes_r4(devinfo, inst)) - return true; - -+ if (devinfo->ver <= 42) -+ return false; -+ -+ /* Don't schedule anything that writes rf0 right after ldvary, since -+ * that would clash with the ldvary's delayed rf0 write (the exception -+ * is another ldvary, since its implicit rf0 write would also have -+ * one cycle of delay and would not clash). -+ */ -+ if (scoreboard->last_ldvary_tick + 1 == scoreboard->tick && -+ (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) || -+ (v3d_qpu_writes_rf0_implicitly(devinfo, inst) && -+ !inst->sig.ldvary))) { -+ return true; -+ } -+ - return false; - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0042-broadcom-compiler-allow-instruction-merges-in-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0042-broadcom-compiler-allow-instruction-merges-in-v71.patch deleted file mode 100644 index dcb28320d5..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0042-broadcom-compiler-allow-instruction-merges-in-v71.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 31623712c2f741d393767641f32d56c35150eda5 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 30 Sep 2021 13:22:48 +0200 -Subject: [PATCH 042/142] broadcom/compiler: allow instruction merges in v71 - -In v3d 4.x there were restrictions based on the number of raddrs used -by the combined instructions, but we don't have these restrictions in -v3d 7.x. - -It should be noted that while there are no restrictions on the number -of raddrs addressed, a QPU instruction can only address a single small -immediate, so we should be careful about that when we add support for -small immediates. ---- - src/broadcom/compiler/qpu_schedule.c | 21 +++++++++++++++++---- - 1 file changed, 17 insertions(+), 4 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 870823fd2b1..ff544fb3c1c 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -906,8 +906,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a, - static bool - qpu_merge_raddrs(struct v3d_qpu_instr *result, - const struct v3d_qpu_instr *add_instr, -- const struct v3d_qpu_instr *mul_instr) -+ const struct v3d_qpu_instr *mul_instr, -+ const struct v3d_device_info *devinfo) - { -+ assert(devinfo->ver <= 42); -+ - uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr); - int naddrs = util_bitcount64(raddrs_used); - -@@ -1111,9 +1114,19 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, - add_instr = a; - } - -- if (add_instr && mul_instr && -- !qpu_merge_raddrs(&merge, add_instr, mul_instr)) { -- return false; -+ /* V3D 4.x and earlier use muxes to select the inputs for the ALUs and -+ * they have restrictions on the number of raddrs that can be adressed -+ * in a single instruction. -+ * -+ * FIXME: for V3D 7.x we can't merge instructions if they address more -+ * than one small immediate. For now, we don't support small immediates, -+ * so it is not a problem. -+ */ -+ if (devinfo->ver <= 42) { -+ if (add_instr && mul_instr && -+ !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) { -+ return false; -+ } - } - - merge.sig.thrsw |= b->sig.thrsw; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0043-broadcom-qpu-add-MOV-integer-packing-unpacking-varia.patch b/projects/RPi/devices/RPi5/patches/mesa/0043-broadcom-qpu-add-MOV-integer-packing-unpacking-varia.patch deleted file mode 100644 index 1df473d3de..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0043-broadcom-qpu-add-MOV-integer-packing-unpacking-varia.patch +++ /dev/null @@ -1,172 +0,0 @@ -From 959a0128654c94d84fda53ffc108971d3b3a817a Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 6 Oct 2021 09:27:43 +0200 -Subject: [PATCH 043/142] broadcom/qpu: add MOV integer packing/unpacking - variants - -These are new in v71 and cover MOV on both the ADD and the MUL alus. ---- - src/broadcom/qpu/qpu_instr.h | 9 ++++ - src/broadcom/qpu/qpu_pack.c | 98 ++++++++++++++++++++++++++++++++++++ - 2 files changed, 107 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index c86a4119c54..4b34d17bd4c 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -285,6 +285,15 @@ enum v3d_qpu_input_unpack { - - /** Swap high and low 16 bits */ - V3D_QPU_UNPACK_SWAP_16, -+ -+ /** Convert low 16 bits from 16-bit integer to unsigned 32-bit int */ -+ V3D_QPU_UNPACK_UL, -+ /** Convert high 16 bits from 16-bit integer to unsigned 32-bit int */ -+ V3D_QPU_UNPACK_UH, -+ /** Convert low 16 bits from 16-bit integer to signed 32-bit int */ -+ V3D_QPU_UNPACK_IL, -+ /** Convert high 16 bits from 16-bit integer to signed 32-bit int */ -+ V3D_QPU_UNPACK_IH, - }; - - enum v3d_qpu_mux { -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 7a262f18ac3..4d677894755 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -922,6 +922,56 @@ v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, - } - } - -+static bool -+v3d_qpu_int32_unpack_unpack(uint32_t packed, -+ enum v3d_qpu_input_unpack *unpacked) -+{ -+ switch (packed) { -+ case 0: -+ *unpacked = V3D_QPU_UNPACK_NONE; -+ return true; -+ case 1: -+ *unpacked = V3D_QPU_UNPACK_UL; -+ return true; -+ case 2: -+ *unpacked = V3D_QPU_UNPACK_UH; -+ return true; -+ case 3: -+ *unpacked = V3D_QPU_UNPACK_IL; -+ return true; -+ case 4: -+ *unpacked = V3D_QPU_UNPACK_IH; -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static bool -+v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked, -+ uint32_t *packed) -+{ -+ switch (unpacked) { -+ case V3D_QPU_UNPACK_NONE: -+ *packed = 0; -+ return true; -+ case V3D_QPU_UNPACK_UL: -+ *packed = 1; -+ return true; -+ case V3D_QPU_UNPACK_UH: -+ *packed = 2; -+ return true; -+ case V3D_QPU_UNPACK_IL: -+ *packed = 3; -+ return true; -+ case V3D_QPU_UNPACK_IH: -+ *packed = 4; -+ return true; -+ default: -+ return false; -+ } -+} -+ - static bool - v3d_qpu_float16_unpack_unpack(uint32_t packed, - enum v3d_qpu_input_unpack *unpacked) -@@ -1273,6 +1323,15 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst - instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE; - break; - -+ case V3D_QPU_A_MOV: -+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE; -+ -+ if (!v3d_qpu_int32_unpack_unpack((raddr_b >> 2) & 0x7, -+ &instr->alu.add.a.unpack)) { -+ return false; -+ } -+ break; -+ - default: - instr->alu.add.output_pack = V3D_QPU_PACK_NONE; - instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE; -@@ -1449,6 +1508,15 @@ v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst - - break; - -+ case V3D_QPU_M_MOV: -+ instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; -+ -+ if (!v3d_qpu_int32_unpack_unpack((raddr_d >> 2) & 0x7, -+ &instr->alu.mul.a.unpack)) { -+ return false; -+ } -+ break; -+ - default: - instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; - instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE; -@@ -1909,6 +1977,21 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, - opcode |= packed; - break; - -+ case V3D_QPU_A_MOV: { -+ uint32_t packed; -+ -+ if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) -+ return false; -+ -+ if (!v3d_qpu_int32_unpack_pack(instr->alu.add.a.unpack, -+ &packed)) { -+ return false; -+ } -+ -+ raddr_b |= packed << 2; -+ break; -+ } -+ - default: - if (instr->alu.add.op != V3D_QPU_A_NOP && - (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || -@@ -2126,6 +2209,21 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo, - break; - } - -+ case V3D_QPU_M_MOV: { -+ uint32_t packed; -+ -+ if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE) -+ return false; -+ -+ if (!v3d_qpu_int32_unpack_pack(instr->alu.mul.a.unpack, -+ &packed)) { -+ return false; -+ } -+ -+ raddr_d |= packed << 2; -+ break; -+ } -+ - default: - break; - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0044-broadcom-qpu-fail-packing-on-unhandled-mul-pack-unpa.patch b/projects/RPi/devices/RPi5/patches/mesa/0044-broadcom-qpu-fail-packing-on-unhandled-mul-pack-unpa.patch deleted file mode 100644 index 864966dbea..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0044-broadcom-qpu-fail-packing-on-unhandled-mul-pack-unpa.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 2e86dd0c357d7b432ce6794ae22fbfae89ad186b Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 6 Oct 2021 12:01:10 +0200 -Subject: [PATCH 044/142] broadcom/qpu: fail packing on unhandled mul - pack/unpack - -We are doing this for the ADD alu already and it may be helpful to -identify cases where we have QPU code with pack/unpack modifiers on -MUL opcodes that we then are not packing into the actual QPU -instructions. ---- - src/broadcom/qpu/qpu_pack.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 4d677894755..180d7ab08a3 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -2106,6 +2106,12 @@ v3d33_qpu_mul_pack(const struct v3d_device_info *devinfo, - } - - default: -+ if (instr->alu.mul.op != V3D_QPU_M_NOP && -+ (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE || -+ instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE || -+ instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) { -+ return false; -+ } - break; - } - -@@ -2225,6 +2231,12 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo, - } - - default: -+ if (instr->alu.mul.op != V3D_QPU_M_NOP && -+ (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE || -+ instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE || -+ instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) { -+ return false; -+ } - break; - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0045-broadcom-compiler-generalize-check-for-shaders-using.patch b/projects/RPi/devices/RPi5/patches/mesa/0045-broadcom-compiler-generalize-check-for-shaders-using.patch deleted file mode 100644 index cc4befe719..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0045-broadcom-compiler-generalize-check-for-shaders-using.patch +++ /dev/null @@ -1,30 +0,0 @@ -From ed6bfa29d43b5a89ff070961454f1e82e23b4f45 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Fri, 8 Oct 2021 15:10:24 +0200 -Subject: [PATCH 045/142] broadcom/compiler: generalize check for shaders using - pixel center W - -V3D 4.x has pixel center W in rf0 and V3D 7.x has it in rf3. We already -account for this when we setup the c->payload_w, so use that. ---- - src/broadcom/compiler/nir_to_vir.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c -index 220ff6bcd49..90fe1d1e7f0 100644 ---- a/src/broadcom/compiler/nir_to_vir.c -+++ b/src/broadcom/compiler/nir_to_vir.c -@@ -4547,8 +4547,8 @@ vir_check_payload_w(struct v3d_compile *c) - - vir_for_each_inst_inorder(inst, c) { - for (int i = 0; i < vir_get_nsrc(inst); i++) { -- if (inst->src[i].file == QFILE_REG && -- inst->src[i].index == 0) { -+ if (inst->src[i].file == c->payload_w.file && -+ inst->src[i].index == c->payload_w.index) { - c->uses_center_w = true; - return; - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0046-broadcom-compiler-v71-isn-t-affected-by-double-round.patch b/projects/RPi/devices/RPi5/patches/mesa/0046-broadcom-compiler-v71-isn-t-affected-by-double-round.patch deleted file mode 100644 index 23f70c60d3..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0046-broadcom-compiler-v71-isn-t-affected-by-double-round.patch +++ /dev/null @@ -1,34 +0,0 @@ -From e1a0fa2c2010ef29b8cec798cd0fc99cf44f3a2d Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 14 Oct 2021 14:16:40 +0200 -Subject: [PATCH 046/142] broadcom/compiler: v71 isn't affected by - double-rounding of viewport X,Y coords - ---- - src/broadcom/compiler/v3d_nir_lower_io.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c -index 3ef0e398228..4cdba3748a1 100644 ---- a/src/broadcom/compiler/v3d_nir_lower_io.c -+++ b/src/broadcom/compiler/v3d_nir_lower_io.c -@@ -600,9 +600,13 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b, - * The correct fix for this as recommended by Broadcom - * is to convert to .8 fixed-point with ffloor(). - */ -- pos = nir_f2i32(b, nir_ffloor(b, pos)); -- v3d_nir_store_output(b, state->vp_vpm_offset + i, -- offset_reg, pos); -+ if (c->devinfo->ver <= 42) -+ pos = nir_f2i32(b, nir_ffloor(b, pos)); -+ else -+ pos = nir_f2i32(b, nir_fround_even(b, pos)); -+ -+ v3d_nir_store_output(b, state->vp_vpm_offset + i, -+ offset_reg, pos); - } - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0047-broadcom-compiler-update-one-TMUWT-restriction-for-v.patch b/projects/RPi/devices/RPi5/patches/mesa/0047-broadcom-compiler-update-one-TMUWT-restriction-for-v.patch deleted file mode 100644 index 45dd5fba46..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0047-broadcom-compiler-update-one-TMUWT-restriction-for-v.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 697e6cf01b781b244404872f331a778b6d4e67da Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 19 Oct 2021 11:16:43 +0200 -Subject: [PATCH 047/142] broadcom/compiler: update one TMUWT restriction for - v71 - -TMUWT not allowed in the final instruction restriction doesn't apply -for v71. ---- - src/broadcom/compiler/qpu_schedule.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index ff544fb3c1c..25f79aa6f46 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -1700,8 +1700,10 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, - - if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { - /* GFXH-1625: TMUWT not allowed in the final instruction. */ -- if (slot == 2 && inst->alu.add.op == V3D_QPU_A_TMUWT) -+ if (c->devinfo->ver <= 42 && slot == 2 && -+ inst->alu.add.op == V3D_QPU_A_TMUWT) { - return false; -+ } - - /* No writing physical registers at the end. */ - bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0048-broadcom-compiler-update-ldunif-ldvary-comment-for-v.patch b/projects/RPi/devices/RPi5/patches/mesa/0048-broadcom-compiler-update-ldunif-ldvary-comment-for-v.patch deleted file mode 100644 index 75d16def81..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0048-broadcom-compiler-update-ldunif-ldvary-comment-for-v.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 26fea727a9f34b75a3fe3f6a806accaddcc317f6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 19 Oct 2021 11:51:32 +0200 -Subject: [PATCH 048/142] broadcom/compiler: update ldunif/ldvary comment for - v71 - -For v42 and below ldunif/ldvary write both on r5, but with a different -delay, so we need to take that into account when scheduling both. - -For v71 the register used is rf0, but the behaviour is the same. So -the scheduling code can be the same, but the comment needs update. ---- - src/broadcom/compiler/qpu_schedule.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 25f79aa6f46..e8197661f89 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -1234,10 +1234,11 @@ retry: - if (pixel_scoreboard_too_soon(c, scoreboard, inst)) - continue; - -- /* ldunif and ldvary both write r5, but ldunif does so a tick -- * sooner. If the ldvary's r5 wasn't used, then ldunif might -+ /* ldunif and ldvary both write the same register (r5 for v42 -+ * and below, rf0 for v71), but ldunif does so a tick sooner. -+ * If the ldvary's register wasn't used, then ldunif might - * otherwise get scheduled so ldunif and ldvary try to update -- * r5 in the same tick. -+ * the register in the same tick. - */ - if ((inst->sig.ldunif || inst->sig.ldunifa) && - scoreboard->tick == scoreboard->last_ldvary_tick + 1) { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0049-broadcom-compiler-update-payload-registers-handling-.patch b/projects/RPi/devices/RPi5/patches/mesa/0049-broadcom-compiler-update-payload-registers-handling-.patch deleted file mode 100644 index b66dc181f4..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0049-broadcom-compiler-update-payload-registers-handling-.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 70456e27b039174f767010f96d9b649e5e42d84f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 19 Oct 2021 23:52:30 +0200 -Subject: [PATCH 049/142] broadcom/compiler: update payload registers handling - when computing live intervals - -As for v71 the payload registers are not the same. Specifically now -rf3 is used as payload register, so this is needed to avoid rf3 being -selected as a instruction dst by the register allocator, overwriting -the payload value that could be still used. ---- - src/broadcom/compiler/vir_live_variables.c | 21 +++++++++++++-------- - 1 file changed, 13 insertions(+), 8 deletions(-) - -diff --git a/src/broadcom/compiler/vir_live_variables.c b/src/broadcom/compiler/vir_live_variables.c -index 575b0481dc8..87a7e2b5b81 100644 ---- a/src/broadcom/compiler/vir_live_variables.c -+++ b/src/broadcom/compiler/vir_live_variables.c -@@ -179,17 +179,22 @@ vir_setup_def_use(struct v3d_compile *c) - flags_inst = NULL; - } - -- /* Payload registers: r0/1/2 contain W, centroid W, -- * and Z at program start. Register allocation will -- * force their nodes to R0/1/2. -+ /* Payload registers: for fragment shaders, W, -+ * centroid W, and Z will be initialized at r0/1/2 -+ * until v42, or r1/r2/r3 from v71. -+ * -+ * For compute shaders, payload would be r0/r2 until -+ * v42, r3/r2 from v71 -+ * -+ * Register allocation will force their nodes to those -+ * registers. - */ - if (inst->src[0].file == QFILE_REG) { -- switch (inst->src[0].index) { -- case 0: -- case 1: -- case 2: -+ uint32_t min_payload_r = c->devinfo->ver >= 71 ? 1 : 0; -+ uint32_t max_payload_r = c->devinfo->ver >= 71 ? 3 : 2; -+ if (inst->src[0].index >= min_payload_r || -+ inst->src[0].index <= max_payload_r) { - c->temp_start[inst->dst.index] = 0; -- break; - } - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0050-broadcom-compiler-update-peripheral-access-restricti.patch b/projects/RPi/devices/RPi5/patches/mesa/0050-broadcom-compiler-update-peripheral-access-restricti.patch deleted file mode 100644 index 28e2ba2dec..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0050-broadcom-compiler-update-peripheral-access-restricti.patch +++ /dev/null @@ -1,235 +0,0 @@ -From f9a76b3a1e316e5ed6387819b87eaaf60f989a2b Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 26 Oct 2021 11:43:02 +0200 -Subject: [PATCH 050/142] broadcom/compiler: update peripheral access - restrictions for v71 - -In V3D 4.x only a couple of simultaneous accesses where allowed, but -V3D 7.x is a bit more flexible, so rather than trying to check for all -the allowed combinations it is easier to check if we are one of the -disallows. - -Shader-db (pi5): - -total instructions in shared programs: 11338883 -> 11307386 (-0.28%) -instructions in affected programs: 2727201 -> 2695704 (-1.15%) -helped: 12555 -HURT: 289 -Instructions are helped. - -total max-temps in shared programs: 2230199 -> 2229260 (-0.04%) -max-temps in affected programs: 20508 -> 19569 (-4.58%) -helped: 608 -HURT: 4 -Max-temps are helped. - -total sfu-stalls in shared programs: 15236 -> 15293 (0.37%) -sfu-stalls in affected programs: 148 -> 205 (38.51%) -helped: 38 -HURT: 64 -Inconclusive result (%-change mean confidence interval includes 0). - -total inst-and-stalls in shared programs: 11354119 -> 11322679 (-0.28%) -inst-and-stalls in affected programs: 2732262 -> 2700822 (-1.15%) -helped: 12550 -HURT: 304 -Inst-and-stalls are helped. - -total nops in shared programs: 273711 -> 274095 (0.14%) -nops in affected programs: 9626 -> 10010 (3.99%) -helped: 186 -HURT: 397 -Nops are HURT. ---- - src/broadcom/compiler/qpu_schedule.c | 88 +++++++++++++++++++++------- - src/broadcom/compiler/qpu_validate.c | 2 +- - src/broadcom/qpu/qpu_instr.c | 16 +++-- - src/broadcom/qpu/qpu_instr.h | 2 + - 4 files changed, 82 insertions(+), 26 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index e8197661f89..adb501e85ce 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -790,7 +790,8 @@ enum { - V3D_PERIPHERAL_TMU_WAIT = (1 << 6), - V3D_PERIPHERAL_TMU_WRTMUC_SIG = (1 << 7), - V3D_PERIPHERAL_TSY = (1 << 8), -- V3D_PERIPHERAL_TLB = (1 << 9), -+ V3D_PERIPHERAL_TLB_READ = (1 << 9), -+ V3D_PERIPHERAL_TLB_WRITE = (1 << 10), - }; - - static uint32_t -@@ -815,8 +816,10 @@ qpu_peripherals(const struct v3d_device_info *devinfo, - if (v3d_qpu_uses_sfu(inst)) - result |= V3D_PERIPHERAL_SFU; - -- if (v3d_qpu_uses_tlb(inst)) -- result |= V3D_PERIPHERAL_TLB; -+ if (v3d_qpu_reads_tlb(inst)) -+ result |= V3D_PERIPHERAL_TLB_READ; -+ if (v3d_qpu_writes_tlb(inst)) -+ result |= V3D_PERIPHERAL_TLB_WRITE; - - if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { - if (inst->alu.add.op != V3D_QPU_A_NOP && -@@ -847,32 +850,75 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo, - if (devinfo->ver < 41) - return false; - -- /* V3D 4.1+ allow WRTMUC signal with TMU register write (other than -- * tmuc). -+ /* V3D 4.x can't do more than one peripheral access except in a -+ * few cases: - */ -- if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG && -- b_peripherals == V3D_PERIPHERAL_TMU_WRITE) { -- return v3d_qpu_writes_tmu_not_tmuc(devinfo, b); -+ if (devinfo->ver <= 42) { -+ /* WRTMUC signal with TMU register write (other than tmuc). */ -+ if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG && -+ b_peripherals == V3D_PERIPHERAL_TMU_WRITE) { -+ return v3d_qpu_writes_tmu_not_tmuc(devinfo, b); -+ } -+ if (b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG && -+ a_peripherals == V3D_PERIPHERAL_TMU_WRITE) { -+ return v3d_qpu_writes_tmu_not_tmuc(devinfo, a); -+ } -+ -+ /* TMU read with VPM read/write. */ -+ if (a_peripherals == V3D_PERIPHERAL_TMU_READ && -+ (b_peripherals == V3D_PERIPHERAL_VPM_READ || -+ b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) { -+ return true; -+ } -+ if (b_peripherals == V3D_PERIPHERAL_TMU_READ && -+ (a_peripherals == V3D_PERIPHERAL_VPM_READ || -+ a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) { -+ return true; -+ } -+ -+ return false; - } - -- if (a_peripherals == V3D_PERIPHERAL_TMU_WRITE && -- b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG) { -- return v3d_qpu_writes_tmu_not_tmuc(devinfo, a); -+ /* V3D 7.x can't have more than one of these restricted peripherals */ -+ const uint32_t restricted = V3D_PERIPHERAL_TMU_WRITE | -+ V3D_PERIPHERAL_TMU_WRTMUC_SIG | -+ V3D_PERIPHERAL_TSY | -+ V3D_PERIPHERAL_TLB_READ | -+ V3D_PERIPHERAL_SFU | -+ V3D_PERIPHERAL_VPM_READ | -+ V3D_PERIPHERAL_VPM_WRITE; -+ -+ const uint32_t a_restricted = a_peripherals & restricted; -+ const uint32_t b_restricted = b_peripherals & restricted; -+ if (a_restricted && b_restricted) { -+ /* WRTMUC signal with TMU register write (other than tmuc) is -+ * allowed though. -+ */ -+ if (!((a_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG && -+ b_restricted == V3D_PERIPHERAL_TMU_WRITE && -+ v3d_qpu_writes_tmu_not_tmuc(devinfo, b)) || -+ (b_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG && -+ a_restricted == V3D_PERIPHERAL_TMU_WRITE && -+ v3d_qpu_writes_tmu_not_tmuc(devinfo, a)))) { -+ return false; -+ } - } - -- /* V3D 4.1+ allows TMU read with VPM read/write. */ -- if (a_peripherals == V3D_PERIPHERAL_TMU_READ && -- (b_peripherals == V3D_PERIPHERAL_VPM_READ || -- b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) { -- return true; -+ /* Only one TMU read per instruction */ -+ if ((a_peripherals & V3D_PERIPHERAL_TMU_READ) && -+ (b_peripherals & V3D_PERIPHERAL_TMU_READ)) { -+ return false; - } -- if (b_peripherals == V3D_PERIPHERAL_TMU_READ && -- (a_peripherals == V3D_PERIPHERAL_VPM_READ || -- a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) { -- return true; -+ -+ /* Only one TLB access per instruction */ -+ if ((a_peripherals & (V3D_PERIPHERAL_TLB_WRITE | -+ V3D_PERIPHERAL_TLB_READ)) && -+ (b_peripherals & (V3D_PERIPHERAL_TLB_WRITE | -+ V3D_PERIPHERAL_TLB_READ))) { -+ return false; - } - -- return false; -+ return true; - } - - /* Compute a bitmask of which rf registers are used between -diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c -index 12788692432..fde6695d59b 100644 ---- a/src/broadcom/compiler/qpu_validate.c -+++ b/src/broadcom/compiler/qpu_validate.c -@@ -227,7 +227,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) - vpm_writes + - tlb_writes + - tsy_writes + -- inst->sig.ldtmu + -+ (devinfo->ver <= 42 ? inst->sig.ldtmu : 0) + - inst->sig.ldtlb + - inst->sig.ldvpm + - inst->sig.ldtlbu > 1) { -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index 195a0dcd232..f54ce7210fb 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -649,12 +649,14 @@ v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op) - } - - bool --v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) -+v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst) - { -- if (inst->sig.ldtlb || -- inst->sig.ldtlbu) -- return true; -+ return inst->sig.ldtlb || inst->sig.ldtlbu; -+} - -+bool -+v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst) -+{ - if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { - if (inst->alu.add.op != V3D_QPU_A_NOP && - inst->alu.add.magic_write && -@@ -672,6 +674,12 @@ v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) - return false; - } - -+bool -+v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) -+{ -+ return v3d_qpu_writes_tlb(inst) || v3d_qpu_reads_tlb(inst); -+} -+ - bool - v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) - { -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index 4b34d17bd4c..dece45c5c54 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -472,6 +472,8 @@ bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; - bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; - bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; - bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; -+bool v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; -+bool v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; - bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; - bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; - bool v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0051-broadcom-qpu-add-packing-for-fmov-on-ADD-alu.patch b/projects/RPi/devices/RPi5/patches/mesa/0051-broadcom-qpu-add-packing-for-fmov-on-ADD-alu.patch deleted file mode 100644 index 0002304bd8..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0051-broadcom-qpu-add-packing-for-fmov-on-ADD-alu.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 3520cceb87fb2f9765ba7dbe2771fbd0cadca78d Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 26 Oct 2021 08:37:54 +0200 -Subject: [PATCH 051/142] broadcom/qpu: add packing for fmov on ADD alu - ---- - src/broadcom/qpu/qpu_pack.c | 31 +++++++++++++++++++++++++++++++ - 1 file changed, 31 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 180d7ab08a3..ed5a8bc667d 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -1332,6 +1332,20 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst - } - break; - -+ case V3D_QPU_A_FMOV: -+ instr->alu.add.output_pack = raddr_b & 0x3; -+ -+ /* Mul alu FMOV has one additional variant */ -+ int32_t unpack = (raddr_b >> 2) & 0x7; -+ if (unpack == 7) -+ return false; -+ -+ if (!v3d_qpu_float32_unpack_unpack(unpack, -+ &instr->alu.add.a.unpack)) { -+ return false; -+ } -+ break; -+ - default: - instr->alu.add.output_pack = V3D_QPU_PACK_NONE; - instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE; -@@ -1992,6 +2006,23 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, - break; - } - -+ case V3D_QPU_A_FMOV: { -+ uint32_t packed; -+ -+ if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, -+ &packed)) { -+ return false; -+ } -+ raddr_b = packed; -+ -+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, -+ &packed)) { -+ return false; -+ } -+ raddr_b |= packed << 2; -+ break; -+ } -+ - default: - if (instr->alu.add.op != V3D_QPU_A_NOP && - (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0052-broadcom-compiler-handle-rf0-flops-storage-restricti.patch b/projects/RPi/devices/RPi5/patches/mesa/0052-broadcom-compiler-handle-rf0-flops-storage-restricti.patch deleted file mode 100644 index f173a0f4c0..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0052-broadcom-compiler-handle-rf0-flops-storage-restricti.patch +++ /dev/null @@ -1,155 +0,0 @@ -From 7c7ab15b3c9def4bc3bb5be492228a933c325f8a Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 6 Oct 2021 13:58:27 +0200 -Subject: [PATCH 052/142] broadcom/compiler: handle rf0 flops storage - restriction in v71 - ---- - src/broadcom/compiler/qpu_schedule.c | 81 +++++++++++++++++++++++++++- - 1 file changed, 79 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index adb501e85ce..7048d9257b6 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -538,6 +538,10 @@ struct choose_scoreboard { - int ldvary_count; - int pending_ldtmu_count; - bool first_ldtmu_after_thrsw; -+ -+ /* V3D 7.x */ -+ int last_implicit_rf0_write_tick; -+ bool has_rf0_flops_conflict; - }; - - static bool -@@ -1499,6 +1503,62 @@ update_scoreboard_tmu_tracking(struct choose_scoreboard *scoreboard, - } - } - -+static void -+set_has_rf0_flops_conflict(struct choose_scoreboard *scoreboard, -+ const struct v3d_qpu_instr *inst, -+ const struct v3d_device_info *devinfo) -+{ -+ if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick && -+ v3d_qpu_sig_writes_address(devinfo, &inst->sig) && -+ !inst->sig_magic) { -+ scoreboard->has_rf0_flops_conflict = true; -+ } -+} -+ -+static void -+update_scoreboard_for_rf0_flops(struct choose_scoreboard *scoreboard, -+ const struct v3d_qpu_instr *inst, -+ const struct v3d_device_info *devinfo) -+{ -+ if (devinfo->ver < 71) -+ return; -+ -+ /* Thread switch restrictions: -+ * -+ * At the point of a thread switch or thread end (when the actual -+ * thread switch or thread end happens, not when the signalling -+ * instruction is processed): -+ * -+ * - If the most recent write to rf0 was from a ldunif, ldunifa, or -+ * ldvary instruction in which another signal also wrote to the -+ * register file, and the final instruction of the thread section -+ * contained a signal which wrote to the register file, then the -+ * value of rf0 is undefined at the start of the new section -+ * -+ * Here we use the scoreboard to track if our last rf0 implicit write -+ * happens at the same time that another signal writes the register -+ * file (has_rf0_flops_conflict). We will use that information when -+ * scheduling thrsw instructions to avoid putting anything in their -+ * last delay slot which has a signal that writes to the register file. -+ */ -+ -+ /* Reset tracking if we have an explicit rf0 write or we are starting -+ * a new thread section. -+ */ -+ if (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) || -+ scoreboard->tick - scoreboard->last_thrsw_tick == 3) { -+ scoreboard->last_implicit_rf0_write_tick = -10; -+ scoreboard->has_rf0_flops_conflict = false; -+ } -+ -+ if (v3d_qpu_writes_rf0_implicitly(devinfo, inst)) { -+ scoreboard->last_implicit_rf0_write_tick = inst->sig.ldvary ? -+ scoreboard->tick + 1 : scoreboard->tick; -+ } -+ -+ set_has_rf0_flops_conflict(scoreboard, inst, devinfo); -+} -+ - static void - update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard, - const struct qinst *qinst, -@@ -1542,6 +1602,8 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard, - if (inst->sig.ldvary) - scoreboard->last_ldvary_tick = scoreboard->tick; - -+ update_scoreboard_for_rf0_flops(scoreboard, inst, devinfo); -+ - update_scoreboard_tmu_tracking(scoreboard, qinst); - } - -@@ -1812,6 +1874,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, - */ - static bool - qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c, -+ struct choose_scoreboard *scoreboard, - const struct qinst *qinst, - uint32_t slot) - { -@@ -1842,6 +1905,17 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c, - if (v3d_qpu_writes_unifa(c->devinfo, &qinst->qpu)) - return false; - -+ /* See comment when we set has_rf0_flops_conflict for details */ -+ if (c->devinfo->ver >= 71 && -+ slot == 2 && -+ v3d_qpu_sig_writes_address(c->devinfo, &qinst->qpu.sig) && -+ !qinst->qpu.sig_magic) { -+ if (scoreboard->has_rf0_flops_conflict) -+ return false; -+ if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick) -+ return false; -+ } -+ - return true; - } - -@@ -1874,7 +1948,7 @@ qpu_inst_after_thrsw_valid_in_delay_slot(struct v3d_compile *c, - * also apply to instructions scheduled after the thrsw that we want - * to place in its delay slots. - */ -- if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, qinst, slot)) -+ if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard, qinst, slot)) - return false; - - /* TLB access is disallowed until scoreboard wait is executed, which -@@ -1947,8 +2021,10 @@ valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard - bool is_thrend) - { - for (int slot = 0; slot < instructions_in_sequence; slot++) { -- if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, qinst, slot)) -+ if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard, -+ qinst, slot)) { - return false; -+ } - - if (is_thrend && - !qpu_inst_valid_in_thrend_slot(c, qinst, slot)) { -@@ -2718,6 +2794,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c) - scoreboard.last_setmsf_tick = -10; - scoreboard.last_stallable_sfu_tick = -10; - scoreboard.first_ldtmu_after_thrsw = true; -+ scoreboard.last_implicit_rf0_write_tick = - 10; - - if (debug) { - fprintf(stderr, "Pre-schedule instructions\n"); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0053-broadcom-compiler-enable-ldvary-pipelining-on-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0053-broadcom-compiler-enable-ldvary-pipelining-on-v71.patch deleted file mode 100644 index ffd2489d53..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0053-broadcom-compiler-enable-ldvary-pipelining-on-v71.patch +++ /dev/null @@ -1,189 +0,0 @@ -From 0c6910721eb50b38b3388c2d2344b6ecfe0fee58 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 27 Oct 2021 11:35:12 +0200 -Subject: [PATCH 053/142] broadcom/compiler: enable ldvary pipelining on v71 - ---- - src/broadcom/compiler/qpu_schedule.c | 121 ++++++++++++++++++--------- - 1 file changed, 80 insertions(+), 41 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 7048d9257b6..334ffdc6d58 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -2312,46 +2312,72 @@ emit_branch(struct v3d_compile *c, - } - - static bool --alu_reads_register(struct v3d_qpu_instr *inst, -+alu_reads_register(const struct v3d_device_info *devinfo, -+ struct v3d_qpu_instr *inst, - bool add, bool magic, uint32_t index) - { - uint32_t num_src; -- enum v3d_qpu_mux mux_a, mux_b; -- -- if (add) { -+ if (add) - num_src = v3d_qpu_add_op_num_src(inst->alu.add.op); -- mux_a = inst->alu.add.a.mux; -- mux_b = inst->alu.add.b.mux; -- } else { -+ else - num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op); -- mux_a = inst->alu.mul.a.mux; -- mux_b = inst->alu.mul.b.mux; -- } - -- for (int i = 0; i < num_src; i++) { -- if (magic) { -- if (i == 0 && mux_a == index) -- return true; -- if (i == 1 && mux_b == index) -- return true; -+ if (devinfo->ver <= 42) { -+ enum v3d_qpu_mux mux_a, mux_b; -+ if (add) { -+ mux_a = inst->alu.add.a.mux; -+ mux_b = inst->alu.add.b.mux; - } else { -- if (i == 0 && mux_a == V3D_QPU_MUX_A && -- inst->raddr_a == index) { -- return true; -- } -- if (i == 0 && mux_a == V3D_QPU_MUX_B && -- inst->raddr_b == index) { -- return true; -- } -- if (i == 1 && mux_b == V3D_QPU_MUX_A && -- inst->raddr_a == index) { -- return true; -- } -- if (i == 1 && mux_b == V3D_QPU_MUX_B && -- inst->raddr_b == index) { -- return true; -+ mux_a = inst->alu.mul.a.mux; -+ mux_b = inst->alu.mul.b.mux; -+ } -+ -+ for (int i = 0; i < num_src; i++) { -+ if (magic) { -+ if (i == 0 && mux_a == index) -+ return true; -+ if (i == 1 && mux_b == index) -+ return true; -+ } else { -+ if (i == 0 && mux_a == V3D_QPU_MUX_A && -+ inst->raddr_a == index) { -+ return true; -+ } -+ if (i == 0 && mux_a == V3D_QPU_MUX_B && -+ inst->raddr_b == index) { -+ return true; -+ } -+ if (i == 1 && mux_b == V3D_QPU_MUX_A && -+ inst->raddr_a == index) { -+ return true; -+ } -+ if (i == 1 && mux_b == V3D_QPU_MUX_B && -+ inst->raddr_b == index) { -+ return true; -+ } - } - } -+ -+ return false; -+ } -+ -+ assert(devinfo->ver >= 71); -+ assert(!magic); -+ -+ uint32_t raddr_a, raddr_b; -+ if (add) { -+ raddr_a = inst->alu.add.a.raddr; -+ raddr_b = inst->alu.add.b.raddr; -+ } else { -+ raddr_a = inst->alu.mul.a.raddr; -+ raddr_b = inst->alu.mul.b.raddr; -+ } -+ -+ for (int i = 0; i < num_src; i++) { -+ if (i == 0 && raddr_a == index) -+ return true; -+ if (i == 1 && raddr_b == index) -+ return true; - } - - return false; -@@ -2386,6 +2412,8 @@ fixup_pipelined_ldvary(struct v3d_compile *c, - struct qblock *block, - struct v3d_qpu_instr *inst) - { -+ const struct v3d_device_info *devinfo = c->devinfo; -+ - /* We only call this if we have successfully merged an ldvary into a - * previous instruction. - */ -@@ -2398,9 +2426,9 @@ fixup_pipelined_ldvary(struct v3d_compile *c, - * the ldvary destination, if it does, then moving the ldvary before - * it would overwrite it. - */ -- if (alu_reads_register(inst, true, ldvary_magic, ldvary_index)) -+ if (alu_reads_register(devinfo, inst, true, ldvary_magic, ldvary_index)) - return false; -- if (alu_reads_register(inst, false, ldvary_magic, ldvary_index)) -+ if (alu_reads_register(devinfo, inst, false, ldvary_magic, ldvary_index)) - return false; - - /* The implicit ldvary destination may not be written to by a signal -@@ -2436,13 +2464,13 @@ fixup_pipelined_ldvary(struct v3d_compile *c, - } - - /* The previous instruction cannot have a conflicting signal */ -- if (v3d_qpu_sig_writes_address(c->devinfo, &prev->qpu.sig)) -+ if (v3d_qpu_sig_writes_address(devinfo, &prev->qpu.sig)) - return false; - - uint32_t sig; - struct v3d_qpu_sig new_sig = prev->qpu.sig; - new_sig.ldvary = true; -- if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig)) -+ if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig)) - return false; - - /* The previous instruction cannot use flags since ldvary uses the -@@ -2471,14 +2499,25 @@ fixup_pipelined_ldvary(struct v3d_compile *c, - inst->sig_magic = false; - inst->sig_addr = 0; - -- /* By moving ldvary to the previous instruction we make it update -- * r5 in the current one, so nothing else in it should write r5. -- * This should've been prevented by our dependency tracking, which -+ /* Update rf0 flops tracking for new ldvary delayed rf0 write tick */ -+ if (devinfo->ver >= 71) { -+ scoreboard->last_implicit_rf0_write_tick = scoreboard->tick; -+ set_has_rf0_flops_conflict(scoreboard, inst, devinfo); -+ } -+ -+ /* By moving ldvary to the previous instruction we make it update r5 -+ * (rf0 for ver >= 71) in the current one, so nothing else in it -+ * should write this register. -+ * -+ * This should've been prevented by our depedency tracking, which - * would not allow ldvary to be paired up with an instruction that -- * writes r5 (since our dependency tracking doesn't know that the -- * ldvary write r5 happens in the next instruction). -+ * writes r5/rf0 (since our dependency tracking doesn't know that the -+ * ldvary write to r5/rf0 happens in the next instruction). - */ -- assert(!v3d_qpu_writes_r5(c->devinfo, inst)); -+ assert(!v3d_qpu_writes_r5(devinfo, inst)); -+ assert(devinfo->ver <= 42 || -+ (!v3d_qpu_writes_rf0_implicitly(devinfo, inst) && -+ !v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0))); - - return true; - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0054-broadcom-compiler-try-to-use-ldunif-a-instead-of-ldu.patch b/projects/RPi/devices/RPi5/patches/mesa/0054-broadcom-compiler-try-to-use-ldunif-a-instead-of-ldu.patch deleted file mode 100644 index 5e4dc3adce..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0054-broadcom-compiler-try-to-use-ldunif-a-instead-of-ldu.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 0670d642bb91fc68ce73f2d9fb88c482295a446d Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 28 Oct 2021 14:13:29 +0200 -Subject: [PATCH 054/142] broadcom/compiler: try to use ldunif(a) instead of - ldunif(a)rf in v71 - -The rf variants need to encode the destination in the cond bits, which -prevents these to be merged with any other instruction that need them. - -In 4.x, ldunif(a) write to r5 which is a special register that only -ldunif(a) and ldvary can write so we have a special register class for -it and only allow it for them. Then when we need to choose a register -for a node, if this register is available we always use it. - -In 7.x these instructions write to rf0, which can be used by any -instruction, so instead of restricting rf0, we track the temps that -are used as ldunif(a) destinations and use that information to favor -rf0 for them. ---- - src/broadcom/compiler/v3d_compiler.h | 3 ++ - src/broadcom/compiler/vir_register_allocate.c | 34 ++++++++++++++++--- - src/broadcom/compiler/vir_to_qpu.c | 11 ++++-- - 3 files changed, 41 insertions(+), 7 deletions(-) - -diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h -index 7e8f3bfc1a7..36adf8830b5 100644 ---- a/src/broadcom/compiler/v3d_compiler.h -+++ b/src/broadcom/compiler/v3d_compiler.h -@@ -613,6 +613,9 @@ struct v3d_ra_node_info { - struct { - uint32_t priority; - uint8_t class_bits; -+ -+ /* V3D 7.x */ -+ bool is_ldunif_dst; - } *info; - uint32_t alloc_count; - }; -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index e0adc1de7a4..1be091f8518 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -384,6 +384,7 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits) - /* We fill the node priority after we are done inserting spills */ - c->nodes.info[node].class_bits = class_bits; - c->nodes.info[node].priority = 0; -+ c->nodes.info[node].is_ldunif_dst = false; - } - - /* The spill offset for this thread takes a bit of setup, so do it once at -@@ -899,9 +900,22 @@ v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra, - - static bool - v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra, -+ unsigned int node, - BITSET_WORD *regs, - unsigned int *out) - { -+ /* In V3D 7.x, try to assign rf0 to temps used as ldunif's dst -+ * so we can avoid turning them into ldunifrf (which uses the -+ * cond field to encode the dst and would prevent merge with -+ * instructions that use cond flags). -+ */ -+ if (v3d_ra->nodes->info[node].is_ldunif_dst && -+ BITSET_TEST(regs, v3d_ra->phys_index)) { -+ assert(v3d_ra->devinfo->ver >= 71); -+ *out = v3d_ra->phys_index; -+ return true; -+ } -+ - for (int i = 0; i < PHYS_COUNT; i++) { - int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT; - int phys = v3d_ra->phys_index + phys_off; -@@ -927,7 +941,7 @@ v3d_ra_select_callback(unsigned int n, BITSET_WORD *regs, void *data) - return reg; - } - -- if (v3d_ra_select_rf(v3d_ra, regs, ®)) -+ if (v3d_ra_select_rf(v3d_ra, n, regs, ®)) - return reg; - - /* If we ran out of physical registers try to assign an accumulator -@@ -1139,15 +1153,24 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, - } - } - } else { -- /* If the instruction has an implicit write -- * we can't allocate its dest to the same -- * register. -+ /* Make sure we don't allocate the ldvary's -+ * destination to rf0, since it would clash -+ * with its implicit write to that register. - */ -- if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) { -+ if (inst->qpu.sig.ldvary) { - ra_add_node_interference(c->g, - temp_to_node(c, inst->dst.index), - implicit_rf_nodes[0]); - } -+ /* Flag dst temps from ldunif(a) instructions -+ * so we can try to assign rf0 to them and avoid -+ * converting these to ldunif(a)rf. -+ */ -+ if (inst->qpu.sig.ldunif || inst->qpu.sig.ldunifa) { -+ const uint32_t dst_n = -+ temp_to_node(c, inst->dst.index); -+ c->nodes.info[dst_n].is_ldunif_dst = true; -+ } - } - } - -@@ -1222,6 +1245,7 @@ v3d_register_allocate(struct v3d_compile *c) - * without accumulators that can have implicit writes to phys regs. - */ - for (uint32_t i = 0; i < num_ra_nodes; i++) { -+ c->nodes.info[i].is_ldunif_dst = false; - if (c->devinfo->has_accumulators && i < ACC_COUNT) { - acc_nodes[i] = i; - ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i); -diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c -index afc4941fdb1..cbbb495592b 100644 ---- a/src/broadcom/compiler/vir_to_qpu.c -+++ b/src/broadcom/compiler/vir_to_qpu.c -@@ -345,8 +345,15 @@ v3d_generate_code_block(struct v3d_compile *c, - assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); - assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); - -- if (!dst.magic || -- dst.index != V3D_QPU_WADDR_R5) { -+ bool use_rf; -+ if (c->devinfo->has_accumulators) { -+ use_rf = !dst.magic || -+ dst.index != V3D_QPU_WADDR_R5; -+ } else { -+ use_rf = dst.magic || dst.index != 0; -+ } -+ -+ if (use_rf) { - assert(c->devinfo->ver >= 40); - - if (qinst->qpu.sig.ldunif) { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0055-broadcom-compiler-don-t-assign-rf0-to-temps-that-con.patch b/projects/RPi/devices/RPi5/patches/mesa/0055-broadcom-compiler-don-t-assign-rf0-to-temps-that-con.patch deleted file mode 100644 index d03707a3fc..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0055-broadcom-compiler-don-t-assign-rf0-to-temps-that-con.patch +++ /dev/null @@ -1,82 +0,0 @@ -From cbed3b97394da09c9ae644c79e098e3ba8b5c3e8 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Fri, 29 Oct 2021 13:00:56 +0200 -Subject: [PATCH 055/142] broadcom/compiler: don't assign rf0 to temps that - conflict with ldvary - -ldvary writes to rf0 implicitly, so we don't want to allocate rf0 to -any temps that are live across ldvary's rf0 live ranges. ---- - src/broadcom/compiler/vir_register_allocate.c | 39 ++++++++++++++++++- - 1 file changed, 38 insertions(+), 1 deletion(-) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index 1be091f8518..6f7b1ca0589 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -1019,6 +1019,7 @@ static void - update_graph_and_reg_classes_for_inst(struct v3d_compile *c, - int *acc_nodes, - int *implicit_rf_nodes, -+ int last_ldvary_ip, - struct qinst *inst) - { - int32_t ip = inst->ip; -@@ -1125,6 +1126,25 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, - } - } - -+ /* Don't allocate rf0 to temps that cross ranges where we have -+ * live implicit rf0 writes from ldvary. We can identify these -+ * by tracking the last ldvary instruction and explicit reads -+ * of rf0. -+ */ -+ if (c->devinfo->ver >= 71 && -+ ((inst->src[0].file == QFILE_REG && inst->src[0].index == 0) || -+ (vir_get_nsrc(inst) > 1 && -+ inst->src[1].file == QFILE_REG && inst->src[1].index == 0))) { -+ for (int i = 0; i < c->num_temps; i++) { -+ if (c->temp_start[i] < ip && -+ c->temp_end[i] > last_ldvary_ip) { -+ ra_add_node_interference(c->g, -+ temp_to_node(c, i), -+ implicit_rf_nodes[0]); -+ } -+ } -+ } -+ - if (inst->dst.file == QFILE_TEMP) { - /* Only a ldunif gets to write to R5, which only has a - * single 32-bit channel of storage. -@@ -1270,10 +1290,27 @@ v3d_register_allocate(struct v3d_compile *c) - * interferences. - */ - int ip = 0; -+ int last_ldvary_ip = -1; - vir_for_each_inst_inorder(inst, c) { - inst->ip = ip++; -+ -+ /* ldunif(a) always write to a temporary, so we have -+ * liveness info available to decide if rf0 is -+ * available for them, however, ldvary is different: -+ * it always writes to rf0 directly so we don't have -+ * liveness information for its implicit rf0 write. -+ * -+ * That means the allocator may assign rf0 to a temp -+ * that is defined while an implicit rf0 write from -+ * ldvary is still live. We fix that by manually -+ * tracking rf0 live ranges from ldvary instructions. -+ */ -+ if (inst->qpu.sig.ldvary) -+ last_ldvary_ip = ip; -+ - update_graph_and_reg_classes_for_inst(c, acc_nodes, -- implicit_rf_nodes, inst); -+ implicit_rf_nodes, -+ last_ldvary_ip, inst); - } - - /* Set the register classes for all our temporaries in the graph */ --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0056-broadcom-compiler-convert-mul-to-add-when-needed-to-.patch b/projects/RPi/devices/RPi5/patches/mesa/0056-broadcom-compiler-convert-mul-to-add-when-needed-to-.patch deleted file mode 100644 index dac7b03bfc..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0056-broadcom-compiler-convert-mul-to-add-when-needed-to-.patch +++ /dev/null @@ -1,139 +0,0 @@ -From cbaa469c09974c1574b16f559173694904fe1bb0 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Mon, 25 Oct 2021 09:38:57 +0200 -Subject: [PATCH 056/142] broadcom/compiler: convert mul to add when needed to - allow merge - -V3D 7.x added 'mov' opcodes to the ADD alu, so now it is possible to -move these to the ADD alu to facilitate merging them with other MUL -instructions. ---- - src/broadcom/compiler/qpu_schedule.c | 102 ++++++++++++++++++++++++--- - 1 file changed, 94 insertions(+), 8 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 334ffdc6d58..caa84254998 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -1086,6 +1086,57 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst) - inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE; - } - -+static bool -+can_do_mul_as_add(const struct v3d_device_info *devinfo, enum v3d_qpu_mul_op op) -+{ -+ switch (op) { -+ case V3D_QPU_M_MOV: -+ case V3D_QPU_M_FMOV: -+ return devinfo->ver >= 71; -+ default: -+ return false; -+ } -+} -+ -+static enum v3d_qpu_mul_op -+mul_op_as_add_op(enum v3d_qpu_mul_op op) -+{ -+ switch (op) { -+ case V3D_QPU_M_MOV: -+ return V3D_QPU_A_MOV; -+ case V3D_QPU_M_FMOV: -+ return V3D_QPU_A_FMOV; -+ default: -+ unreachable("unexpected mov opcode"); -+ } -+} -+ -+static void -+qpu_convert_mul_to_add(struct v3d_qpu_instr *inst) -+{ -+ STATIC_ASSERT(sizeof(inst->alu.add) == sizeof(inst->alu.mul)); -+ assert(inst->alu.mul.op != V3D_QPU_M_NOP); -+ assert(inst->alu.add.op == V3D_QPU_A_NOP); -+ -+ memcpy(&inst->alu.add, &inst->alu.mul, sizeof(inst->alu.add)); -+ inst->alu.add.op = mul_op_as_add_op(inst->alu.mul.op); -+ inst->alu.mul.op = V3D_QPU_M_NOP; -+ -+ inst->flags.ac = inst->flags.mc; -+ inst->flags.apf = inst->flags.mpf; -+ inst->flags.auf = inst->flags.muf; -+ inst->flags.mc = V3D_QPU_COND_NONE; -+ inst->flags.mpf = V3D_QPU_PF_NONE; -+ inst->flags.muf = V3D_QPU_UF_NONE; -+ -+ inst->alu.add.output_pack = inst->alu.mul.output_pack; -+ inst->alu.add.a.unpack = inst->alu.mul.a.unpack; -+ inst->alu.add.b.unpack = inst->alu.mul.b.unpack; -+ inst->alu.mul.output_pack = V3D_QPU_PACK_NONE; -+ inst->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE; -+ inst->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE; -+} -+ - static bool - qpu_merge_inst(const struct v3d_device_info *devinfo, - struct v3d_qpu_instr *result, -@@ -1151,17 +1202,52 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, - } - } - -+ struct v3d_qpu_instr add_inst; - if (b->alu.mul.op != V3D_QPU_M_NOP) { -- if (a->alu.mul.op != V3D_QPU_M_NOP) -- return false; -- merge.alu.mul = b->alu.mul; -+ if (a->alu.mul.op == V3D_QPU_M_NOP) { -+ merge.alu.mul = b->alu.mul; -+ -+ merge.flags.mc = b->flags.mc; -+ merge.flags.mpf = b->flags.mpf; -+ merge.flags.muf = b->flags.muf; -+ -+ mul_instr = b; -+ add_instr = a; -+ } -+ /* If a's mul op is used but its add op is not, then see if we -+ * can convert either a's mul op or b's mul op to an add op -+ * so we can merge. -+ */ -+ else if (a->alu.add.op == V3D_QPU_A_NOP && -+ can_do_mul_as_add(devinfo, b->alu.mul.op)) { -+ add_inst = *b; -+ qpu_convert_mul_to_add(&add_inst); - -- merge.flags.mc = b->flags.mc; -- merge.flags.mpf = b->flags.mpf; -- merge.flags.muf = b->flags.muf; -+ merge.alu.add = add_inst.alu.add; - -- mul_instr = b; -- add_instr = a; -+ merge.flags.ac = b->flags.mc; -+ merge.flags.apf = b->flags.mpf; -+ merge.flags.auf = b->flags.muf; -+ -+ mul_instr = a; -+ add_instr = &add_inst; -+ } else if (a->alu.add.op == V3D_QPU_A_NOP && -+ can_do_mul_as_add(devinfo, a->alu.mul.op)) { -+ add_inst = *a; -+ qpu_convert_mul_to_add(&add_inst); -+ -+ merge = add_inst; -+ merge.alu.mul = b->alu.mul; -+ -+ merge.flags.mc = b->flags.mc; -+ merge.flags.mpf = b->flags.mpf; -+ merge.flags.muf = b->flags.muf; -+ -+ mul_instr = b; -+ add_instr = &add_inst; -+ } else { -+ return false; -+ } - } - - /* V3D 4.x and earlier use muxes to select the inputs for the ALUs and --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0057-broadcom-compiler-implement-small-immediates-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0057-broadcom-compiler-implement-small-immediates-for-v71.patch deleted file mode 100644 index 02310764ef..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0057-broadcom-compiler-implement-small-immediates-for-v71.patch +++ /dev/null @@ -1,418 +0,0 @@ -From b59b3725fb16f4ab1ac0db86a5452a4ed6176074 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 3 Nov 2021 10:34:19 +0100 -Subject: [PATCH 057/142] broadcom/compiler: implement small immediates for v71 - ---- - src/broadcom/compiler/qpu_schedule.c | 90 +++++++++++++------ - src/broadcom/compiler/qpu_validate.c | 20 ++++- - .../compiler/vir_opt_small_immediates.c | 26 +++++- - src/broadcom/compiler/vir_to_qpu.c | 11 ++- - src/broadcom/qpu/qpu_disasm.c | 1 - - src/broadcom/qpu/qpu_instr.c | 8 +- - src/broadcom/qpu/qpu_instr.h | 2 +- - src/broadcom/qpu/qpu_pack.c | 36 ++++---- - 8 files changed, 139 insertions(+), 55 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index caa84254998..bd1c920848a 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -714,7 +714,6 @@ qpu_instruction_uses_rf(const struct v3d_device_info *devinfo, - !inst->sig.small_imm_b && (inst->raddr_b == waddr)) - return true; - } else { -- /* FIXME: skip if small immediate */ - if (v3d71_qpu_reads_raddr(inst, waddr)) - return true; - } -@@ -948,10 +947,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a, - return raddrs_used; - } - --/* Take two instructions and attempt to merge their raddr fields -- * into one merged instruction. Returns false if the two instructions -- * access more than two different rf registers between them, or more -- * than one rf register and one small immediate. -+/* Takes two instructions and attempts to merge their raddr fields (including -+ * small immediates) into one merged instruction. For V3D 4.x, returns false -+ * if the two instructions access more than two different rf registers between -+ * them, or more than one rf register and one small immediate. For 7.x returns -+ * false if both instructions use small immediates. - */ - static bool - qpu_merge_raddrs(struct v3d_qpu_instr *result, -@@ -959,6 +959,27 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result, - const struct v3d_qpu_instr *mul_instr, - const struct v3d_device_info *devinfo) - { -+ if (devinfo->ver >= 71) { -+ assert(add_instr->sig.small_imm_a + -+ add_instr->sig.small_imm_b <= 1); -+ assert(add_instr->sig.small_imm_c + -+ add_instr->sig.small_imm_d == 0); -+ assert(mul_instr->sig.small_imm_a + -+ mul_instr->sig.small_imm_b == 0); -+ assert(mul_instr->sig.small_imm_c + -+ mul_instr->sig.small_imm_d <= 1); -+ -+ result->sig.small_imm_a = add_instr->sig.small_imm_a; -+ result->sig.small_imm_b = add_instr->sig.small_imm_b; -+ result->sig.small_imm_c = mul_instr->sig.small_imm_c; -+ result->sig.small_imm_d = mul_instr->sig.small_imm_d; -+ -+ return (result->sig.small_imm_a + -+ result->sig.small_imm_b + -+ result->sig.small_imm_c + -+ result->sig.small_imm_d) <= 1; -+ } -+ - assert(devinfo->ver <= 42); - - uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr); -@@ -1060,7 +1081,8 @@ add_op_as_mul_op(enum v3d_qpu_add_op op) - } - - static void --qpu_convert_add_to_mul(struct v3d_qpu_instr *inst) -+qpu_convert_add_to_mul(const struct v3d_device_info *devinfo, -+ struct v3d_qpu_instr *inst) - { - STATIC_ASSERT(sizeof(inst->alu.mul) == sizeof(inst->alu.add)); - assert(inst->alu.add.op != V3D_QPU_A_NOP); -@@ -1084,6 +1106,18 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst) - inst->alu.add.output_pack = V3D_QPU_PACK_NONE; - inst->alu.add.a.unpack = V3D_QPU_UNPACK_NONE; - inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE; -+ -+ if (devinfo->ver >= 71) { -+ assert(!inst->sig.small_imm_c && !inst->sig.small_imm_d); -+ assert(inst->sig.small_imm_a + inst->sig.small_imm_b <= 1); -+ if (inst->sig.small_imm_a) { -+ inst->sig.small_imm_c = true; -+ inst->sig.small_imm_a = false; -+ } else if (inst->sig.small_imm_b) { -+ inst->sig.small_imm_d = true; -+ inst->sig.small_imm_b = false; -+ } -+ } - } - - static bool -@@ -1135,6 +1169,16 @@ qpu_convert_mul_to_add(struct v3d_qpu_instr *inst) - inst->alu.mul.output_pack = V3D_QPU_PACK_NONE; - inst->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE; - inst->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE; -+ -+ assert(!inst->sig.small_imm_a && !inst->sig.small_imm_b); -+ assert(inst->sig.small_imm_c + inst->sig.small_imm_d <= 1); -+ if (inst->sig.small_imm_c) { -+ inst->sig.small_imm_a = true; -+ inst->sig.small_imm_c = false; -+ } else if (inst->sig.small_imm_d) { -+ inst->sig.small_imm_b = true; -+ inst->sig.small_imm_d = false; -+ } - } - - static bool -@@ -1173,20 +1217,20 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, - else if (a->alu.mul.op == V3D_QPU_M_NOP && - can_do_add_as_mul(b->alu.add.op)) { - mul_inst = *b; -- qpu_convert_add_to_mul(&mul_inst); -+ qpu_convert_add_to_mul(devinfo, &mul_inst); - - merge.alu.mul = mul_inst.alu.mul; - -- merge.flags.mc = b->flags.ac; -- merge.flags.mpf = b->flags.apf; -- merge.flags.muf = b->flags.auf; -+ merge.flags.mc = mul_inst.flags.mc; -+ merge.flags.mpf = mul_inst.flags.mpf; -+ merge.flags.muf = mul_inst.flags.muf; - - add_instr = a; - mul_instr = &mul_inst; - } else if (a->alu.mul.op == V3D_QPU_M_NOP && - can_do_add_as_mul(a->alu.add.op)) { - mul_inst = *a; -- qpu_convert_add_to_mul(&mul_inst); -+ qpu_convert_add_to_mul(devinfo, &mul_inst); - - merge = mul_inst; - merge.alu.add = b->alu.add; -@@ -1225,9 +1269,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, - - merge.alu.add = add_inst.alu.add; - -- merge.flags.ac = b->flags.mc; -- merge.flags.apf = b->flags.mpf; -- merge.flags.auf = b->flags.muf; -+ merge.flags.ac = add_inst.flags.ac; -+ merge.flags.apf = add_inst.flags.apf; -+ merge.flags.auf = add_inst.flags.auf; - - mul_instr = a; - add_instr = &add_inst; -@@ -1252,17 +1296,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, - - /* V3D 4.x and earlier use muxes to select the inputs for the ALUs and - * they have restrictions on the number of raddrs that can be adressed -- * in a single instruction. -- * -- * FIXME: for V3D 7.x we can't merge instructions if they address more -- * than one small immediate. For now, we don't support small immediates, -- * so it is not a problem. -+ * in a single instruction. In V3D 7.x, we don't have that restriction, -+ * but we are still limited to a single small immediate per instruction. - */ -- if (devinfo->ver <= 42) { -- if (add_instr && mul_instr && -- !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) { -- return false; -- } -+ if (add_instr && mul_instr && -+ !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) { -+ return false; - } - - merge.sig.thrsw |= b->sig.thrsw; -@@ -1273,7 +1312,6 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, - merge.sig.ldtmu |= b->sig.ldtmu; - merge.sig.ldvary |= b->sig.ldvary; - merge.sig.ldvpm |= b->sig.ldvpm; -- merge.sig.small_imm_b |= b->sig.small_imm_b; - merge.sig.ldtlb |= b->sig.ldtlb; - merge.sig.ldtlbu |= b->sig.ldtlbu; - merge.sig.ucb |= b->sig.ucb; -@@ -1933,8 +1971,6 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, - if (c->devinfo->ver >= 71) { - /* RF2-3 might be overwritten during the delay slots by - * fragment shader setup. -- * -- * FIXME: handle small immediate cases - */ - if (v3d71_qpu_reads_raddr(inst, 2) || - v3d71_qpu_reads_raddr(inst, 3)) { -diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c -index fde6695d59b..41070484286 100644 ---- a/src/broadcom/compiler/qpu_validate.c -+++ b/src/broadcom/compiler/qpu_validate.c -@@ -116,8 +116,24 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) - return; - - if (devinfo->ver < 71) { -- if (inst->sig.small_imm_a || inst->sig.small_imm_c || inst->sig.small_imm_d) -- fail_instr(state, "small imm a/c/d added after V3D 7.1"); -+ if (inst->sig.small_imm_a || inst->sig.small_imm_c || -+ inst->sig.small_imm_d) { -+ fail_instr(state, "small imm a/c/d added after V3D 7.1"); -+ } -+ } else { -+ if ((inst->sig.small_imm_a || inst->sig.small_imm_b) && -+ !vir_is_add(qinst)) { -+ fail_instr(state, "small imm a/b used but no ADD inst"); -+ } -+ if ((inst->sig.small_imm_c || inst->sig.small_imm_d) && -+ !vir_is_mul(qinst)) { -+ fail_instr(state, "small imm c/d used but no MUL inst"); -+ } -+ if (inst->sig.small_imm_a + inst->sig.small_imm_b + -+ inst->sig.small_imm_c + inst->sig.small_imm_d > 1) { -+ fail_instr(state, "only one small immediate can be " -+ "enabled per instruction"); -+ } - } - - /* LDVARY writes r5 two instructions later and LDUNIF writes -diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c -index df0d6c36c9b..ed5bc011964 100644 ---- a/src/broadcom/compiler/vir_opt_small_immediates.c -+++ b/src/broadcom/compiler/vir_opt_small_immediates.c -@@ -44,7 +44,9 @@ vir_opt_small_immediates(struct v3d_compile *c) - /* The small immediate value sits in the raddr B field, so we - * can't have 2 small immediates in one instruction (unless - * they're the same value, but that should be optimized away -- * elsewhere). -+ * elsewhere). Since 7.x we can encode small immediates in -+ * any raddr field, but each instruction can still only use -+ * one. - */ - bool uses_small_imm = false; - for (int i = 0; i < vir_get_nsrc(inst); i++) { -@@ -80,7 +82,22 @@ vir_opt_small_immediates(struct v3d_compile *c) - */ - struct v3d_qpu_sig new_sig = inst->qpu.sig; - uint32_t sig_packed; -- new_sig.small_imm_b = true; -+ if (c->devinfo->ver <= 42) { -+ new_sig.small_imm_b = true; -+ } else { -+ if (vir_is_add(inst)) { -+ if (i == 0) -+ new_sig.small_imm_a = true; -+ else -+ new_sig.small_imm_b = true; -+ } else { -+ if (i == 0) -+ new_sig.small_imm_c = true; -+ else -+ new_sig.small_imm_d = true; -+ } -+ } -+ - if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig_packed)) - continue; - -@@ -89,7 +106,10 @@ vir_opt_small_immediates(struct v3d_compile *c) - vir_dump_inst(c, inst); - fprintf(stderr, "\n"); - } -- inst->qpu.sig.small_imm_b = true; -+ inst->qpu.sig.small_imm_a = new_sig.small_imm_a; -+ inst->qpu.sig.small_imm_b = new_sig.small_imm_b; -+ inst->qpu.sig.small_imm_c = new_sig.small_imm_c; -+ inst->qpu.sig.small_imm_d = new_sig.small_imm_d; - inst->qpu.raddr_b = packed; - - inst->src[i].file = QFILE_SMALL_IMM; -diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c -index cbbb495592b..4ed184cbbcb 100644 ---- a/src/broadcom/compiler/vir_to_qpu.c -+++ b/src/broadcom/compiler/vir_to_qpu.c -@@ -89,8 +89,15 @@ new_qpu_nop_before(struct qinst *inst) - static void - v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src) - { -- if (src.smimm) -- unreachable("v3d71_set_src: pending handling small immediates"); -+ /* If we have a small immediate move it from inst->raddr_b to the -+ * corresponding raddr. -+ */ -+ if (src.smimm) { -+ assert(instr->sig.small_imm_a || instr->sig.small_imm_b || -+ instr->sig.small_imm_c || instr->sig.small_imm_d); -+ *raddr = instr->raddr_b; -+ return; -+ } - - assert(!src.magic); - *raddr = src.index; -diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c -index b613de781dc..c1590a760de 100644 ---- a/src/broadcom/qpu/qpu_disasm.c -+++ b/src/broadcom/qpu/qpu_disasm.c -@@ -113,7 +113,6 @@ v3d71_qpu_disasm_raddr(struct disasm_state *disasm, - } - - if (is_small_imm) { -- unreachable("Pending handling small immediates"); - uint32_t val; - ASSERTED bool ok = - v3d_qpu_small_imm_unpack(disasm->devinfo, -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index f54ce7210fb..c30f4bbbccf 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -975,10 +975,10 @@ v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr) - int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op); - int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op); - -- return (add_nsrc > 0 && inst->alu.add.a.raddr == raddr) || -- (add_nsrc > 1 && inst->alu.add.b.raddr == raddr) || -- (mul_nsrc > 0 && inst->alu.mul.a.raddr == raddr) || -- (mul_nsrc > 1 && inst->alu.mul.b.raddr == raddr); -+ return (add_nsrc > 0 && !inst->sig.small_imm_a && inst->alu.add.a.raddr == raddr) || -+ (add_nsrc > 1 && !inst->sig.small_imm_b && inst->alu.add.b.raddr == raddr) || -+ (mul_nsrc > 0 && !inst->sig.small_imm_c && inst->alu.mul.a.raddr == raddr) || -+ (mul_nsrc > 1 && !inst->sig.small_imm_d && inst->alu.mul.b.raddr == raddr); - } - - bool -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index dece45c5c54..d408fb426fa 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -402,7 +402,7 @@ struct v3d_qpu_instr { - uint8_t sig_addr; - bool sig_magic; /* If the signal writes to a magic address */ - uint8_t raddr_a; /* V3D 4.x */ -- uint8_t raddr_b; /* V3D 4.x*/ -+ uint8_t raddr_b; /* V3D 4.x (holds packed small immediate in 7.x too) */ - struct v3d_qpu_flags flags; - - union { -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index ed5a8bc667d..7984712d527 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -1218,16 +1218,11 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst - - instr->alu.add.op = desc->op; - -- /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the -+ /* FADD/FADDNF and FMIN/FMAX are determined by the order of the - * operands. - */ -- /* FIXME: for now hardcoded values, until we got the small_imm support -- * in place -- */ -- uint32_t small_imm_a = 0; -- uint32_t small_imm_b = 0; -- if (small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a > -- small_imm_b *256 + (op & 3) * 64 + raddr_b) { -+ if (instr->sig.small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a > -+ instr->sig.small_imm_b * 256 + (op & 3) * 64 + raddr_b) { - if (instr->alu.add.op == V3D_QPU_A_FMIN) - instr->alu.add.op = V3D_QPU_A_FMAX; - if (instr->alu.add.op == V3D_QPU_A_FADD) -@@ -1858,11 +1853,6 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, - uint32_t output_pack; - uint32_t a_unpack; - uint32_t b_unpack; -- /* FIXME: for now hardcoded values, until we got the small_imm -- * support in place -- */ -- uint32_t small_imm_a = 0; -- uint32_t small_imm_b = 0; - - if (instr->alu.add.op != V3D_QPU_A_FCMP) { - if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, -@@ -1886,8 +1876,8 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, - * distinguished by which order their operands come in. - */ - bool ordering = -- small_imm_a * 256 + a_unpack * 64 + raddr_a > -- small_imm_b * 256 + b_unpack * 64 + raddr_b; -+ instr->sig.small_imm_a * 256 + a_unpack * 64 + raddr_a > -+ instr->sig.small_imm_b * 256 + b_unpack * 64 + raddr_b; - if (((instr->alu.add.op == V3D_QPU_A_FMIN || - instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || - ((instr->alu.add.op == V3D_QPU_A_FMAX || -@@ -1901,6 +1891,22 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, - temp = raddr_a; - raddr_a = raddr_b; - raddr_b = temp; -+ -+ /* If we are swapping raddr_a/b we also need to swap -+ * small_imm_a/b. -+ */ -+ if (instr->sig.small_imm_a || instr->sig.small_imm_b) { -+ assert(instr->sig.small_imm_a != -+ instr->sig.small_imm_b); -+ struct v3d_qpu_sig new_sig = instr->sig; -+ new_sig.small_imm_a = !instr->sig.small_imm_a; -+ new_sig.small_imm_b = !instr->sig.small_imm_b; -+ uint32_t sig; -+ if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig)) -+ return false; -+ *packed_instr &= ~V3D_QPU_SIG_MASK; -+ *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG); -+ } - } - - opcode |= a_unpack << 2; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0058-broadcom-compiler-update-thread-end-restrictions-for.patch b/projects/RPi/devices/RPi5/patches/mesa/0058-broadcom-compiler-update-thread-end-restrictions-for.patch deleted file mode 100644 index cd5c07f5eb..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0058-broadcom-compiler-update-thread-end-restrictions-for.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 3af87d2672da7c928ecf8a0a1cd1bef8a6729364 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Mon, 22 Nov 2021 12:56:03 +0100 -Subject: [PATCH 058/142] broadcom/compiler: update thread end restrictions for - v7.x - -In 4.x it is not allowed to write to the register file in the last -3 instructions, but in 7.x we only have this restriction in the -thread end instruction itself, and only if the write comes from -the ALU ports. ---- - src/broadcom/compiler/qpu_schedule.c | 31 ++++++++++++++++++++-------- - 1 file changed, 22 insertions(+), 9 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index bd1c920848a..cba16c77d67 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -1938,17 +1938,30 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, - return false; - } - -- /* No writing physical registers at the end. */ -- bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP; -- bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP; -- if ((!add_is_nop && !inst->alu.add.magic_write) || -- (!mul_is_nop && !inst->alu.mul.magic_write)) { -- return false; -+ if (c->devinfo->ver <= 42) { -+ /* No writing physical registers at the end. */ -+ bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP; -+ bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP; -+ if ((!add_is_nop && !inst->alu.add.magic_write) || -+ (!mul_is_nop && !inst->alu.mul.magic_write)) { -+ return false; -+ } -+ -+ if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) && -+ !inst->sig_magic) { -+ return false; -+ } - } - -- if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) && -- !inst->sig_magic) { -- return false; -+ if (c->devinfo->ver >= 71) { -+ /* The thread end instruction must not write to the -+ * register file via the add/mul ALUs. -+ */ -+ if (slot == 0 && -+ (!inst->alu.add.magic_write || -+ !inst->alu.mul.magic_write)) { -+ return false; -+ } - } - - if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF) --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0059-broadcom-compiler-update-ldvary-thread-switch-delay-.patch b/projects/RPi/devices/RPi5/patches/mesa/0059-broadcom-compiler-update-ldvary-thread-switch-delay-.patch deleted file mode 100644 index 515f12d5d5..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0059-broadcom-compiler-update-ldvary-thread-switch-delay-.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 7cfd5b808bb2f1cb17f57435cb5d411c4ac3aa6c Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 23 Nov 2021 10:04:49 +0100 -Subject: [PATCH 059/142] broadcom/compiler: update ldvary thread switch delay - slot restriction for v7.x - -In V3D 7.x we don't have accumulators which would not survive a thread -switch, so the only restriction is that ldvary can't be placed in the second -delay slot of a thread switch. - -shader-db results for UnrealEngine4 shaders: - -total instructions in shared programs: 446458 -> 446401 (-0.01%) -instructions in affected programs: 13492 -> 13435 (-0.42%) -helped: 58 -HURT: 3 -Instructions are helped. - -total nops in shared programs: 19571 -> 19541 (-0.15%) -nops in affected programs: 161 -> 131 (-18.63%) -helped: 30 -HURT: 0 -Nops are helped. ---- - src/broadcom/compiler/qpu_schedule.c | 33 +++++++++++++++++++++------- - src/broadcom/compiler/qpu_validate.c | 10 +++++++-- - 2 files changed, 33 insertions(+), 10 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index cba16c77d67..32f651851cf 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -1491,11 +1491,20 @@ retry: - * ldvary now if the follow-up fixup would place - * it in the delay slots of a thrsw, which is not - * allowed and would prevent the fixup from being -- * successful. -+ * successful. In V3D 7.x we can allow this to happen -+ * as long as it is not the last delay slot. - */ -- if (inst->sig.ldvary && -- scoreboard->last_thrsw_tick + 2 >= scoreboard->tick - 1) { -- continue; -+ if (inst->sig.ldvary) { -+ if (c->devinfo->ver <= 42 && -+ scoreboard->last_thrsw_tick + 2 >= -+ scoreboard->tick - 1) { -+ continue; -+ } -+ if (c->devinfo->ver >= 71 && -+ scoreboard->last_thrsw_tick + 2 == -+ scoreboard->tick - 1) { -+ continue; -+ } - } - - /* We can emit a new tmu lookup with a previous ldtmu -@@ -2020,8 +2029,12 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c, - if (slot > 0 && v3d_qpu_instr_is_legacy_sfu(&qinst->qpu)) - return false; - -- if (slot > 0 && qinst->qpu.sig.ldvary) -- return false; -+ if (qinst->qpu.sig.ldvary) { -+ if (c->devinfo->ver <= 42 && slot > 0) -+ return false; -+ if (c->devinfo->ver >= 71 && slot == 2) -+ return false; -+ } - - /* unifa and the following 3 instructions can't overlap a - * thread switch/end. The docs further clarify that this means -@@ -2618,9 +2631,13 @@ fixup_pipelined_ldvary(struct v3d_compile *c, - - /* We can't put an ldvary in the delay slots of a thrsw. We should've - * prevented this when pairing up the ldvary with another instruction -- * and flagging it for a fixup. -+ * and flagging it for a fixup. In V3D 7.x this is limited only to the -+ * second delay slot. - */ -- assert(scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1); -+ assert((devinfo->ver <= 42 && -+ scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1) || -+ (devinfo->ver >= 71 && -+ scoreboard->last_thrsw_tick + 2 != scoreboard->tick - 1)); - - /* Move the ldvary to the previous instruction and remove it from the - * current one. -diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c -index 41070484286..4f09aa8aef4 100644 ---- a/src/broadcom/compiler/qpu_validate.c -+++ b/src/broadcom/compiler/qpu_validate.c -@@ -215,8 +215,14 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) - "SFU write started during THRSW delay slots "); - } - -- if (inst->sig.ldvary) -- fail_instr(state, "LDVARY during THRSW delay slots"); -+ if (inst->sig.ldvary) { -+ if (devinfo->ver <= 42) -+ fail_instr(state, "LDVARY during THRSW delay slots"); -+ if (devinfo->ver >= 71 && -+ state->ip - state->last_thrsw_ip == 2) { -+ fail_instr(state, "LDVARY in 2nd THRSW delay slot"); -+ } -+ } - } - - (void)qpu_magic_waddr_matches; /* XXX */ --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0060-broadcom-compiler-lift-restriction-for-branch-msfign.patch b/projects/RPi/devices/RPi5/patches/mesa/0060-broadcom-compiler-lift-restriction-for-branch-msfign.patch deleted file mode 100644 index 7c78c6938b..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0060-broadcom-compiler-lift-restriction-for-branch-msfign.patch +++ /dev/null @@ -1,30 +0,0 @@ -From ca4063d627cd31c589a8e8688f2876dd8211d1bc Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 25 Nov 2021 08:31:02 +0100 -Subject: [PATCH 060/142] broadcom/compiler: lift restriction for branch + - msfign after setmsf for v7.x - ---- - src/broadcom/compiler/qpu_schedule.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 32f651851cf..476eae691ab 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -2373,10 +2373,11 @@ emit_branch(struct v3d_compile *c, - assert(scoreboard->last_branch_tick + 3 < branch_tick); - assert(scoreboard->last_unifa_write_tick + 3 < branch_tick); - -- /* Can't place a branch with msfign != 0 and cond != 0,2,3 after -+ /* V3D 4.x can't place a branch with msfign != 0 and cond != 0,2,3 after - * setmsf. - */ - bool is_safe_msf_branch = -+ c->devinfo->ver >= 71 || - inst->qpu.branch.msfign == V3D_QPU_MSFIGN_NONE || - inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_ALWAYS || - inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_A0 || --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0061-broadcom-compiler-start-allocating-from-RF-4-in-V7.x.patch b/projects/RPi/devices/RPi5/patches/mesa/0061-broadcom-compiler-start-allocating-from-RF-4-in-V7.x.patch deleted file mode 100644 index 8bff29c318..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0061-broadcom-compiler-start-allocating-from-RF-4-in-V7.x.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 167510aa43bbcf06e57a64495cee40e8cdaf5f8b Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Fri, 26 Nov 2021 10:37:05 +0100 -Subject: [PATCH 061/142] broadcom/compiler: start allocating from RF 4 in V7.x - -In V3D 4.x we start at RF3 so that we allocate RF0-2 only if there -aren't any other RFs available. This is useful with small shaders -to ensure that our TLB writes don't use these registers because -these are the last instructions we emit in fragment shaders and -the last instructions in a program can't write to these registers, -so if we do, we need to emit NOPs. - -In V3D 7.x the registers affected by this restriction are RF2-3, -so we choose to start at RF4. ---- - src/broadcom/compiler/vir_register_allocate.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index 6f7b1ca0589..440b093a636 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -1234,9 +1234,10 @@ v3d_register_allocate(struct v3d_compile *c) - .phys_index = phys_index, - .next_acc = 0, - /* Start at RF3, to try to keep the TLB writes from using -- * RF0-2. -+ * RF0-2. Start at RF4 in 7.x to prevent TLB writes from -+ * using RF2-3. - */ -- .next_phys = 3, -+ .next_phys = c->devinfo->ver <= 42 ? 3 : 4, - .nodes = &c->nodes, - .devinfo = c->devinfo, - }; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0062-broadcom-compiler-validate-restrictions-after-TLB-Z-.patch b/projects/RPi/devices/RPi5/patches/mesa/0062-broadcom-compiler-validate-restrictions-after-TLB-Z-.patch deleted file mode 100644 index f1f210e47a..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0062-broadcom-compiler-validate-restrictions-after-TLB-Z-.patch +++ /dev/null @@ -1,71 +0,0 @@ -From d47ea903b96e43b07bdef21f8026da818e30fcd1 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 25 Nov 2021 13:00:34 +0100 -Subject: [PATCH 062/142] broadcom/compiler: validate restrictions after TLB Z - write - ---- - src/broadcom/compiler/qpu_validate.c | 28 ++++++++++++++++++++++++++++ - 1 file changed, 28 insertions(+) - -diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c -index 4f09aa8aef4..1082fb7d50a 100644 ---- a/src/broadcom/compiler/qpu_validate.c -+++ b/src/broadcom/compiler/qpu_validate.c -@@ -41,6 +41,7 @@ struct v3d_qpu_validate_state { - int last_sfu_write; - int last_branch_ip; - int last_thrsw_ip; -+ int first_tlb_z_write; - - /* Set when we've found the last-THRSW signal, or if we were started - * in single-segment mode. -@@ -110,11 +111,37 @@ static void - qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) - { - const struct v3d_device_info *devinfo = state->c->devinfo; -+ -+ if (qinst->is_tlb_z_write && state->ip < state->first_tlb_z_write) -+ state->first_tlb_z_write = state->ip; -+ - const struct v3d_qpu_instr *inst = &qinst->qpu; - -+ if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH && -+ state->first_tlb_z_write >= 0 && -+ state->ip > state->first_tlb_z_write && -+ inst->branch.msfign != V3D_QPU_MSFIGN_NONE && -+ inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS && -+ inst->branch.cond != V3D_QPU_BRANCH_COND_A0 && -+ inst->branch.cond != V3D_QPU_BRANCH_COND_NA0) { -+ fail_instr(state, "Implicit branch MSF read after TLB Z write"); -+ } -+ - if (inst->type != V3D_QPU_INSTR_TYPE_ALU) - return; - -+ if (inst->alu.add.op == V3D_QPU_A_SETMSF && -+ state->first_tlb_z_write >= 0 && -+ state->ip > state->first_tlb_z_write) { -+ fail_instr(state, "SETMSF after TLB Z write"); -+ } -+ -+ if (state->first_tlb_z_write >= 0 && -+ state->ip > state->first_tlb_z_write && -+ inst->alu.add.op == V3D_QPU_A_MSF) { -+ fail_instr(state, "MSF read after TLB Z write"); -+ } -+ - if (devinfo->ver < 71) { - if (inst->sig.small_imm_a || inst->sig.small_imm_c || - inst->sig.small_imm_d) { -@@ -348,6 +375,7 @@ qpu_validate(struct v3d_compile *c) - .last_sfu_write = -10, - .last_thrsw_ip = -10, - .last_branch_ip = -10, -+ .first_tlb_z_write = INT_MAX, - .ip = 0, - - .last_thrsw_found = !c->last_thrsw, --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0063-broadcom-compiler-lift-restriction-on-vpmwt-in-last-.patch b/projects/RPi/devices/RPi5/patches/mesa/0063-broadcom-compiler-lift-restriction-on-vpmwt-in-last-.patch deleted file mode 100644 index 7cfdab4c05..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0063-broadcom-compiler-lift-restriction-on-vpmwt-in-last-.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 6cdf01fad49489b5fc66d231b527de5245d5de32 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Mon, 29 Nov 2021 13:23:11 +0100 -Subject: [PATCH 063/142] broadcom/compiler: lift restriction on vpmwt in last - instruction for V3D 7.x - ---- - src/broadcom/compiler/qpu_schedule.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 476eae691ab..77fb6a794e6 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -1934,7 +1934,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, - if (slot > 0 && qinst->uniform != ~0) - return false; - -- if (v3d_qpu_waits_vpm(inst)) -+ if (c->devinfo->ver <= 42 && v3d_qpu_waits_vpm(inst)) - return false; - - if (inst->sig.ldvary) --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0064-broadcom-compiler-fix-up-copy-propagation-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0064-broadcom-compiler-fix-up-copy-propagation-for-v71.patch deleted file mode 100644 index 080764c6d0..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0064-broadcom-compiler-fix-up-copy-propagation-for-v71.patch +++ /dev/null @@ -1,134 +0,0 @@ -From acc54637f0787ba4dc887130c25c628ccdaf4e38 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 9 Nov 2021 11:34:59 +0100 -Subject: [PATCH 064/142] broadcom/compiler: fix up copy propagation for v71 - -Update rules for unsafe copy propagations to match v7.x. ---- - .../compiler/vir_opt_copy_propagate.c | 83 +++++++++++++------ - 1 file changed, 56 insertions(+), 27 deletions(-) - -diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c -index c4aa7255a17..1260838ca05 100644 ---- a/src/broadcom/compiler/vir_opt_copy_propagate.c -+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c -@@ -35,7 +35,7 @@ - #include "v3d_compiler.h" - - static bool --is_copy_mov(struct qinst *inst) -+is_copy_mov(const struct v3d_device_info *devinfo, struct qinst *inst) - { - if (!inst) - return false; -@@ -62,36 +62,65 @@ is_copy_mov(struct qinst *inst) - return false; - } - -- switch (inst->src[0].file) { -- case QFILE_MAGIC: -- /* No copy propagating from R3/R4/R5 -- the MOVs from those -- * are there to register allocate values produced into R3/4/5 -- * to other regs (though hopefully r3/4/5). -- */ -- switch (inst->src[0].index) { -- case V3D_QPU_WADDR_R3: -- case V3D_QPU_WADDR_R4: -- case V3D_QPU_WADDR_R5: -- return false; -+ if (devinfo->ver <= 42) { -+ switch (inst->src[0].file) { -+ case QFILE_MAGIC: -+ /* No copy propagating from R3/R4/R5 -- the MOVs from -+ * those are there to register allocate values produced -+ * into R3/4/5 to other regs (though hopefully r3/4/5). -+ */ -+ switch (inst->src[0].index) { -+ case V3D_QPU_WADDR_R3: -+ case V3D_QPU_WADDR_R4: -+ case V3D_QPU_WADDR_R5: -+ return false; -+ default: -+ break; -+ } -+ break; -+ -+ case QFILE_REG: -+ switch (inst->src[0].index) { -+ case 0: -+ case 1: -+ case 2: -+ /* MOVs from rf0/1/2 are only to track the live -+ * intervals for W/centroid W/Z. -+ */ -+ return false; -+ } -+ break; -+ - default: - break; - } -- break; -- -- case QFILE_REG: -- switch (inst->src[0].index) { -- case 0: -- case 1: -- case 2: -- /* MOVs from rf0/1/2 are only to track the live -+ } else { -+ assert(devinfo->ver >= 71); -+ switch (inst->src[0].file) { -+ case QFILE_REG: -+ switch (inst->src[0].index) { -+ /* MOVs from rf1/2/3 are only to track the live - * intervals for W/centroid W/Z. -+ * -+ * Note: rf0 can be implicitly written by ldvary -+ * (no temp involved), so it is not an SSA value and -+ * could clash with writes to other temps that are -+ * also allocated to rf0. In theory, that would mean -+ * that we can't copy propagate from it, but we handle -+ * this at register allocation time, preventing temps -+ * from being allocated to rf0 while the rf0 value from -+ * ldvary is still live. - */ -- return false; -- } -- break; -+ case 1: -+ case 2: -+ case 3: -+ return false; -+ } -+ break; - -- default: -- break; -+ default: -+ break; -+ } - } - - return true; -@@ -135,7 +164,7 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs) - */ - struct qinst *mov = movs[inst->src[i].index]; - if (!mov) { -- if (!is_copy_mov(c->defs[inst->src[i].index])) -+ if (!is_copy_mov(c->devinfo, c->defs[inst->src[i].index])) - continue; - mov = c->defs[inst->src[i].index]; - -@@ -245,7 +274,7 @@ vir_opt_copy_propagate(struct v3d_compile *c) - - apply_kills(c, movs, inst); - -- if (is_copy_mov(inst)) -+ if (is_copy_mov(c->devinfo, inst)) - movs[inst->dst.index] = inst; - } - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0065-broadcom-qpu-new-packing-conversion-v71-instructions.patch b/projects/RPi/devices/RPi5/patches/mesa/0065-broadcom-qpu-new-packing-conversion-v71-instructions.patch deleted file mode 100644 index 5bd7e35514..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0065-broadcom-qpu-new-packing-conversion-v71-instructions.patch +++ /dev/null @@ -1,150 +0,0 @@ -From c340f7f1eb4a1e5c0fafe1ea2f801f2ebaf82d8d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 26 Nov 2021 01:24:12 +0100 -Subject: [PATCH 065/142] broadcom/qpu: new packing/conversion v71 instructions - -This commits adds the qpu definitions for several new v71 -instructions. - -Packing: - * vpack does a 2x32 to 2x16 bit integer pack - * v8pack: Pack 2 x 2x16 bit integers into 4x8 bits - * v10pack packs parts of 2 2x16 bit integer into r10g10b10a2. - * v11fpack packs parts of 2 2x16 bit float into r11g11b10 rounding - to nearest - -Conversion to unorm/snorm: - * vftounorm8/vftosnorm8: converts from 2x16-bit floating point - to 2x8 bit unorm/snorm. - * ftounorm16/ftosnorm16: converts floating point to 16-bit - unorm/snorm - * vftounorm10lo: Convert 2x16-bit floating point to 2x10-bit unorm - * vftounorm10hi: Convert 2x16-bit floating point to one 2-bit and one 10-bit unorm ---- - src/broadcom/qpu/qpu_instr.c | 20 ++++++++++++++++++++ - src/broadcom/qpu/qpu_instr.h | 12 ++++++++++++ - src/broadcom/qpu/qpu_pack.c | 12 ++++++++++++ - 3 files changed, 44 insertions(+) - -diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c -index c30f4bbbccf..44f20618a5a 100644 ---- a/src/broadcom/qpu/qpu_instr.c -+++ b/src/broadcom/qpu/qpu_instr.c -@@ -179,6 +179,10 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op) - [V3D_QPU_A_UTOF] = "utof", - [V3D_QPU_A_MOV] = "mov", - [V3D_QPU_A_FMOV] = "fmov", -+ [V3D_QPU_A_VPACK] = "vpack", -+ [V3D_QPU_A_V8PACK] = "v8pack", -+ [V3D_QPU_A_V10PACK] = "v10pack", -+ [V3D_QPU_A_V11FPACK] = "v11fpack", - }; - - if (op >= ARRAY_SIZE(op_names)) -@@ -201,6 +205,12 @@ v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op) - [V3D_QPU_M_MOV] = "mov", - [V3D_QPU_M_NOP] = "nop", - [V3D_QPU_M_FMUL] = "fmul", -+ [V3D_QPU_M_FTOUNORM16] = "ftounorm16", -+ [V3D_QPU_M_FTOSNORM16] = "ftosnorm16", -+ [V3D_QPU_M_VFTOUNORM8] = "vftounorm8", -+ [V3D_QPU_M_VFTOSNORM8] = "vftosnorm8", -+ [V3D_QPU_M_VFTOUNORM10LO] = "vftounorm10lo", -+ [V3D_QPU_M_VFTOUNORM10HI] = "vftounorm10hi", - }; - - if (op >= ARRAY_SIZE(op_names)) -@@ -463,6 +473,10 @@ static const uint8_t add_op_args[] = { - - [V3D_QPU_A_MOV] = D | A, - [V3D_QPU_A_FMOV] = D | A, -+ [V3D_QPU_A_VPACK] = D | A | B, -+ [V3D_QPU_A_V8PACK] = D | A | B, -+ [V3D_QPU_A_V10PACK] = D | A | B, -+ [V3D_QPU_A_V11FPACK] = D | A | B, - }; - - static const uint8_t mul_op_args[] = { -@@ -476,6 +490,12 @@ static const uint8_t mul_op_args[] = { - [V3D_QPU_M_NOP] = 0, - [V3D_QPU_M_MOV] = D | A, - [V3D_QPU_M_FMUL] = D | A | B, -+ [V3D_QPU_M_FTOUNORM16] = D | A, -+ [V3D_QPU_M_FTOSNORM16] = D | A, -+ [V3D_QPU_M_VFTOUNORM8] = D | A, -+ [V3D_QPU_M_VFTOSNORM8] = D | A, -+ [V3D_QPU_M_VFTOUNORM10LO] = D | A, -+ [V3D_QPU_M_VFTOUNORM10HI] = D | A, - }; - - bool -diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h -index d408fb426fa..56eee9f9cac 100644 ---- a/src/broadcom/qpu/qpu_instr.h -+++ b/src/broadcom/qpu/qpu_instr.h -@@ -231,6 +231,10 @@ enum v3d_qpu_add_op { - /* V3D 7.x */ - V3D_QPU_A_FMOV, - V3D_QPU_A_MOV, -+ V3D_QPU_A_VPACK, -+ V3D_QPU_A_V8PACK, -+ V3D_QPU_A_V10PACK, -+ V3D_QPU_A_V11FPACK, - }; - - enum v3d_qpu_mul_op { -@@ -244,6 +248,14 @@ enum v3d_qpu_mul_op { - V3D_QPU_M_MOV, - V3D_QPU_M_NOP, - V3D_QPU_M_FMUL, -+ -+ /* V3D 7.x */ -+ V3D_QPU_M_FTOUNORM16, -+ V3D_QPU_M_FTOSNORM16, -+ V3D_QPU_M_VFTOUNORM8, -+ V3D_QPU_M_VFTOSNORM8, -+ V3D_QPU_M_VFTOUNORM10LO, -+ V3D_QPU_M_VFTOUNORM10HI, - }; - - enum v3d_qpu_output_pack { -diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c -index 7984712d527..6cd75adac6d 100644 ---- a/src/broadcom/qpu/qpu_pack.c -+++ b/src/broadcom/qpu/qpu_pack.c -@@ -783,6 +783,9 @@ static const struct opcode_desc add_ops_v71[] = { - { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 }, - { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 }, - -+ { 247, 247, .raddr_mask = ANYOPMASK, V3D_QPU_A_VPACK, 71 }, -+ { 248, 248, .raddr_mask = ANYOPMASK, V3D_QPU_A_V8PACK, 71 }, -+ - { 249, 249, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FMOV, 71 }, - { 249, 249, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FMOV, 71 }, - { 249, 249, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FMOV, 71 }, -@@ -797,6 +800,8 @@ static const struct opcode_desc add_ops_v71[] = { - { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 }, - { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 }, - -+ { 250, 250, .raddr_mask = ANYOPMASK, V3D_QPU_A_V10PACK, 71 }, -+ { 251, 251, .raddr_mask = ANYOPMASK, V3D_QPU_A_V11FPACK, 71 }, - }; - - static const struct opcode_desc mul_ops_v71[] = { -@@ -822,6 +827,13 @@ static const struct opcode_desc mul_ops_v71[] = { - { 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 }, - { 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 }, - -+ { 14, 14, .raddr_mask = OP_MASK(32), V3D_QPU_M_FTOUNORM16, 71 }, -+ { 14, 14, .raddr_mask = OP_MASK(33), V3D_QPU_M_FTOSNORM16, 71 }, -+ { 14, 14, .raddr_mask = OP_MASK(34), V3D_QPU_M_VFTOUNORM8, 71 }, -+ { 14, 14, .raddr_mask = OP_MASK(35), V3D_QPU_M_VFTOSNORM8, 71 }, -+ { 14, 14, .raddr_mask = OP_MASK(48), V3D_QPU_M_VFTOUNORM10LO, 71 }, -+ { 14, 14, .raddr_mask = OP_MASK(49), V3D_QPU_M_VFTOUNORM10HI, 71 }, -+ - { 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 }, - - { 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL }, --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0068-broadcom-compiler-don-t-allocate-spill-base-to-rf0-i.patch b/projects/RPi/devices/RPi5/patches/mesa/0068-broadcom-compiler-don-t-allocate-spill-base-to-rf0-i.patch deleted file mode 100644 index 1fe43abf8f..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0068-broadcom-compiler-don-t-allocate-spill-base-to-rf0-i.patch +++ /dev/null @@ -1,68 +0,0 @@ -From f6082e941a3454c8735df2ff2713ae49b3daa74f Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 18 Apr 2023 08:50:13 +0200 -Subject: [PATCH 068/142] broadcom/compiler: don't allocate spill base to rf0 - in V3D 7.x - -Otherwise it can be stomped by instructions doing implicit rf0 writes. ---- - src/broadcom/compiler/vir_register_allocate.c | 21 +++++++++++++++---- - 1 file changed, 17 insertions(+), 4 deletions(-) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index 440b093a636..121c9b2794f 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -582,7 +582,8 @@ interferes(int32_t t0_start, int32_t t0_end, int32_t t1_start, int32_t t1_end) - } - - static void --v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp) -+v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int *implicit_rf_nodes, -+ int spill_temp) - { - c->spill_start_num_temps = c->num_temps; - c->spilling = true; -@@ -594,8 +595,20 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp) - spill_offset = c->spill_size; - c->spill_size += V3D_CHANNELS * sizeof(uint32_t); - -- if (spill_offset == 0) -+ if (spill_offset == 0) { - v3d_setup_spill_base(c); -+ -+ /* Don't allocate our spill base to rf0 to avoid -+ * conflicts with instructions doing implicit writes -+ * to that register. -+ */ -+ if (!c->devinfo->has_accumulators) { -+ ra_add_node_interference( -+ c->g, -+ temp_to_node(c, c->spill_base.index), -+ implicit_rf_nodes[0]); -+ } -+ } - } - - struct qinst *last_thrsw = c->last_thrsw; -@@ -1346,7 +1359,7 @@ v3d_register_allocate(struct v3d_compile *c) - int node = v3d_choose_spill_node(c); - uint32_t temp = node_to_temp(c, node); - if (node != -1) { -- v3d_spill_reg(c, acc_nodes, temp); -+ v3d_spill_reg(c, acc_nodes, implicit_rf_nodes, temp); - continue; - } - } -@@ -1363,7 +1376,7 @@ v3d_register_allocate(struct v3d_compile *c) - enum temp_spill_type spill_type = - get_spill_type_for_temp(c, temp); - if (spill_type != SPILL_TYPE_TMU || tmu_spilling_allowed(c)) { -- v3d_spill_reg(c, acc_nodes, temp); -+ v3d_spill_reg(c, acc_nodes, implicit_rf_nodes, temp); - if (c->spills + c->fills > c->max_tmu_spills) - goto spill_fail; - } else { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0069-broadcom-compiler-improve-allocation-for-final-progr.patch b/projects/RPi/devices/RPi5/patches/mesa/0069-broadcom-compiler-improve-allocation-for-final-progr.patch deleted file mode 100644 index fb73352b1a..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0069-broadcom-compiler-improve-allocation-for-final-progr.patch +++ /dev/null @@ -1,186 +0,0 @@ -From 0e9577fbb18a026390f653ca22f5a98a69a5fe59 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 2 May 2023 10:12:37 +0200 -Subject: [PATCH 069/142] broadcom/compiler: improve allocation for final - program instructions - -The last 3 instructions can't use specific registers so flag all the -nodes for temps used in the last program instructions and try to -avoid assigning any of these. This may help us avoid injecting nops -for the last thread switch instruction. - -Because regisster allocation needs to happen before QPU scheduling -and instruction merging we can't tell exactly what the last 3 -instructions will be, so we do this for a few more instructions than -just 3. - -We only do this for fragment shaders because other shader stages -always end with VPM store instructions that take an small immediate -and therefore will never allow us to merge the final thread switch -earlier, so limiting allocation for these shaders will never improve -anything and might instead be detrimental. - -total instructions in shared programs: 11471389 -> 11464335 (-0.06%) -instructions in affected programs: 582908 -> 575854 (-1.21%) -helped: 4669 -HURT: 578 -Instructions are helped. - -total max-temps in shared programs: 2230497 -> 2230150 (-0.02%) -max-temps in affected programs: 5662 -> 5315 (-6.13%) -helped: 344 -HURT: 44 -Max-temps are helped. - -total sfu-stalls in shared programs: 18068 -> 18077 (0.05%) -sfu-stalls in affected programs: 264 -> 273 (3.41%) -helped: 37 -HURT: 48 -Inconclusive result (value mean confidence interval includes 0). - -total inst-and-stalls in shared programs: 11489457 -> 11482412 (-0.06%) -inst-and-stalls in affected programs: 585180 -> 578135 (-1.20%) -helped: 4659 -HURT: 588 -Inst-and-stalls are helped. - -total nops in shared programs: 301738 -> 298140 (-1.19%) -nops in affected programs: 14680 -> 11082 (-24.51%) -helped: 3252 -HURT: 108 -Nops are helped. ---- - src/broadcom/compiler/v3d_compiler.h | 1 + - src/broadcom/compiler/vir_register_allocate.c | 69 +++++++++++++++++-- - 2 files changed, 66 insertions(+), 4 deletions(-) - -diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h -index 425ab0cdf9d..2642d23b629 100644 ---- a/src/broadcom/compiler/v3d_compiler.h -+++ b/src/broadcom/compiler/v3d_compiler.h -@@ -613,6 +613,7 @@ struct v3d_ra_node_info { - struct { - uint32_t priority; - uint8_t class_bits; -+ bool is_program_end; - - /* V3D 7.x */ - bool is_ldunif_dst; -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index 121c9b2794f..495644bb557 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -385,6 +385,7 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits) - c->nodes.info[node].class_bits = class_bits; - c->nodes.info[node].priority = 0; - c->nodes.info[node].is_ldunif_dst = false; -+ c->nodes.info[node].is_program_end = false; - } - - /* The spill offset for this thread takes a bit of setup, so do it once at -@@ -929,6 +930,17 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra, - return true; - } - -+ /* The last 3 instructions in a shader can't use some specific registers -+ * (usually early rf registers, depends on v3d version) so try to -+ * avoid allocating these to registers used by the last instructions -+ * in the shader. -+ */ -+ const uint32_t safe_rf_start = v3d_ra->devinfo->ver <= 42 ? 3 : 4; -+ if (v3d_ra->nodes->info[node].is_program_end && -+ v3d_ra->next_phys < safe_rf_start) { -+ v3d_ra->next_phys = safe_rf_start; -+ } -+ - for (int i = 0; i < PHYS_COUNT; i++) { - int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT; - int phys = v3d_ra->phys_index + phys_off; -@@ -1218,6 +1230,44 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, - } - } - -+static void -+flag_program_end_nodes(struct v3d_compile *c) -+{ -+ /* Only look for registers used in this many instructions */ -+ uint32_t last_set_count = 6; -+ -+ struct qblock *last_block = vir_exit_block(c); -+ list_for_each_entry_rev(struct qinst, inst, &last_block->instructions, link) { -+ if (!inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) -+ continue; -+ -+ int num_src = v3d_qpu_add_op_num_src(inst->qpu.alu.add.op); -+ for (int i = 0; i < num_src; i++) { -+ if (inst->src[i].file == QFILE_TEMP) { -+ int node = temp_to_node(c, inst->src[i].index); -+ c->nodes.info[node].is_program_end = true; -+ } -+ } -+ -+ num_src = v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op); -+ for (int i = 0; i < num_src; i++) { -+ if (inst->src[i].file == QFILE_TEMP) { -+ int node = temp_to_node(c, inst->src[i].index); -+ c->nodes.info[node].is_program_end = true; -+ -+ } -+ } -+ -+ if (inst->dst.file == QFILE_TEMP) { -+ int node = temp_to_node(c, inst->dst.index); -+ c->nodes.info[node].is_program_end = true; -+ } -+ -+ if (--last_set_count == 0) -+ break; -+ } -+} -+ - /** - * Returns a mapping from QFILE_TEMP indices to struct qpu_regs. - * -@@ -1280,17 +1330,16 @@ v3d_register_allocate(struct v3d_compile *c) - */ - for (uint32_t i = 0; i < num_ra_nodes; i++) { - c->nodes.info[i].is_ldunif_dst = false; -+ c->nodes.info[i].is_program_end = false; -+ c->nodes.info[i].priority = 0; -+ c->nodes.info[i].class_bits = 0; - if (c->devinfo->has_accumulators && i < ACC_COUNT) { - acc_nodes[i] = i; - ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i); -- c->nodes.info[i].priority = 0; -- c->nodes.info[i].class_bits = 0; - } else if (!c->devinfo->has_accumulators && - i < ARRAY_SIZE(implicit_rf_nodes)) { - implicit_rf_nodes[i] = i; - ra_set_node_reg(c->g, implicit_rf_nodes[i], phys_index + i); -- c->nodes.info[i].priority = 0; -- c->nodes.info[i].class_bits = 0; - } else { - uint32_t t = node_to_temp(c, i); - c->nodes.info[i].priority = -@@ -1327,6 +1376,18 @@ v3d_register_allocate(struct v3d_compile *c) - last_ldvary_ip, inst); - } - -+ /* Flag the nodes that are used in the last instructions of the program -+ * (there are some registers that cannot be used in the last 3 -+ * instructions). We only do this for fragment shaders, because the idea -+ * is that by avoiding this conflict we may be able to emit the last -+ * thread switch earlier in some cases, however, in non-fragment shaders -+ * this won't happen because the last instructions are always VPM stores -+ * with a small immediate, which conflicts with other signals, -+ * preventing us from ever moving the thrsw earlier. -+ */ -+ if (c->s->info.stage == MESA_SHADER_FRAGMENT) -+ flag_program_end_nodes(c); -+ - /* Set the register classes for all our temporaries in the graph */ - for (uint32_t i = 0; i < c->num_temps; i++) { - ra_set_node_class(c->g, temp_to_node(c, i), --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0070-broadcom-compiler-don-t-assign-registers-to-unused-n.patch b/projects/RPi/devices/RPi5/patches/mesa/0070-broadcom-compiler-don-t-assign-registers-to-unused-n.patch deleted file mode 100644 index 1b29439b82..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0070-broadcom-compiler-don-t-assign-registers-to-unused-n.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 645fe451bcecbe3345a144222306d06fb39f6b9f Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 2 May 2023 10:17:47 +0200 -Subject: [PATCH 070/142] broadcom/compiler: don't assign registers to unused - nodes/temps - -In programs with a lot of unused temps, if we don't do this, we may -end up recycling previously used rfs more often, which can be -detrimental to instruction pairing. - -total instructions in shared programs: 11464335 -> 11444136 (-0.18%) -instructions in affected programs: 8976743 -> 8956544 (-0.23%) -helped: 33196 -HURT: 33778 -Inconclusive result - -total max-temps in shared programs: 2230150 -> 2229445 (-0.03%) -max-temps in affected programs: 86413 -> 85708 (-0.82%) -helped: 2217 -HURT: 1523 -Max-temps are helped. - -total sfu-stalls in shared programs: 18077 -> 17104 (-5.38%) -sfu-stalls in affected programs: 8669 -> 7696 (-11.22%) -helped: 2657 -HURT: 2182 -Sfu-stalls are helped. - -total inst-and-stalls in shared programs: 11482412 -> 11461240 (-0.18%) -inst-and-stalls in affected programs: 8995697 -> 8974525 (-0.24%) -helped: 33319 -HURT: 33708 -Inconclusive result - -total nops in shared programs: 298140 -> 296185 (-0.66%) -nops in affected programs: 52805 -> 50850 (-3.70%) -helped: 3797 -HURT: 2662 -Inconclusive result ---- - src/broadcom/compiler/v3d_compiler.h | 1 + - src/broadcom/compiler/vir_register_allocate.c | 14 ++++++++++++++ - 2 files changed, 15 insertions(+) - -diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h -index 2642d23b629..f1a807e38fd 100644 ---- a/src/broadcom/compiler/v3d_compiler.h -+++ b/src/broadcom/compiler/v3d_compiler.h -@@ -614,6 +614,7 @@ struct v3d_ra_node_info { - uint32_t priority; - uint8_t class_bits; - bool is_program_end; -+ bool unused; - - /* V3D 7.x */ - bool is_ldunif_dst; -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index 495644bb557..0ab0474424f 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -386,6 +386,7 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits) - c->nodes.info[node].priority = 0; - c->nodes.info[node].is_ldunif_dst = false; - c->nodes.info[node].is_program_end = false; -+ c->nodes.info[node].unused = false; - } - - /* The spill offset for this thread takes a bit of setup, so do it once at -@@ -918,6 +919,12 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra, - BITSET_WORD *regs, - unsigned int *out) - { -+ /* If this node is for an unused temp, ignore. */ -+ if (v3d_ra->nodes->info[node].unused) { -+ *out = 0; -+ return true; -+ } -+ - /* In V3D 7.x, try to assign rf0 to temps used as ldunif's dst - * so we can avoid turning them into ldunifrf (which uses the - * cond field to encode the dst and would prevent merge with -@@ -1331,6 +1338,7 @@ v3d_register_allocate(struct v3d_compile *c) - for (uint32_t i = 0; i < num_ra_nodes; i++) { - c->nodes.info[i].is_ldunif_dst = false; - c->nodes.info[i].is_program_end = false; -+ c->nodes.info[i].unused = false; - c->nodes.info[i].priority = 0; - c->nodes.info[i].class_bits = 0; - if (c->devinfo->has_accumulators && i < ACC_COUNT) { -@@ -1396,6 +1404,12 @@ v3d_register_allocate(struct v3d_compile *c) - - /* Add register interferences based on liveness data */ - for (uint32_t i = 0; i < c->num_temps; i++) { -+ /* And while we are here, let's also flag nodes for -+ * unused temps. -+ */ -+ if (c->temp_start[i] > c->temp_end[i]) -+ c->nodes.info[temp_to_node(c, i)].unused = true; -+ - for (uint32_t j = i + 1; j < c->num_temps; j++) { - if (interferes(c->temp_start[i], c->temp_end[i], - c->temp_start[j], c->temp_end[j])) { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0071-broadcom-compiler-only-assign-rf0-as-last-resort-in-.patch b/projects/RPi/devices/RPi5/patches/mesa/0071-broadcom-compiler-only-assign-rf0-as-last-resort-in-.patch deleted file mode 100644 index 1ff6366faa..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0071-broadcom-compiler-only-assign-rf0-as-last-resort-in-.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 851704169d59e28c5429b06d05e5ef952be893a2 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Mon, 15 May 2023 10:02:10 +0200 -Subject: [PATCH 071/142] broadcom/compiler: only assign rf0 as last resort in - V3D 7.x - -So we can use it for ldunif(a) and avoid generating ldunif(a)rf which -can't be paired with conditional instructions. - -shader-db (pi5): - -total instructions in shared programs: 11357802 -> 11338883 (-0.17%) -instructions in affected programs: 7117889 -> 7098970 (-0.27%) -helped: 24264 -HURT: 17574 -Instructions are helped. - -total uniforms in shared programs: 3857808 -> 3857815 (<.01%) -uniforms in affected programs: 92 -> 99 (7.61%) -helped: 0 -HURT: 1 - -total max-temps in shared programs: 2230904 -> 2230199 (-0.03%) -max-temps in affected programs: 52309 -> 51604 (-1.35%) -helped: 1219 -HURT: 725 -Max-temps are helped. - -total sfu-stalls in shared programs: 15021 -> 15236 (1.43%) -sfu-stalls in affected programs: 6848 -> 7063 (3.14%) -helped: 1866 -HURT: 1704 -Inconclusive result - -total inst-and-stalls in shared programs: 11372823 -> 11354119 (-0.16%) -inst-and-stalls in affected programs: 7149177 -> 7130473 (-0.26%) -helped: 24315 -HURT: 17561 -Inst-and-stalls are helped. - -total nops in shared programs: 273624 -> 273711 (0.03%) -nops in affected programs: 31562 -> 31649 (0.28%) -helped: 1619 -HURT: 1854 -Inconclusive result (value mean confidence interval includes 0). ---- - src/broadcom/compiler/vir_register_allocate.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c -index 0ab0474424f..8eac2b75bd7 100644 ---- a/src/broadcom/compiler/vir_register_allocate.c -+++ b/src/broadcom/compiler/vir_register_allocate.c -@@ -950,6 +950,11 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra, - - for (int i = 0; i < PHYS_COUNT; i++) { - int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT; -+ -+ /* Try to keep rf0 available for ldunif in 7.x (see above). */ -+ if (v3d_ra->devinfo->ver >= 71 && phys_off == 0) -+ continue; -+ - int phys = v3d_ra->phys_index + phys_off; - - if (BITSET_TEST(regs, phys)) { -@@ -959,6 +964,14 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra, - } - } - -+ /* If we couldn't allocate, do try to assign rf0 if it is available. */ -+ if (v3d_ra->devinfo->ver >= 71 && -+ BITSET_TEST(regs, v3d_ra->phys_index)) { -+ v3d_ra->next_phys = 1; -+ *out = v3d_ra->phys_index; -+ return true; -+ } -+ - return false; - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0072-v3dv-recover-non-conformant-warning-for-not-fully-su.patch b/projects/RPi/devices/RPi5/patches/mesa/0072-v3dv-recover-non-conformant-warning-for-not-fully-su.patch deleted file mode 100644 index 2fcd20415f..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0072-v3dv-recover-non-conformant-warning-for-not-fully-su.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 0d3fd30d67ffc0195b0783e30ab6afbbe403310a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 28 Apr 2021 14:31:38 +0200 -Subject: [PATCH 072/142] v3dv: recover non-conformant warning for not fully - supported hw - ---- - src/broadcom/vulkan/v3dv_device.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index d5de3517670..d29ffad3531 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -1212,6 +1212,12 @@ create_physical_device(struct v3dv_instance *instance, - - list_addtail(&device->vk.link, &instance->vk.physical_devices.list); - -+ if (device->devinfo.ver != 42) { -+ fprintf(stderr, "WARNING: v3dv support for hw version %i is neither " -+ "a complete nor a conformant Vulkan implementation. Testing " -+ "use only.\n", device->devinfo.ver); -+ } -+ - return VK_SUCCESS; - - fail: --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0073-v3dv-meson-add-v71-hw-generation.patch b/projects/RPi/devices/RPi5/patches/mesa/0073-v3dv-meson-add-v71-hw-generation.patch deleted file mode 100644 index 8023c45736..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0073-v3dv-meson-add-v71-hw-generation.patch +++ /dev/null @@ -1,504 +0,0 @@ -From 52b5ac62b367ae89574c8031fdcf7c1dae05c942 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 29 Jun 2021 11:59:53 +0200 -Subject: [PATCH 073/142] v3dv/meson: add v71 hw generation - -Starting point for v71 version inclusion. - -This just adds it as one of the versions to be compiled (on meson), -updates the v3dX/v3dv_X macros, and update the code enough to get it -compiling when building using the two versions. For any packet not -available on v71 we just provide a generic asserted placeholder of -generation not supported. - -Any real v71 support will be implemented on following commits. ---- - src/broadcom/vulkan/meson.build | 6 +- - src/broadcom/vulkan/v3dv_private.h | 7 +++ - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 75 +++++++++++++++++++++++-- - src/broadcom/vulkan/v3dvx_image.c | 16 +++++- - src/broadcom/vulkan/v3dvx_meta_common.c | 32 +++++++++++ - src/broadcom/vulkan/v3dvx_pipeline.c | 5 ++ - src/broadcom/vulkan/v3dvx_queue.c | 11 ++++ - 7 files changed, 142 insertions(+), 10 deletions(-) - -diff --git a/src/broadcom/vulkan/meson.build b/src/broadcom/vulkan/meson.build -index ad032d832ad..3da7364686f 100644 ---- a/src/broadcom/vulkan/meson.build -+++ b/src/broadcom/vulkan/meson.build -@@ -27,6 +27,7 @@ v3dv_entrypoints = custom_target( - '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'v3dv', - '--beta', with_vulkan_beta.to_string(), - '--device-prefix', 'ver42', -+ '--device-prefix', 'ver71', - ], - depend_files : vk_entrypoints_gen_depend_files, - ) -@@ -67,10 +68,7 @@ files_per_version = files( - 'v3dvx_queue.c', - ) - --# The vulkan driver only supports version >= 42, which is the version present in --# Rpi4. We need to explicitly set it as we are reusing pieces from the GL v3d --# driver. --v3d_versions = ['42'] -+v3d_versions = ['42', '71'] - - v3dv_flags = [] - -diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h -index c6707211529..6bdf338c67b 100644 ---- a/src/broadcom/vulkan/v3dv_private.h -+++ b/src/broadcom/vulkan/v3dv_private.h -@@ -2608,6 +2608,9 @@ u64_compare(const void *key1, const void *key2) - case 42: \ - v3d_X_thing = &v3d42_##thing; \ - break; \ -+ case 71: \ -+ v3d_X_thing = &v3d71_##thing; \ -+ break; \ - default: \ - unreachable("Unsupported hardware generation"); \ - } \ -@@ -2626,6 +2629,10 @@ u64_compare(const void *key1, const void *key2) - # define v3dX(x) v3d42_##x - # include "v3dvx_private.h" - # undef v3dX -+ -+# define v3dX(x) v3d71_##x -+# include "v3dvx_private.h" -+# undef v3dX - #endif - - #ifdef ANDROID -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index f182b790d36..b958e634c82 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -56,10 +56,15 @@ v3dX(job_emit_enable_double_buffer)(struct v3dv_job *job) - }; - config.width_in_pixels = tiling->width; - config.height_in_pixels = tiling->height; -+#if V3D_VERSION == 42 - config.number_of_render_targets = MAX2(tiling->render_target_count, 1); - config.multisample_mode_4x = tiling->msaa; - config.double_buffer_in_non_ms_mode = tiling->double_buffer; - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - - uint8_t *rewrite_addr = (uint8_t *)job->bcl_tile_binning_mode_ptr; - cl_packet_pack(TILE_BINNING_MODE_CFG)(NULL, rewrite_addr, &config); -@@ -82,10 +87,15 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job, - cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { - config.width_in_pixels = tiling->width; - config.height_in_pixels = tiling->height; -+#if V3D_VERSION == 42 - config.number_of_render_targets = MAX2(tiling->render_target_count, 1); - config.multisample_mode_4x = tiling->msaa; - config.double_buffer_in_non_ms_mode = tiling->double_buffer; - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - } - - /* There's definitely nothing in the VCD cache we want. */ -@@ -649,10 +659,15 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer, - * bit and instead we have to emit a single clear of all tile buffers. - */ - if (use_global_zs_clear || use_global_rt_clear) { -+#if V3D_VERSION == 42 - cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = use_global_zs_clear; - clear.clear_all_render_targets = use_global_rt_clear; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif - } - } - -@@ -824,7 +839,12 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - config.number_of_render_targets = MAX2(subpass->color_count, 1); - config.multisample_mode_4x = tiling->msaa; - config.double_buffer_in_non_ms_mode = tiling->double_buffer; -+#if V3D_VERSION == 42 - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - - if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { - const struct v3dv_image_view *iview = -@@ -920,7 +940,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - const struct v3d_resource_slice *slice = - &image->planes[plane].slices[iview->vk.base_mip_level]; - -- const uint32_t *clear_color = -+ UNUSED const uint32_t *clear_color = - &state->attachments[attachment_idx].clear_value.color[0]; - - uint32_t clear_pad = 0; -@@ -937,13 +957,19 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - } - } - -+#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { - clear.clear_color_low_32_bits = clear_color[0]; - clear.clear_color_next_24_bits = clear_color[1] & 0xffffff; - clear.render_target_number = i; - }; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - - if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) { -+#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { - clear.clear_color_mid_low_32_bits = - ((clear_color[1] >> 24) | (clear_color[2] << 8)); -@@ -951,17 +977,28 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8)); - clear.render_target_number = i; - }; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif -+ - } - - if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { -+#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { - clear.uif_padded_height_in_uif_blocks = clear_pad; - clear.clear_color_high_16_bits = clear_color[3] >> 16; - clear.render_target_number = i; - }; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - } - } - -+#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - v3dX(cmd_buffer_render_pass_setup_render_target) - (cmd_buffer, 0, &rt.render_target_0_internal_bpp, -@@ -976,6 +1013,10 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - (cmd_buffer, 3, &rt.render_target_3_internal_bpp, - &rt.render_target_3_internal_type, &rt.render_target_3_clamp); - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif - - /* Ends rendering mode config. */ - if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { -@@ -1036,10 +1077,15 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - } - if (cmd_buffer->state.tile_aligned_render_area && - (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) { -+#if V3D_VERSION == 42 - cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = !job->early_zs_clear; - clear.clear_all_render_targets = true; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - } - cl_emit(rcl, END_OF_TILE_MARKER, end); - } -@@ -1065,7 +1111,9 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer) - * now, would need to change if we allow multiple viewports - */ - float *vptranslate = dynamic->viewport.translate[0]; -+#if V3D_VERSION == 42 - float *vpscale = dynamic->viewport.scale[0]; -+#endif - - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); -@@ -1078,10 +1126,15 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer) - v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size); - v3dv_return_if_oom(cmd_buffer, NULL); - -+#if V3D_VERSION == 42 - cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { - clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f; - clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - - float translate_z, scale_z; - v3dv_cmd_buffer_state_get_viewport_z_xform(&cmd_buffer->state, 0, -@@ -1591,16 +1644,20 @@ v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer) - struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - assert(pipeline); - -- bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer); -- - v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS)); - v3dv_return_if_oom(cmd_buffer, NULL); - -+#if V3D_VERSION == 42 -+ bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer); - cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) { - config.early_z_enable = enable_ez; - config.early_z_updates_enable = config.early_z_enable && - pipeline->z_updates_enable; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - } - - void -@@ -2031,10 +2088,12 @@ v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer) - pipeline->vpm_cfg.Gv); - } - -+#if V3D_VERSION == 42 - struct v3dv_bo *default_attribute_values = - pipeline->default_attribute_values != NULL ? - pipeline->default_attribute_values : - pipeline->device->default_attribute_float; -+#endif - - cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD, - pipeline->shader_state_record, shader) { -@@ -2060,8 +2119,10 @@ v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer) - shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs; - shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs; - -+#if V3D_VERSION == 42 - shader.address_of_default_attribute_values = - v3dv_cl_address(default_attribute_values, 0); -+#endif - - shader.any_shader_reads_hardware_written_primitive_id = - (pipeline->has_gs && prog_data_gs->uses_pid) || prog_data_fs->uses_pid; -@@ -2399,11 +2460,17 @@ v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buf - - assert(iview->plane_count == 1); - *rt_bpp = iview->planes[0].internal_bpp; -- *rt_type = iview->planes[0].internal_type; - if (vk_format_is_int(iview->vk.view_format)) -+#if V3D_VERSION == 42 -+ *rt_type = iview->planes[0].internal_type; -+ if (vk_format_is_int(iview->vk.format)) - *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT; - else if (vk_format_is_srgb(iview->vk.view_format)) - *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM; - else - *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - } -diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c -index 80a3e5bfde8..dac6ff2741f 100644 ---- a/src/broadcom/vulkan/v3dvx_image.c -+++ b/src/broadcom/vulkan/v3dvx_image.c -@@ -76,8 +76,6 @@ pack_texture_shader_state_helper(struct v3dv_device *device, - tex.swizzle_b = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[2]); - tex.swizzle_a = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[3]); - -- tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse; -- - tex.texture_type = image_view->format->planes[plane].tex_type; - - if (image->vk.image_type == VK_IMAGE_TYPE_3D) { -@@ -110,7 +108,16 @@ pack_texture_shader_state_helper(struct v3dv_device *device, - - tex.array_stride_64_byte_aligned = image->planes[iplane].cube_map_stride / 64; - -+#if V3D_VERSION == 42 -+ tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse; -+#endif -+ -+#if V3D_VERSION == 42 - tex.srgb = vk_format_is_srgb(image_view->vk.view_format); -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif - - /* At this point we don't have the job. That's the reason the first - * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to -@@ -166,7 +173,12 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device, - - assert(buffer_view->format->plane_count == 1); - tex.texture_type = buffer_view->format->planes[0].tex_type; -+#if V3D_VERSION == 42 - tex.srgb = vk_format_is_srgb(buffer_view->vk_format); -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif - - /* At this point we don't have the job. That's the reason the first - * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to -diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c -index 04147b82cbd..2db07ea7427 100644 ---- a/src/broadcom/vulkan/v3dvx_meta_common.c -+++ b/src/broadcom/vulkan/v3dvx_meta_common.c -@@ -58,7 +58,12 @@ emit_rcl_prologue(struct v3dv_job *job, - config.number_of_render_targets = 1; - config.multisample_mode_4x = tiling->msaa; - config.double_buffer_in_non_ms_mode = tiling->double_buffer; -+#if V3D_VERSION == 42 - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif - config.internal_depth_type = fb->internal_depth_type; - } - -@@ -88,14 +93,20 @@ emit_rcl_prologue(struct v3dv_job *job, - } - } - -+#if V3D_VERSION == 42 - const uint32_t *color = &clear_info->clear_value->color[0]; - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { - clear.clear_color_low_32_bits = color[0]; - clear.clear_color_next_24_bits = color[1] & 0x00ffffff; - clear.render_target_number = 0; - }; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif - - if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) { -+#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { - clear.clear_color_mid_low_32_bits = - ((color[1] >> 24) | (color[2] << 8)); -@@ -103,22 +114,37 @@ emit_rcl_prologue(struct v3dv_job *job, - ((color[2] >> 24) | ((color[3] & 0xffff) << 8)); - clear.render_target_number = 0; - }; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif -+ - } - - if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { -+#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { - clear.uif_padded_height_in_uif_blocks = clear_pad; - clear.clear_color_high_16_bits = color[3] >> 16; - clear.render_target_number = 0; - }; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif - } - } - -+#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - rt.render_target_0_internal_bpp = tiling->internal_bpp; - rt.render_target_0_internal_type = fb->internal_type; - rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { - clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f; -@@ -179,10 +205,16 @@ emit_frame_setup(struct v3dv_job *job, - */ - if (clear_value && - (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) { -+#if V3D_VERSION == 42 - cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = true; - clear.clear_all_render_targets = true; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif -+ - } - cl_emit(rcl, END_OF_TILE_MARKER, end); - } -diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c -index 5d32d414ed8..922698b08a2 100644 ---- a/src/broadcom/vulkan/v3dvx_pipeline.c -+++ b/src/broadcom/vulkan/v3dvx_pipeline.c -@@ -447,10 +447,15 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline) - /* FIXME: Use combined input/output size flag in the common case (also - * on v3d, see v3dx_draw). - */ -+#if V3D_VERSION == 42 - shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = - prog_data_vs_bin->separate_segments; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = - prog_data_vs->separate_segments; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - - shader.coordinate_shader_input_vpm_segment_size = - prog_data_vs_bin->separate_segments ? -diff --git a/src/broadcom/vulkan/v3dvx_queue.c b/src/broadcom/vulkan/v3dvx_queue.c -index efe63de425c..1a26d04aef7 100644 ---- a/src/broadcom/vulkan/v3dvx_queue.c -+++ b/src/broadcom/vulkan/v3dvx_queue.c -@@ -42,14 +42,25 @@ v3dX(job_emit_noop)(struct v3dv_job *job) - config.image_height_pixels = 1; - config.number_of_render_targets = 1; - config.multisample_mode_4x = false; -+#if V3D_VERSION == 42 - config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - } - -+#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32; - rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8; - rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("Hardware generation 71 not supported yet."); -+#endif -+ - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { - clear.z_clear_value = 1.0f; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0074-v3dv-expose-V3D-revision-number-in-device-name.patch b/projects/RPi/devices/RPi5/patches/mesa/0074-v3dv-expose-V3D-revision-number-in-device-name.patch deleted file mode 100644 index 3b3626dda1..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0074-v3dv-expose-V3D-revision-number-in-device-name.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 7aa016bca8bb1bf449ea79505692353c0bd174b8 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 10 Nov 2021 10:06:50 +0100 -Subject: [PATCH 074/142] v3dv: expose V3D revision number in device name - ---- - src/broadcom/vulkan/v3dv_device.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index d29ffad3531..3034b561480 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -1123,8 +1123,10 @@ create_physical_device(struct v3dv_instance *instance, - device->next_program_id = 0; - - ASSERTED int len = -- asprintf(&device->name, "V3D %d.%d", -- device->devinfo.ver / 10, device->devinfo.ver % 10); -+ asprintf(&device->name, "V3D %d.%d.%d", -+ device->devinfo.ver / 10, -+ device->devinfo.ver % 10, -+ device->devinfo.rev); - assert(len != -1); - - v3dv_physical_device_init_disk_cache(device); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0075-v3dv-device-handle-new-rpi5-device-bcm2712.patch b/projects/RPi/devices/RPi5/patches/mesa/0075-v3dv-device-handle-new-rpi5-device-bcm2712.patch deleted file mode 100644 index 249a11c141..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0075-v3dv-device-handle-new-rpi5-device-bcm2712.patch +++ /dev/null @@ -1,54 +0,0 @@ -From fb9e95b7e1d5987fd25e914635c4e09d81ea9561 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 10 Nov 2021 07:54:35 +0100 -Subject: [PATCH 075/142] v3dv/device: handle new rpi5 device (bcm2712) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This includes both master and primary devices. - -Signed-off-by: Iago Toral Quiroga -Signed-off-by: Alejandro Piñeiro ---- - src/broadcom/vulkan/v3dv_device.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index 3034b561480..c8719d33f15 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -1287,7 +1287,8 @@ enumerate_devices(struct vk_instance *vk_instance) - if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER) { - char **compat = devices[i]->deviceinfo.platform->compatible; - while (*compat) { -- if (strncmp(*compat, "brcm,2711-v3d", 13) == 0) { -+ if (strncmp(*compat, "brcm,2711-v3d", 13) == 0 || -+ strncmp(*compat, "brcm,2712-v3d", 13) == 0) { - v3d_idx = i; - break; - } -@@ -1296,8 +1297,9 @@ enumerate_devices(struct vk_instance *vk_instance) - } else if (devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) { - char **compat = devices[i]->deviceinfo.platform->compatible; - while (*compat) { -- if (strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 || -- strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0 ) { -+ if (strncmp(*compat, "brcm,bcm2712-vc6", 16) == 0 || -+ strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 || -+ strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0) { - vc4_idx = i; - break; - } -@@ -1334,6 +1336,8 @@ v3dv_physical_device_device_id(struct v3dv_physical_device *dev) - switch (dev->devinfo.ver) { - case 42: - return 0xBE485FD3; /* Broadcom deviceID for 2711 */ -+ case 71: -+ return 0x55701C33; /* Broadcom deviceID for 2712 */ - default: - unreachable("Unsupported V3D version"); - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0076-v3dv-cmd_buffer-emit-TILE_BINNING_MODE_CFG-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0076-v3dv-cmd_buffer-emit-TILE_BINNING_MODE_CFG-for-v71.patch deleted file mode 100644 index 70419bad10..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0076-v3dv-cmd_buffer-emit-TILE_BINNING_MODE_CFG-for-v71.patch +++ /dev/null @@ -1,32 +0,0 @@ -From c4f957af4fb0e10abf0a7ffad4f7a468633b7d99 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 20 Jul 2021 14:00:44 +0200 -Subject: [PATCH 076/142] v3dv/cmd_buffer: emit TILE_BINNING_MODE_CFG for v71 - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index b958e634c82..17b2f46850d 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -94,7 +94,14 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job, - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; - #endif - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ config.log2_tile_width = log2_tile_size(tiling->tile_width); -+ config.log2_tile_height = log2_tile_size(tiling->tile_height); -+ /* FIXME: ideally we would like next assert on the packet header (as is -+ * general, so also applies to GL). We would need to expand -+ * gen_pack_header for that. -+ */ -+ assert(config.log2_tile_width == config.log2_tile_height || -+ config.log2_tile_width == config.log2_tile_height + 1); - #endif - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0077-v3dv-emit-TILE_RENDERING_MODE_CFG_COMMON-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0077-v3dv-emit-TILE_RENDERING_MODE_CFG_COMMON-for-v71.patch deleted file mode 100644 index 7a6e9ec2a1..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0077-v3dv-emit-TILE_RENDERING_MODE_CFG_COMMON-for-v71.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 1934ac07df73cb685f6550b8b0f5b4f2ead11396 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 20 Jul 2021 14:33:00 +0200 -Subject: [PATCH 077/142] v3dv: emit TILE_RENDERING_MODE_CFG_COMMON for v71 - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 9 ++++++++- - src/broadcom/vulkan/v3dvx_meta_common.c | 9 ++++++++- - 2 files changed, 16 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 17b2f46850d..7837b460051 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -850,7 +850,14 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; - #endif - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ config.log2_tile_width = log2_tile_size(tiling->tile_width); -+ config.log2_tile_height = log2_tile_size(tiling->tile_height); -+ /* FIXME: ideallly we would like next assert on the packet header (as is -+ * general, so also applies to GL). We would need to expand -+ * gen_pack_header for that. -+ */ -+ assert(config.log2_tile_width == config.log2_tile_height || -+ config.log2_tile_width == config.log2_tile_height + 1); - #endif - - if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { -diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c -index 2db07ea7427..e4084d851fc 100644 ---- a/src/broadcom/vulkan/v3dvx_meta_common.c -+++ b/src/broadcom/vulkan/v3dvx_meta_common.c -@@ -62,7 +62,14 @@ emit_rcl_prologue(struct v3dv_job *job, - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; - #endif - #if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); -+ config.log2_tile_width = log2_tile_size(tiling->tile_width); -+ config.log2_tile_height = log2_tile_size(tiling->tile_height); -+ /* FIXME: ideallly we would like next assert on the packet header (as is -+ * general, so also applies to GL). We would need to expand -+ * gen_pack_header for that. -+ */ -+ assert(config.log2_tile_width == config.log2_tile_height || -+ config.log2_tile_width == config.log2_tile_height + 1); - #endif - config.internal_depth_type = fb->internal_depth_type; - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0078-v3dv-cmd_buffer-emit-TILE_RENDERING_MODE_CFG_RENDER_.patch b/projects/RPi/devices/RPi5/patches/mesa/0078-v3dv-cmd_buffer-emit-TILE_RENDERING_MODE_CFG_RENDER_.patch deleted file mode 100644 index 9c0a0a5ced..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0078-v3dv-cmd_buffer-emit-TILE_RENDERING_MODE_CFG_RENDER_.patch +++ /dev/null @@ -1,315 +0,0 @@ -From f0f9eea3cad83ed8824c6a7686150327407a5286 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 22 Jul 2021 14:26:13 +0200 -Subject: [PATCH 078/142] v3dv/cmd_buffer: emit - TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1 for v71 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Alejandro Piñeiro -Signed-off-by: Iago Toral Quiroga ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 186 +++++++++++++++++------- - src/broadcom/vulkan/v3dvx_meta_common.c | 12 +- - src/broadcom/vulkan/v3dvx_private.h | 11 +- - 3 files changed, 147 insertions(+), 62 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 7837b460051..c6307890da5 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -800,6 +800,103 @@ set_rcl_early_z_config(struct v3dv_job *job, - } - } - -+/* Note that for v71, render target cfg packets has just one field that -+ * combined the internal type and clamp mode. For simplicity we keep just one -+ * helper. -+ * -+ * Note: rt_type is in fact a "enum V3DX(Internal_Type)". -+ * -+ * FIXME: for v71 we are not returning all the possible combinations for -+ * render target internal type and clamp. For example for int types we are -+ * always using clamp int, and for 16f we are using clamp none or pos (that -+ * seem the equivalent for no-clamp on 4.2), but not pq or hlg. In summary -+ * right now we are just porting what we were doing on 4.2 -+ */ -+uint32_t -+v3dX(clamp_for_format_and_type)(uint32_t rt_type, -+ VkFormat vk_format) -+{ -+#if V3D_VERSION == 42 -+ if (vk_format_is_int(vk_format)) -+ return V3D_RENDER_TARGET_CLAMP_INT; -+ else if (vk_format_is_srgb(vk_format)) -+ return V3D_RENDER_TARGET_CLAMP_NORM; -+ else -+ return V3D_RENDER_TARGET_CLAMP_NONE; -+#endif -+#if V3D_VERSION >= 71 -+ switch (rt_type) { -+ case V3D_INTERNAL_TYPE_8I: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_8I_CLAMPED; -+ case V3D_INTERNAL_TYPE_8UI: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_8UI_CLAMPED; -+ case V3D_INTERNAL_TYPE_8: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_8; -+ case V3D_INTERNAL_TYPE_16I: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_16I_CLAMPED; -+ case V3D_INTERNAL_TYPE_16UI: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_16UI_CLAMPED; -+ case V3D_INTERNAL_TYPE_16F: -+ return vk_format_is_srgb(vk_format) ? -+ V3D_RENDER_TARGET_TYPE_CLAMP_16F_CLAMP_NORM : -+ V3D_RENDER_TARGET_TYPE_CLAMP_16F; -+ case V3D_INTERNAL_TYPE_32I: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_32I_CLAMPED; -+ case V3D_INTERNAL_TYPE_32UI: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_32UI_CLAMPED; -+ case V3D_INTERNAL_TYPE_32F: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_32F; -+ default: -+ unreachable("Unknown internal render target type"); -+ } -+ -+ return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID; -+#endif -+} -+ -+static void -+cmd_buffer_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer, -+ int rt, -+ uint32_t *rt_bpp, -+#if V3D_VERSION == 42 -+ uint32_t *rt_type, -+ uint32_t *rt_clamp) -+#else -+ uint32_t *rt_type_clamp) -+#endif -+{ -+ const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; -+ -+ assert(state->subpass_idx < state->pass->subpass_count); -+ const struct v3dv_subpass *subpass = -+ &state->pass->subpasses[state->subpass_idx]; -+ -+ if (rt >= subpass->color_count) -+ return; -+ -+ struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt]; -+ const uint32_t attachment_idx = attachment->attachment; -+ if (attachment_idx == VK_ATTACHMENT_UNUSED) -+ return; -+ -+ assert(attachment_idx < state->framebuffer->attachment_count && -+ attachment_idx < state->attachment_alloc_count); -+ struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view; -+ assert(vk_format_is_color(iview->vk.format)); -+ -+ assert(iview->plane_count == 1); -+ *rt_bpp = iview->planes[0].internal_bpp; -+#if V3D_VERSION == 42 -+ *rt_type = iview->planes[0].internal_type; -+ *rt_clamp = v3dX(clamp_for_format_and_type)(iview->planes[0].internal_type, -+ iview->vk.format); -+#endif -+#if V3D_VERSION >= 71 -+ *rt_type_clamp = v3dX(clamp_for_format_and_type)(iview->planes[0].internal_type, -+ iview->vk.format); -+#endif -+} -+ - void - v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - { -@@ -939,10 +1036,20 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - */ - job->early_zs_clear = do_early_zs_clear; - -+#if V3D_VERSION >= 71 -+ uint32_t base_addr = 0; -+#endif - for (uint32_t i = 0; i < subpass->color_count; i++) { - uint32_t attachment_idx = subpass->color_attachments[i].attachment; -- if (attachment_idx == VK_ATTACHMENT_UNUSED) -+ if (attachment_idx == VK_ATTACHMENT_UNUSED) { -+#if V3D_VERSION >= 71 -+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -+ rt.render_target_number = i; -+ rt.stride = 1; /* Unused */ -+ } -+#endif - continue; -+ } - - struct v3dv_image_view *iview = - state->attachments[attachment_idx].image_view; -@@ -978,9 +1085,6 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - clear.render_target_number = i; - }; - #endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif - - if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) { - #if V3D_VERSION == 42 -@@ -1010,27 +1114,44 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - unreachable("HW generation 71 not supported yet."); - #endif - } -+ -+#if V3D_VERSION >= 71 -+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -+ rt.clear_color_low_bits = clear_color[0]; -+ cmd_buffer_render_pass_setup_render_target(cmd_buffer, i, &rt.internal_bpp, -+ &rt.internal_type_and_clamping); -+ rt.stride = -+ v3d_compute_rt_row_row_stride_128_bits(tiling->tile_width, -+ v3d_internal_bpp_words(rt.internal_bpp)); -+ rt.base_address = base_addr; -+ rt.render_target_number = i; -+ -+ /* base_addr in multiples of 512 bits. We divide by 8 because stride -+ * is in 128-bit units, but it is packing 2 rows worth of data, so we -+ * need to divide it by 2 so it is only 1 row, and then again by 4 so -+ * it is in 512-bit units. -+ */ -+ base_addr += (tiling->tile_height * rt.stride) / 8; -+ } -+#endif - } - - #if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { -- v3dX(cmd_buffer_render_pass_setup_render_target) -+ cmd_buffer_render_pass_setup_render_target - (cmd_buffer, 0, &rt.render_target_0_internal_bpp, - &rt.render_target_0_internal_type, &rt.render_target_0_clamp); -- v3dX(cmd_buffer_render_pass_setup_render_target) -+ cmd_buffer_render_pass_setup_render_target - (cmd_buffer, 1, &rt.render_target_1_internal_bpp, - &rt.render_target_1_internal_type, &rt.render_target_1_clamp); -- v3dX(cmd_buffer_render_pass_setup_render_target) -+ cmd_buffer_render_pass_setup_render_target - (cmd_buffer, 2, &rt.render_target_2_internal_bpp, - &rt.render_target_2_internal_type, &rt.render_target_2_clamp); -- v3dX(cmd_buffer_render_pass_setup_render_target) -+ cmd_buffer_render_pass_setup_render_target - (cmd_buffer, 3, &rt.render_target_3_internal_bpp, - &rt.render_target_3_internal_type, &rt.render_target_3_clamp); - } - #endif --#if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); --#endif - - /* Ends rendering mode config. */ - if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { -@@ -2445,46 +2566,3 @@ v3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer, - buffer->mem_offset + offset); - } - } -- --void --v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer, -- int rt, -- uint32_t *rt_bpp, -- uint32_t *rt_type, -- uint32_t *rt_clamp) --{ -- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; -- -- assert(state->subpass_idx < state->pass->subpass_count); -- const struct v3dv_subpass *subpass = -- &state->pass->subpasses[state->subpass_idx]; -- -- if (rt >= subpass->color_count) -- return; -- -- struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt]; -- const uint32_t attachment_idx = attachment->attachment; -- if (attachment_idx == VK_ATTACHMENT_UNUSED) -- return; -- -- assert(attachment_idx < state->framebuffer->attachment_count && -- attachment_idx < state->attachment_alloc_count); -- struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view; -- assert(vk_format_is_color(iview->vk.format)); -- -- assert(iview->plane_count == 1); -- *rt_bpp = iview->planes[0].internal_bpp; -- if (vk_format_is_int(iview->vk.view_format)) --#if V3D_VERSION == 42 -- *rt_type = iview->planes[0].internal_type; -- if (vk_format_is_int(iview->vk.format)) -- *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT; -- else if (vk_format_is_srgb(iview->vk.view_format)) -- *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM; -- else -- *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; --#endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif --} -diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c -index e4084d851fc..c6391bc6d83 100644 ---- a/src/broadcom/vulkan/v3dvx_meta_common.c -+++ b/src/broadcom/vulkan/v3dvx_meta_common.c -@@ -26,6 +26,7 @@ - - #include "broadcom/common/v3d_macros.h" - #include "broadcom/common/v3d_tfu.h" -+#include "broadcom/common/v3d_util.h" - #include "broadcom/cle/v3dx_pack.h" - #include "broadcom/compiler/v3d_compiler.h" - -@@ -150,7 +151,16 @@ emit_rcl_prologue(struct v3dv_job *job, - } - #endif - #if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); -+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -+ rt.internal_bpp = tiling->internal_bpp; -+ rt.internal_type_and_clamping = v3dX(clamp_for_format_and_type)(fb->internal_type, -+ fb->vk_format); -+ rt.stride = -+ v3d_compute_rt_row_row_stride_128_bits(tiling->tile_width, -+ v3d_internal_bpp_words(rt.internal_bpp)); -+ rt.base_address = 0; -+ rt.render_target_number = 0; -+ } - #endif - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { -diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h -index ad8ddfa5731..a4157d11c7c 100644 ---- a/src/broadcom/vulkan/v3dvx_private.h -+++ b/src/broadcom/vulkan/v3dvx_private.h -@@ -125,13 +125,6 @@ v3dX(get_hw_clear_color)(const VkClearColorValue *color, - uint32_t internal_size, - uint32_t *hw_color); - --void --v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer, -- int rt, -- uint32_t *rt_bpp, -- uint32_t *rt_type, -- uint32_t *rt_clamp); -- - /* Used at v3dv_device */ - - void -@@ -325,3 +318,7 @@ uint32_t v3dX(max_descriptor_bo_size)(void); - uint32_t v3dX(combined_image_sampler_texture_state_offset)(uint8_t plane); - - uint32_t v3dX(combined_image_sampler_sampler_state_offset)(uint8_t plane); -+ -+uint32_t -+v3dX(clamp_for_format_and_type)(uint32_t rt_type, -+ VkFormat vk_format); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0079-v3dvx-cmd_buffer-emit-CLEAR_RENDER_TARGETS-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0079-v3dvx-cmd_buffer-emit-CLEAR_RENDER_TARGETS-for-v71.patch deleted file mode 100644 index ee9e9d2074..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0079-v3dvx-cmd_buffer-emit-CLEAR_RENDER_TARGETS-for-v71.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 7c89d8026fd550282d54933f37ffc2773869326f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Mon, 26 Jul 2021 15:08:11 +0200 -Subject: [PATCH 079/142] v3dvx/cmd_buffer: emit CLEAR_RENDER_TARGETS for v71 - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index c6307890da5..ae1c21ae00b 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1219,7 +1219,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - } - #endif - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ cl_emit(rcl, CLEAR_RENDER_TARGETS, clear_rt); - #endif - } - cl_emit(rcl, END_OF_TILE_MARKER, end); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0080-v3dv-cmd_buffer-emit-CLIPPER_XY_SCALING-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0080-v3dv-cmd_buffer-emit-CLIPPER_XY_SCALING-for-v71.patch deleted file mode 100644 index a6507e3a17..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0080-v3dv-cmd_buffer-emit-CLIPPER_XY_SCALING-for-v71.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 2eb29b57fde2acda76e12953b3a1050f3056b39d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Sun, 19 Sep 2021 23:37:32 +0200 -Subject: [PATCH 080/142] v3dv/cmd_buffer: emit CLIPPER_XY_SCALING for v71 - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index ae1c21ae00b..2e525a11619 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1246,9 +1246,7 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer) - * now, would need to change if we allow multiple viewports - */ - float *vptranslate = dynamic->viewport.translate[0]; --#if V3D_VERSION == 42 - float *vpscale = dynamic->viewport.scale[0]; --#endif - - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); -@@ -1268,7 +1266,10 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer) - } - #endif - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { -+ clip.viewport_half_width_in_1_64th_of_pixel = vpscale[0] * 64.0f; -+ clip.viewport_half_height_in_1_64th_of_pixel = vpscale[1] * 64.0f; -+ } - #endif - - float translate_z, scale_z; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0081-v3dv-uniforms-update-VIEWPORT_X-Y_SCALE-uniforms-for.patch b/projects/RPi/devices/RPi5/patches/mesa/0081-v3dv-uniforms-update-VIEWPORT_X-Y_SCALE-uniforms-for.patch deleted file mode 100644 index cb0d7512d3..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0081-v3dv-uniforms-update-VIEWPORT_X-Y_SCALE-uniforms-for.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 611bf6a7445837c7e20416ff9f11a6dad9c543d7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 14 Sep 2021 10:08:19 +0200 -Subject: [PATCH 081/142] v3dv/uniforms: update VIEWPORT_X/Y_SCALE uniforms for - v71 - -As the packet CLIPPER_XY scaling, this needs to be computed on 1/64ths -of pixel, instead of 1/256ths of pixels. - -As this is the usual values that we get from macros, we add manually a -v42 and v71 macro, and define a new helper (V3DV_X) to get the value -for the current hw version. ---- - src/broadcom/vulkan/v3dv_private.h | 17 +++++++++++++++++ - src/broadcom/vulkan/v3dv_uniforms.c | 7 ++++--- - src/broadcom/vulkan/v3dvx_private.h | 9 +++++++++ - 3 files changed, 30 insertions(+), 3 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h -index 6bdf338c67b..cd6811b19c2 100644 ---- a/src/broadcom/vulkan/v3dv_private.h -+++ b/src/broadcom/vulkan/v3dv_private.h -@@ -2617,6 +2617,23 @@ u64_compare(const void *key1, const void *key2) - v3d_X_thing; \ - }) - -+/* Helper to get hw-specific macro values */ -+#define V3DV_X(device, thing) ({ \ -+ __typeof(V3D42_##thing) V3D_X_THING; \ -+ switch (device->devinfo.ver) { \ -+ case 42: \ -+ V3D_X_THING = V3D42_##thing; \ -+ break; \ -+ case 71: \ -+ V3D_X_THING = V3D71_##thing; \ -+ break; \ -+ default: \ -+ unreachable("Unsupported hardware generation"); \ -+ } \ -+ V3D_X_THING; \ -+}) -+ -+ - - /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to - * define v3dX for each version supported, because when we compile code that -diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c -index 72fa9a1b39c..0e681cc4ee2 100644 ---- a/src/broadcom/vulkan/v3dv_uniforms.c -+++ b/src/broadcom/vulkan/v3dv_uniforms.c -@@ -497,7 +497,8 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect); - - struct v3dv_cl_out *uniforms = cl_start(&job->indirect); -- -+ float clipper_xy_granularity = -+ V3DV_X(cmd_buffer->device, CLIPPER_XY_GRANULARITY); - for (int i = 0; i < uinfo->count; i++) { - uint32_t data = uinfo->data[i]; - -@@ -520,11 +521,11 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, - break; - - case QUNIFORM_VIEWPORT_X_SCALE: -- cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f); -+ cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * clipper_xy_granularity); - break; - - case QUNIFORM_VIEWPORT_Y_SCALE: -- cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f); -+ cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * clipper_xy_granularity); - break; - - case QUNIFORM_VIEWPORT_Z_OFFSET: { -diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h -index a4157d11c7c..ff9ba75cf93 100644 ---- a/src/broadcom/vulkan/v3dvx_private.h -+++ b/src/broadcom/vulkan/v3dvx_private.h -@@ -319,6 +319,15 @@ uint32_t v3dX(combined_image_sampler_texture_state_offset)(uint8_t plane); - - uint32_t v3dX(combined_image_sampler_sampler_state_offset)(uint8_t plane); - -+/* General utils */ -+ -+uint32_t -+v3dX(clamp_for_format_and_type)(uint32_t rt_type, -+ VkFormat vk_format); -+ -+#define V3D42_CLIPPER_XY_GRANULARITY 256.0f -+#define V3D71_CLIPPER_XY_GRANULARITY 64.0f -+ - uint32_t - v3dX(clamp_for_format_and_type)(uint32_t rt_type, - VkFormat vk_format); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0082-v3dv-cmd_buffer-just-don-t-fill-up-early-z-fields-fo.patch b/projects/RPi/devices/RPi5/patches/mesa/0082-v3dv-cmd_buffer-just-don-t-fill-up-early-z-fields-fo.patch deleted file mode 100644 index 8a77ae3708..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0082-v3dv-cmd_buffer-just-don-t-fill-up-early-z-fields-fo.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 3819efaf2bb6fd8bd9cd45d54fb7254377b2296a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 27 Jul 2021 14:02:30 +0200 -Subject: [PATCH 082/142] v3dv/cmd_buffer: just don't fill up early-z fields - for CFG_BITS for v71 - -For v71 early_z_enable/early_z_updates_enable is configured with -packet 121. ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 2e525a11619..fe9f7e43596 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1783,17 +1783,14 @@ v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer) - v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS)); - v3dv_return_if_oom(cmd_buffer, NULL); - --#if V3D_VERSION == 42 -- bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer); - cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) { -+#if V3D_VERSION == 42 -+ bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer); - config.early_z_enable = enable_ez; - config.early_z_updates_enable = config.early_z_enable && - pipeline->z_updates_enable; -- } --#endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); - #endif -+ } - } - - void --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0083-v3dv-default-vertex-attribute-values-are-gen-dependa.patch b/projects/RPi/devices/RPi5/patches/mesa/0083-v3dv-default-vertex-attribute-values-are-gen-dependa.patch deleted file mode 100644 index b37e2be950..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0083-v3dv-default-vertex-attribute-values-are-gen-dependa.patch +++ /dev/null @@ -1,219 +0,0 @@ -From e3b1a578f45ea830d790970115b6de978d56edb8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 28 Jul 2021 12:01:38 +0200 -Subject: [PATCH 083/142] v3dv: default vertex attribute values are gen - dependant - -Content, structure and size would depend on the generation. Even if it -is needed at all. - -So let's move it to the v3dvx files. ---- - src/broadcom/vulkan/v3dv_device.c | 2 +- - src/broadcom/vulkan/v3dv_pipeline.c | 61 ++------------------------- - src/broadcom/vulkan/v3dv_private.h | 4 -- - src/broadcom/vulkan/v3dvx_pipeline.c | 63 ++++++++++++++++++++++++++++ - src/broadcom/vulkan/v3dvx_private.h | 8 ++++ - 5 files changed, 75 insertions(+), 63 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index c8719d33f15..01e2dd7ac2d 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -2043,7 +2043,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, - v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0, - device->instance->default_pipeline_cache_enabled); - device->default_attribute_float = -- v3dv_pipeline_create_default_attribute_values(device, NULL); -+ v3dv_X(device, create_default_attribute_values)(device, NULL); - - device->device_address_mem_ctx = ralloc_context(NULL); - util_dynarray_init(&device->device_address_bo_list, -diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c -index 22f01bdf64b..d012ff8f948 100644 ---- a/src/broadcom/vulkan/v3dv_pipeline.c -+++ b/src/broadcom/vulkan/v3dv_pipeline.c -@@ -2802,62 +2802,6 @@ pipeline_set_ez_state(struct v3dv_pipeline *pipeline, - } - } - --static bool --pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline) --{ -- for (uint8_t i = 0; i < pipeline->va_count; i++) { -- if (vk_format_is_int(pipeline->va[i].vk_format)) -- return true; -- } -- return false; --} -- --/* @pipeline can be NULL. We assume in that case that all the attributes have -- * a float format (we only create an all-float BO once and we reuse it with -- * all float pipelines), otherwise we look at the actual type of each -- * attribute used with the specific pipeline passed in. -- */ --struct v3dv_bo * --v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device, -- struct v3dv_pipeline *pipeline) --{ -- uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4; -- struct v3dv_bo *bo; -- -- bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true); -- -- if (!bo) { -- fprintf(stderr, "failed to allocate memory for the default " -- "attribute values\n"); -- return NULL; -- } -- -- bool ok = v3dv_bo_map(device, bo, size); -- if (!ok) { -- fprintf(stderr, "failed to map default attribute values buffer\n"); -- return false; -- } -- -- uint32_t *attrs = bo->map; -- uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0; -- for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) { -- attrs[i * 4 + 0] = 0; -- attrs[i * 4 + 1] = 0; -- attrs[i * 4 + 2] = 0; -- VkFormat attr_format = -- pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED; -- if (i < va_count && vk_format_is_int(attr_format)) { -- attrs[i * 4 + 3] = 1; -- } else { -- attrs[i * 4 + 3] = fui(1.0); -- } -- } -- -- v3dv_bo_unmap(device, bo); -- -- return bo; --} -- - static void - pipeline_set_sample_mask(struct v3dv_pipeline *pipeline, - const VkPipelineMultisampleStateCreateInfo *ms_info) -@@ -2992,9 +2936,10 @@ pipeline_init(struct v3dv_pipeline *pipeline, - - v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info); - -- if (pipeline_has_integer_vertex_attrib(pipeline)) { -+ if (v3dv_X(device, pipeline_needs_default_attribute_values)(pipeline)) { - pipeline->default_attribute_values = -- v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline); -+ v3dv_X(pipeline->device, create_default_attribute_values)(pipeline->device, pipeline); -+ - if (!pipeline->default_attribute_values) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } else { -diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h -index cd6811b19c2..a9fab24d19e 100644 ---- a/src/broadcom/vulkan/v3dv_private.h -+++ b/src/broadcom/vulkan/v3dv_private.h -@@ -2500,10 +2500,6 @@ void - v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline, - struct v3dv_pipeline_cache *cache); - --struct v3dv_bo * --v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device, -- struct v3dv_pipeline *pipeline); -- - VkResult - v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device, - nir_shader *nir, -diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c -index 922698b08a2..e235220cb14 100644 ---- a/src/broadcom/vulkan/v3dvx_pipeline.c -+++ b/src/broadcom/vulkan/v3dvx_pipeline.c -@@ -664,3 +664,66 @@ v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline, - } - } - } -+ -+static bool -+pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline) -+{ -+ for (uint8_t i = 0; i < pipeline->va_count; i++) { -+ if (vk_format_is_int(pipeline->va[i].vk_format)) -+ return true; -+ } -+ return false; -+} -+ -+bool -+v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline) -+{ -+ return pipeline_has_integer_vertex_attrib(pipeline); -+} -+ -+/* @pipeline can be NULL. In that case we assume the most common case. For -+ * example, for v42 we assume in that case that all the attributes have a -+ * float format (we only create an all-float BO once and we reuse it with all -+ * float pipelines), otherwise we look at the actual type of each attribute -+ * used with the specific pipeline passed in. -+ */ -+struct v3dv_bo * -+v3dX(create_default_attribute_values)(struct v3dv_device *device, -+ struct v3dv_pipeline *pipeline) -+{ -+ uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4; -+ struct v3dv_bo *bo; -+ -+ bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true); -+ -+ if (!bo) { -+ fprintf(stderr, "failed to allocate memory for the default " -+ "attribute values\n"); -+ return NULL; -+ } -+ -+ bool ok = v3dv_bo_map(device, bo, size); -+ if (!ok) { -+ fprintf(stderr, "failed to map default attribute values buffer\n"); -+ return NULL; -+ } -+ -+ uint32_t *attrs = bo->map; -+ uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0; -+ for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) { -+ attrs[i * 4 + 0] = 0; -+ attrs[i * 4 + 1] = 0; -+ attrs[i * 4 + 2] = 0; -+ VkFormat attr_format = -+ pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED; -+ if (i < va_count && vk_format_is_int(attr_format)) { -+ attrs[i * 4 + 3] = 1; -+ } else { -+ attrs[i * 4 + 3] = fui(1.0); -+ } -+ } -+ -+ v3dv_bo_unmap(device, bo); -+ -+ return bo; -+} -diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h -index ff9ba75cf93..036ce11b455 100644 ---- a/src/broadcom/vulkan/v3dvx_private.h -+++ b/src/broadcom/vulkan/v3dvx_private.h -@@ -306,6 +306,14 @@ void - v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *vi_info, - const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info); -+ -+bool -+v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline); -+ -+struct v3dv_bo * -+v3dX(create_default_attribute_values)(struct v3dv_device *device, -+ struct v3dv_pipeline *pipeline); -+ - /* Used at v3dv_queue */ - void - v3dX(job_emit_noop)(struct v3dv_job *job); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0084-v3dv-pipeline-default-vertex-attributes-values-are-n.patch b/projects/RPi/devices/RPi5/patches/mesa/0084-v3dv-pipeline-default-vertex-attributes-values-are-n.patch deleted file mode 100644 index f33f20827d..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0084-v3dv-pipeline-default-vertex-attributes-values-are-n.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 8464dc8869f3d2eccfecac7b4358cc0ffe05f081 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 28 Jul 2021 12:05:26 +0200 -Subject: [PATCH 084/142] v3dv/pipeline: default vertex attributes values are - not needed for v71 - -There are not part of the shader state record. ---- - src/broadcom/vulkan/v3dv_private.h | 10 +++++++++- - src/broadcom/vulkan/v3dvx_pipeline.c | 10 ++++++++++ - 2 files changed, 19 insertions(+), 1 deletion(-) - -diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h -index a9fab24d19e..300a1ec8ae1 100644 ---- a/src/broadcom/vulkan/v3dv_private.h -+++ b/src/broadcom/vulkan/v3dv_private.h -@@ -581,6 +581,10 @@ struct v3dv_device { - * being float being float, allowing us to reuse the same BO for all - * pipelines matching this requirement. Pipelines that need integer - * attributes will create their own BO. -+ * -+ * Note that since v71 the default attribute values are not needed, so this -+ * can be NULL. -+ * - */ - struct v3dv_bo *default_attribute_float; - -@@ -2289,11 +2293,15 @@ struct v3dv_pipeline { - unsigned char sha1[20]; - - /* In general we can reuse v3dv_device->default_attribute_float, so note -- * that the following can be NULL. -+ * that the following can be NULL. In 7.x this is not used, so it will be -+ * NULL. - * - * FIXME: the content of this BO will be small, so it could be improved to - * be uploaded to a common BO. But as in most cases it will be NULL, it is - * not a priority. -+ * -+ * Note that since v71 the default attribute values are not needed, so this -+ * can be NULL. - */ - struct v3dv_bo *default_attribute_values; - -diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c -index e235220cb14..4dc6d70efe1 100644 ---- a/src/broadcom/vulkan/v3dvx_pipeline.c -+++ b/src/broadcom/vulkan/v3dvx_pipeline.c -@@ -665,6 +665,7 @@ v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline, - } - } - -+#if V3D_VERSION == 42 - static bool - pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline) - { -@@ -674,11 +675,16 @@ pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline) - } - return false; - } -+#endif - - bool - v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline) - { -+#if V3D_VERSION == 42 - return pipeline_has_integer_vertex_attrib(pipeline); -+#endif -+ -+ return false; - } - - /* @pipeline can be NULL. In that case we assume the most common case. For -@@ -691,6 +697,10 @@ struct v3dv_bo * - v3dX(create_default_attribute_values)(struct v3dv_device *device, - struct v3dv_pipeline *pipeline) - { -+#if V3D_VERSION >= 71 -+ return NULL; -+#endif -+ - uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4; - struct v3dv_bo *bo; - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0085-v3dv-pipeline-handle-GL_SHADER_STATE_RECORD-changed-.patch b/projects/RPi/devices/RPi5/patches/mesa/0085-v3dv-pipeline-handle-GL_SHADER_STATE_RECORD-changed-.patch deleted file mode 100644 index 0d8acd9826..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0085-v3dv-pipeline-handle-GL_SHADER_STATE_RECORD-changed-.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 339096598660ec34be8087007dd4d66581de1c4e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 28 Jul 2021 13:45:52 +0200 -Subject: [PATCH 085/142] v3dv/pipeline: handle GL_SHADER_STATE_RECORD changed - size on v71 - -It is likely that we would need more changes, as this packet changed, -but this is enough to get basic tests running. Any additional support -will be handled with new commits. ---- - src/broadcom/vulkan/v3dvx_pipeline.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c -index 4dc6d70efe1..a640c1d084a 100644 ---- a/src/broadcom/vulkan/v3dvx_pipeline.c -+++ b/src/broadcom/vulkan/v3dvx_pipeline.c -@@ -360,7 +360,7 @@ v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline, - static void - pack_shader_state_record(struct v3dv_pipeline *pipeline) - { -- assert(sizeof(pipeline->shader_state_record) == -+ assert(sizeof(pipeline->shader_state_record) >= - cl_packet_length(GL_SHADER_STATE_RECORD)); - - struct v3d_fs_prog_data *prog_data_fs = -@@ -453,9 +453,6 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline) - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = - prog_data_vs->separate_segments; - #endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif - - shader.coordinate_shader_input_vpm_segment_size = - prog_data_vs_bin->separate_segments ? --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0086-v3dv-setup-render-pass-color-clears-for-any-format-b.patch b/projects/RPi/devices/RPi5/patches/mesa/0086-v3dv-setup-render-pass-color-clears-for-any-format-b.patch deleted file mode 100644 index b1d310f166..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0086-v3dv-setup-render-pass-color-clears-for-any-format-b.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 5b1342eb1e255d17619b1a7b33eaf7b31f5e50a5 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 22 Sep 2021 12:03:58 +0200 -Subject: [PATCH 086/142] v3dv: setup render pass color clears for any format - bpp in v71 - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 33 ++++++++++++++++---------- - 1 file changed, 20 insertions(+), 13 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index fe9f7e43596..1b39e230580 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1064,7 +1064,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - UNUSED const uint32_t *clear_color = - &state->attachments[attachment_idx].clear_value.color[0]; - -- uint32_t clear_pad = 0; -+ UNUSED uint32_t clear_pad = 0; - if (slice->tiling == V3D_TILING_UIF_NO_XOR || - slice->tiling == V3D_TILING_UIF_XOR) { - int uif_block_height = v3d_utile_height(image->planes[plane].cpp) * 2; -@@ -1084,10 +1084,8 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - clear.clear_color_next_24_bits = clear_color[1] & 0xffffff; - clear.render_target_number = i; - }; --#endif - - if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) { --#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { - clear.clear_color_mid_low_32_bits = - ((clear_color[1] >> 24) | (clear_color[2] << 8)); -@@ -1095,25 +1093,16 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8)); - clear.render_target_number = i; - }; --#endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif -- - } - - if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { --#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { - clear.uif_padded_height_in_uif_blocks = clear_pad; - clear.clear_color_high_16_bits = clear_color[3] >> 16; - clear.render_target_number = i; - }; --#endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif - } -+#endif - - #if V3D_VERSION >= 71 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -@@ -1133,6 +1122,24 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - */ - base_addr += (tiling->tile_height * rt.stride) / 8; - } -+ -+ if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) { -+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) { -+ rt.clear_color_mid_bits = /* 40 bits (32 + 8) */ -+ ((uint64_t) clear_color[1]) | -+ (((uint64_t) (clear_color[2] & 0xff)) << 32); -+ rt.render_target_number = i; -+ } -+ } -+ -+ if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128) { -+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) { -+ rt.clear_color_top_bits = /* 56 bits (24 + 32) */ -+ (((uint64_t) (clear_color[2] & 0xffffff00)) >> 8) | -+ (((uint64_t) (clear_color[3])) << 24); -+ rt.render_target_number = i; -+ } -+ } - #endif - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0087-v3dv-setup-TLB-clear-color-for-meta-operations-in-v7.patch b/projects/RPi/devices/RPi5/patches/mesa/0087-v3dv-setup-TLB-clear-color-for-meta-operations-in-v7.patch deleted file mode 100644 index 26e8475540..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0087-v3dv-setup-TLB-clear-color-for-meta-operations-in-v7.patch +++ /dev/null @@ -1,126 +0,0 @@ -From ff5b5d4405b1d5600d7f1c4355202fd303f56700 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 22 Sep 2021 12:04:21 +0200 -Subject: [PATCH 087/142] v3dv: setup TLB clear color for meta operations in - v71 - ---- - src/broadcom/vulkan/v3dvx_meta_common.c | 46 +++++++++++++++---------- - 1 file changed, 27 insertions(+), 19 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c -index c6391bc6d83..09ebcfa97c1 100644 ---- a/src/broadcom/vulkan/v3dvx_meta_common.c -+++ b/src/broadcom/vulkan/v3dvx_meta_common.c -@@ -75,8 +75,9 @@ emit_rcl_prologue(struct v3dv_job *job, - config.internal_depth_type = fb->internal_depth_type; - } - -+ const uint32_t *color = NULL; - if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) { -- uint32_t clear_pad = 0; -+ UNUSED uint32_t clear_pad = 0; - if (clear_info->image) { - const struct v3dv_image *image = clear_info->image; - -@@ -101,20 +102,16 @@ emit_rcl_prologue(struct v3dv_job *job, - } - } - -+ color = &clear_info->clear_value->color[0]; -+ - #if V3D_VERSION == 42 -- const uint32_t *color = &clear_info->clear_value->color[0]; - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { - clear.clear_color_low_32_bits = color[0]; - clear.clear_color_next_24_bits = color[1] & 0x00ffffff; - clear.render_target_number = 0; - }; --#endif --#if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); --#endif - - if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) { --#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { - clear.clear_color_mid_low_32_bits = - ((color[1] >> 24) | (color[2] << 8)); -@@ -122,25 +119,16 @@ emit_rcl_prologue(struct v3dv_job *job, - ((color[2] >> 24) | ((color[3] & 0xffff) << 8)); - clear.render_target_number = 0; - }; --#endif --#if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); --#endif -- - } - - if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { --#if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { - clear.uif_padded_height_in_uif_blocks = clear_pad; - clear.clear_color_high_16_bits = color[3] >> 16; - clear.render_target_number = 0; - }; --#endif --#if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); --#endif - } -+#endif - } - - #if V3D_VERSION == 42 -@@ -150,8 +138,11 @@ emit_rcl_prologue(struct v3dv_job *job, - rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; - } - #endif -+ - #if V3D_VERSION >= 71 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -+ if (color) -+ rt.clear_color_low_bits = color[0]; - rt.internal_bpp = tiling->internal_bpp; - rt.internal_type_and_clamping = v3dX(clamp_for_format_and_type)(fb->internal_type, - fb->vk_format); -@@ -161,6 +152,24 @@ emit_rcl_prologue(struct v3dv_job *job, - rt.base_address = 0; - rt.render_target_number = 0; - } -+ -+ if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_64) { -+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) { -+ rt.clear_color_mid_bits = /* 40 bits (32 + 8) */ -+ ((uint64_t) color[1]) | -+ (((uint64_t) (color[2] & 0xff)) << 32); -+ rt.render_target_number = 0; -+ } -+ } -+ -+ if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_128) { -+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) { -+ rt.clear_color_top_bits = /* 56 bits (24 + 32) */ -+ (((uint64_t) (color[2] & 0xffffff00)) >> 8) | -+ (((uint64_t) (color[3])) << 24); -+ rt.render_target_number = 0; -+ } -+ } - #endif - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { -@@ -229,9 +238,8 @@ emit_frame_setup(struct v3dv_job *job, - } - #endif - #if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); -+ cl_emit(rcl, CLEAR_RENDER_TARGETS, clear); - #endif -- - } - cl_emit(rcl, END_OF_TILE_MARKER, end); - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0088-v3dv-fix-up-texture-shader-state-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0088-v3dv-fix-up-texture-shader-state-for-v71.patch deleted file mode 100644 index 2bf2de50b7..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0088-v3dv-fix-up-texture-shader-state-for-v71.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 1e9d7d69849fa646b331f7661c74ee138badc4bb Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Mon, 25 Oct 2021 01:37:12 +0200 -Subject: [PATCH 088/142] v3dv: fix up texture shader state for v71 - -There are some new fields for YCbCr with pointers for the various -planes in multi-planar formats. These need to match the base address -pointer in the texture state, or the hardware will assume this is a -multi-planar texture. ---- - src/broadcom/vulkan/v3dvx_image.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c -index dac6ff2741f..848290c2a47 100644 ---- a/src/broadcom/vulkan/v3dvx_image.c -+++ b/src/broadcom/vulkan/v3dvx_image.c -@@ -129,6 +129,14 @@ pack_texture_shader_state_helper(struct v3dv_device *device, - v3dv_layer_offset(image, 0, image_view->vk.base_array_layer, - iplane); - tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); -+ -+#if V3D_VERSION >= 71 -+ tex.chroma_offset_x = 1; -+ tex.chroma_offset_y = 1; -+ /* See comment in XML field definition for rationale of the shifts */ -+ tex.texture_base_pointer_cb = base_offset >> 6; -+ tex.texture_base_pointer_cr = base_offset >> 6; -+#endif - } - } - } -@@ -191,5 +199,13 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device, - buffer_view->offset; - - tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); -+ -+#if V3D_VERSION >= 71 -+ tex.chroma_offset_x = 1; -+ tex.chroma_offset_y = 1; -+ /* See comment in XML field definition for rationale of the shifts */ -+ tex.texture_base_pointer_cb = base_offset >> 6; -+ tex.texture_base_pointer_cr = base_offset >> 6; -+#endif - } - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0089-v3dv-handle-new-texture-state-transfer-functions-in-.patch b/projects/RPi/devices/RPi5/patches/mesa/0089-v3dv-handle-new-texture-state-transfer-functions-in-.patch deleted file mode 100644 index 7647e30707..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0089-v3dv-handle-new-texture-state-transfer-functions-in-.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 1f150a3a92741f7654a13626bd5b27b5575f2b76 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Mon, 25 Oct 2021 01:38:31 +0200 -Subject: [PATCH 089/142] v3dv: handle new texture state transfer functions in - v71 - ---- - src/broadcom/vulkan/v3dvx_image.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c -index 848290c2a47..437d4588c7e 100644 ---- a/src/broadcom/vulkan/v3dvx_image.c -+++ b/src/broadcom/vulkan/v3dvx_image.c -@@ -108,15 +108,16 @@ pack_texture_shader_state_helper(struct v3dv_device *device, - - tex.array_stride_64_byte_aligned = image->planes[iplane].cube_map_stride / 64; - -+ bool is_srgb = vk_format_is_srgb(image_view->vk.format); - #if V3D_VERSION == 42 - tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse; - #endif - - #if V3D_VERSION == 42 -- tex.srgb = vk_format_is_srgb(image_view->vk.view_format); -+ tex.srgb = is_srgb; - #endif - #if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); -+ tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE; - #endif - - /* At this point we don't have the job. That's the reason the first -@@ -181,11 +182,13 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device, - - assert(buffer_view->format->plane_count == 1); - tex.texture_type = buffer_view->format->planes[0].tex_type; -+ -+ bool is_srgb = vk_format_is_srgb(buffer_view->vk_format); - #if V3D_VERSION == 42 -- tex.srgb = vk_format_is_srgb(buffer_view->vk_format); -+ tex.srgb = is_srgb; - #endif - #if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); -+ tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE; - #endif - - /* At this point we don't have the job. That's the reason the first --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0090-v3dv-implement-noop-job-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0090-v3dv-implement-noop-job-for-v71.patch deleted file mode 100644 index 69401c2100..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0090-v3dv-implement-noop-job-for-v71.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 45de9f019ee92635de9a505db58439f0f4561281 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 28 Sep 2021 08:14:11 +0200 -Subject: [PATCH 090/142] v3dv: implement noop job for v71 - ---- - src/broadcom/vulkan/v3dvx_queue.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_queue.c b/src/broadcom/vulkan/v3dvx_queue.c -index 1a26d04aef7..f8cee36e3bf 100644 ---- a/src/broadcom/vulkan/v3dvx_queue.c -+++ b/src/broadcom/vulkan/v3dvx_queue.c -@@ -46,7 +46,8 @@ v3dX(job_emit_noop)(struct v3dv_job *job) - config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32; - #endif - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ config.log2_tile_width = 3; /* Tile size 64 */ -+ config.log2_tile_height = 3; /* Tile size 64 */ - #endif - } - -@@ -58,10 +59,13 @@ v3dX(job_emit_noop)(struct v3dv_job *job) - } - #endif - #if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); -+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -+ rt.internal_bpp = V3D_INTERNAL_BPP_32; -+ rt.internal_type_and_clamping = V3D_RENDER_TARGET_TYPE_CLAMP_8; -+ rt.stride = 1; /* Unused RT */ -+ } - #endif - -- - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { - clear.z_clear_value = 1.0f; - clear.stencil_clear_value = 0; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0091-v3dv-handle-render-pass-global-clear-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0091-v3dv-handle-render-pass-global-clear-for-v71.patch deleted file mode 100644 index 066e45d424..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0091-v3dv-handle-render-pass-global-clear-for-v71.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 3e607bb28056bb52242be6878281efae84026813 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 28 Sep 2021 08:23:48 +0200 -Subject: [PATCH 091/142] v3dv: handle render pass global clear for v71 - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 66 ++++++++++++++++---------- - 1 file changed, 41 insertions(+), 25 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 1b39e230580..48b2e319e51 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -362,6 +362,11 @@ cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer, - iview->vk.base_array_layer + layer, - image_plane); - -+ /* The Clear Buffer bit is not supported for Z/Stencil stores in 7.x and it -+ * is broken in earlier V3D versions. -+ */ -+ assert((buffer != Z && buffer != STENCIL && buffer != ZSTENCIL) || !clear); -+ - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = buffer; - store.address = v3dv_cl_address(image->planes[image_plane].mem->bo, layer_offset); -@@ -484,6 +489,30 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer, - const VkImageAspectFlags aspects = - vk_format_aspects(ds_attachment->desc.format); - -+#if V3D_VERSION <= 42 -+ /* GFXH-1689: The per-buffer store command's clear buffer bit is broken -+ * for depth/stencil. -+ * -+ * There used to be some confusion regarding the Clear Tile Buffers -+ * Z/S bit also being broken, but we confirmed with Broadcom that this -+ * is not the case, it was just that some other hardware bugs (that we -+ * need to work around, such as GFXH-1461) could cause this bit to behave -+ * incorrectly. -+ * -+ * There used to be another issue where the RTs bit in the Clear Tile -+ * Buffers packet also cleared Z/S, but Broadcom confirmed this is -+ * fixed since V3D 4.1. -+ * -+ * So if we have to emit a clear of depth or stencil we don't use -+ * the per-buffer store clear bit, even if we need to store the buffers, -+ * instead we always have to use the Clear Tile Buffers Z/S bit. -+ * If we have configured the job to do early Z/S clearing, then we -+ * don't want to emit any Clear Tile Buffers command at all here. -+ * -+ * Note that GFXH-1689 is not reproduced in the simulator, where -+ * using the clear buffer bit in depth/stencil stores works fine. -+ */ -+ - /* Only clear once on the first subpass that uses the attachment */ - uint32_t ds_first_subpass = !state->pass->multiview_enabled ? - ds_attachment->first_subpass : -@@ -503,6 +532,17 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer, - ds_attachment->desc.stencilLoadOp, - subpass->do_stencil_clear_with_draw); - -+ use_global_zs_clear = !state->job->early_zs_clear && -+ (needs_depth_clear || needs_stencil_clear); -+#endif -+#if V3D_VERSION >= 71 -+ /* The store command's clear buffer bit cannot be used for Z/S stencil: -+ * since V3D 4.5.6 Z/S buffers are automatically cleared between tiles, -+ * so we don't want to emit redundant clears here. -+ */ -+ use_global_zs_clear = false; -+#endif -+ - /* Skip the last store if it is not required */ - uint32_t ds_last_subpass = !pass->multiview_enabled ? - ds_attachment->last_subpass : -@@ -545,30 +585,6 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer, - needs_stencil_store = subpass->resolve_stencil; - } - -- /* GFXH-1689: The per-buffer store command's clear buffer bit is broken -- * for depth/stencil. -- * -- * There used to be some confusion regarding the Clear Tile Buffers -- * Z/S bit also being broken, but we confirmed with Broadcom that this -- * is not the case, it was just that some other hardware bugs (that we -- * need to work around, such as GFXH-1461) could cause this bit to behave -- * incorrectly. -- * -- * There used to be another issue where the RTs bit in the Clear Tile -- * Buffers packet also cleared Z/S, but Broadcom confirmed this is -- * fixed since V3D 4.1. -- * -- * So if we have to emit a clear of depth or stencil we don't use -- * the per-buffer store clear bit, even if we need to store the buffers, -- * instead we always have to use the Clear Tile Buffers Z/S bit. -- * If we have configured the job to do early Z/S clearing, then we -- * don't want to emit any Clear Tile Buffers command at all here. -- * -- * Note that GFXH-1689 is not reproduced in the simulator, where -- * using the clear buffer bit in depth/stencil stores works fine. -- */ -- use_global_zs_clear = !state->job->early_zs_clear && -- (needs_depth_clear || needs_stencil_clear); - if (needs_depth_store || needs_stencil_store) { - const uint32_t zs_buffer = - v3dv_zs_buffer(needs_depth_store, needs_stencil_store); -@@ -673,7 +689,7 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer, - } - #endif - #if V3D_VERSION >= 71 -- unreachable("Hardware generation 71 not supported yet."); -+ cl_emit(cl, CLEAR_RENDER_TARGETS, clear); - #endif - } - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0092-v3dv-GFX-1461-does-not-affect-V3D-7.x.patch b/projects/RPi/devices/RPi5/patches/mesa/0092-v3dv-GFX-1461-does-not-affect-V3D-7.x.patch deleted file mode 100644 index 0251f31b56..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0092-v3dv-GFX-1461-does-not-affect-V3D-7.x.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 3794f6f08c559c4e442b57e992d501fb7d515b9b Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 28 Sep 2021 08:31:04 +0200 -Subject: [PATCH 092/142] v3dv: GFX-1461 does not affect V3D 7.x - ---- - src/broadcom/vulkan/v3dv_pass.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_pass.c b/src/broadcom/vulkan/v3dv_pass.c -index 20f5014268d..3e82c15df88 100644 ---- a/src/broadcom/vulkan/v3dv_pass.c -+++ b/src/broadcom/vulkan/v3dv_pass.c -@@ -236,11 +236,13 @@ v3dv_CreateRenderPass2(VkDevice _device, - - /* GFXH-1461: if depth is cleared but stencil is loaded (or vice versa), - * the clear might get lost. If a subpass has this then we can't emit -- * the clear using the TLB and we have to do it as a draw call. -+ * the clear using the TLB and we have to do it as a draw call. This -+ * issue is fixed since V3D 4.3.18. - * - * FIXME: separate stencil. - */ -- if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) { -+ if (device->devinfo.ver == 42 && -+ subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) { - struct v3dv_render_pass_attachment *att = - &pass->attachments[subpass->ds_attachment.attachment]; - if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0093-v3dv-update-thread-end-restrictions-validation-for-v.patch b/projects/RPi/devices/RPi5/patches/mesa/0093-v3dv-update-thread-end-restrictions-validation-for-v.patch deleted file mode 100644 index 2b9aa1538c..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0093-v3dv-update-thread-end-restrictions-validation-for-v.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 5be7f484210103e40b77fa3135042da4a8406659 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 28 Sep 2021 08:59:08 +0200 -Subject: [PATCH 093/142] v3dv: update thread end restrictions validation for - v71 - ---- - src/broadcom/compiler/qpu_validate.c | 37 +++++++++++++++++++++++++--- - 1 file changed, 34 insertions(+), 3 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c -index 1082fb7d50a..0466ee5d0b6 100644 ---- a/src/broadcom/compiler/qpu_validate.c -+++ b/src/broadcom/compiler/qpu_validate.c -@@ -316,17 +316,48 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) - inst->type == V3D_QPU_INSTR_TYPE_ALU) { - if ((inst->alu.add.op != V3D_QPU_A_NOP && - !inst->alu.add.magic_write)) { -- fail_instr(state, "RF write after THREND"); -+ if (devinfo->ver <= 42) { -+ fail_instr(state, "RF write after THREND"); -+ } else if (devinfo->ver >= 71) { -+ if (state->last_thrsw_ip - state->ip == 0) { -+ fail_instr(state, -+ "ADD RF write at THREND"); -+ } -+ if (inst->alu.add.waddr == 2 || -+ inst->alu.add.waddr == 3) { -+ fail_instr(state, -+ "RF2-3 write after THREND"); -+ } -+ } - } - - if ((inst->alu.mul.op != V3D_QPU_M_NOP && - !inst->alu.mul.magic_write)) { -- fail_instr(state, "RF write after THREND"); -+ if (devinfo->ver <= 42) { -+ fail_instr(state, "RF write after THREND"); -+ } else if (devinfo->ver >= 71) { -+ if (state->last_thrsw_ip - state->ip == 0) { -+ fail_instr(state, -+ "MUL RF write at THREND"); -+ } -+ -+ if (inst->alu.mul.waddr == 2 || -+ inst->alu.mul.waddr == 3) { -+ fail_instr(state, -+ "RF2-3 write after THREND"); -+ } -+ } - } - - if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && - !inst->sig_magic) { -- fail_instr(state, "RF write after THREND"); -+ if (devinfo->ver <= 42) { -+ fail_instr(state, "RF write after THREND"); -+ } else if (devinfo->ver >= 71 && -+ (inst->sig_addr == 2 || -+ inst->sig_addr == 3)) { -+ fail_instr(state, "RF2-3 write after THREND"); -+ } - } - - /* GFXH-1625: No TMUWT in the last instruction */ --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0094-v3dv-handle-early-Z-S-clears-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0094-v3dv-handle-early-Z-S-clears-for-v71.patch deleted file mode 100644 index 50989e8ea6..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0094-v3dv-handle-early-Z-S-clears-for-v71.patch +++ /dev/null @@ -1,68 +0,0 @@ -From a751dff57b6d769f5b031054cc65415cc3b44c08 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 29 Sep 2021 08:22:59 +0200 -Subject: [PATCH 094/142] v3dv: handle early Z/S clears for v71 - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 30 ++++++++++++++++++++------ - 1 file changed, 23 insertions(+), 7 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 48b2e319e51..4580e2a4650 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -998,6 +998,10 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - * Early-Z/S clearing is independent of Early Z/S testing, so it is - * possible to enable one but not the other so long as their - * respective requirements are met. -+ * -+ * From V3D 4.5.6, Z/S buffers are always cleared automatically -+ * between tiles, but we still want to enable early ZS clears -+ * when Z/S are not loaded or stored. - */ - struct v3dv_render_pass_attachment *ds_attachment = - &pass->attachments[ds_attachment_idx]; -@@ -1005,21 +1009,33 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - const VkImageAspectFlags ds_aspects = - vk_format_aspects(ds_attachment->desc.format); - -- bool needs_depth_clear = -- check_needs_clear(state, -- ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, -- ds_attachment->first_subpass, -- ds_attachment->desc.loadOp, -- subpass->do_depth_clear_with_draw); -- - bool needs_depth_store = - v3dv_cmd_buffer_check_needs_store(state, - ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, - ds_attachment->last_subpass, - ds_attachment->desc.storeOp) || - subpass->resolve_depth; -+#if V3D_VERSION <= 42 -+ bool needs_depth_clear = -+ check_needs_clear(state, -+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, -+ ds_attachment->first_subpass, -+ ds_attachment->desc.loadOp, -+ subpass->do_depth_clear_with_draw); - - do_early_zs_clear = needs_depth_clear && !needs_depth_store; -+#endif -+#if V3D_VERSION >= 71 -+ bool needs_depth_load = -+ v3dv_cmd_buffer_check_needs_load(state, -+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, -+ ds_attachment->first_subpass, -+ ds_attachment->desc.loadOp, -+ ds_attachment->last_subpass, -+ ds_attachment->desc.storeOp); -+ do_early_zs_clear = !needs_depth_load && !needs_depth_store; -+#endif -+ - if (do_early_zs_clear && - vk_format_has_stencil(ds_attachment->desc.format)) { - bool needs_stencil_load = --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0095-v3dv-handle-RTs-with-no-color-targets-in-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0095-v3dv-handle-RTs-with-no-color-targets-in-v71.patch deleted file mode 100644 index 11ab68bfb4..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0095-v3dv-handle-RTs-with-no-color-targets-in-v71.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 2add46ebce4760bf8349606201324ee0e6b1f9da Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 29 Sep 2021 09:07:28 +0200 -Subject: [PATCH 095/142] v3dv: handle RTs with no color targets in v71 - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 4580e2a4650..750486a6ccf 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1175,6 +1175,17 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - #endif - } - -+#if V3D_VERSION >= 71 -+ /* If we don't have any color RTs, we still need to emit one and flag -+ * it as not used using stride = 1. -+ */ -+ if (subpass->color_count == 0) { -+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -+ rt.stride = 1; -+ } -+ } -+#endif -+ - #if V3D_VERSION == 42 - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - cmd_buffer_render_pass_setup_render_target --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0096-v3dv-no-specific-separate_segments-flag-for-V3D-7.1.patch b/projects/RPi/devices/RPi5/patches/mesa/0096-v3dv-no-specific-separate_segments-flag-for-V3D-7.1.patch deleted file mode 100644 index 10f1c52764..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0096-v3dv-no-specific-separate_segments-flag-for-V3D-7.1.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 019abbd34d2d904d6bb33f9fa4433cb53ca7899c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 1 Oct 2021 15:18:38 +0200 -Subject: [PATCH 096/142] v3dv: no specific separate_segments flag for V3D 7.1 - -On V3D 7.1 there is not a flag on the Shader State Record to specify -if we are using shared or separate segments. This is done by setting -the vpm input size to 0 (so we need to ensure that the output would be -the max needed for input/output). - -We were already doing the latter on the prog_data_vs, so we just need -to use those values, instead of assigning default values. - -As we are here, we also add some comments on the compiler part. ---- - src/broadcom/compiler/qpu_schedule.c | 4 ++++ - src/broadcom/compiler/vir.c | 4 ++++ - src/broadcom/vulkan/v3dvx_pipeline.c | 15 +++++++++++++-- - 3 files changed, 21 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c -index 77fb6a794e6..4f767296860 100644 ---- a/src/broadcom/compiler/qpu_schedule.c -+++ b/src/broadcom/compiler/qpu_schedule.c -@@ -297,6 +297,10 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) - /* If the input and output segments are shared, then all VPM reads to - * a location need to happen before all writes. We handle this by - * serializing all VPM operations for now. -+ * -+ * FIXME: we are assuming that the segments are shared. That is -+ * correct right now as we are only using shared, but technically you -+ * can choose. - */ - bool separate_vpm_segment = false; - -diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c -index 7612eed7130..dd0aa761c43 100644 ---- a/src/broadcom/compiler/vir.c -+++ b/src/broadcom/compiler/vir.c -@@ -745,6 +745,10 @@ v3d_vs_set_prog_data(struct v3d_compile *c, - - /* Set us up for shared input/output segments. This is apparently - * necessary for our VCM setup to avoid varying corruption. -+ * -+ * FIXME: initially testing on V3D 7.1 seems to work fine when using -+ * separate segments. So we could try to reevaluate in the future, if -+ * there is any advantage of using separate segments. - */ - prog_data->separate_segments = false; - prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size, -diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c -index a640c1d084a..a72ca3c241b 100644 ---- a/src/broadcom/vulkan/v3dvx_pipeline.c -+++ b/src/broadcom/vulkan/v3dvx_pipeline.c -@@ -452,14 +452,25 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline) - prog_data_vs_bin->separate_segments; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = - prog_data_vs->separate_segments; --#endif -- - shader.coordinate_shader_input_vpm_segment_size = - prog_data_vs_bin->separate_segments ? - prog_data_vs_bin->vpm_input_size : 1; - shader.vertex_shader_input_vpm_segment_size = - prog_data_vs->separate_segments ? - prog_data_vs->vpm_input_size : 1; -+#endif -+ -+ /* On V3D 7.1 there isn't a specific flag to set if we are using -+ * shared/separate segments or not. We just set the value of -+ * vpm_input_size to 0, and set output to the max needed. That should be -+ * already properly set on prog_data_vs_bin -+ */ -+#if V3D_VERSION == 71 -+ shader.coordinate_shader_input_vpm_segment_size = -+ prog_data_vs_bin->vpm_input_size; -+ shader.vertex_shader_input_vpm_segment_size = -+ prog_data_vs->vpm_input_size; -+#endif - - shader.coordinate_shader_output_vpm_segment_size = - prog_data_vs_bin->vpm_output_size; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0097-v3dv-don-t-convert-floating-point-border-colors-in-v.patch b/projects/RPi/devices/RPi5/patches/mesa/0097-v3dv-don-t-convert-floating-point-border-colors-in-v.patch deleted file mode 100644 index d0018b9f0e..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0097-v3dv-don-t-convert-floating-point-border-colors-in-v.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 4f6b4f91577ec04aab907d59d836d0c17731a9d0 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 7 Oct 2021 12:43:49 +0200 -Subject: [PATCH 097/142] v3dv: don't convert floating point border colors in - v71 - -The TMU does this for us now. ---- - src/broadcom/vulkan/v3dvx_device.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/src/broadcom/vulkan/v3dvx_device.c b/src/broadcom/vulkan/v3dvx_device.c -index e235983864c..72daefadb08 100644 ---- a/src/broadcom/vulkan/v3dvx_device.c -+++ b/src/broadcom/vulkan/v3dvx_device.c -@@ -118,7 +118,11 @@ static union pipe_color_union encode_border_color( - (1 << (desc->channel[i].size - 1)) - 1); - } - -- /* convert from float to expected format */ -+#if V3D_VERSION <= 42 -+ /* The TMU in V3D 7.x always takes 32-bit floats and handles conversions -+ * for us. In V3D 4.x we need to manually convert floating point color -+ * values to the expected format. -+ */ - if (vk_format_is_srgb(bc_info->format) || - vk_format_is_compressed(bc_info->format)) { - for (int i = 0; i < 4; i++) -@@ -170,6 +174,7 @@ static union pipe_color_union encode_border_color( - } - } - } -+#endif - - return border; - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0098-v3dv-handle-Z-clipping-in-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0098-v3dv-handle-Z-clipping-in-v71.patch deleted file mode 100644 index aec7084bd4..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0098-v3dv-handle-Z-clipping-in-v71.patch +++ /dev/null @@ -1,60 +0,0 @@ -From d8083cb8f104e0f035f5b812e000a500fa52d66f Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Fri, 15 Oct 2021 13:06:31 +0200 -Subject: [PATCH 098/142] v3dv: handle Z clipping in v71 - -Fixes the following tests: - -dEQP-VK.clipping.clip_volume.* -dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_* (except deltazero) ---- - src/broadcom/vulkan/v3dvx_pipeline.c | 33 ++++++++++++++++++++++++++++ - 1 file changed, 33 insertions(+) - -diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c -index a72ca3c241b..7b1133f8173 100644 ---- a/src/broadcom/vulkan/v3dvx_pipeline.c -+++ b/src/broadcom/vulkan/v3dvx_pipeline.c -@@ -227,6 +227,39 @@ pack_cfg_bits(struct v3dv_pipeline *pipeline, - ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false; - - pipeline->z_updates_enable = config.z_updates_enable; -+ -+#if V3D_VERSION >= 71 -+ /* From the Vulkan spec: -+ * -+ * "depthClampEnable controls whether to clamp the fragment’s depth -+ * values as described in Depth Test. If the pipeline is not created -+ * with VkPipelineRasterizationDepthClipStateCreateInfoEXT present -+ * then enabling depth clamp will also disable clipping primitives to -+ * the z planes of the frustrum as described in Primitive Clipping. -+ * Otherwise depth clipping is controlled by the state set in -+ * VkPipelineRasterizationDepthClipStateCreateInfoEXT." -+ * -+ * Note: neither depth clamping nor VK_EXT_depth_clip_enable are actually -+ * supported in the driver yet, so in practice we are always enabling Z -+ * clipping for now. -+ */ -+ bool z_clip_enable = false; -+ const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info = -+ ds_info ? vk_find_struct_const(ds_info->pNext, -+ PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT) : -+ NULL; -+ if (clip_info) -+ z_clip_enable = clip_info->depthClipEnable; -+ else if (!(rs_info && rs_info->depthClampEnable)) -+ z_clip_enable = true; -+ -+ if (z_clip_enable) { -+ config.z_clipping_mode = pipeline->negative_one_to_one ? -+ V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_ZERO_TO_ONE; -+ } else { -+ config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE; -+ } -+#endif - }; - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0099-broadcom-common-add-TFU-register-definitions-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0099-broadcom-common-add-TFU-register-definitions-for-v71.patch deleted file mode 100644 index d69b668ccf..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0099-broadcom-common-add-TFU-register-definitions-for-v71.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 2925fa6dc936d9268a59d8d7d4a775e89fd3fbdb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 17 Nov 2021 11:33:59 +0100 -Subject: [PATCH 099/142] broadcom/common: add TFU register definitions for v71 - ---- - src/broadcom/common/v3d_tfu.h | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/src/broadcom/common/v3d_tfu.h b/src/broadcom/common/v3d_tfu.h -index 80da224ca2d..572d0074794 100644 ---- a/src/broadcom/common/v3d_tfu.h -+++ b/src/broadcom/common/v3d_tfu.h -@@ -48,4 +48,27 @@ - #define V3D33_TFU_ICFG_FORMAT_UIF_NO_XOR 14 - #define V3D33_TFU_ICFG_FORMAT_UIF_XOR 15 - -+/* Disable level 0 write, just write following mipmaps */ -+#define V3D71_TFU_IOC_DIMTW (1 << 0) -+#define V3D71_TFU_IOC_FORMAT_SHIFT 12 -+#define V3D71_TFU_IOC_FORMAT_LINEARTILE 3 -+#define V3D71_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4 -+#define V3D71_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5 -+#define V3D71_TFU_IOA_FORMAT_UIF_NO_XOR 6 -+#define V3D71_TFU_IOA_FORMAT_UIF_XOR 7 -+ -+#define V3D71_TFU_IOC_STRIDE_SHIFT 16 -+#define V3D71_TFU_IOC_NUMMM_SHIFT 4 -+ -+#define V3D71_TFU_ICFG_OTYPE_SHIFT 16 -+#define V3D71_TFU_ICFG_IFORMAT_SHIFT 23 -+#define V3D71_TFU_ICFG_FORMAT_RASTER 0 -+#define V3D71_TFU_ICFG_FORMAT_SAND_128 1 -+#define V3D71_TFU_ICFG_FORMAT_SAND_256 2 -+#define V3D71_TFU_ICFG_FORMAT_LINEARTILE 11 -+#define V3D71_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12 -+#define V3D71_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13 -+#define V3D71_TFU_ICFG_FORMAT_UIF_NO_XOR 14 -+#define V3D71_TFU_ICFG_FORMAT_UIF_XOR 15 -+ - #endif --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0100-broadcom-simulator-TFU-register-names-changed-for-v7.patch b/projects/RPi/devices/RPi5/patches/mesa/0100-broadcom-simulator-TFU-register-names-changed-for-v7.patch deleted file mode 100644 index 8f275d0f02..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0100-broadcom-simulator-TFU-register-names-changed-for-v7.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 6d10aa8a64e009d4d1f4f05885621bd2d9a72465 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 23 Sep 2021 13:09:41 +0200 -Subject: [PATCH 100/142] broadcom/simulator: TFU register names changed for - v71 - ---- - src/broadcom/simulator/v3dx_simulator.c | 39 +++++++++++++++---------- - 1 file changed, 23 insertions(+), 16 deletions(-) - -diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c -index f23b0538de3..494f44a6b5d 100644 ---- a/src/broadcom/simulator/v3dx_simulator.c -+++ b/src/broadcom/simulator/v3dx_simulator.c -@@ -182,26 +182,33 @@ v3d_flush_caches(struct v3d_hw *v3d) - v3d_flush_l2t(v3d); - } - -+#if V3D_VERSION < 71 -+#define TFU_REG(NAME) V3D_TFU_ ## NAME -+#else -+#define TFU_REG(NAME) V3D_IFC_ ## NAME -+#endif -+ -+ - int - v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d, - struct drm_v3d_submit_tfu *args) - { -- int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET; -- -- V3D_WRITE(V3D_TFU_IIA, args->iia); -- V3D_WRITE(V3D_TFU_IIS, args->iis); -- V3D_WRITE(V3D_TFU_ICA, args->ica); -- V3D_WRITE(V3D_TFU_IUA, args->iua); -- V3D_WRITE(V3D_TFU_IOA, args->ioa); -- V3D_WRITE(V3D_TFU_IOS, args->ios); -- V3D_WRITE(V3D_TFU_COEF0, args->coef[0]); -- V3D_WRITE(V3D_TFU_COEF1, args->coef[1]); -- V3D_WRITE(V3D_TFU_COEF2, args->coef[2]); -- V3D_WRITE(V3D_TFU_COEF3, args->coef[3]); -- -- V3D_WRITE(V3D_TFU_ICFG, args->icfg); -- -- while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) { -+ int last_vtct = V3D_READ(TFU_REG(CS)) & V3D_TFU_CS_CVTCT_SET; -+ -+ V3D_WRITE(TFU_REG(IIA), args->iia); -+ V3D_WRITE(TFU_REG(IIS), args->iis); -+ V3D_WRITE(TFU_REG(ICA), args->ica); -+ V3D_WRITE(TFU_REG(IUA), args->iua); -+ V3D_WRITE(TFU_REG(IOA), args->ioa); -+ V3D_WRITE(TFU_REG(IOS), args->ios); -+ V3D_WRITE(TFU_REG(COEF0), args->coef[0]); -+ V3D_WRITE(TFU_REG(COEF1), args->coef[1]); -+ V3D_WRITE(TFU_REG(COEF2), args->coef[2]); -+ V3D_WRITE(TFU_REG(COEF3), args->coef[3]); -+ -+ V3D_WRITE(TFU_REG(ICFG), args->icfg); -+ -+ while ((V3D_READ(TFU_REG(CS)) & V3D_TFU_CS_CVTCT_SET) == last_vtct) { - v3d_hw_tick(v3d); - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0101-v3dv-add-support-for-TFU-jobs-in-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0101-v3dv-add-support-for-TFU-jobs-in-v71.patch deleted file mode 100644 index bf9e2ccdcd..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0101-v3dv-add-support-for-TFU-jobs-in-v71.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 780f012747f2cc6e816b1955081dbeca9a0abe5c Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 23 Sep 2021 12:12:18 +0200 -Subject: [PATCH 101/142] v3dv: add support for TFU jobs in v71 - ---- - include/drm-uapi/v3d_drm.h | 5 ++++ - src/broadcom/simulator/v3dx_simulator.c | 3 ++ - src/broadcom/vulkan/v3dvx_meta_common.c | 37 +++++++++++++++++++++++++ - 3 files changed, 45 insertions(+) - -diff --git a/include/drm-uapi/v3d_drm.h b/include/drm-uapi/v3d_drm.h -index 3dfc0af8756..1a7d7a689de 100644 ---- a/include/drm-uapi/v3d_drm.h -+++ b/include/drm-uapi/v3d_drm.h -@@ -319,6 +319,11 @@ struct drm_v3d_submit_tfu { - - /* Pointer to an array of ioctl extensions*/ - __u64 extensions; -+ -+ struct { -+ __u32 ioc; -+ __u32 pad; -+ } v71; - }; - - /* Submits a compute shader for dispatch. This job will block on any -diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c -index 494f44a6b5d..4ea177c9bb7 100644 ---- a/src/broadcom/simulator/v3dx_simulator.c -+++ b/src/broadcom/simulator/v3dx_simulator.c -@@ -200,6 +200,9 @@ v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d, - V3D_WRITE(TFU_REG(ICA), args->ica); - V3D_WRITE(TFU_REG(IUA), args->iua); - V3D_WRITE(TFU_REG(IOA), args->ioa); -+#if V3D_VERSION >= 71 -+ V3D_WRITE(TFU_REG(IOC), args->v71.ioc); -+#endif - V3D_WRITE(TFU_REG(IOS), args->ios); - V3D_WRITE(TFU_REG(COEF0), args->coef[0]); - V3D_WRITE(TFU_REG(COEF1), args->coef[1]); -diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c -index 09ebcfa97c1..b8f3297bc94 100644 ---- a/src/broadcom/vulkan/v3dvx_meta_common.c -+++ b/src/broadcom/vulkan/v3dvx_meta_common.c -@@ -950,6 +950,7 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, - - tfu.iia |= src_offset; - -+#if V3D_VERSION <= 42 - if (src_tiling == V3D_TILING_RASTER) { - tfu.icfg = V3D33_TFU_ICFG_FORMAT_RASTER << V3D33_TFU_ICFG_FORMAT_SHIFT; - } else { -@@ -958,12 +959,46 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, - V3D33_TFU_ICFG_FORMAT_SHIFT; - } - tfu.icfg |= format_plane->tex_type << V3D33_TFU_ICFG_TTYPE_SHIFT; -+#endif -+#if V3D_VERSION >= 71 -+ if (src_tiling == V3D_TILING_RASTER) { -+ tfu.icfg = V3D71_TFU_ICFG_FORMAT_RASTER << V3D71_TFU_ICFG_IFORMAT_SHIFT; -+ } else { -+ tfu.icfg = (V3D71_TFU_ICFG_FORMAT_LINEARTILE + -+ (src_tiling - V3D_TILING_LINEARTILE)) << -+ V3D71_TFU_ICFG_IFORMAT_SHIFT; -+ } -+ tfu.icfg |= format_plane->tex_type << V3D71_TFU_ICFG_OTYPE_SHIFT; -+#endif - - tfu.ioa = dst_offset; - -+#if V3D_VERSION <= 42 - tfu.ioa |= (V3D33_TFU_IOA_FORMAT_LINEARTILE + - (dst_tiling - V3D_TILING_LINEARTILE)) << - V3D33_TFU_IOA_FORMAT_SHIFT; -+#endif -+ -+#if V3D_VERSION >= 71 -+ tfu.v71.ioc = (V3D71_TFU_IOC_FORMAT_LINEARTILE + -+ (dst_tiling - V3D_TILING_LINEARTILE)) << -+ V3D71_TFU_IOC_FORMAT_SHIFT; -+ -+ switch (dst_tiling) { -+ case V3D_TILING_UIF_NO_XOR: -+ case V3D_TILING_UIF_XOR: -+ tfu.v71.ioc |= -+ (dst_padded_height_or_stride / (2 * v3d_utile_height(dst_cpp))) << -+ V3D71_TFU_IOC_STRIDE_SHIFT; -+ break; -+ case V3D_TILING_RASTER: -+ tfu.v71.ioc |= (dst_padded_height_or_stride / dst_cpp) << -+ V3D71_TFU_IOC_STRIDE_SHIFT; -+ break; -+ default: -+ break; -+ } -+#endif - - switch (src_tiling) { - case V3D_TILING_UIF_NO_XOR: -@@ -980,6 +1015,7 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, - /* The TFU can handle raster sources but always produces UIF results */ - assert(dst_tiling != V3D_TILING_RASTER); - -+#if V3D_VERSION <= 42 - /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the - * OPAD field for the destination (how many extra UIF blocks beyond - * those necessary to cover the height). -@@ -991,6 +1027,7 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, - uif_block_h; - tfu.icfg |= icfg << V3D33_TFU_ICFG_OPAD_SHIFT; - } -+#endif - - v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0102-v3dv-make-v3dv_viewport_compute_xform-depend-on-the-.patch b/projects/RPi/devices/RPi5/patches/mesa/0102-v3dv-make-v3dv_viewport_compute_xform-depend-on-the-.patch deleted file mode 100644 index 946565c402..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0102-v3dv-make-v3dv_viewport_compute_xform-depend-on-the-.patch +++ /dev/null @@ -1,155 +0,0 @@ -From 07cba940af2fe0c40641816bee280b57a40973fb Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 20 Oct 2021 11:22:11 +0200 -Subject: [PATCH 102/142] v3dv: make v3dv_viewport_compute_xform depend on the - V3D version - -For 4.x we have a workaround for too small Z scale values that is -not required for V3D 7.x. ---- - src/broadcom/vulkan/v3dv_cmd_buffer.c | 40 +++----------------------- - src/broadcom/vulkan/v3dv_pipeline.c | 7 +++-- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 37 ++++++++++++++++++++++++ - src/broadcom/vulkan/v3dvx_private.h | 5 ++++ - 4 files changed, 50 insertions(+), 39 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c -index 96360a96b44..bda0a614523 100644 ---- a/src/broadcom/vulkan/v3dv_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c -@@ -2131,39 +2131,6 @@ v3dv_CmdBindPipeline(VkCommandBuffer commandBuffer, - } - } - --/* FIXME: C&P from radv. tu has similar code. Perhaps common place? */ --void --v3dv_viewport_compute_xform(const VkViewport *viewport, -- float scale[3], -- float translate[3]) --{ -- float x = viewport->x; -- float y = viewport->y; -- float half_width = 0.5f * viewport->width; -- float half_height = 0.5f * viewport->height; -- double n = viewport->minDepth; -- double f = viewport->maxDepth; -- -- scale[0] = half_width; -- translate[0] = half_width + x; -- scale[1] = half_height; -- translate[1] = half_height + y; -- -- scale[2] = (f - n); -- translate[2] = n; -- -- /* It seems that if the scale is small enough the hardware won't clip -- * correctly so we work around this my choosing the smallest scale that -- * seems to work. -- * -- * This case is exercised by CTS: -- * dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_deltazero -- */ -- const float min_abs_scale = 0.000009f; -- if (fabs(scale[2]) < min_abs_scale) -- scale[2] = scale[2] < 0 ? -min_abs_scale : min_abs_scale; --} -- - /* Considers the pipeline's negative_one_to_one state and applies it to the - * current viewport transform if needed to produce the resulting Z translate - * and scale parameters. -@@ -2216,9 +2183,10 @@ v3dv_CmdSetViewport(VkCommandBuffer commandBuffer, - viewportCount * sizeof(*pViewports)); - - for (uint32_t i = firstViewport; i < total_count; i++) { -- v3dv_viewport_compute_xform(&state->dynamic.viewport.viewports[i], -- state->dynamic.viewport.scale[i], -- state->dynamic.viewport.translate[i]); -+ v3dv_X(cmd_buffer->device, viewport_compute_xform) -+ (&state->dynamic.viewport.viewports[i], -+ state->dynamic.viewport.scale[i], -+ state->dynamic.viewport.translate[i]); - } - - cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEWPORT; -diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c -index d012ff8f948..2156176d4cc 100644 ---- a/src/broadcom/vulkan/v3dv_pipeline.c -+++ b/src/broadcom/vulkan/v3dv_pipeline.c -@@ -2661,9 +2661,10 @@ pipeline_init_dynamic_state( - pViewportState->viewportCount); - - for (uint32_t i = 0; i < dynamic->viewport.count; i++) { -- v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i], -- dynamic->viewport.scale[i], -- dynamic->viewport.translate[i]); -+ v3dv_X(pipeline->device, viewport_compute_xform) -+ (&dynamic->viewport.viewports[i], -+ dynamic->viewport.scale[i], -+ dynamic->viewport.translate[i]); - } - } - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 750486a6ccf..f7c13a22423 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1285,6 +1285,43 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) - cl_emit(rcl, END_OF_RENDERING, end); - } - -+void -+v3dX(viewport_compute_xform)(const VkViewport *viewport, -+ float scale[3], -+ float translate[3]) -+{ -+ float x = viewport->x; -+ float y = viewport->y; -+ float half_width = 0.5f * viewport->width; -+ float half_height = 0.5f * viewport->height; -+ double n = viewport->minDepth; -+ double f = viewport->maxDepth; -+ -+ scale[0] = half_width; -+ translate[0] = half_width + x; -+ scale[1] = half_height; -+ translate[1] = half_height + y; -+ -+ scale[2] = (f - n); -+ translate[2] = n; -+ -+ /* It seems that if the scale is small enough the hardware won't clip -+ * correctly so we work around this my choosing the smallest scale that -+ * seems to work. -+ * -+ * This case is exercised by CTS: -+ * dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero -+ * -+ * V3D 7.x fixes this by using the new -+ * CLIPPER_Z_SCALE_AND_OFFSET_NO_GUARDBAND. -+ */ -+#if V3D_VERSION <= 42 -+ const float min_abs_scale = 0.0005f; -+ if (fabs(scale[2]) < min_abs_scale) -+ scale[2] = scale[2] < 0 ? -min_abs_scale : min_abs_scale; -+#endif -+} -+ - void - v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer) - { -diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h -index 036ce11b455..81715520913 100644 ---- a/src/broadcom/vulkan/v3dvx_private.h -+++ b/src/broadcom/vulkan/v3dvx_private.h -@@ -339,3 +339,8 @@ v3dX(clamp_for_format_and_type)(uint32_t rt_type, - uint32_t - v3dX(clamp_for_format_and_type)(uint32_t rt_type, - VkFormat vk_format); -+ -+void -+v3dX(viewport_compute_xform)(const VkViewport *viewport, -+ float scale[3], -+ float translate[3]); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0103-v3dv-fix-depth-clipping-then-Z-scale-is-too-small-in.patch b/projects/RPi/devices/RPi5/patches/mesa/0103-v3dv-fix-depth-clipping-then-Z-scale-is-too-small-in.patch deleted file mode 100644 index 82f934720c..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0103-v3dv-fix-depth-clipping-then-Z-scale-is-too-small-in.patch +++ /dev/null @@ -1,51 +0,0 @@ -From c6b60ee47c50474030f8a0a92bd4c6a071f926dc Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 14 Feb 2023 10:09:53 +0100 -Subject: [PATCH 103/142] v3dv: fix depth clipping then Z scale is too small in - V3D 7.x - -When the Z scale is too small guardband clipping may not clip -correctly, so disable it, which is a new option in V3D 7.x. - -This fixes this test in V3D 7.x without needing any workarounds: -dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index f7c13a22423..3566649aafd 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1363,10 +1363,28 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer) - v3dv_cmd_buffer_state_get_viewport_z_xform(&cmd_buffer->state, 0, - &translate_z, &scale_z); - -+#if V3D_VERSION == 42 - cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { - clip.viewport_z_offset_zc_to_zs = translate_z; - clip.viewport_z_scale_zc_to_zs = scale_z; - } -+#endif -+ -+#if V3D_VERSION >= 71 -+ /* If the Z scale is too small guardband clipping may not clip correctly */ -+ if (fabsf(scale_z) < 0.01f) { -+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET_NO_GUARDBAND, clip) { -+ clip.viewport_z_offset_zc_to_zs = translate_z; -+ clip.viewport_z_scale_zc_to_zs = scale_z; -+ } -+ } else { -+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { -+ clip.viewport_z_offset_zc_to_zs = translate_z; -+ clip.viewport_z_scale_zc_to_zs = scale_z; -+ } -+ } -+#endif -+ - cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { - /* Vulkan's default Z NDC is [0..1]. If 'negative_one_to_one' is enabled, - * we are using OpenGL's [-1, 1] instead. --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0104-v3d-add-a-non-conformant-warning-for-not-fully-suppo.patch b/projects/RPi/devices/RPi5/patches/mesa/0104-v3d-add-a-non-conformant-warning-for-not-fully-suppo.patch deleted file mode 100644 index 83c6351641..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0104-v3d-add-a-non-conformant-warning-for-not-fully-suppo.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 46e2b22f43290e6fe92f5435af174c4b18bb6ef5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 21 Oct 2021 22:52:47 +0200 -Subject: [PATCH 104/142] v3d: add a non-conformant warning for not fully - supported hw - ---- - src/gallium/drivers/v3d/v3d_screen.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c -index 98ca9bb69e6..efdb7d615ae 100644 ---- a/src/gallium/drivers/v3d/v3d_screen.c -+++ b/src/gallium/drivers/v3d/v3d_screen.c -@@ -922,6 +922,12 @@ v3d_screen_create(int fd, const struct pipe_screen_config *config, - if (!v3d_get_device_info(screen->fd, &screen->devinfo, &v3d_ioctl)) - goto fail; - -+ if (screen->devinfo.ver >= 71) { -+ fprintf(stderr, "WARNING: v3d support for hw version %i is neither " -+ "a complete nor a conformant OpenGL implementation. Testing " -+ "use only.\n", screen->devinfo.ver); -+ } -+ - driParseConfigFiles(config->options, config->options_info, 0, "v3d", - NULL, NULL, NULL, 0, NULL, 0); - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0105-v3d-add-v71-hw-generation.patch b/projects/RPi/devices/RPi5/patches/mesa/0105-v3d-add-v71-hw-generation.patch deleted file mode 100644 index 07bed87a0c..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0105-v3d-add-v71-hw-generation.patch +++ /dev/null @@ -1,336 +0,0 @@ -From 46ffdc57ac7fbe71e92b22e1fe93185f3d33a3ac Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 23 May 2023 23:32:37 +0200 -Subject: [PATCH 105/142] v3d: add v71 hw generation - -Starting point for v71 version inclusion: - * Adds as one of the versions to be compiled on meson - * Updated the v3d_X and v3dX macros to include version 71 - * Update the code enough to get it building when using v71. - -Any real v71 support will be implemented on following commits. ---- - src/gallium/drivers/v3d/meson.build | 2 +- - src/gallium/drivers/v3d/v3d_context.h | 22 +++++++++++++---- - src/gallium/drivers/v3d/v3dx_draw.c | 21 +++++++++++++--- - src/gallium/drivers/v3d/v3dx_emit.c | 11 +++++++++ - src/gallium/drivers/v3d/v3dx_rcl.c | 35 ++++++++++++++++++++++----- - src/gallium/drivers/v3d/v3dx_state.c | 12 +++++++++ - 6 files changed, 88 insertions(+), 15 deletions(-) - -diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build -index dfa1e88097b..526a131ae9b 100644 ---- a/src/gallium/drivers/v3d/meson.build -+++ b/src/gallium/drivers/v3d/meson.build -@@ -58,7 +58,7 @@ if dep_v3dv3.found() - v3d_args += '-DUSE_V3D_SIMULATOR' - endif - --v3d_versions = ['33', '42'] -+v3d_versions = ['33', '42', '71'] - - per_version_libs = [] - foreach ver : v3d_versions -diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h -index 97850b0363e..ad267d5033c 100644 ---- a/src/gallium/drivers/v3d/v3d_context.h -+++ b/src/gallium/drivers/v3d/v3d_context.h -@@ -818,13 +818,21 @@ void v3d_disk_cache_store(struct v3d_context *v3d, - - /* Helper to call hw ver specific functions */ - #define v3d_X(devinfo, thing) ({ \ -- __typeof(&v3d42_##thing) v3d_X_thing; \ -- if ((devinfo)->ver >= 42) \ -- v3d_X_thing = &v3d42_##thing; \ -- else if ((devinfo)->ver >= 33) \ -+ __typeof(&v3d33_##thing) v3d_X_thing; \ -+ switch (devinfo->ver) { \ -+ case 33: \ -+ case 40: \ - v3d_X_thing = &v3d33_##thing; \ -- else \ -+ break; \ -+ case 42: \ -+ v3d_X_thing = &v3d42_##thing; \ -+ break; \ -+ case 71: \ -+ v3d_X_thing = &v3d71_##thing; \ -+ break; \ -+ default: \ - unreachable("Unsupported hardware generation"); \ -+ } \ - v3d_X_thing; \ - }) - -@@ -838,6 +846,10 @@ void v3d_disk_cache_store(struct v3d_context *v3d, - # define v3dX(x) v3d42_##x - # include "v3dx_context.h" - # undef v3dX -+ -+# define v3dX(x) v3d71_##x -+# include "v3dx_context.h" -+# undef v3dX - #endif - - #endif /* V3D_CONTEXT_H */ -diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c -index 17442500ea9..2c74c5973c9 100644 ---- a/src/gallium/drivers/v3d/v3dx_draw.c -+++ b/src/gallium/drivers/v3d/v3dx_draw.c -@@ -95,7 +95,11 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job) - #endif - - assert(!job->msaa || !job->double_buffer); --#if V3D_VERSION >= 40 -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif -+ -+#if V3D_VERSION >= 40 && V3D_VERSION <= 42 - cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { - config.width_in_pixels = job->draw_width; - config.height_in_pixels = job->draw_height; -@@ -107,7 +111,8 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job) - - config.maximum_bpp_of_all_render_targets = job->internal_bpp; - } --#else /* V3D_VERSION < 40 */ -+#endif -+#if V3D_VERSION < 40 - /* "Binning mode lists start with a Tile Binning Mode Configuration - * item (120)" - * -@@ -134,7 +139,7 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job) - - config.maximum_bpp_of_all_render_targets = job->internal_bpp; - } --#endif /* V3D_VERSION < 40 */ -+#endif - - /* There's definitely nothing in the VCD cache we want. */ - cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); -@@ -655,10 +660,15 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, - /* XXX: Use combined input/output size flag in the common - * case. - */ -+#if V3D_VERSION <= 42 - shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = - v3d->prog.cs->prog_data.vs->separate_segments; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = - v3d->prog.vs->prog_data.vs->separate_segments; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - - shader.coordinate_shader_input_vpm_segment_size = - v3d->prog.cs->prog_data.vs->separate_segments ? -@@ -724,9 +734,14 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, - shader.instance_id_read_by_vertex_shader = - v3d->prog.vs->prog_data.vs->uses_iid; - -+#if V3D_VERSION <= 42 - shader.address_of_default_attribute_values = - cl_address(v3d_resource(vtx->defaults)->bo, - vtx->defaults_offset); -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - } - - bool cs_loaded_any = false; -diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c -index 0ad3fb68b1e..5af3d03b337 100644 ---- a/src/gallium/drivers/v3d/v3dx_emit.c -+++ b/src/gallium/drivers/v3d/v3dx_emit.c -@@ -512,6 +512,7 @@ v3dX(emit_state)(struct pipe_context *pctx) - /* Note: EZ state may update based on the compiled FS, - * along with ZSA - */ -+#if V3D_VERSION <= 42 - config.early_z_updates_enable = - (job->ez_state != V3D_EZ_DISABLED); - if (v3d->zsa->base.depth_enabled) { -@@ -524,6 +525,10 @@ v3dX(emit_state)(struct pipe_context *pctx) - } else { - config.depth_test_function = PIPE_FUNC_ALWAYS; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - - config.stencil_enable = - v3d->zsa->base.stencil[0].enabled; -@@ -564,12 +569,18 @@ v3dX(emit_state)(struct pipe_context *pctx) - } - - if (v3d->dirty & V3D_DIRTY_VIEWPORT) { -+#if V3D_VERSION <= 42 - cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { - clip.viewport_half_width_in_1_256th_of_pixel = - v3d->viewport.scale[0] * 256.0f; - clip.viewport_half_height_in_1_256th_of_pixel = - v3d->viewport.scale[1] * 256.0f; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif -+ - - cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { - clip.viewport_z_offset_zc_to_zs = -diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c -index 82547437c25..166cc34e4ee 100644 ---- a/src/gallium/drivers/v3d/v3dx_rcl.c -+++ b/src/gallium/drivers/v3d/v3dx_rcl.c -@@ -419,10 +419,16 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer) - * clearing Z/S. - */ - if (job->clear) { -+#if V3D_VERSION <= 42 - cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = !job->early_zs_clear; - clear.clear_all_render_targets = true; - } -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif -+ - } - #endif /* V3D_VERSION >= 40 */ - } -@@ -483,7 +489,7 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer) - } - } - --#if V3D_VERSION >= 40 -+#if V3D_VERSION >= 40 && V3D_VERSION <= 42 - static void - v3d_setup_render_target(struct v3d_job *job, int cbuf, - uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp) -@@ -507,9 +513,9 @@ v3d_setup_render_target(struct v3d_job *job, int cbuf, - else - *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; - } -+#endif - --#else /* V3D_VERSION < 40 */ -- -+#if V3D_VERSION < 40 - static void - v3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf, - struct v3d_resource *rsc, bool is_separate_stencil) -@@ -656,7 +662,8 @@ emit_render_layer(struct v3d_job *job, uint32_t layer) - cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } --#else -+#endif -+#if V3D_VERSION >= 40 && V3D_VERSION <= 42 - for (int i = 0; i < 2; i++) { - if (i > 0) - cl_emit(&job->rcl, TILE_COORDINATES, coords); -@@ -673,6 +680,10 @@ emit_render_layer(struct v3d_job *job, uint32_t layer) - cl_emit(&job->rcl, END_OF_TILE_MARKER, end); - } - #endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif -+ - - cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); - -@@ -775,7 +786,13 @@ v3dX(emit_rcl)(struct v3d_job *job) - config.multisample_mode_4x = job->msaa; - config.double_buffer_in_non_ms_mode = job->double_buffer; - -+#if V3D_VERSION <= 42 - config.maximum_bpp_of_all_render_targets = job->internal_bpp; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif -+ - } - - for (int i = 0; i < job->nr_cbufs; i++) { -@@ -786,7 +803,7 @@ v3dX(emit_rcl)(struct v3d_job *job) - struct v3d_resource *rsc = v3d_resource(psurf->texture); - - UNUSED uint32_t config_pad = 0; -- uint32_t clear_pad = 0; -+ UNUSED uint32_t clear_pad = 0; - - /* XXX: Set the pad for raster. */ - if (surf->tiling == V3D_TILING_UIF_NO_XOR || -@@ -819,6 +836,7 @@ v3dX(emit_rcl)(struct v3d_job *job) - } - #endif /* V3D_VERSION < 40 */ - -+#if V3D_VERSION <= 42 - cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, - clear) { - clear.clear_color_low_32_bits = job->clear_color[i][0]; -@@ -847,9 +865,10 @@ v3dX(emit_rcl)(struct v3d_job *job) - clear.render_target_number = i; - }; - } -+#endif - } - --#if V3D_VERSION >= 40 -+#if V3D_VERSION >= 40 && V3D_VERSION <= 42 - cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - v3d_setup_render_target(job, 0, - &rt.render_target_0_internal_bpp, -@@ -870,6 +889,10 @@ v3dX(emit_rcl)(struct v3d_job *job) - } - #endif - -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif -+ - #if V3D_VERSION < 40 - /* FIXME: Don't bother emitting if we don't load/clear Z/S. */ - if (job->zsbuf) { -diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c -index 0f1735fee66..a93d5be091e 100644 ---- a/src/gallium/drivers/v3d/v3dx_state.c -+++ b/src/gallium/drivers/v3d/v3dx_state.c -@@ -990,7 +990,13 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d, - cso->u.buf.size); - } - -+#if V3D_VERSION <= 42 - tex.srgb = util_format_is_srgb(cso->format); -+#endif -+ -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif - - #if V3D_VERSION >= 40 - tex.swizzle_r = v3d_translate_pipe_swizzle(so->swizzle[0]); -@@ -1040,7 +1046,13 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d, - * shader code if we wanted to read an MSAA sRGB - * texture without sRGB decode. - */ -+#if V3D_VERSION <= 42 - tex.srgb = false; -+#endif -+#if V3D_VERSION >= 71 -+ unreachable("HW generation 71 not supported yet."); -+#endif -+ - } else { - tex.texture_type = v3d_get_tex_format(&screen->devinfo, - cso->format); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0106-v3d-emit-TILE_BINNING_MODE_CFG-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0106-v3d-emit-TILE_BINNING_MODE_CFG-for-v71.patch deleted file mode 100644 index dafba1550e..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0106-v3d-emit-TILE_BINNING_MODE_CFG-for-v71.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 1ef6241854666a00d43401039809f2470d3a2cc0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 20 Oct 2021 14:31:10 +0200 -Subject: [PATCH 106/142] v3d: emit TILE_BINNING_MODE_CFG for v71 - ---- - src/gallium/drivers/v3d/v3dx_draw.c | 16 +++++++++++++++- - 1 file changed, 15 insertions(+), 1 deletion(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c -index 2c74c5973c9..9f38baa0bbf 100644 ---- a/src/gallium/drivers/v3d/v3dx_draw.c -+++ b/src/gallium/drivers/v3d/v3dx_draw.c -@@ -96,7 +96,21 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job) - - assert(!job->msaa || !job->double_buffer); - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { -+ config.width_in_pixels = job->draw_width; -+ config.height_in_pixels = job->draw_height; -+ -+ config.log2_tile_width = log2_tile_size(job->tile_width); -+ config.log2_tile_height = log2_tile_size(job->tile_height); -+ -+ /* FIXME: ideallly we would like next assert on the packet header (as is -+ * general, so also applies to GL). We would need to expand -+ * gen_pack_header for that. -+ */ -+ assert(config.log2_tile_width == config.log2_tile_height || -+ config.log2_tile_width == config.log2_tile_height + 1); -+ } -+ - #endif - - #if V3D_VERSION >= 40 && V3D_VERSION <= 42 --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0107-v3d-emit-TILE_RENDERING_MODE_CFG_COMMON-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0107-v3d-emit-TILE_RENDERING_MODE_CFG_COMMON-for-v71.patch deleted file mode 100644 index f3bfe3eac3..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0107-v3d-emit-TILE_RENDERING_MODE_CFG_COMMON-for-v71.patch +++ /dev/null @@ -1,44 +0,0 @@ -From dfdfcf3853d7178acff288a368dfc169018c186a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 20 Oct 2021 14:42:43 +0200 -Subject: [PATCH 107/142] v3d: emit TILE_RENDERING_MODE_CFG_COMMON for v71 - ---- - src/gallium/drivers/v3d/v3dx_rcl.c | 13 +++++++++++-- - 1 file changed, 11 insertions(+), 2 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c -index 166cc34e4ee..3f5eb293c4e 100644 ---- a/src/gallium/drivers/v3d/v3dx_rcl.c -+++ b/src/gallium/drivers/v3d/v3dx_rcl.c -@@ -23,8 +23,9 @@ - - #include "util/format/u_format.h" - #include "v3d_context.h" --#include "broadcom/common/v3d_tiling.h" - #include "broadcom/common/v3d_macros.h" -+#include "broadcom/common/v3d_tiling.h" -+#include "broadcom/common/v3d_util.h" - #include "broadcom/cle/v3dx_pack.h" - - #define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 | \ -@@ -790,7 +791,15 @@ v3dX(emit_rcl)(struct v3d_job *job) - config.maximum_bpp_of_all_render_targets = job->internal_bpp; - #endif - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ config.log2_tile_width = log2_tile_size(job->tile_width); -+ config.log2_tile_height = log2_tile_size(job->tile_height); -+ -+ /* FIXME: ideallly we would like next assert on the packet header (as is -+ * general, so also applies to GL). We would need to expand -+ * gen_pack_header for that. -+ */ -+ assert(config.log2_tile_width == config.log2_tile_height || -+ config.log2_tile_width == config.log2_tile_height + 1); - #endif - - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0108-v3d-TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1.patch b/projects/RPi/devices/RPi5/patches/mesa/0108-v3d-TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1.patch deleted file mode 100644 index de56d89812..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0108-v3d-TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1.patch +++ /dev/null @@ -1,186 +0,0 @@ -From 34b32f1ee504449e39529110631c389fa9e9e409 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 20 Oct 2021 15:12:15 +0200 -Subject: [PATCH 108/142] v3d: TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1 - ---- - src/gallium/drivers/v3d/v3dx_rcl.c | 130 +++++++++++++++++++++++++---- - 1 file changed, 115 insertions(+), 15 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c -index 3f5eb293c4e..815e1098c22 100644 ---- a/src/gallium/drivers/v3d/v3dx_rcl.c -+++ b/src/gallium/drivers/v3d/v3dx_rcl.c -@@ -490,10 +490,86 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer) - } - } - -+#if V3D_VERSION > 33 -+/* Note that for v71, render target cfg packets has just one field that -+ * combined the internal type and clamp mode. For simplicity we keep just one -+ * helper. -+ * -+ * Note: rt_type is in fact a "enum V3DX(Internal_Type)". -+ * -+ */ -+static uint32_t -+v3dX(clamp_for_format_and_type)(uint32_t rt_type, -+ enum pipe_format format) -+{ -+#if V3D_VERSION == 42 -+ if (util_format_is_pure_integer(format)) { -+ return V3D_RENDER_TARGET_CLAMP_INT; -+ } else if (util_format_is_srgb(format)) { -+ return V3D_RENDER_TARGET_CLAMP_NORM; -+ } else { -+ return V3D_RENDER_TARGET_CLAMP_NONE; -+ } -+#endif -+#if V3D_VERSION >= 71 -+ switch (rt_type) { -+ case V3D_INTERNAL_TYPE_8I: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_8I_CLAMPED; -+ case V3D_INTERNAL_TYPE_8UI: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_8UI_CLAMPED; -+ case V3D_INTERNAL_TYPE_8: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_8; -+ case V3D_INTERNAL_TYPE_16I: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_16I_CLAMPED; -+ case V3D_INTERNAL_TYPE_16UI: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_16UI_CLAMPED; -+ case V3D_INTERNAL_TYPE_16F: -+ return util_format_is_srgb(format) ? -+ V3D_RENDER_TARGET_TYPE_CLAMP_16F_CLAMP_NORM : -+ V3D_RENDER_TARGET_TYPE_CLAMP_16F; -+ case V3D_INTERNAL_TYPE_32I: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_32I_CLAMPED; -+ case V3D_INTERNAL_TYPE_32UI: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_32UI_CLAMPED; -+ case V3D_INTERNAL_TYPE_32F: -+ return V3D_RENDER_TARGET_TYPE_CLAMP_32F; -+ default: -+ unreachable("Unknown internal render target type"); -+ } -+ return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID; -+#endif -+ return 0; -+} -+#endif -+ -+#if V3D_VERSION >= 71 -+static void -+v3d_setup_render_target(struct v3d_job *job, -+ int cbuf, -+ uint32_t *rt_bpp, -+ uint32_t *rt_type_clamp) -+{ -+ if (!job->cbufs[cbuf]) -+ return; -+ -+ struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]); -+ *rt_bpp = surf->internal_bpp; -+ if (job->bbuf) { -+ struct v3d_surface *bsurf = v3d_surface(job->bbuf); -+ *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp); -+ } -+ *rt_type_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type, -+ surf->base.format); -+} -+#endif -+ - #if V3D_VERSION >= 40 && V3D_VERSION <= 42 - static void --v3d_setup_render_target(struct v3d_job *job, int cbuf, -- uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp) -+v3d_setup_render_target(struct v3d_job *job, -+ int cbuf, -+ uint32_t *rt_bpp, -+ uint32_t *rt_type, -+ uint32_t *rt_clamp) - { - if (!job->cbufs[cbuf]) - return; -@@ -505,14 +581,8 @@ v3d_setup_render_target(struct v3d_job *job, int cbuf, - *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp); - } - *rt_type = surf->internal_type; -- if (util_format_is_srgb(surf->base.format)) -- *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM; --#if V3D_VERSION >= 42 -- else if (util_format_is_pure_integer(surf->base.format)) -- *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT; --#endif -- else -- *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; -+ *rt_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type, -+ surf->base.format); - } - #endif - -@@ -804,10 +874,30 @@ v3dX(emit_rcl)(struct v3d_job *job) - - } - -+#if V3D_VERSION >= 71 -+ uint32_t base_addr = 0; -+ -+ /* If we don't have any color RTs, we sill need to emit one and flat -+ * it as not used using stride = 1 -+ */ -+ if (job->nr_cbufs == 0) { -+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -+ rt.stride = 1; /* Unused */ -+ } -+ } -+#endif - for (int i = 0; i < job->nr_cbufs; i++) { - struct pipe_surface *psurf = job->cbufs[i]; -- if (!psurf) -+ if (!psurf) { -+#if V3D_VERSION >= 71 -+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -+ rt.render_target_number = i; -+ rt.stride = 1; /* Unused */ -+ } -+#endif - continue; -+ } -+ - struct v3d_surface *surf = v3d_surface(psurf); - struct v3d_resource *rsc = v3d_resource(psurf->texture); - -@@ -874,6 +964,20 @@ v3dX(emit_rcl)(struct v3d_job *job) - clear.render_target_number = i; - }; - } -+#endif -+#if V3D_VERSION >= 71 -+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) { -+ rt.clear_color_low_bits = job->clear_color[i][0]; -+ v3d_setup_render_target(job, i, &rt.internal_bpp, -+ &rt.internal_type_and_clamping); -+ rt.stride = -+ v3d_compute_rt_row_row_stride_128_bits(job->tile_width, -+ v3d_internal_bpp_words(rt.internal_bpp)); -+ rt.base_address = base_addr; -+ rt.render_target_number = i; -+ -+ base_addr += (job->tile_height * rt.stride) / 8; -+ } - #endif - } - -@@ -898,10 +1002,6 @@ v3dX(emit_rcl)(struct v3d_job *job) - } - #endif - --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif -- - #if V3D_VERSION < 40 - /* FIXME: Don't bother emitting if we don't load/clear Z/S. */ - if (job->zsbuf) { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0109-v3d-emit-CLEAR_RENDER_TARGETS-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0109-v3d-emit-CLEAR_RENDER_TARGETS-for-v71.patch deleted file mode 100644 index fbb87ab660..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0109-v3d-emit-CLEAR_RENDER_TARGETS-for-v71.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 8496282476420e7e5d9d31f6cfd87f3f3b136446 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 21 Oct 2021 01:47:29 +0200 -Subject: [PATCH 109/142] v3d: emit CLEAR_RENDER_TARGETS for v71 - ---- - src/gallium/drivers/v3d/v3dx_rcl.c | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c -index 815e1098c22..4274be042bd 100644 ---- a/src/gallium/drivers/v3d/v3dx_rcl.c -+++ b/src/gallium/drivers/v3d/v3dx_rcl.c -@@ -427,7 +427,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer) - } - #endif - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ cl_emit(cl, CLEAR_RENDER_TARGETS, clear); - #endif - - } -@@ -734,7 +734,7 @@ emit_render_layer(struct v3d_job *job, uint32_t layer) - store.buffer_to_store = NONE; - } - #endif --#if V3D_VERSION >= 40 && V3D_VERSION <= 42 -+#if V3D_VERSION >= 40 - for (int i = 0; i < 2; i++) { - if (i > 0) - cl_emit(&job->rcl, TILE_COORDINATES, coords); -@@ -742,20 +742,20 @@ emit_render_layer(struct v3d_job *job, uint32_t layer) - cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } -+ - if (i == 0 || do_double_initial_tile_clear(job)) { -+#if V3D_VERSION < 71 - cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = !job->early_zs_clear; - clear.clear_all_render_targets = true; - } -+#else -+ cl_emit(&job->rcl, CLEAR_RENDER_TARGETS, clear); -+#endif - } - cl_emit(&job->rcl, END_OF_TILE_MARKER, end); - } - #endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif -- -- - cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); - - v3d_rcl_emit_generic_per_tile_list(job, layer); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0110-v3d-just-don-t-fill-up-early-z-fields-for-CFG_BITS-f.patch b/projects/RPi/devices/RPi5/patches/mesa/0110-v3d-just-don-t-fill-up-early-z-fields-for-CFG_BITS-f.patch deleted file mode 100644 index e3dbb971af..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0110-v3d-just-don-t-fill-up-early-z-fields-for-CFG_BITS-f.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 4de1ace1c7b3b6436a5de8e4c6a2f52d6308ff5c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 21 Oct 2021 13:09:03 +0200 -Subject: [PATCH 110/142] v3d: just don't fill up early-z fields for CFG_BITS - for v71 - -v71 doesn't include early_z_enable/early_z_updates_enable. They are -configured with packet 121. ---- - src/gallium/drivers/v3d/v3dx_emit.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c -index 5af3d03b337..de05ae29d04 100644 ---- a/src/gallium/drivers/v3d/v3dx_emit.c -+++ b/src/gallium/drivers/v3d/v3dx_emit.c -@@ -515,20 +515,19 @@ v3dX(emit_state)(struct pipe_context *pctx) - #if V3D_VERSION <= 42 - config.early_z_updates_enable = - (job->ez_state != V3D_EZ_DISABLED); -+#endif - if (v3d->zsa->base.depth_enabled) { - config.z_updates_enable = - v3d->zsa->base.depth_writemask; -+#if V3D_VERSION <= 42 - config.early_z_enable = - config.early_z_updates_enable; -+#endif - config.depth_test_function = - v3d->zsa->base.depth_func; - } else { - config.depth_test_function = PIPE_FUNC_ALWAYS; - } --#endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif - - config.stencil_enable = - v3d->zsa->base.stencil[0].enabled; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0111-v3d-emit-CLIPPER_XY_SCALING-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0111-v3d-emit-CLIPPER_XY_SCALING-for-v71.patch deleted file mode 100644 index 78e45af498..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0111-v3d-emit-CLIPPER_XY_SCALING-for-v71.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 0683f6db1cd50659829fe53f49427bfdacb707b6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 21 Oct 2021 13:14:32 +0200 -Subject: [PATCH 111/142] v3d: emit CLIPPER_XY_SCALING for v71 - ---- - src/gallium/drivers/v3d/v3dx_emit.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c -index de05ae29d04..58c886bb29e 100644 ---- a/src/gallium/drivers/v3d/v3dx_emit.c -+++ b/src/gallium/drivers/v3d/v3dx_emit.c -@@ -577,7 +577,12 @@ v3dX(emit_state)(struct pipe_context *pctx) - } - #endif - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { -+ clip.viewport_half_width_in_1_64th_of_pixel = -+ v3d->viewport.scale[0] * 64.0f; -+ clip.viewport_half_height_in_1_64th_of_pixel = -+ v3d->viewport.scale[1] * 64.0f; -+ } - #endif - - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0112-v3d-no-specific-separate_segments-flag-for-V3D-7.1.patch b/projects/RPi/devices/RPi5/patches/mesa/0112-v3d-no-specific-separate_segments-flag-for-V3D-7.1.patch deleted file mode 100644 index cf420be0f5..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0112-v3d-no-specific-separate_segments-flag-for-V3D-7.1.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 1d1aa5ce739644c72b44ffe547b7233ad19e26b5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 21 Oct 2021 13:19:49 +0200 -Subject: [PATCH 112/142] v3d: no specific separate_segments flag for V3D 7.1 - -On V3D 7.1 there is not a flag on the Shader State Record to specify -if we are using shared or separate segments. This is done by setting -the vpm input size to 0 (so we need to ensure that the output would be -the max needed for input/output). - -We were already doing the latter on the prog_data_vs, so we just need -to use those values, instead of assigning default values. ---- - src/gallium/drivers/v3d/v3dx_draw.c | 17 ++++++++++++----- - 1 file changed, 12 insertions(+), 5 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c -index 9f38baa0bbf..dd13e5177fe 100644 ---- a/src/gallium/drivers/v3d/v3dx_draw.c -+++ b/src/gallium/drivers/v3d/v3dx_draw.c -@@ -679,17 +679,24 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, - v3d->prog.cs->prog_data.vs->separate_segments; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = - v3d->prog.vs->prog_data.vs->separate_segments; --#endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif -- - shader.coordinate_shader_input_vpm_segment_size = - v3d->prog.cs->prog_data.vs->separate_segments ? - v3d->prog.cs->prog_data.vs->vpm_input_size : 1; - shader.vertex_shader_input_vpm_segment_size = - v3d->prog.vs->prog_data.vs->separate_segments ? - v3d->prog.vs->prog_data.vs->vpm_input_size : 1; -+#endif -+ /* On V3D 7.1 there isn't a specific flag to set if we are using -+ * shared/separate segments or not. We just set the value of -+ * vpm_input_size to 0, and set output to the max needed. That should be -+ * already properly set on prog_data_vs_bin -+ */ -+#if V3D_VERSION == 71 -+ shader.coordinate_shader_input_vpm_segment_size = -+ v3d->prog.cs->prog_data.vs->vpm_input_size; -+ shader.vertex_shader_input_vpm_segment_size = -+ v3d->prog.vs->prog_data.vs->vpm_input_size; -+#endif - - shader.coordinate_shader_output_vpm_segment_size = - v3d->prog.cs->prog_data.vs->vpm_output_size; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0113-v3d-default-vertex-attributes-values-are-not-needed-.patch b/projects/RPi/devices/RPi5/patches/mesa/0113-v3d-default-vertex-attributes-values-are-not-needed-.patch deleted file mode 100644 index b3e7369ea0..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0113-v3d-default-vertex-attributes-values-are-not-needed-.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 3a790ddd27c8406c59426599fb9cadb5de5c024d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 21 Oct 2021 13:37:46 +0200 -Subject: [PATCH 113/142] v3d: default vertex attributes values are not needed - for v71 - ---- - src/gallium/drivers/v3d/v3d_context.h | 1 + - src/gallium/drivers/v3d/v3dx_draw.c | 3 -- - src/gallium/drivers/v3d/v3dx_state.c | 53 ++++++++++++++++++--------- - 3 files changed, 37 insertions(+), 20 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h -index ad267d5033c..c0aac741fdc 100644 ---- a/src/gallium/drivers/v3d/v3d_context.h -+++ b/src/gallium/drivers/v3d/v3d_context.h -@@ -265,6 +265,7 @@ struct v3d_vertex_stateobj { - unsigned num_elements; - - uint8_t attrs[16 * (V3D_MAX_VS_INPUTS / 4)]; -+ /* defaults can be NULL for some hw generation */ - struct pipe_resource *defaults; - uint32_t defaults_offset; - }; -diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c -index dd13e5177fe..4bff2ea6478 100644 ---- a/src/gallium/drivers/v3d/v3dx_draw.c -+++ b/src/gallium/drivers/v3d/v3dx_draw.c -@@ -759,9 +759,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, - shader.address_of_default_attribute_values = - cl_address(v3d_resource(vtx->defaults)->bo, - vtx->defaults_offset); --#endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); - #endif - } - -diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c -index a93d5be091e..3d3c4fb0f47 100644 ---- a/src/gallium/drivers/v3d/v3dx_state.c -+++ b/src/gallium/drivers/v3d/v3dx_state.c -@@ -337,6 +337,20 @@ v3d_zsa_state_bind(struct pipe_context *pctx, void *hwcso) - v3d->dirty |= V3D_DIRTY_ZSA; - } - -+ -+static bool -+needs_default_attribute_values(void) -+{ -+#if V3D_VERSION <= 42 -+ /* FIXME: on vulkan we are able to refine even further, as we know in -+ * advance when we create the pipeline if we have a integer vertex -+ * attrib. Pending to check if we could do something similar here. -+ */ -+ return true; -+#endif -+ return false; -+} -+ - static void * - v3d_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, - const struct pipe_vertex_element *elements) -@@ -414,24 +428,29 @@ v3d_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, - } - } - -- /* Set up the default attribute values in case any of the vertex -- * elements use them. -- */ -- uint32_t *attrs; -- u_upload_alloc(v3d->state_uploader, 0, -- V3D_MAX_VS_INPUTS * sizeof(float), 16, -- &so->defaults_offset, &so->defaults, (void **)&attrs); -- -- for (int i = 0; i < V3D_MAX_VS_INPUTS / 4; i++) { -- attrs[i * 4 + 0] = 0; -- attrs[i * 4 + 1] = 0; -- attrs[i * 4 + 2] = 0; -- if (i < so->num_elements && -- util_format_is_pure_integer(so->pipe[i].src_format)) { -- attrs[i * 4 + 3] = 1; -- } else { -- attrs[i * 4 + 3] = fui(1.0); -+ if (needs_default_attribute_values()) { -+ /* Set up the default attribute values in case any of the vertex -+ * elements use them. -+ */ -+ uint32_t *attrs; -+ u_upload_alloc(v3d->state_uploader, 0, -+ V3D_MAX_VS_INPUTS * sizeof(float), 16, -+ &so->defaults_offset, &so->defaults, (void **)&attrs); -+ -+ for (int i = 0; i < V3D_MAX_VS_INPUTS / 4; i++) { -+ attrs[i * 4 + 0] = 0; -+ attrs[i * 4 + 1] = 0; -+ attrs[i * 4 + 2] = 0; -+ if (i < so->num_elements && -+ util_format_is_pure_integer(so->pipe[i].src_format)) { -+ attrs[i * 4 + 3] = 1; -+ } else { -+ attrs[i * 4 + 3] = fui(1.0); -+ } - } -+ } else { -+ so->defaults = NULL; -+ so->defaults_offset = 0; - } - - u_upload_unmap(v3d->state_uploader); --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0114-v3d-uniforms-update-VIEWPORT_X-Y_SCALE-uniforms-for-.patch b/projects/RPi/devices/RPi5/patches/mesa/0114-v3d-uniforms-update-VIEWPORT_X-Y_SCALE-uniforms-for-.patch deleted file mode 100644 index d197620253..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0114-v3d-uniforms-update-VIEWPORT_X-Y_SCALE-uniforms-for-.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 8e3a2a35df5789687993d05436602821186e1cf2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 21 Oct 2021 13:46:11 +0200 -Subject: [PATCH 114/142] v3d/uniforms: update VIEWPORT_X/Y_SCALE uniforms for - v71 - -As the packet CLIPPER_XY scaling, this needs to be computed on 1/64ths -of pixel, instead of 1/256ths of pixels. - -As this is the usual values that we get from macros, we add manually a -v42 and v71 macro, and define a new helper to get those. - -Those granularity values are the same for Vulkan and OpenGL, so -perhaps we should move them to a common place. - -As with v3dv, V3D_X macro name is somewhat confusing. It is -specifically created to ask for define values that depends on the -version. But I also felt that V3D_DEFINE_X was too long. ---- - src/gallium/drivers/v3d/v3d_context.h | 28 ++++++++++++++++++++++++++ - src/gallium/drivers/v3d/v3d_uniforms.c | 8 ++++++-- - 2 files changed, 34 insertions(+), 2 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h -index c0aac741fdc..21ee10a90cc 100644 ---- a/src/gallium/drivers/v3d/v3d_context.h -+++ b/src/gallium/drivers/v3d/v3d_context.h -@@ -837,6 +837,34 @@ void v3d_disk_cache_store(struct v3d_context *v3d, - v3d_X_thing; \ - }) - -+/* FIXME: The same for vulkan/opengl. Common place? define it at the -+ * v3d_packet files? -+ */ -+#define V3D33_CLIPPER_XY_GRANULARITY 256.0f -+#define V3D42_CLIPPER_XY_GRANULARITY 256.0f -+#define V3D71_CLIPPER_XY_GRANULARITY 64.0f -+ -+/* Helper to get hw-specific macro values */ -+#define V3DV_X(devinfo, thing) ({ \ -+ __typeof(V3D33_##thing) V3D_X_THING; \ -+ switch (devinfo->ver) { \ -+ case 33: \ -+ case 40: \ -+ V3D_X_THING = V3D33_##thing; \ -+ break; \ -+ case 41: \ -+ case 42: \ -+ V3D_X_THING = V3D42_##thing; \ -+ break; \ -+ case 71: \ -+ V3D_X_THING = V3D71_##thing; \ -+ break; \ -+ default: \ -+ unreachable("Unsupported hardware generation"); \ -+ } \ -+ V3D_X_THING; \ -+}) -+ - #ifdef v3dX - # include "v3dx_context.h" - #else -diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c -index 95eb838954f..1b8758bae7d 100644 ---- a/src/gallium/drivers/v3d/v3d_uniforms.c -+++ b/src/gallium/drivers/v3d/v3d_uniforms.c -@@ -261,6 +261,7 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job, - struct v3d_compiled_shader *shader, - enum pipe_shader_type stage) - { -+ struct v3d_device_info *devinfo = &v3d->screen->devinfo; - struct v3d_constbuf_stateobj *cb = &v3d->constbuf[stage]; - struct v3d_texture_stateobj *texstate = &v3d->tex[stage]; - struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms; -@@ -282,6 +283,9 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job, - struct v3d_cl_out *uniforms = - cl_start(&job->indirect); - -+ float clipper_xy_granularity = -+ V3DV_X(devinfo, CLIPPER_XY_GRANULARITY); -+ - for (int i = 0; i < uinfo->count; i++) { - uint32_t data = uinfo->data[i]; - -@@ -293,10 +297,10 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job, - cl_aligned_u32(&uniforms, gallium_uniforms[data]); - break; - case QUNIFORM_VIEWPORT_X_SCALE: -- cl_aligned_f(&uniforms, v3d->viewport.scale[0] * 256.0f); -+ cl_aligned_f(&uniforms, v3d->viewport.scale[0] * clipper_xy_granularity); - break; - case QUNIFORM_VIEWPORT_Y_SCALE: -- cl_aligned_f(&uniforms, v3d->viewport.scale[1] * 256.0f); -+ cl_aligned_f(&uniforms, v3d->viewport.scale[1] * clipper_xy_granularity); - break; - - case QUNIFORM_VIEWPORT_Z_OFFSET: --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0115-v3d-handle-new-texture-state-transfer-functions-in-v.patch b/projects/RPi/devices/RPi5/patches/mesa/0115-v3d-handle-new-texture-state-transfer-functions-in-v.patch deleted file mode 100644 index e9f5e92927..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0115-v3d-handle-new-texture-state-transfer-functions-in-v.patch +++ /dev/null @@ -1,43 +0,0 @@ -From aa6f70116d9e7be56cdb52b55d75419bf7209185 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Thu, 21 Oct 2021 23:21:02 +0200 -Subject: [PATCH 115/142] v3d: handle new texture state transfer functions in - v71 - ---- - src/gallium/drivers/v3d/v3dx_state.c | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c -index 3d3c4fb0f47..b5e572b13c5 100644 ---- a/src/gallium/drivers/v3d/v3dx_state.c -+++ b/src/gallium/drivers/v3d/v3dx_state.c -@@ -1009,12 +1009,12 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d, - cso->u.buf.size); - } - -+ bool is_srgb = util_format_is_srgb(cso->format); - #if V3D_VERSION <= 42 -- tex.srgb = util_format_is_srgb(cso->format); -+ tex.srgb = is_srgb; - #endif -- - #if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); -+ tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE; - #endif - - #if V3D_VERSION >= 40 -@@ -1068,9 +1068,6 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d, - #if V3D_VERSION <= 42 - tex.srgb = false; - #endif --#if V3D_VERSION >= 71 -- unreachable("HW generation 71 not supported yet."); --#endif - - } else { - tex.texture_type = v3d_get_tex_format(&screen->devinfo, --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0116-v3d-handle-new-TEXTURE_SHADER_STATE-v71-YCbCr-fields.patch b/projects/RPi/devices/RPi5/patches/mesa/0116-v3d-handle-new-TEXTURE_SHADER_STATE-v71-YCbCr-fields.patch deleted file mode 100644 index 2ce6d66bd2..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0116-v3d-handle-new-TEXTURE_SHADER_STATE-v71-YCbCr-fields.patch +++ /dev/null @@ -1,62 +0,0 @@ -From aefc98b6aefc38caa6f6efd421db6d02c42596a7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 22 Oct 2021 10:54:24 +0200 -Subject: [PATCH 116/142] v3d: handle new TEXTURE_SHADER_STATE v71 YCbCr fields - -There are some new fields for YCbCr with pointers for the various -planes in multi-planar formats. These need to match the base address -pointer in the texture state, or the hardware will assume this is a -multi-planar texture. - -Notice we don't use an address type for these fields in the XML -description. This is because the addresses are 64-bit aligned (even -though the PRM doesn't say it) which means the 6 LSB bits are -implicitly 0, but the fields are encoded before the 6th bit of their -starting byte, so we can't use the usual trick we do with address -types where the first 6 bits in the byte are implicitly overwritten by -other fields and we have to encode this manually as a uint field. This -would mean that if we had an actual BO we would also need to add it -manually to the job's list, but since we don't have one, we don't have -to do anything about it. ---- - src/gallium/drivers/v3d/v3dx_state.c | 17 +++++++++++++---- - 1 file changed, 13 insertions(+), 4 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c -index b5e572b13c5..c08a072157b 100644 ---- a/src/gallium/drivers/v3d/v3dx_state.c -+++ b/src/gallium/drivers/v3d/v3dx_state.c -@@ -936,17 +936,26 @@ v3d_setup_texture_shader_state(struct V3DX(TEXTURE_SHADER_STATE) *tex, - } - - tex->base_level = base_level; -+ - #if V3D_VERSION >= 40 - tex->max_level = last_level; - /* Note that we don't have a job to reference the texture's sBO - * at state create time, so any time this sampler view is used - * we need to add the texture to the job. - */ -- tex->texture_base_pointer = -- cl_address(NULL, -- rsc->bo->offset + -- v3d_layer_offset(prsc, 0, first_layer)); -+ const uint32_t base_offset = rsc->bo->offset + -+ v3d_layer_offset(prsc, 0, first_layer); -+ -+ tex->texture_base_pointer = cl_address(NULL, base_offset); - #endif -+#if V3D_VERSION >= 71 -+ tex->chroma_offset_x = 1; -+ tex->chroma_offset_y = 1; -+ /* See comment in XML field definition for rationale of the shifts */ -+ tex->texture_base_pointer_cb = base_offset >> 6; -+ tex->texture_base_pointer_cr = base_offset >> 6; -+#endif -+ - tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64; - - /* Since other platform devices may produce UIF images even --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0117-v3d-setup-render-pass-color-clears-for-any-format-bp.patch b/projects/RPi/devices/RPi5/patches/mesa/0117-v3d-setup-render-pass-color-clears-for-any-format-bp.patch deleted file mode 100644 index 5f7cdbd03f..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0117-v3d-setup-render-pass-color-clears-for-any-format-bp.patch +++ /dev/null @@ -1,42 +0,0 @@ -From fcb3fc1ead4344da59c4b26a81878d53f8f4a291 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 22 Oct 2021 11:40:49 +0200 -Subject: [PATCH 117/142] v3d: setup render pass color clears for any format - bpp in v71 - ---- - src/gallium/drivers/v3d/v3dx_rcl.c | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c -index 4274be042bd..d3fbc9aff5d 100644 ---- a/src/gallium/drivers/v3d/v3dx_rcl.c -+++ b/src/gallium/drivers/v3d/v3dx_rcl.c -@@ -978,6 +978,24 @@ v3dX(emit_rcl)(struct v3d_job *job) - - base_addr += (job->tile_height * rt.stride) / 8; - } -+ -+ if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) { -+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) { -+ rt.clear_color_mid_bits = /* 40 bits (32 + 8) */ -+ ((uint64_t) job->clear_color[i][1]) | -+ (((uint64_t) (job->clear_color[i][2] & 0xff)) << 32); -+ rt.render_target_number = i; -+ } -+ } -+ -+ if (surf->internal_bpp >= V3D_INTERNAL_BPP_128) { -+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) { -+ rt.clear_color_top_bits = /* 56 bits (24 + 32) */ -+ (((uint64_t) (job->clear_color[i][2] & 0xffffff00)) >> 8) | -+ (((uint64_t) (job->clear_color[i][3])) << 24); -+ rt.render_target_number = i; -+ } -+ } - #endif - } - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0118-v3d-GFX-1461-does-not-affect-V3D-7.x.patch b/projects/RPi/devices/RPi5/patches/mesa/0118-v3d-GFX-1461-does-not-affect-V3D-7.x.patch deleted file mode 100644 index 56e27cf09c..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0118-v3d-GFX-1461-does-not-affect-V3D-7.x.patch +++ /dev/null @@ -1,29 +0,0 @@ -From ceb088c05f351b40df14069bd6e0de777288ece4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 22 Oct 2021 12:17:45 +0200 -Subject: [PATCH 118/142] v3d: GFX-1461 does not affect V3D 7.x - ---- - src/gallium/drivers/v3d/v3dx_draw.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c -index 4bff2ea6478..04cc3bc3ae1 100644 ---- a/src/gallium/drivers/v3d/v3dx_draw.c -+++ b/src/gallium/drivers/v3d/v3dx_draw.c -@@ -1593,9 +1593,10 @@ v3d_tlb_clear(struct v3d_job *job, unsigned buffers, - /* GFXH-1461: If we were to emit a load of just depth or just stencil, - * then the clear for the other may get lost. We need to decide now - * if it would be possible to need to emit a load of just one after -- * we've set up our TLB clears. -+ * we've set up our TLB clears. This issue is fixed since V3D 4.3.18. - */ -- if (buffers & PIPE_CLEAR_DEPTHSTENCIL && -+ if (v3d->screen->devinfo.ver <= 42 && -+ buffers & PIPE_CLEAR_DEPTHSTENCIL && - (buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL && - job->zsbuf && - util_format_is_depth_and_stencil(job->zsbuf->texture->format)) { --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0119-v3d-don-t-convert-floating-point-border-colors-in-v7.patch b/projects/RPi/devices/RPi5/patches/mesa/0119-v3d-don-t-convert-floating-point-border-colors-in-v7.patch deleted file mode 100644 index c3cdfc0355..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0119-v3d-don-t-convert-floating-point-border-colors-in-v7.patch +++ /dev/null @@ -1,55 +0,0 @@ -From b44a8785c5436fb28b6734d3bac806d3a82c828d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 22 Oct 2021 13:41:09 +0200 -Subject: [PATCH 119/142] v3d: don't convert floating point border colors in - v71 - -The TMU does this for us now. ---- - src/gallium/drivers/v3d/v3dx_state.c | 29 ++++++++++++++-------------- - 1 file changed, 15 insertions(+), 14 deletions(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c -index c08a072157b..348a7bcf3da 100644 ---- a/src/gallium/drivers/v3d/v3dx_state.c -+++ b/src/gallium/drivers/v3d/v3dx_state.c -@@ -718,21 +718,22 @@ v3d_upload_sampler_state_variant(void *map, - break; - } - -- if (variant >= V3D_SAMPLER_STATE_32) { -- sampler.border_color_word_0 = border.ui[0]; -- sampler.border_color_word_1 = border.ui[1]; -- sampler.border_color_word_2 = border.ui[2]; -- sampler.border_color_word_3 = border.ui[3]; -- } else { -- sampler.border_color_word_0 = -- _mesa_float_to_half(border.f[0]); -- sampler.border_color_word_1 = -- _mesa_float_to_half(border.f[1]); -- sampler.border_color_word_2 = -- _mesa_float_to_half(border.f[2]); -- sampler.border_color_word_3 = -- _mesa_float_to_half(border.f[3]); -+#if V3D_VERSION <= 42 -+ /* The TMU in V3D 7.x always takes 32-bit floats and handles conversions -+ * for us. In V3D 4.x we need to manually convert floating point color -+ * values to the expected format. -+ */ -+ if (variant < V3D_SAMPLER_STATE_32) { -+ border.ui[0] = _mesa_float_to_half(border.f[0]); -+ border.ui[1] = _mesa_float_to_half(border.f[1]); -+ border.ui[2] = _mesa_float_to_half(border.f[2]); -+ border.ui[3] = _mesa_float_to_half(border.f[3]); - } -+#endif -+ sampler.border_color_word_0 = border.ui[0]; -+ sampler.border_color_word_1 = border.ui[1]; -+ sampler.border_color_word_2 = border.ui[2]; -+ sampler.border_color_word_3 = border.ui[3]; - } - } - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0120-v3d-handle-Z-clipping-in-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0120-v3d-handle-Z-clipping-in-v71.patch deleted file mode 100644 index ef5d2ade88..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0120-v3d-handle-Z-clipping-in-v71.patch +++ /dev/null @@ -1,39 +0,0 @@ -From ecc1a5fa6b09a684a1e831c342121ec417f1a101 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 22 Oct 2021 14:26:29 +0200 -Subject: [PATCH 120/142] v3d: handle Z clipping in v71 - ---- - src/gallium/drivers/v3d/v3dx_emit.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c -index 58c886bb29e..75751dc9ab6 100644 ---- a/src/gallium/drivers/v3d/v3dx_emit.c -+++ b/src/gallium/drivers/v3d/v3dx_emit.c -@@ -539,8 +539,21 @@ v3dX(emit_state)(struct pipe_context *pctx) - v3d_line_smoothing_enabled(v3d) ? - V3D_LINE_RASTERIZATION_PERP_END_CAPS : - V3D_LINE_RASTERIZATION_DIAMOND_EXIT; -- } - -+#if V3D_VERSION >= 71 -+ /* The following follows the logic implemented at v3dv -+ * plus the definition of depth_clip_near/far and -+ * depth_clamp. -+ * -+ * Note: some extensions are not supported by v3d -+ * (like ARB_depth_clamp) that would affect this, but -+ * the values on rasterizer are taking that into -+ * account. -+ */ -+ config.z_clipping_mode = v3d->rasterizer->base.depth_clip_near || -+ v3d->rasterizer->base.depth_clip_far; -+#endif -+ } - } - - if (v3d->dirty & V3D_DIRTY_RASTERIZER && --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0121-v3d-add-support-for-TFU-blit-in-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0121-v3d-add-support-for-TFU-blit-in-v71.patch deleted file mode 100644 index 8275072cbe..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0121-v3d-add-support-for-TFU-blit-in-v71.patch +++ /dev/null @@ -1,446 +0,0 @@ -From ecac3d8441b75011446b566320194df17beba352 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Wed, 27 Oct 2021 02:03:10 +0200 -Subject: [PATCH 121/142] v3d: add support for TFU blit in v71 - -TFU has changed on v71, specially on which registers to use, so that -means that support code change across versions. So as part of this -commit TFU copying is moved to a v3dx file. ---- - src/gallium/drivers/v3d/meson.build | 1 + - src/gallium/drivers/v3d/v3d_blit.c | 164 +++----------------- - src/gallium/drivers/v3d/v3dx_context.h | 10 ++ - src/gallium/drivers/v3d/v3dx_tfu.c | 202 +++++++++++++++++++++++++ - 4 files changed, 232 insertions(+), 145 deletions(-) - create mode 100644 src/gallium/drivers/v3d/v3dx_tfu.c - -diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build -index 526a131ae9b..b2e748573b7 100644 ---- a/src/gallium/drivers/v3d/meson.build -+++ b/src/gallium/drivers/v3d/meson.build -@@ -49,6 +49,7 @@ files_per_version = files( - 'v3dx_job.c', - 'v3dx_rcl.c', - 'v3dx_state.c', -+ 'v3dx_tfu.c', - ) - - v3d_args = ['-DV3D_BUILD_NEON'] -diff --git a/src/gallium/drivers/v3d/v3d_blit.c b/src/gallium/drivers/v3d/v3d_blit.c -index 0260bdde6d1..96179f654a4 100644 ---- a/src/gallium/drivers/v3d/v3d_blit.c -+++ b/src/gallium/drivers/v3d/v3d_blit.c -@@ -210,140 +210,6 @@ v3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info) - info->mask &= ~PIPE_MASK_S; - } - --static bool --v3d_tfu(struct pipe_context *pctx, -- struct pipe_resource *pdst, -- struct pipe_resource *psrc, -- unsigned int src_level, -- unsigned int base_level, -- unsigned int last_level, -- unsigned int src_layer, -- unsigned int dst_layer, -- bool for_mipmap) --{ -- struct v3d_context *v3d = v3d_context(pctx); -- struct v3d_screen *screen = v3d->screen; -- struct v3d_resource *src = v3d_resource(psrc); -- struct v3d_resource *dst = v3d_resource(pdst); -- struct v3d_resource_slice *src_base_slice = &src->slices[src_level]; -- struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level]; -- int msaa_scale = pdst->nr_samples > 1 ? 2 : 1; -- int width = u_minify(pdst->width0, base_level) * msaa_scale; -- int height = u_minify(pdst->height0, base_level) * msaa_scale; -- enum pipe_format pformat; -- -- if (psrc->format != pdst->format) -- return false; -- if (psrc->nr_samples != pdst->nr_samples) -- return false; -- -- /* Can't write to raster. */ -- if (dst_base_slice->tiling == V3D_TILING_RASTER) -- return false; -- -- /* When using TFU for blit, we are doing exact copies (both input and -- * output format must be the same, no scaling, etc), so there is no -- * pixel format conversions. Thus we can rewrite the format to use one -- * that is TFU compatible based on its texel size. -- */ -- if (for_mipmap) { -- pformat = pdst->format; -- } else { -- switch (dst->cpp) { -- case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT; break; -- case 8: pformat = PIPE_FORMAT_R16G16B16A16_FLOAT; break; -- case 4: pformat = PIPE_FORMAT_R32_FLOAT; break; -- case 2: pformat = PIPE_FORMAT_R16_FLOAT; break; -- case 1: pformat = PIPE_FORMAT_R8_UNORM; break; -- default: unreachable("unsupported format bit-size"); break; -- }; -- } -- -- uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat); -- struct v3d_device_info *devinfo = &screen->devinfo; -- -- if (!v3d_X(devinfo, tfu_supports_tex_format)(tex_format, for_mipmap)) { -- assert(for_mipmap); -- return false; -- } -- -- v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false); -- v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false); -- -- struct drm_v3d_submit_tfu tfu = { -- .ios = (height << 16) | width, -- .bo_handles = { -- dst->bo->handle, -- src != dst ? src->bo->handle : 0 -- }, -- .in_sync = v3d->out_sync, -- .out_sync = v3d->out_sync, -- }; -- uint32_t src_offset = (src->bo->offset + -- v3d_layer_offset(psrc, src_level, src_layer)); -- tfu.iia |= src_offset; -- if (src_base_slice->tiling == V3D_TILING_RASTER) { -- tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER << -- V3D33_TFU_ICFG_FORMAT_SHIFT); -- } else { -- tfu.icfg |= ((V3D33_TFU_ICFG_FORMAT_LINEARTILE + -- (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << -- V3D33_TFU_ICFG_FORMAT_SHIFT); -- } -- -- uint32_t dst_offset = (dst->bo->offset + -- v3d_layer_offset(pdst, base_level, dst_layer)); -- tfu.ioa |= dst_offset; -- if (last_level != base_level) -- tfu.ioa |= V3D33_TFU_IOA_DIMTW; -- tfu.ioa |= ((V3D33_TFU_IOA_FORMAT_LINEARTILE + -- (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << -- V3D33_TFU_IOA_FORMAT_SHIFT); -- -- tfu.icfg |= tex_format << V3D33_TFU_ICFG_TTYPE_SHIFT; -- tfu.icfg |= (last_level - base_level) << V3D33_TFU_ICFG_NUMMM_SHIFT; -- -- switch (src_base_slice->tiling) { -- case V3D_TILING_UIF_NO_XOR: -- case V3D_TILING_UIF_XOR: -- tfu.iis |= (src_base_slice->padded_height / -- (2 * v3d_utile_height(src->cpp))); -- break; -- case V3D_TILING_RASTER: -- tfu.iis |= src_base_slice->stride / src->cpp; -- break; -- case V3D_TILING_LINEARTILE: -- case V3D_TILING_UBLINEAR_1_COLUMN: -- case V3D_TILING_UBLINEAR_2_COLUMN: -- break; -- } -- -- /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the -- * OPAD field for the destination (how many extra UIF blocks beyond -- * those necessary to cover the height). When filling mipmaps, the -- * miplevel 1+ tiling state is inferred. -- */ -- if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR || -- dst_base_slice->tiling == V3D_TILING_UIF_XOR) { -- int uif_block_h = 2 * v3d_utile_height(dst->cpp); -- int implicit_padded_height = align(height, uif_block_h); -- -- tfu.icfg |= (((dst_base_slice->padded_height - -- implicit_padded_height) / uif_block_h) << -- V3D33_TFU_ICFG_OPAD_SHIFT); -- } -- -- int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu); -- if (ret != 0) { -- fprintf(stderr, "Failed to submit TFU job: %d\n", ret); -- return false; -- } -- -- dst->writes++; -- -- return true; --} -- - bool - v3d_generate_mipmap(struct pipe_context *pctx, - struct pipe_resource *prsc, -@@ -362,12 +228,16 @@ v3d_generate_mipmap(struct pipe_context *pctx, - if (first_layer != last_layer) - return false; - -- return v3d_tfu(pctx, -- prsc, prsc, -- base_level, -- base_level, last_level, -- first_layer, first_layer, -- true); -+ struct v3d_context *v3d = v3d_context(pctx); -+ struct v3d_screen *screen = v3d->screen; -+ struct v3d_device_info *devinfo = &screen->devinfo; -+ -+ return v3d_X(devinfo, tfu)(pctx, -+ prsc, prsc, -+ base_level, -+ base_level, last_level, -+ first_layer, first_layer, -+ true); - } - - static void -@@ -396,11 +266,15 @@ v3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info) - if (info->dst.format != info->src.format) - return; - -- if (v3d_tfu(pctx, info->dst.resource, info->src.resource, -- info->src.level, -- info->dst.level, info->dst.level, -- info->src.box.z, info->dst.box.z, -- false)) { -+ struct v3d_context *v3d = v3d_context(pctx); -+ struct v3d_screen *screen = v3d->screen; -+ struct v3d_device_info *devinfo = &screen->devinfo; -+ -+ if (v3d_X(devinfo, tfu)(pctx, info->dst.resource, info->src.resource, -+ info->src.level, -+ info->dst.level, info->dst.level, -+ info->src.box.z, info->dst.box.z, -+ false)) { - info->mask &= ~PIPE_MASK_RGBA; - } - } -diff --git a/src/gallium/drivers/v3d/v3dx_context.h b/src/gallium/drivers/v3d/v3dx_context.h -index 03d7c244ea2..e0a5cbfb2f3 100644 ---- a/src/gallium/drivers/v3d/v3dx_context.h -+++ b/src/gallium/drivers/v3d/v3dx_context.h -@@ -51,3 +51,13 @@ void v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, - */ - bool v3dX(tfu_supports_tex_format)(uint32_t tex_format, - bool for_mipmap); -+ -+bool v3dX(tfu)(struct pipe_context *pctx, -+ struct pipe_resource *pdst, -+ struct pipe_resource *psrc, -+ unsigned int src_level, -+ unsigned int base_level, -+ unsigned int last_level, -+ unsigned int src_layer, -+ unsigned int dst_layer, -+ bool for_mipmap); -diff --git a/src/gallium/drivers/v3d/v3dx_tfu.c b/src/gallium/drivers/v3d/v3dx_tfu.c -new file mode 100644 -index 00000000000..d6b51390a11 ---- /dev/null -+++ b/src/gallium/drivers/v3d/v3dx_tfu.c -@@ -0,0 +1,202 @@ -+/* -+ * Copyright © 2021 Broadcom -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#include "v3d_context.h" -+#include "broadcom/common/v3d_tfu.h" -+ -+bool -+v3dX(tfu)(struct pipe_context *pctx, -+ struct pipe_resource *pdst, -+ struct pipe_resource *psrc, -+ unsigned int src_level, -+ unsigned int base_level, -+ unsigned int last_level, -+ unsigned int src_layer, -+ unsigned int dst_layer, -+ bool for_mipmap) -+{ -+ struct v3d_context *v3d = v3d_context(pctx); -+ struct v3d_screen *screen = v3d->screen; -+ struct v3d_resource *src = v3d_resource(psrc); -+ struct v3d_resource *dst = v3d_resource(pdst); -+ struct v3d_resource_slice *src_base_slice = &src->slices[src_level]; -+ struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level]; -+ int msaa_scale = pdst->nr_samples > 1 ? 2 : 1; -+ int width = u_minify(pdst->width0, base_level) * msaa_scale; -+ int height = u_minify(pdst->height0, base_level) * msaa_scale; -+ enum pipe_format pformat; -+ -+ if (psrc->format != pdst->format) -+ return false; -+ if (psrc->nr_samples != pdst->nr_samples) -+ return false; -+ -+ if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D) -+ return false; -+ -+ /* Can't write to raster. */ -+ if (dst_base_slice->tiling == V3D_TILING_RASTER) -+ return false; -+ -+ /* When using TFU for blit, we are doing exact copies (both input and -+ * output format must be the same, no scaling, etc), so there is no -+ * pixel format conversions. Thus we can rewrite the format to use one -+ * that is TFU compatible based on its texel size. -+ */ -+ if (for_mipmap) { -+ pformat = pdst->format; -+ } else { -+ switch (dst->cpp) { -+ case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT; break; -+ case 8: pformat = PIPE_FORMAT_R16G16B16A16_FLOAT; break; -+ case 4: pformat = PIPE_FORMAT_R32_FLOAT; break; -+ case 2: pformat = PIPE_FORMAT_R16_FLOAT; break; -+ case 1: pformat = PIPE_FORMAT_R8_UNORM; break; -+ default: unreachable("unsupported format bit-size"); break; -+ }; -+ } -+ -+ uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat); -+ -+ if (!v3dX(tfu_supports_tex_format)(tex_format, for_mipmap)) { -+ assert(for_mipmap); -+ return false; -+ } -+ -+ v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false); -+ v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false); -+ -+ struct drm_v3d_submit_tfu tfu = { -+ .ios = (height << 16) | width, -+ .bo_handles = { -+ dst->bo->handle, -+ src != dst ? src->bo->handle : 0 -+ }, -+ .in_sync = v3d->out_sync, -+ .out_sync = v3d->out_sync, -+ }; -+ uint32_t src_offset = (src->bo->offset + -+ v3d_layer_offset(psrc, src_level, src_layer)); -+ tfu.iia |= src_offset; -+ -+ uint32_t dst_offset = (dst->bo->offset + -+ v3d_layer_offset(pdst, base_level, dst_layer)); -+ tfu.ioa |= dst_offset; -+ -+ switch (src_base_slice->tiling) { -+ case V3D_TILING_UIF_NO_XOR: -+ case V3D_TILING_UIF_XOR: -+ tfu.iis |= (src_base_slice->padded_height / -+ (2 * v3d_utile_height(src->cpp))); -+ break; -+ case V3D_TILING_RASTER: -+ tfu.iis |= src_base_slice->stride / src->cpp; -+ break; -+ case V3D_TILING_LINEARTILE: -+ case V3D_TILING_UBLINEAR_1_COLUMN: -+ case V3D_TILING_UBLINEAR_2_COLUMN: -+ break; -+ } -+ -+#if V3D_VERSION <= 42 -+ if (src_base_slice->tiling == V3D_TILING_RASTER) { -+ tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER << -+ V3D33_TFU_ICFG_FORMAT_SHIFT); -+ } else { -+ tfu.icfg |= ((V3D33_TFU_ICFG_FORMAT_LINEARTILE + -+ (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << -+ V3D33_TFU_ICFG_FORMAT_SHIFT); -+ } -+ tfu.icfg |= tex_format << V3D33_TFU_ICFG_TTYPE_SHIFT; -+ -+ if (last_level != base_level) -+ tfu.ioa |= V3D33_TFU_IOA_DIMTW; -+ -+ tfu.ioa |= ((V3D33_TFU_IOA_FORMAT_LINEARTILE + -+ (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << -+ V3D33_TFU_IOA_FORMAT_SHIFT); -+ -+ tfu.icfg |= (last_level - base_level) << V3D33_TFU_ICFG_NUMMM_SHIFT; -+ -+ /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the -+ * OPAD field for the destination (how many extra UIF blocks beyond -+ * those necessary to cover the height). When filling mipmaps, the -+ * miplevel 1+ tiling state is inferred. -+ */ -+ if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR || -+ dst_base_slice->tiling == V3D_TILING_UIF_XOR) { -+ int uif_block_h = 2 * v3d_utile_height(dst->cpp); -+ int implicit_padded_height = align(height, uif_block_h); -+ -+ tfu.icfg |= (((dst_base_slice->padded_height - -+ implicit_padded_height) / uif_block_h) << -+ V3D33_TFU_ICFG_OPAD_SHIFT); -+ } -+#endif /* V3D_VERSION <= 42 */ -+ -+#if V3D_VERSION >= 71 -+ if (src_base_slice->tiling == V3D_TILING_RASTER) { -+ tfu.icfg = V3D71_TFU_ICFG_FORMAT_RASTER << V3D71_TFU_ICFG_IFORMAT_SHIFT; -+ } else { -+ tfu.icfg = (V3D71_TFU_ICFG_FORMAT_LINEARTILE + -+ (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << -+ V3D71_TFU_ICFG_IFORMAT_SHIFT; -+ } -+ tfu.icfg |= tex_format << V3D71_TFU_ICFG_OTYPE_SHIFT; -+ -+ if (last_level != base_level) -+ tfu.v71.ioc |= V3D71_TFU_IOC_DIMTW; -+ -+ tfu.v71.ioc |= ((V3D71_TFU_IOC_FORMAT_LINEARTILE + -+ (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << -+ V3D71_TFU_IOC_FORMAT_SHIFT); -+ -+ switch (dst_base_slice->tiling) { -+ case V3D_TILING_UIF_NO_XOR: -+ case V3D_TILING_UIF_XOR: -+ tfu.v71.ioc |= -+ (dst_base_slice->padded_height / (2 * v3d_utile_height(dst->cpp))) << -+ V3D71_TFU_IOC_STRIDE_SHIFT; -+ break; -+ case V3D_TILING_RASTER: -+ tfu.v71.ioc |= (dst_base_slice->padded_height / dst->cpp) << -+ V3D71_TFU_IOC_STRIDE_SHIFT; -+ break; -+ default: -+ break; -+ } -+ -+ tfu.v71.ioc |= (last_level - base_level) << V3D71_TFU_IOC_NUMMM_SHIFT; -+#endif /* V3D_VERSION >= 71*/ -+ -+ int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu); -+ if (ret != 0) { -+ fprintf(stderr, "Failed to submit TFU job: %d\n", ret); -+ return false; -+ } -+ -+ dst->writes++; -+ -+ return true; -+} -+ --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0122-v3d-v3dv-fix-texture-state-array-stride-packing-for-.patch b/projects/RPi/devices/RPi5/patches/mesa/0122-v3d-v3dv-fix-texture-state-array-stride-packing-for-.patch deleted file mode 100644 index 105a224f18..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0122-v3d-v3dv-fix-texture-state-array-stride-packing-for-.patch +++ /dev/null @@ -1,91 +0,0 @@ -From ed7e118a6cc0c9bba9f02929e98bc51252331950 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 16 May 2023 00:28:27 +0200 -Subject: [PATCH 122/142] v3d/v3dv: fix texture state array stride packing for - V3D 7.1.5 - ---- - src/broadcom/vulkan/v3dvx_image.c | 7 +++++++ - src/gallium/drivers/v3d/v3dx_state.c | 20 +++++++++++++++----- - 2 files changed, 22 insertions(+), 5 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c -index 437d4588c7e..ae6eaa88d0c 100644 ---- a/src/broadcom/vulkan/v3dvx_image.c -+++ b/src/broadcom/vulkan/v3dvx_image.c -@@ -118,6 +118,13 @@ pack_texture_shader_state_helper(struct v3dv_device *device, - #endif - #if V3D_VERSION >= 71 - tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE; -+ -+ /* V3D 7.1.5 has array stride starting one bit later than previous -+ * V3D versions to make room for the new RB swap bit, but we don't -+ * handle that in the CLE parser. -+ */ -+ if (device->devinfo.rev >= 5) -+ tex.array_stride_64_byte_aligned <<= 1; - #endif - - /* At this point we don't have the job. That's the reason the first -diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c -index 348a7bcf3da..88e57cd072b 100644 ---- a/src/gallium/drivers/v3d/v3dx_state.c -+++ b/src/gallium/drivers/v3d/v3dx_state.c -@@ -889,7 +889,8 @@ v3d_setup_texture_shader_state_from_buffer(struct V3DX(TEXTURE_SHADER_STATE) *te - } - - static void --v3d_setup_texture_shader_state(struct V3DX(TEXTURE_SHADER_STATE) *tex, -+v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo, -+ struct V3DX(TEXTURE_SHADER_STATE) *tex, - struct pipe_resource *prsc, - int base_level, int last_level, - int first_layer, int last_layer, -@@ -949,15 +950,22 @@ v3d_setup_texture_shader_state(struct V3DX(TEXTURE_SHADER_STATE) *tex, - - tex->texture_base_pointer = cl_address(NULL, base_offset); - #endif -+ -+ tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64; -+ - #if V3D_VERSION >= 71 - tex->chroma_offset_x = 1; - tex->chroma_offset_y = 1; - /* See comment in XML field definition for rationale of the shifts */ - tex->texture_base_pointer_cb = base_offset >> 6; - tex->texture_base_pointer_cr = base_offset >> 6; --#endif - -- tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64; -+ /* V3D 7.1.5 has array stride start at bit 33 instead of bit 32 to -+ * make room for the RB swap bit. -+ */ -+ if (devinfo->rev >= 5) -+ tex->array_stride_64_byte_aligned <<= 1; -+#endif - - /* Since other platform devices may produce UIF images even - * when they're not big enough for V3D to assume they're UIF, -@@ -1006,7 +1014,8 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d, - - v3dx_pack(map, TEXTURE_SHADER_STATE, tex) { - if (prsc->target != PIPE_BUFFER) { -- v3d_setup_texture_shader_state(&tex, prsc, -+ v3d_setup_texture_shader_state(&v3d->screen->devinfo, -+ &tex, prsc, - cso->u.tex.first_level, - cso->u.tex.last_level, - cso->u.tex.first_layer, -@@ -1442,7 +1451,8 @@ v3d_create_image_view_texture_shader_state(struct v3d_context *v3d, - - v3dx_pack(map, TEXTURE_SHADER_STATE, tex) { - if (prsc->target != PIPE_BUFFER) { -- v3d_setup_texture_shader_state(&tex, prsc, -+ v3d_setup_texture_shader_state(&v3d->screen->devinfo, -+ &tex, prsc, - iview->base.u.tex.level, - iview->base.u.tex.level, - iview->base.u.tex.first_layer, --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0123-v3d-v3dv-support-up-to-8-render-targets-in-v7.1.patch b/projects/RPi/devices/RPi5/patches/mesa/0123-v3d-v3dv-support-up-to-8-render-targets-in-v7.1.patch deleted file mode 100644 index 19cffa9495..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0123-v3d-v3dv-support-up-to-8-render-targets-in-v7.1.patch +++ /dev/null @@ -1,499 +0,0 @@ -From 48893b056a07b7eda4fe3dea7f068c403981b621 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Fri, 12 Nov 2021 10:35:59 +0100 -Subject: [PATCH 123/142] v3d,v3dv: support up to 8 render targets in v7.1+ - ---- - src/broadcom/common/v3d_limits.h | 3 +- - src/broadcom/common/v3d_util.c | 49 ++++++++++++++++++++++++-- - src/broadcom/common/v3d_util.h | 6 ++-- - src/broadcom/compiler/nir_to_vir.c | 10 +++--- - src/broadcom/vulkan/v3dv_cmd_buffer.c | 5 +-- - src/broadcom/vulkan/v3dv_device.c | 6 ++-- - src/broadcom/vulkan/v3dv_limits.h | 2 -- - src/broadcom/vulkan/v3dv_meta_clear.c | 8 +++-- - src/broadcom/vulkan/v3dv_pass.c | 6 ++-- - src/broadcom/vulkan/v3dv_pipeline.c | 4 ++- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 7 ++-- - src/broadcom/vulkan/v3dvx_device.c | 1 - - src/gallium/drivers/v3d/v3d_blit.c | 2 +- - src/gallium/drivers/v3d/v3d_context.c | 5 +-- - src/gallium/drivers/v3d/v3d_context.h | 3 +- - src/gallium/drivers/v3d/v3d_job.c | 6 ++-- - src/gallium/drivers/v3d/v3d_screen.c | 3 +- - src/gallium/drivers/v3d/v3dx_emit.c | 14 +++++--- - src/gallium/drivers/v3d/v3dx_state.c | 5 +-- - 19 files changed, 104 insertions(+), 41 deletions(-) - -diff --git a/src/broadcom/common/v3d_limits.h b/src/broadcom/common/v3d_limits.h -index 46f38bd7484..354c8784914 100644 ---- a/src/broadcom/common/v3d_limits.h -+++ b/src/broadcom/common/v3d_limits.h -@@ -42,7 +42,8 @@ - - #define V3D_MAX_SAMPLES 4 - --#define V3D_MAX_DRAW_BUFFERS 4 -+#define V3D_MAX_DRAW_BUFFERS 8 -+#define V3D_MAX_RENDER_TARGETS(ver) (ver < 71 ? 4 : 8) - - #define V3D_MAX_POINT_SIZE 512.0f - #define V3D_MAX_LINE_WIDTH 32 -diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c -index 26f5c6b336f..209a5eceaa1 100644 ---- a/src/broadcom/common/v3d_util.c -+++ b/src/broadcom/common/v3d_util.c -@@ -88,8 +88,10 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, - } - - void --v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp, -- bool msaa, bool double_buffer, -+v3d_choose_tile_size(const struct v3d_device_info *devinfo, -+ uint32_t color_attachment_count, -+ uint32_t max_color_bpp, bool msaa, -+ bool double_buffer, - uint32_t *width, uint32_t *height) - { - static const uint8_t tile_sizes[] = { -@@ -103,7 +105,9 @@ v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp, - }; - - uint32_t idx = 0; -- if (color_attachment_count > 2) -+ if (color_attachment_count > 4) -+ idx += 3; -+ else if (color_attachment_count > 2) - idx += 2; - else if (color_attachment_count > 1) - idx += 1; -@@ -117,6 +121,45 @@ v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp, - - idx += max_color_bpp; - -+ if (devinfo->ver >= 71) { -+ /* In V3D 7.x the TLB has an auxiliary buffer of 8KB that will be -+ * automatically used for depth instead of the main 16KB depth TLB buffer -+ * when the depth tile fits in the auxiliary buffer, allowing the hardware -+ * to allocate the 16KB from the main depth TLB to the color TLB. If -+ * we can do that, then we are effectively doubling the memory we have -+ * for color and we can increase our tile dimensions by a factor of 2 -+ * (reduce idx by 1). -+ * -+ * If we have computed a tile size that would be smaller than the minimum -+ * of 8x8, then it is certain that depth will fit in the aux depth TLB -+ * (even in MSAA mode). -+ * -+ * Otherwise, we need check if we can fit depth in the aux TLB buffer -+ * using a larger tile size. -+ * -+ * FIXME: the docs state that depth TLB memory can be used for color -+ * if depth testing is not used by setting the 'depth disable' bit in the -+ * rendering configuration. However, this comes with a requirement that -+ * occlussion queries must not be active. We need to clarify if this means -+ * active at the point at which we emit a tile rendering configuration -+ * item, meaning that the we have a query spanning a full render pass -+ * (this is something we can tell before we emit the rendering -+ * configuration item) or active in the subpass for which we are enabling -+ * the bit (which we can't tell until later, when we record commands for -+ * the subpass). If it is the latter, then we cannot use this feature. -+ */ -+ if (idx >= ARRAY_SIZE(tile_sizes) / 2) { -+ idx--; -+ } else if (idx > 0) { -+ /* Depth is always 32bpp (4x32bpp for 4x MSAA) */ -+ uint32_t depth_bpp = !msaa ? 4 : 16; -+ uint32_t tile_w = tile_sizes[(idx - 1) * 2]; -+ uint32_t tile_h = tile_sizes[(idx - 1) * 2 + 1]; -+ if (tile_w * tile_h * depth_bpp <= 8192) -+ idx--; -+ } -+ } -+ - assert(idx < ARRAY_SIZE(tile_sizes) / 2); - - *width = tile_sizes[idx * 2]; -diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h -index 864fc949ffa..5a7e244a0a5 100644 ---- a/src/broadcom/common/v3d_util.h -+++ b/src/broadcom/common/v3d_util.h -@@ -37,8 +37,10 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, - uint32_t wg_size); - - void --v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp, -- bool msaa, bool double_buffer, -+v3d_choose_tile_size(const struct v3d_device_info *devinfo, -+ uint32_t color_attachment_count, -+ uint32_t max_color_bpp, bool msaa, -+ bool double_buffer, - uint32_t *width, uint32_t *height); - - uint32_t -diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c -index a8cf02dd386..531e85a1212 100644 ---- a/src/broadcom/compiler/nir_to_vir.c -+++ b/src/broadcom/compiler/nir_to_vir.c -@@ -2483,15 +2483,17 @@ ntq_setup_outputs(struct v3d_compile *c) - - switch (var->data.location) { - case FRAG_RESULT_COLOR: -- c->output_color_var[0] = var; -- c->output_color_var[1] = var; -- c->output_color_var[2] = var; -- c->output_color_var[3] = var; -+ for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) -+ c->output_color_var[i] = var; - break; - case FRAG_RESULT_DATA0: - case FRAG_RESULT_DATA1: - case FRAG_RESULT_DATA2: - case FRAG_RESULT_DATA3: -+ case FRAG_RESULT_DATA4: -+ case FRAG_RESULT_DATA5: -+ case FRAG_RESULT_DATA6: -+ case FRAG_RESULT_DATA7: - c->output_color_var[var->data.location - - FRAG_RESULT_DATA0] = var; - break; -diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c -index bda0a614523..11d161b19b7 100644 ---- a/src/broadcom/vulkan/v3dv_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c -@@ -365,7 +365,8 @@ job_compute_frame_tiling(struct v3dv_job *job, - /* Double-buffer is incompatible with MSAA */ - assert(!tiling->msaa || !tiling->double_buffer); - -- v3d_choose_tile_size(render_target_count, max_internal_bpp, -+ v3d_choose_tile_size(&job->device->devinfo, -+ render_target_count, max_internal_bpp, - tiling->msaa, tiling->double_buffer, - &tiling->tile_width, &tiling->tile_height); - -@@ -1374,7 +1375,7 @@ cmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer) - } - - uint32_t att_count = 0; -- VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */ -+ VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* +1 for D/S */ - - /* We only need to emit subpass clears as draw calls for color attachments - * if the render area is not aligned to tile boundaries. -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index 01e2dd7ac2d..19e58542414 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -1366,6 +1366,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, - const VkSampleCountFlags supported_sample_counts = - VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; - -+ const uint8_t max_rts = V3D_MAX_RENDER_TARGETS(pdevice->devinfo.ver); -+ - struct timespec clock_res; - clock_getres(CLOCK_MONOTONIC, &clock_res); - const float timestamp_period = -@@ -1436,7 +1438,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, - .maxFragmentInputComponents = max_varying_components, - .maxFragmentOutputAttachments = 4, - .maxFragmentDualSrcAttachments = 0, -- .maxFragmentCombinedOutputResources = MAX_RENDER_TARGETS + -+ .maxFragmentCombinedOutputResources = max_rts + - MAX_STORAGE_BUFFERS + - MAX_STORAGE_IMAGES, - -@@ -1476,7 +1478,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, - .framebufferDepthSampleCounts = supported_sample_counts, - .framebufferStencilSampleCounts = supported_sample_counts, - .framebufferNoAttachmentsSampleCounts = supported_sample_counts, -- .maxColorAttachments = MAX_RENDER_TARGETS, -+ .maxColorAttachments = max_rts, - .sampledImageColorSampleCounts = supported_sample_counts, - .sampledImageIntegerSampleCounts = supported_sample_counts, - .sampledImageDepthSampleCounts = supported_sample_counts, -diff --git a/src/broadcom/vulkan/v3dv_limits.h b/src/broadcom/vulkan/v3dv_limits.h -index 9cda9f0d6d2..8ac99724105 100644 ---- a/src/broadcom/vulkan/v3dv_limits.h -+++ b/src/broadcom/vulkan/v3dv_limits.h -@@ -50,8 +50,6 @@ - #define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + \ - MAX_DYNAMIC_STORAGE_BUFFERS) - --#define MAX_RENDER_TARGETS 4 -- - #define MAX_MULTIVIEW_VIEW_COUNT 16 - - /* These are tunable parameters in the HW design, but all the V3D -diff --git a/src/broadcom/vulkan/v3dv_meta_clear.c b/src/broadcom/vulkan/v3dv_meta_clear.c -index d376c179e1c..0a7905b49d5 100644 ---- a/src/broadcom/vulkan/v3dv_meta_clear.c -+++ b/src/broadcom/vulkan/v3dv_meta_clear.c -@@ -747,7 +747,7 @@ get_color_clear_pipeline_cache_key(uint32_t rt_idx, - uint32_t bit_offset = 0; - - key |= rt_idx; -- bit_offset += 2; -+ bit_offset += 3; - - key |= ((uint64_t) format) << bit_offset; - bit_offset += 32; -@@ -1189,9 +1189,11 @@ v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer, - { - V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - -- /* We can only clear attachments in the current subpass */ -- assert(attachmentCount <= 5); /* 4 color + D/S */ -+ /* We can have at most max_color_RTs + 1 D/S attachments */ -+ assert(attachmentCount <= -+ V3D_MAX_RENDER_TARGETS(cmd_buffer->device->devinfo.ver) + 1); - -+ /* We can only clear attachments in the current subpass */ - struct v3dv_render_pass *pass = cmd_buffer->state.pass; - - assert(cmd_buffer->state.subpass_idx < pass->subpass_count); -diff --git a/src/broadcom/vulkan/v3dv_pass.c b/src/broadcom/vulkan/v3dv_pass.c -index 3e82c15df88..7f2e2bbc710 100644 ---- a/src/broadcom/vulkan/v3dv_pass.c -+++ b/src/broadcom/vulkan/v3dv_pass.c -@@ -322,11 +322,11 @@ subpass_get_granularity(struct v3dv_device *device, - /* Granularity is defined by the tile size */ - assert(subpass_idx < pass->subpass_count); - struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx]; -- const uint32_t color_attachment_count = subpass->color_count; -+ const uint32_t color_count = subpass->color_count; - - bool msaa = false; - uint32_t max_bpp = 0; -- for (uint32_t i = 0; i < color_attachment_count; i++) { -+ for (uint32_t i = 0; i < color_count; i++) { - uint32_t attachment_idx = subpass->color_attachments[i].attachment; - if (attachment_idx == VK_ATTACHMENT_UNUSED) - continue; -@@ -349,7 +349,7 @@ subpass_get_granularity(struct v3dv_device *device, - * heuristics so we choose a conservative granularity here, with it disabled. - */ - uint32_t width, height; -- v3d_choose_tile_size(color_attachment_count, max_bpp, msaa, -+ v3d_choose_tile_size(&device->devinfo, color_count, max_bpp, msaa, - false /* double-buffer */, &width, &height); - *granularity = (VkExtent2D) { - .width = width, -diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c -index 2156176d4cc..3bcdcc9a853 100644 ---- a/src/broadcom/vulkan/v3dv_pipeline.c -+++ b/src/broadcom/vulkan/v3dv_pipeline.c -@@ -2632,6 +2632,7 @@ pipeline_init_dynamic_state( - const VkPipelineColorWriteCreateInfoEXT *pColorWriteState) - { - /* Initialize to default values */ -+ const struct v3d_device_info *devinfo = &pipeline->device->devinfo; - struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state; - memset(dynamic, 0, sizeof(*dynamic)); - dynamic->stencil_compare_mask.front = ~0; -@@ -2639,7 +2640,8 @@ pipeline_init_dynamic_state( - dynamic->stencil_write_mask.front = ~0; - dynamic->stencil_write_mask.back = ~0; - dynamic->line_width = 1.0f; -- dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1; -+ dynamic->color_write_enable = -+ (1ull << (4 * V3D_MAX_RENDER_TARGETS(devinfo->ver))) - 1; - - /* Create a mask of enabled dynamic states */ - uint32_t dynamic_states = 0; -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 3566649aafd..bf5e47018e8 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1550,10 +1550,13 @@ v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer) - struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - assert(pipeline); - -+ const struct v3d_device_info *devinfo = &cmd_buffer->device->devinfo; -+ const uint32_t max_color_rts = V3D_MAX_RENDER_TARGETS(devinfo->ver); -+ - const uint32_t blend_packets_size = - cl_packet_length(BLEND_ENABLES) + - cl_packet_length(BLEND_CONSTANT_COLOR) + -- cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS; -+ cl_packet_length(BLEND_CFG) * max_color_rts; - - v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size); - v3dv_return_if_oom(cmd_buffer, NULL); -@@ -1565,7 +1568,7 @@ v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer) - } - } - -- for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { -+ for (uint32_t i = 0; i < max_color_rts; i++) { - if (pipeline->blend.enables & (1 << i)) - cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]); - } -diff --git a/src/broadcom/vulkan/v3dvx_device.c b/src/broadcom/vulkan/v3dvx_device.c -index 72daefadb08..4d17a2691a5 100644 ---- a/src/broadcom/vulkan/v3dvx_device.c -+++ b/src/broadcom/vulkan/v3dvx_device.c -@@ -49,7 +49,6 @@ vk_to_v3d_compare_func[] = { - [VK_COMPARE_OP_ALWAYS] = V3D_COMPARE_FUNC_ALWAYS, - }; - -- - static union pipe_color_union encode_border_color( - const VkSamplerCustomBorderColorCreateInfoEXT *bc_info) - { -diff --git a/src/gallium/drivers/v3d/v3d_blit.c b/src/gallium/drivers/v3d/v3d_blit.c -index 96179f654a4..51ddc292ff7 100644 ---- a/src/gallium/drivers/v3d/v3d_blit.c -+++ b/src/gallium/drivers/v3d/v3d_blit.c -@@ -369,7 +369,7 @@ v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info) - bool double_buffer = V3D_DBG(DOUBLE_BUFFER) && !msaa; - - uint32_t tile_width, tile_height, max_bpp; -- v3d_get_tile_buffer_size(msaa, double_buffer, -+ v3d_get_tile_buffer_size(devinfo, msaa, double_buffer, - is_color_blit ? 1 : 0, surfaces, src_surf, - &tile_width, &tile_height, &max_bpp); - -diff --git a/src/gallium/drivers/v3d/v3d_context.c b/src/gallium/drivers/v3d/v3d_context.c -index f12e8c92139..def546e9ef5 100644 ---- a/src/gallium/drivers/v3d/v3d_context.c -+++ b/src/gallium/drivers/v3d/v3d_context.c -@@ -220,7 +220,8 @@ v3d_flag_dirty_sampler_state(struct v3d_context *v3d, - } - - void --v3d_get_tile_buffer_size(bool is_msaa, -+v3d_get_tile_buffer_size(const struct v3d_device_info *devinfo, -+ bool is_msaa, - bool double_buffer, - uint32_t nr_cbufs, - struct pipe_surface **cbufs, -@@ -247,7 +248,7 @@ v3d_get_tile_buffer_size(bool is_msaa, - *max_bpp = MAX2(*max_bpp, bsurf->internal_bpp); - } - -- v3d_choose_tile_size(max_cbuf_idx + 1, *max_bpp, -+ v3d_choose_tile_size(devinfo, max_cbuf_idx + 1, *max_bpp, - is_msaa, double_buffer, - tile_width, tile_height); - } -diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h -index 21ee10a90cc..eb184b4b203 100644 ---- a/src/gallium/drivers/v3d/v3d_context.h -+++ b/src/gallium/drivers/v3d/v3d_context.h -@@ -795,7 +795,8 @@ void v3d_ensure_prim_counts_allocated(struct v3d_context *ctx); - void v3d_flag_dirty_sampler_state(struct v3d_context *v3d, - enum pipe_shader_type shader); - --void v3d_get_tile_buffer_size(bool is_msaa, -+void v3d_get_tile_buffer_size(const struct v3d_device_info *devinfo, -+ bool is_msaa, - bool double_buffer, - uint32_t nr_cbufs, - struct pipe_surface **cbufs, -diff --git a/src/gallium/drivers/v3d/v3d_job.c b/src/gallium/drivers/v3d/v3d_job.c -index b022ed45073..577890a06c3 100644 ---- a/src/gallium/drivers/v3d/v3d_job.c -+++ b/src/gallium/drivers/v3d/v3d_job.c -@@ -383,9 +383,11 @@ v3d_get_job_for_fbo(struct v3d_context *v3d) - job->double_buffer = false; - } - -- v3d_get_tile_buffer_size(job->msaa, job->double_buffer, -+ v3d_get_tile_buffer_size(&v3d->screen->devinfo, -+ job->msaa, job->double_buffer, - job->nr_cbufs, job->cbufs, job->bbuf, -- &job->tile_width, &job->tile_height, -+ &job->tile_width, -+ &job->tile_height, - &job->internal_bpp); - - /* The dirty flags are tracking what's been updated while v3d->job has -diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c -index efdb7d615ae..2225edf85bd 100644 ---- a/src/gallium/drivers/v3d/v3d_screen.c -+++ b/src/gallium/drivers/v3d/v3d_screen.c -@@ -255,9 +255,8 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) - case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: - return V3D_MAX_ARRAY_LAYERS; - -- /* Render targets. */ - case PIPE_CAP_MAX_RENDER_TARGETS: -- return 4; -+ return V3D_MAX_RENDER_TARGETS(screen->devinfo.ver); - - case PIPE_CAP_VENDOR_ID: - return 0x14E4; -diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c -index 75751dc9ab6..87e75281dc9 100644 ---- a/src/gallium/drivers/v3d/v3dx_emit.c -+++ b/src/gallium/drivers/v3d/v3dx_emit.c -@@ -661,8 +661,10 @@ v3dX(emit_state)(struct pipe_context *pctx) - } - #endif - -+ const uint32_t max_rts = -+ V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver); - if (blend->base.independent_blend_enable) { -- for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) -+ for (int i = 0; i < max_rts; i++) - emit_rt_blend(v3d, job, &blend->base, i, - (1 << i), - v3d->blend_dst_alpha_one & (1 << i)); -@@ -678,16 +680,16 @@ v3dX(emit_state)(struct pipe_context *pctx) - * RTs without. - */ - emit_rt_blend(v3d, job, &blend->base, 0, -- ((1 << V3D_MAX_DRAW_BUFFERS) - 1) & -+ ((1 << max_rts) - 1) & - v3d->blend_dst_alpha_one, - true); - emit_rt_blend(v3d, job, &blend->base, 0, -- ((1 << V3D_MAX_DRAW_BUFFERS) - 1) & -+ ((1 << max_rts) - 1) & - ~v3d->blend_dst_alpha_one, - false); - } else { - emit_rt_blend(v3d, job, &blend->base, 0, -- (1 << V3D_MAX_DRAW_BUFFERS) - 1, -+ (1 << max_rts) - 1, - v3d->blend_dst_alpha_one); - } - } -@@ -696,8 +698,10 @@ v3dX(emit_state)(struct pipe_context *pctx) - if (v3d->dirty & V3D_DIRTY_BLEND) { - struct pipe_blend_state *blend = &v3d->blend->base; - -+ const uint32_t max_rts = -+ V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver); - cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) { -- for (int i = 0; i < 4; i++) { -+ for (int i = 0; i < max_rts; i++) { - int rt = blend->independent_blend_enable ? i : 0; - int rt_mask = blend->rt[rt].colormask; - -diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c -index 88e57cd072b..970a082aa85 100644 ---- a/src/gallium/drivers/v3d/v3dx_state.c -+++ b/src/gallium/drivers/v3d/v3dx_state.c -@@ -138,8 +138,9 @@ v3d_create_blend_state(struct pipe_context *pctx, - - so->base = *cso; - -+ uint32_t max_rts = V3D_MAX_RENDER_TARGETS(V3D_VERSION); - if (cso->independent_blend_enable) { -- for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { -+ for (int i = 0; i < max_rts; i++) { - so->blend_enables |= cso->rt[i].blend_enable << i; - - /* V3D 4.x is when we got independent blend enables. */ -@@ -148,7 +149,7 @@ v3d_create_blend_state(struct pipe_context *pctx, - } - } else { - if (cso->rt[0].blend_enable) -- so->blend_enables = (1 << V3D_MAX_DRAW_BUFFERS) - 1; -+ so->blend_enables = (1 << max_rts) - 1; - } - - return so; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0124-v3d-v3dv-don-t-use-max-internal-bpp-for-tile-sizing-.patch b/projects/RPi/devices/RPi5/patches/mesa/0124-v3d-v3dv-don-t-use-max-internal-bpp-for-tile-sizing-.patch deleted file mode 100644 index 2e193e0644..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0124-v3d-v3dv-don-t-use-max-internal-bpp-for-tile-sizing-.patch +++ /dev/null @@ -1,539 +0,0 @@ -From cc5afd808039f3e0b81fe0615745b74cbb31d0bf Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 16 Nov 2021 11:26:17 +0100 -Subject: [PATCH 124/142] v3d,v3dv: don't use max internal bpp for tile sizing - in V3D 7.x - -We can use the actual bpp of each color attachment to compute real -tile memory requirements, which may allow us to choose a larger tile -size configuration than in V3D 4.2 in certain scenarios. ---- - src/broadcom/common/v3d_util.c | 112 +++++++++++++++--------- - src/broadcom/common/v3d_util.h | 7 +- - src/broadcom/vulkan/v3dv_cmd_buffer.c | 20 +++-- - src/broadcom/vulkan/v3dv_meta_clear.c | 1 + - src/broadcom/vulkan/v3dv_meta_copy.c | 19 ++-- - src/broadcom/vulkan/v3dv_pass.c | 9 +- - src/broadcom/vulkan/v3dv_private.h | 2 + - src/broadcom/vulkan/v3dvx_device.c | 21 +++-- - src/broadcom/vulkan/v3dvx_meta_common.c | 10 ++- - src/broadcom/vulkan/v3dvx_private.h | 4 +- - src/broadcom/vulkan/v3dvx_queue.c | 3 +- - src/gallium/drivers/v3d/v3d_context.c | 6 +- - 12 files changed, 140 insertions(+), 74 deletions(-) - -diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c -index 209a5eceaa1..8a50d279985 100644 ---- a/src/broadcom/common/v3d_util.c -+++ b/src/broadcom/common/v3d_util.c -@@ -87,12 +87,37 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, - return best_wgs_per_sg; - } - -+#define V3D71_TLB_COLOR_SIZE (16 * 1024) -+#define V3D71_TLB_DETPH_SIZE (16 * 1024) -+#define V3D71_TLB_AUX_DETPH_SIZE (8 * 1024) -+ -+static bool -+tile_size_valid(uint32_t pixel_count, uint32_t color_bpp, uint32_t depth_bpp) -+{ -+ /* First, we check if we can fit this tile size allocating the depth -+ * TLB memory to color. -+ */ -+ if (pixel_count * depth_bpp <= V3D71_TLB_AUX_DETPH_SIZE && -+ pixel_count * color_bpp <= V3D71_TLB_COLOR_SIZE + V3D71_TLB_DETPH_SIZE) { -+ return true; -+ } -+ -+ /* Otherwise the tile must fit in the main TLB buffers */ -+ return pixel_count * depth_bpp <= V3D71_TLB_DETPH_SIZE && -+ pixel_count * color_bpp <= V3D71_TLB_COLOR_SIZE; -+} -+ - void - v3d_choose_tile_size(const struct v3d_device_info *devinfo, - uint32_t color_attachment_count, -- uint32_t max_color_bpp, bool msaa, -+ /* V3D 4.x max internal bpp of all RTs */ -+ uint32_t max_internal_bpp, -+ /* V3D 7.x accumulated bpp for all RTs (in bytes) */ -+ uint32_t total_color_bpp, -+ bool msaa, - bool double_buffer, -- uint32_t *width, uint32_t *height) -+ uint32_t *width, -+ uint32_t *height) - { - static const uint8_t tile_sizes[] = { - 64, 64, -@@ -105,37 +130,19 @@ v3d_choose_tile_size(const struct v3d_device_info *devinfo, - }; - - uint32_t idx = 0; -- if (color_attachment_count > 4) -- idx += 3; -- else if (color_attachment_count > 2) -- idx += 2; -- else if (color_attachment_count > 1) -- idx += 1; -- -- /* MSAA and double-buffer are mutually exclusive */ -- assert(!msaa || !double_buffer); -- if (msaa) -- idx += 2; -- else if (double_buffer) -- idx += 1; -- -- idx += max_color_bpp; -- - if (devinfo->ver >= 71) { -- /* In V3D 7.x the TLB has an auxiliary buffer of 8KB that will be -- * automatically used for depth instead of the main 16KB depth TLB buffer -- * when the depth tile fits in the auxiliary buffer, allowing the hardware -- * to allocate the 16KB from the main depth TLB to the color TLB. If -- * we can do that, then we are effectively doubling the memory we have -- * for color and we can increase our tile dimensions by a factor of 2 -- * (reduce idx by 1). -+ /* In V3D 7.x, we use the actual bpp used by color attachments to compute -+ * the tile size instead of the maximum bpp. This may allow us to choose a -+ * larger tile size than we would in 4.x in scenarios with multiple RTs -+ * with different bpps. - * -- * If we have computed a tile size that would be smaller than the minimum -- * of 8x8, then it is certain that depth will fit in the aux depth TLB -- * (even in MSAA mode). -- * -- * Otherwise, we need check if we can fit depth in the aux TLB buffer -- * using a larger tile size. -+ * Also, the TLB has an auxiliary buffer of 8KB that will be automatically -+ * used for depth instead of the main 16KB depth TLB buffer when the depth -+ * tile fits in the auxiliary buffer, allowing the hardware to allocate -+ * the 16KB from the main depth TLB to the color TLB. If we can do that, -+ * then we are effectively doubling the memory we have for color and we -+ * can also select a larger tile size. This is necessary to support -+ * the most expensive configuration: 8x128bpp RTs + MSAA. - * - * FIXME: the docs state that depth TLB memory can be used for color - * if depth testing is not used by setting the 'depth disable' bit in the -@@ -147,17 +154,40 @@ v3d_choose_tile_size(const struct v3d_device_info *devinfo, - * configuration item) or active in the subpass for which we are enabling - * the bit (which we can't tell until later, when we record commands for - * the subpass). If it is the latter, then we cannot use this feature. -+ * -+ * FIXME: pending handling double_buffer. - */ -- if (idx >= ARRAY_SIZE(tile_sizes) / 2) { -- idx--; -- } else if (idx > 0) { -- /* Depth is always 32bpp (4x32bpp for 4x MSAA) */ -- uint32_t depth_bpp = !msaa ? 4 : 16; -- uint32_t tile_w = tile_sizes[(idx - 1) * 2]; -- uint32_t tile_h = tile_sizes[(idx - 1) * 2 + 1]; -- if (tile_w * tile_h * depth_bpp <= 8192) -- idx--; -- } -+ const uint32_t color_bpp = total_color_bpp * (msaa ? 4 : 1); -+ const uint32_t depth_bpp = 4 * (msaa ? 4 : 1); -+ do { -+ const uint32_t tile_w = tile_sizes[idx * 2]; -+ const uint32_t tile_h = tile_sizes[idx * 2 + 1]; -+ if (tile_size_valid(tile_w * tile_h, color_bpp, depth_bpp)) -+ break; -+ idx++; -+ } while (idx < ARRAY_SIZE(tile_sizes) / 2); -+ -+ /* FIXME: pending handling double_buffer */ -+ assert(!double_buffer); -+ } else { -+ /* On V3D 4.x tile size is selected based on the number of RTs, the -+ * maximum bpp across all of them and whether 4x MSAA is used. -+ */ -+ if (color_attachment_count > 4) -+ idx += 3; -+ else if (color_attachment_count > 2) -+ idx += 2; -+ else if (color_attachment_count > 1) -+ idx += 1; -+ -+ /* MSAA and double-buffer are mutually exclusive */ -+ assert(!msaa || !double_buffer); -+ if (msaa) -+ idx += 2; -+ else if (double_buffer) -+ idx += 1; -+ -+ idx += max_internal_bpp; - } - - assert(idx < ARRAY_SIZE(tile_sizes) / 2); -diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h -index 5a7e244a0a5..d02d41dd089 100644 ---- a/src/broadcom/common/v3d_util.h -+++ b/src/broadcom/common/v3d_util.h -@@ -39,9 +39,12 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, - void - v3d_choose_tile_size(const struct v3d_device_info *devinfo, - uint32_t color_attachment_count, -- uint32_t max_color_bpp, bool msaa, -+ uint32_t max_internal_bpp, -+ uint32_t total_color_bpp, -+ bool msaa, - bool double_buffer, -- uint32_t *width, uint32_t *height); -+ uint32_t *width, -+ uint32_t *height); - - uint32_t - v3d_translate_pipe_swizzle(enum pipe_swizzle swizzle); -diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c -index 11d161b19b7..f65388c10ec 100644 ---- a/src/broadcom/vulkan/v3dv_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c -@@ -348,6 +348,7 @@ job_compute_frame_tiling(struct v3dv_job *job, - uint32_t layers, - uint32_t render_target_count, - uint8_t max_internal_bpp, -+ uint8_t total_color_bpp, - bool msaa, - bool double_buffer) - { -@@ -360,14 +361,16 @@ job_compute_frame_tiling(struct v3dv_job *job, - tiling->render_target_count = render_target_count; - tiling->msaa = msaa; - tiling->internal_bpp = max_internal_bpp; -+ tiling->total_color_bpp = total_color_bpp; - tiling->double_buffer = double_buffer; - - /* Double-buffer is incompatible with MSAA */ - assert(!tiling->msaa || !tiling->double_buffer); - - v3d_choose_tile_size(&job->device->devinfo, -- render_target_count, max_internal_bpp, -- tiling->msaa, tiling->double_buffer, -+ render_target_count, -+ max_internal_bpp, total_color_bpp, msaa, -+ tiling->double_buffer, - &tiling->tile_width, &tiling->tile_height); - - tiling->draw_tiles_x = DIV_ROUND_UP(width, tiling->tile_width); -@@ -458,6 +461,7 @@ v3dv_job_start_frame(struct v3dv_job *job, - bool allocate_tile_state_now, - uint32_t render_target_count, - uint8_t max_internal_bpp, -+ uint8_t total_color_bpp, - bool msaa) - { - assert(job); -@@ -468,7 +472,7 @@ v3dv_job_start_frame(struct v3dv_job *job, - const struct v3dv_frame_tiling *tiling = - job_compute_frame_tiling(job, width, height, layers, - render_target_count, max_internal_bpp, -- msaa, false); -+ total_color_bpp, msaa, false); - - v3dv_cl_ensure_space_with_branch(&job->bcl, 256); - v3dv_return_if_oom(NULL, job); -@@ -529,6 +533,7 @@ cmd_buffer_end_render_pass_frame(struct v3dv_cmd_buffer *cmd_buffer) - job->frame_tiling.layers, - job->frame_tiling.render_target_count, - job->frame_tiling.internal_bpp, -+ job->frame_tiling.total_color_bpp, - job->frame_tiling.msaa, - true); - -@@ -1673,10 +1678,11 @@ cmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer, - - const struct v3dv_framebuffer *framebuffer = state->framebuffer; - -- uint8_t internal_bpp; -+ uint8_t max_internal_bpp, total_color_bpp; - bool msaa; - v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa) -- (framebuffer, state->attachments, subpass, &internal_bpp, &msaa); -+ (framebuffer, state->attachments, subpass, -+ &max_internal_bpp, &total_color_bpp, &msaa); - - /* From the Vulkan spec: - * -@@ -1700,7 +1706,8 @@ cmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer, - layers, - true, false, - subpass->color_count, -- internal_bpp, -+ max_internal_bpp, -+ total_color_bpp, - msaa); - } - -@@ -2668,6 +2675,7 @@ cmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer) - true, false, - old_job->frame_tiling.render_target_count, - old_job->frame_tiling.internal_bpp, -+ old_job->frame_tiling.total_color_bpp, - true /* msaa */); - - v3dv_job_destroy(old_job); -diff --git a/src/broadcom/vulkan/v3dv_meta_clear.c b/src/broadcom/vulkan/v3dv_meta_clear.c -index 0a7905b49d5..1c0d66c977c 100644 ---- a/src/broadcom/vulkan/v3dv_meta_clear.c -+++ b/src/broadcom/vulkan/v3dv_meta_clear.c -@@ -127,6 +127,7 @@ clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, - - v3dv_job_start_frame(job, width, height, max_layer, - false, true, 1, internal_bpp, -+ 4 * v3d_internal_bpp_words(internal_bpp), - image->vk.samples > VK_SAMPLE_COUNT_1_BIT); - - struct v3dv_meta_framebuffer framebuffer; -diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c b/src/broadcom/vulkan/v3dv_meta_copy.c -index c0ec888b8c7..2d30c611e17 100644 ---- a/src/broadcom/vulkan/v3dv_meta_copy.c -+++ b/src/broadcom/vulkan/v3dv_meta_copy.c -@@ -453,8 +453,9 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer, - const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); - const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); - -- v3dv_job_start_frame(job, width, height, num_layers, false, true, -- 1, internal_bpp, false); -+ v3dv_job_start_frame(job, width, height, num_layers, false, true, 1, -+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp), -+ false); - - struct v3dv_meta_framebuffer framebuffer; - v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, -@@ -1323,8 +1324,8 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, - const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); - const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); - -- v3dv_job_start_frame(job, width, height, num_layers, -- false, true, 1, internal_bpp, -+ v3dv_job_start_frame(job, width, height, num_layers, false, true, 1, -+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp), - src->vk.samples > VK_SAMPLE_COUNT_1_BIT); - - struct v3dv_meta_framebuffer framebuffer; -@@ -1978,8 +1979,9 @@ copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, - const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); - const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); - -- v3dv_job_start_frame(job, width, height, num_layers, false, true, -- 1, internal_bpp, false); -+ v3dv_job_start_frame(job, width, height, num_layers, false, true, 1, -+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp), -+ false); - - struct v3dv_meta_framebuffer framebuffer; - v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, -@@ -4884,8 +4886,9 @@ resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, - (fb_format, region->srcSubresource.aspectMask, - &internal_type, &internal_bpp); - -- v3dv_job_start_frame(job, width, height, num_layers, false, true, -- 1, internal_bpp, true); -+ v3dv_job_start_frame(job, width, height, num_layers, false, true, 1, -+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp), -+ true); - - struct v3dv_meta_framebuffer framebuffer; - v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, -diff --git a/src/broadcom/vulkan/v3dv_pass.c b/src/broadcom/vulkan/v3dv_pass.c -index 7f2e2bbc710..0583faf6f9a 100644 ---- a/src/broadcom/vulkan/v3dv_pass.c -+++ b/src/broadcom/vulkan/v3dv_pass.c -@@ -325,7 +325,8 @@ subpass_get_granularity(struct v3dv_device *device, - const uint32_t color_count = subpass->color_count; - - bool msaa = false; -- uint32_t max_bpp = 0; -+ uint32_t max_internal_bpp = 0; -+ uint32_t total_color_bpp = 0; - for (uint32_t i = 0; i < color_count; i++) { - uint32_t attachment_idx = subpass->color_attachments[i].attachment; - if (attachment_idx == VK_ATTACHMENT_UNUSED) -@@ -339,7 +340,8 @@ subpass_get_granularity(struct v3dv_device *device, - v3dv_X(device, get_internal_type_bpp_for_output_format) - (format->planes[0].rt_type, &internal_type, &internal_bpp); - -- max_bpp = MAX2(max_bpp, internal_bpp); -+ max_internal_bpp = MAX2(max_internal_bpp, internal_bpp); -+ total_color_bpp += 4 * v3d_internal_bpp_words(internal_bpp); - - if (desc->samples > VK_SAMPLE_COUNT_1_BIT) - msaa = true; -@@ -349,7 +351,8 @@ subpass_get_granularity(struct v3dv_device *device, - * heuristics so we choose a conservative granularity here, with it disabled. - */ - uint32_t width, height; -- v3d_choose_tile_size(&device->devinfo, color_count, max_bpp, msaa, -+ v3d_choose_tile_size(&device->devinfo, color_count, -+ max_internal_bpp, total_color_bpp, msaa, - false /* double-buffer */, &width, &height); - *granularity = (VkExtent2D) { - .width = width, -diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h -index 300a1ec8ae1..9375cdd58c0 100644 ---- a/src/broadcom/vulkan/v3dv_private.h -+++ b/src/broadcom/vulkan/v3dv_private.h -@@ -950,6 +950,7 @@ struct v3dv_frame_tiling { - uint32_t layers; - uint32_t render_target_count; - uint32_t internal_bpp; -+ uint32_t total_color_bpp; - bool msaa; - bool double_buffer; - uint32_t tile_width; -@@ -1373,6 +1374,7 @@ void v3dv_job_start_frame(struct v3dv_job *job, - bool allocate_tile_state_now, - uint32_t render_target_count, - uint8_t max_internal_bpp, -+ uint8_t total_color_bpp, - bool msaa); - - bool v3dv_job_type_is_gpu(struct v3dv_job *job); -diff --git a/src/broadcom/vulkan/v3dvx_device.c b/src/broadcom/vulkan/v3dvx_device.c -index 4d17a2691a5..61ad98c1217 100644 ---- a/src/broadcom/vulkan/v3dvx_device.c -+++ b/src/broadcom/vulkan/v3dvx_device.c -@@ -257,11 +257,13 @@ v3dX(framebuffer_compute_internal_bpp_msaa)( - const struct v3dv_framebuffer *framebuffer, - const struct v3dv_cmd_buffer_attachment_state *attachments, - const struct v3dv_subpass *subpass, -- uint8_t *max_bpp, -+ uint8_t *max_internal_bpp, -+ uint8_t *total_color_bpp, - bool *msaa) - { - STATIC_ASSERT(V3D_INTERNAL_BPP_32 == 0); -- *max_bpp = V3D_INTERNAL_BPP_32; -+ *max_internal_bpp = V3D_INTERNAL_BPP_32; -+ *total_color_bpp = 0; - *msaa = false; - - if (subpass) { -@@ -274,8 +276,11 @@ v3dX(framebuffer_compute_internal_bpp_msaa)( - assert(att); - assert(att->plane_count == 1); - -- if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) -- *max_bpp = MAX2(*max_bpp, att->planes[0].internal_bpp); -+ if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) { -+ const uint32_t internal_bpp = att->planes[0].internal_bpp; -+ *max_internal_bpp = MAX2(*max_internal_bpp, internal_bpp); -+ *total_color_bpp += 4 * v3d_internal_bpp_words(internal_bpp); -+ } - - if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT) - *msaa = true; -@@ -289,7 +294,6 @@ v3dX(framebuffer_compute_internal_bpp_msaa)( - if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT) - *msaa = true; - } -- - return; - } - -@@ -299,8 +303,11 @@ v3dX(framebuffer_compute_internal_bpp_msaa)( - assert(att); - assert(att->plane_count == 1); - -- if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) -- *max_bpp = MAX2(*max_bpp, att->planes[0].internal_bpp); -+ if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) { -+ const uint32_t internal_bpp = att->planes[0].internal_bpp; -+ *max_internal_bpp = MAX2(*max_internal_bpp, internal_bpp); -+ *total_color_bpp += 4 * v3d_internal_bpp_words(internal_bpp); -+ } - - if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT) - *msaa = true; -diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c -index b8f3297bc94..858096f9e4b 100644 ---- a/src/broadcom/vulkan/v3dvx_meta_common.c -+++ b/src/broadcom/vulkan/v3dvx_meta_common.c -@@ -1408,8 +1408,9 @@ v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t width, height; - framebuffer_size_for_pixel_count(num_items, &width, &height); - -- v3dv_job_start_frame(job, width, height, 1, true, true, -- 1, internal_bpp, false); -+ v3dv_job_start_frame(job, width, height, 1, true, true, 1, -+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp), -+ false); - - struct v3dv_meta_framebuffer framebuffer; - v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type, -@@ -1455,8 +1456,9 @@ v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t width, height; - framebuffer_size_for_pixel_count(num_items, &width, &height); - -- v3dv_job_start_frame(job, width, height, 1, true, true, -- 1, internal_bpp, false); -+ v3dv_job_start_frame(job, width, height, 1, true, true, 1, -+ internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp), -+ false); - - struct v3dv_meta_framebuffer framebuffer; - v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT, -diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h -index 81715520913..709b129926f 100644 ---- a/src/broadcom/vulkan/v3dvx_private.h -+++ b/src/broadcom/vulkan/v3dvx_private.h -@@ -136,7 +136,9 @@ void - v3dX(framebuffer_compute_internal_bpp_msaa)(const struct v3dv_framebuffer *framebuffer, - const struct v3dv_cmd_buffer_attachment_state *attachments, - const struct v3dv_subpass *subpass, -- uint8_t *max_bpp, bool *msaa); -+ uint8_t *max_internal_bpp, -+ uint8_t *total_color_bpp, -+ bool *msaa); - - #ifdef DEBUG - void -diff --git a/src/broadcom/vulkan/v3dvx_queue.c b/src/broadcom/vulkan/v3dvx_queue.c -index f8cee36e3bf..6eed2de9d54 100644 ---- a/src/broadcom/vulkan/v3dvx_queue.c -+++ b/src/broadcom/vulkan/v3dvx_queue.c -@@ -29,7 +29,8 @@ - void - v3dX(job_emit_noop)(struct v3dv_job *job) - { -- v3dv_job_start_frame(job, 1, 1, 1, true, true, 1, V3D_INTERNAL_BPP_32, false); -+ v3dv_job_start_frame(job, 1, 1, 1, true, true, 1, -+ V3D_INTERNAL_BPP_32, 4, false); - v3dX(job_emit_binning_flush)(job); - - struct v3dv_cl *rcl = &job->rcl; -diff --git a/src/gallium/drivers/v3d/v3d_context.c b/src/gallium/drivers/v3d/v3d_context.c -index def546e9ef5..1dc4bd017fe 100644 ---- a/src/gallium/drivers/v3d/v3d_context.c -+++ b/src/gallium/drivers/v3d/v3d_context.c -@@ -233,11 +233,13 @@ v3d_get_tile_buffer_size(const struct v3d_device_info *devinfo, - assert(!is_msaa || !double_buffer); - - uint32_t max_cbuf_idx = 0; -+ uint32_t total_bpp = 0; - *max_bpp = 0; - for (int i = 0; i < nr_cbufs; i++) { - if (cbufs[i]) { - struct v3d_surface *surf = v3d_surface(cbufs[i]); - *max_bpp = MAX2(*max_bpp, surf->internal_bpp); -+ total_bpp += 4 * v3d_internal_bpp_words(surf->internal_bpp); - max_cbuf_idx = MAX2(i, max_cbuf_idx); - } - } -@@ -246,9 +248,11 @@ v3d_get_tile_buffer_size(const struct v3d_device_info *devinfo, - struct v3d_surface *bsurf = v3d_surface(bbuf); - assert(bbuf->texture->nr_samples <= 1 || is_msaa); - *max_bpp = MAX2(*max_bpp, bsurf->internal_bpp); -+ total_bpp += 4 * v3d_internal_bpp_words(bsurf->internal_bpp); - } - -- v3d_choose_tile_size(devinfo, max_cbuf_idx + 1, *max_bpp, -+ v3d_choose_tile_size(devinfo, max_cbuf_idx + 1, -+ *max_bpp, total_bpp, - is_msaa, double_buffer, - tile_width, tile_height); - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0125-v3dv-implement-depthBounds-support-for-v71.patch b/projects/RPi/devices/RPi5/patches/mesa/0125-v3dv-implement-depthBounds-support-for-v71.patch deleted file mode 100644 index c03e043b90..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0125-v3dv-implement-depthBounds-support-for-v71.patch +++ /dev/null @@ -1,241 +0,0 @@ -From 210338b6b1b030d36acaebad504ed2bec4a2cd74 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Fri, 19 Nov 2021 10:51:37 +0100 -Subject: [PATCH 125/142] v3dv: implement depthBounds support for v71 - -Just for for v71, as that feature is not supported by older hw. ---- - src/broadcom/vulkan/v3dv_cmd_buffer.c | 19 ++++++++++++--- - src/broadcom/vulkan/v3dv_device.c | 2 +- - src/broadcom/vulkan/v3dv_pipeline.c | 17 ++++++++------ - src/broadcom/vulkan/v3dv_private.h | 12 +++++++++- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 32 ++++++++++++++++++++++++++ - src/broadcom/vulkan/v3dvx_pipeline.c | 3 +++ - src/broadcom/vulkan/v3dvx_private.h | 3 +++ - 7 files changed, 76 insertions(+), 12 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c -index f65388c10ec..36bd7960985 100644 ---- a/src/broadcom/vulkan/v3dv_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c -@@ -2070,6 +2070,14 @@ cmd_buffer_bind_pipeline_static_state(struct v3dv_cmd_buffer *cmd_buffer, - } - } - -+ if (!(dynamic_mask & V3DV_DYNAMIC_DEPTH_BOUNDS)) { -+ if (memcmp(&dest->depth_bounds, &src->depth_bounds, -+ sizeof(src->depth_bounds))) { -+ memcpy(&dest->depth_bounds, &src->depth_bounds, sizeof(src->depth_bounds)); -+ dirty |= V3DV_CMD_DIRTY_DEPTH_BOUNDS; -+ } -+ } -+ - if (!(dynamic_mask & V3DV_DYNAMIC_LINE_WIDTH)) { - if (dest->line_width != src->line_width) { - dest->line_width = src->line_width; -@@ -2940,6 +2948,9 @@ v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer, - if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_DEPTH_BIAS)) - v3dv_X(device, cmd_buffer_emit_depth_bias)(cmd_buffer); - -+ if (*dirty & V3DV_CMD_DIRTY_DEPTH_BOUNDS) -+ v3dv_X(device, cmd_buffer_emit_depth_bounds)(cmd_buffer); -+ - if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_BLEND_CONSTANTS)) - v3dv_X(device, cmd_buffer_emit_blend)(cmd_buffer); - -@@ -3369,9 +3380,11 @@ v3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, - float minDepthBounds, - float maxDepthBounds) - { -- /* We do not support depth bounds testing so we just ignore this. We are -- * already asserting that pipelines don't enable the feature anyway. -- */ -+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); -+ -+ cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; -+ cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; -+ cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DEPTH_BOUNDS; - } - - VKAPI_ATTR void VKAPI_CALL -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index 19e58542414..1de9b5ce683 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -227,7 +227,7 @@ get_features(const struct v3dv_physical_device *physical_device, - .depthClamp = false, /* Only available since V3D 4.5.1.1 */ - .depthBiasClamp = true, - .fillModeNonSolid = true, -- .depthBounds = false, /* Only available since V3D 4.3.16.2 */ -+ .depthBounds = physical_device->devinfo.ver >= 71, - .wideLines = true, - .largePoints = true, - .alphaToOne = true, -diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c -index 3bcdcc9a853..ba782b8268a 100644 ---- a/src/broadcom/vulkan/v3dv_pipeline.c -+++ b/src/broadcom/vulkan/v3dv_pipeline.c -@@ -2608,13 +2608,8 @@ v3dv_dynamic_state_mask(VkDynamicState state) - return V3DV_DYNAMIC_LINE_WIDTH; - case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: - return V3DV_DYNAMIC_COLOR_WRITE_ENABLE; -- -- /* Depth bounds testing is not available in in V3D 4.2 so here we are just -- * ignoring this dynamic state. We are already asserting at pipeline creation -- * time that depth bounds testing is not enabled. -- */ - case VK_DYNAMIC_STATE_DEPTH_BOUNDS: -- return 0; -+ return V3DV_DYNAMIC_DEPTH_BOUNDS; - - default: - unreachable("Unhandled dynamic state"); -@@ -2642,6 +2637,7 @@ pipeline_init_dynamic_state( - dynamic->line_width = 1.0f; - dynamic->color_write_enable = - (1ull << (4 * V3D_MAX_RENDER_TARGETS(devinfo->ver))) - 1; -+ dynamic->depth_bounds.max = 1.0f; - - /* Create a mask of enabled dynamic states */ - uint32_t dynamic_states = 0; -@@ -2694,6 +2690,11 @@ pipeline_init_dynamic_state( - dynamic->stencil_reference.front = pDepthStencilState->front.reference; - dynamic->stencil_reference.back = pDepthStencilState->back.reference; - } -+ -+ if (!(dynamic_states & V3DV_DYNAMIC_DEPTH_BOUNDS)) { -+ dynamic->depth_bounds.min = pDepthStencilState->minDepthBounds; -+ dynamic->depth_bounds.max = pDepthStencilState->maxDepthBounds; -+ } - } - - if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) { -@@ -2907,7 +2908,9 @@ pipeline_init(struct v3dv_pipeline *pipeline, - /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that - * feature and it shouldn't be used by any pipeline. - */ -- assert(!ds_info || !ds_info->depthBoundsTestEnable); -+ assert(device->devinfo.ver >= 71 || -+ !ds_info || !ds_info->depthBoundsTestEnable); -+ pipeline->depth_bounds_test_enabled = ds_info && ds_info->depthBoundsTestEnable; - - enable_depth_bias(pipeline, rs_info); - -diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h -index 9375cdd58c0..a074e0a981c 100644 ---- a/src/broadcom/vulkan/v3dv_private.h -+++ b/src/broadcom/vulkan/v3dv_private.h -@@ -1045,7 +1045,8 @@ enum v3dv_dynamic_state_bits { - V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6, - V3DV_DYNAMIC_LINE_WIDTH = 1 << 7, - V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8, -- V3DV_DYNAMIC_ALL = (1 << 9) - 1, -+ V3DV_DYNAMIC_DEPTH_BOUNDS = 1 << 9, -+ V3DV_DYNAMIC_ALL = (1 << 10) - 1, - }; - - /* Flags for dirty pipeline state. -@@ -1070,6 +1071,7 @@ enum v3dv_cmd_dirty_bits { - V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 16, - V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 17, - V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 18, -+ V3DV_CMD_DIRTY_DEPTH_BOUNDS = 1 << 19, - }; - - struct v3dv_dynamic_state { -@@ -1106,6 +1108,11 @@ struct v3dv_dynamic_state { - float slope_factor; - } depth_bias; - -+ struct { -+ float min; -+ float max; -+ } depth_bounds; -+ - float line_width; - - uint32_t color_write_enable; -@@ -2333,6 +2340,9 @@ struct v3dv_pipeline { - bool is_z16; - } depth_bias; - -+ /* Depth bounds */ -+ bool depth_bounds_test_enabled; -+ - struct { - void *mem_ctx; - struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */ -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index bf5e47018e8..9307a6e9d93 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1507,6 +1507,38 @@ v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer) - cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BIAS; - } - -+void -+v3dX(cmd_buffer_emit_depth_bounds)(struct v3dv_cmd_buffer *cmd_buffer) -+{ -+ /* No depthBounds support for v42, so this method is empty on that case. -+ * -+ * Note that this method is being called as v3dv_job_init flag all state as -+ * dirty. See FIXME note at v3dv_job_init. -+ */ -+ -+#if V3D_VERSION >= 71 -+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; -+ assert(pipeline); -+ -+ if (!pipeline->depth_bounds_test_enabled) -+ return; -+ -+ struct v3dv_job *job = cmd_buffer->state.job; -+ assert(job); -+ -+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_BOUNDS_TEST_LIMITS)); -+ v3dv_return_if_oom(cmd_buffer, NULL); -+ -+ struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; -+ cl_emit(&job->bcl, DEPTH_BOUNDS_TEST_LIMITS, bounds) { -+ bounds.lower_test_limit = dynamic->depth_bounds.min; -+ bounds.upper_test_limit = dynamic->depth_bounds.max; -+ } -+ -+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BOUNDS; -+#endif -+} -+ - void - v3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer) - { -diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c -index 7b1133f8173..83ab2f19e4f 100644 ---- a/src/broadcom/vulkan/v3dvx_pipeline.c -+++ b/src/broadcom/vulkan/v3dvx_pipeline.c -@@ -259,6 +259,9 @@ pack_cfg_bits(struct v3dv_pipeline *pipeline, - } else { - config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE; - } -+ -+ config.depth_bounds_test_enable = -+ ds_info && ds_info->depthBoundsTestEnable && has_ds_attachment; - #endif - }; - } -diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h -index 709b129926f..1ce4789c5ac 100644 ---- a/src/broadcom/vulkan/v3dvx_private.h -+++ b/src/broadcom/vulkan/v3dvx_private.h -@@ -54,6 +54,9 @@ v3dX(cmd_buffer_emit_stencil)(struct v3dv_cmd_buffer *cmd_buffer); - void - v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer); - -+void -+v3dX(cmd_buffer_emit_depth_bounds)(struct v3dv_cmd_buffer *cmd_buffer); -+ - void - v3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer); - --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0126-v3d-v3dv-propagate-NaNs-bits-in-shader-state-records.patch b/projects/RPi/devices/RPi5/patches/mesa/0126-v3d-v3dv-propagate-NaNs-bits-in-shader-state-records.patch deleted file mode 100644 index e59c0e1890..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0126-v3d-v3dv-propagate-NaNs-bits-in-shader-state-records.patch +++ /dev/null @@ -1,119 +0,0 @@ -From be6508ffef8c0e9fbc47175739db80a3eeff2cdb Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Fri, 3 Dec 2021 13:20:22 +0100 -Subject: [PATCH 126/142] v3d,v3dv: propagate NaNs bits in shader state records - are reserved in v7.x - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 4 ++++ - src/broadcom/vulkan/v3dvx_pipeline.c | 10 +++++----- - src/gallium/drivers/v3d/v3dx_draw.c | 14 +++++++++----- - 3 files changed, 18 insertions(+), 10 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 9307a6e9d93..580aeb8ba2b 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -2175,7 +2175,9 @@ emit_gs_shader_state_record(struct v3dv_job *job, - gs_bin->prog_data.gs->base.threads == 4; - shader.geometry_bin_mode_shader_start_in_final_thread_section = - gs_bin->prog_data.gs->base.single_seg; -+#if V3D_VERSION <= 42 - shader.geometry_bin_mode_shader_propagate_nans = true; -+#endif - shader.geometry_bin_mode_shader_uniforms_address = - gs_bin_uniforms; - -@@ -2185,7 +2187,9 @@ emit_gs_shader_state_record(struct v3dv_job *job, - gs->prog_data.gs->base.threads == 4; - shader.geometry_render_mode_shader_start_in_final_thread_section = - gs->prog_data.gs->base.single_seg; -+#if V3D_VERSION <= 42 - shader.geometry_render_mode_shader_propagate_nans = true; -+#endif - shader.geometry_render_mode_shader_uniforms_address = - gs_render_uniforms; - } -diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c -index 83ab2f19e4f..c9b537f4b32 100644 ---- a/src/broadcom/vulkan/v3dvx_pipeline.c -+++ b/src/broadcom/vulkan/v3dvx_pipeline.c -@@ -471,19 +471,19 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline) - shader.number_of_varyings_in_fragment_shader = - prog_data_fs->num_inputs; - -- shader.coordinate_shader_propagate_nans = true; -- shader.vertex_shader_propagate_nans = true; -- shader.fragment_shader_propagate_nans = true; -- - /* Note: see previous note about addresses */ - /* shader.coordinate_shader_code_address */ - /* shader.vertex_shader_code_address */ - /* shader.fragment_shader_code_address */ - -+#if V3D_VERSION == 42 -+ shader.coordinate_shader_propagate_nans = true; -+ shader.vertex_shader_propagate_nans = true; -+ shader.fragment_shader_propagate_nans = true; -+ - /* FIXME: Use combined input/output size flag in the common case (also - * on v3d, see v3dx_draw). - */ --#if V3D_VERSION == 42 - shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = - prog_data_vs_bin->separate_segments; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = -diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c -index 04cc3bc3ae1..e4b414b0676 100644 ---- a/src/gallium/drivers/v3d/v3dx_draw.c -+++ b/src/gallium/drivers/v3d/v3dx_draw.c -@@ -396,7 +396,9 @@ v3d_emit_gs_state_record(struct v3d_job *job, - gs_bin->prog_data.gs->base.threads == 4; - shader.geometry_bin_mode_shader_start_in_final_thread_section = - gs_bin->prog_data.gs->base.single_seg; -+#if V3D_VERSION <= 42 - shader.geometry_bin_mode_shader_propagate_nans = true; -+#endif - shader.geometry_bin_mode_shader_uniforms_address = - gs_bin_uniforms; - -@@ -406,7 +408,9 @@ v3d_emit_gs_state_record(struct v3d_job *job, - gs->prog_data.gs->base.threads == 4; - shader.geometry_render_mode_shader_start_in_final_thread_section = - gs->prog_data.gs->base.single_seg; -+#if V3D_VERSION <= 42 - shader.geometry_render_mode_shader_propagate_nans = true; -+#endif - shader.geometry_render_mode_shader_uniforms_address = - gs_render_uniforms; - } -@@ -657,10 +661,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, - shader.number_of_varyings_in_fragment_shader = - v3d->prog.fs->prog_data.fs->num_inputs; - -- shader.coordinate_shader_propagate_nans = true; -- shader.vertex_shader_propagate_nans = true; -- shader.fragment_shader_propagate_nans = true; -- - shader.coordinate_shader_code_address = - cl_address(v3d_resource(v3d->prog.cs->resource)->bo, - v3d->prog.cs->offset); -@@ -671,10 +671,14 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, - cl_address(v3d_resource(v3d->prog.fs->resource)->bo, - v3d->prog.fs->offset); - -+#if V3D_VERSION <= 42 -+ shader.coordinate_shader_propagate_nans = true; -+ shader.vertex_shader_propagate_nans = true; -+ shader.fragment_shader_propagate_nans = true; -+ - /* XXX: Use combined input/output size flag in the common - * case. - */ --#if V3D_VERSION <= 42 - shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = - v3d->prog.cs->prog_data.vs->separate_segments; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0127-v3dv-use-new-texture-shader-state-rb_swap-and-revers.patch b/projects/RPi/devices/RPi5/patches/mesa/0127-v3dv-use-new-texture-shader-state-rb_swap-and-revers.patch deleted file mode 100644 index 81357ea2f9..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0127-v3dv-use-new-texture-shader-state-rb_swap-and-revers.patch +++ /dev/null @@ -1,296 +0,0 @@ -From c74ba2b39e7b9fe6c5415c20c98cd231d2674df6 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Tue, 16 May 2023 00:38:40 +0200 -Subject: [PATCH 127/142] v3dv: use new texture shader state rb_swap and - reverse fields in v3d 7.x - -In v3d 4.x we handle formats that are reversed or R/B swapped by -applying a format swizzle. This doesn't work on border colors though, -and for that there is a specific bit to reverse the border color in -the texture shader state. - -In v3d 7.x we have new reverse and swap R/B bits and we no longer have -a bit to reverse the border color because the new reverse bit applies -to border texels too. Because of this, we absolutely need to use these -new bits in order to get correct border colors in all cases with these -formats. - -When we enable the reverse and/or swap R/B bits, we are effectively -applying the format swizzle through them, so in these cases we need to -make sure the swizzle we program in the texture shader state is the -view swizzle provided by the API and not the composition of the format -swizzle with the view swizzle like we do in 4.x for all formats. The -same applies to custom border colors: we must not apply the format -swizzle to them for formats that are reversed or R/B swapped, because -again, this format swizzle is already applied through these new bits. - -While we are doing this, we also fully adopt the texture shader state -spec from v3d 7.1.5 for v3d 7.x instead of using a description from -7.1.2 which is incompatible and required the driver to manually pack -some of the bits. ---- - src/broadcom/vulkan/v3dv_device.c | 2 +- - src/broadcom/vulkan/v3dv_image.c | 7 ++-- - src/broadcom/vulkan/v3dv_private.h | 13 ++++++- - src/broadcom/vulkan/v3dvx_device.c | 24 ++++++++++-- - src/broadcom/vulkan/v3dvx_image.c | 56 ++++++++++++++++++---------- - src/broadcom/vulkan/v3dvx_private.h | 3 +- - src/gallium/drivers/v3d/v3dx_state.c | 6 --- - 7 files changed, 75 insertions(+), 36 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index 1de9b5ce683..b520bfa0002 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -2989,7 +2989,7 @@ v3dv_CreateSampler(VkDevice _device, - } - } - -- v3dv_X(device, pack_sampler_state)(sampler, pCreateInfo, bc_info); -+ v3dv_X(device, pack_sampler_state)(device, sampler, pCreateInfo, bc_info); - - *pSampler = v3dv_sampler_to_handle(sampler); - -diff --git a/src/broadcom/vulkan/v3dv_image.c b/src/broadcom/vulkan/v3dv_image.c -index ebbd60e4c03..e01e2e1bd19 100644 ---- a/src/broadcom/vulkan/v3dv_image.c -+++ b/src/broadcom/vulkan/v3dv_image.c -@@ -671,7 +671,6 @@ create_image_view(struct v3dv_device *device, - * makes sense to implement swizzle composition using VkSwizzle directly. - */ - VkFormat format; -- uint8_t image_view_swizzle[4]; - if (pCreateInfo->format == VK_FORMAT_D24_UNORM_S8_UINT && - range->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { - format = VK_FORMAT_R8G8B8A8_UINT; -@@ -682,11 +681,11 @@ create_image_view(struct v3dv_device *device, - vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle, view_swizzle); - - util_format_compose_swizzles(stencil_aspect_swizzle, view_swizzle, -- image_view_swizzle); -+ iview->view_swizzle); - } else { - format = pCreateInfo->format; - vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle, -- image_view_swizzle); -+ iview->view_swizzle); - } - - iview->vk.view_format = format; -@@ -711,7 +710,7 @@ create_image_view(struct v3dv_device *device, - - const uint8_t *format_swizzle = - v3dv_get_format_swizzle(device, format, plane); -- util_format_compose_swizzles(format_swizzle, image_view_swizzle, -+ util_format_compose_swizzles(format_swizzle, iview->view_swizzle, - iview->planes[plane].swizzle); - - iview->planes[plane].swap_rb = v3dv_format_swizzle_needs_rb_swap(format_swizzle); -diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h -index a074e0a981c..8adb8873efd 100644 ---- a/src/broadcom/vulkan/v3dv_private.h -+++ b/src/broadcom/vulkan/v3dv_private.h -@@ -776,6 +776,8 @@ struct v3dv_image_view { - - const struct v3dv_format *format; - -+ uint8_t view_swizzle[4]; -+ - uint8_t plane_count; - struct { - uint8_t image_plane; -@@ -786,8 +788,8 @@ struct v3dv_image_view { - uint32_t internal_type; - uint32_t offset; - -- /* Precomputed (composed from createinfo->components and formar swizzle) -- * swizzles to pass in to the shader key. -+ /* Precomputed swizzle (composed from the view swizzle and the format -+ * swizzle). - * - * This could be also included on the descriptor bo, but the shader state - * packet doesn't need it on a bo, so we can just avoid a memory copy -@@ -2358,6 +2360,13 @@ struct v3dv_pipeline { - uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH]; - }; - -+static inline bool -+v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device *device) -+{ -+ return device->devinfo.ver > 71 || -+ (device->devinfo.ver == 71 && device->devinfo.rev >= 5); -+} -+ - static inline VkPipelineBindPoint - v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline) - { -diff --git a/src/broadcom/vulkan/v3dvx_device.c b/src/broadcom/vulkan/v3dvx_device.c -index 61ad98c1217..1b50d51e19f 100644 ---- a/src/broadcom/vulkan/v3dvx_device.c -+++ b/src/broadcom/vulkan/v3dvx_device.c -@@ -50,6 +50,7 @@ vk_to_v3d_compare_func[] = { - }; - - static union pipe_color_union encode_border_color( -+ const struct v3dv_device *device, - const VkSamplerCustomBorderColorCreateInfoEXT *bc_info) - { - const struct util_format_description *desc = -@@ -76,12 +77,28 @@ static union pipe_color_union encode_border_color( - * colors so we need to fix up the swizzle manually for this case. - */ - uint8_t swizzle[4]; -- if (v3dv_format_swizzle_needs_reverse(format->planes[0].swizzle) && -+ const bool v3d_has_reverse_swap_rb_bits = -+ v3dv_texture_shader_state_has_rb_swap_reverse_bits(device); -+ if (!v3d_has_reverse_swap_rb_bits && -+ v3dv_format_swizzle_needs_reverse(format->planes[0].swizzle) && - v3dv_format_swizzle_needs_rb_swap(format->planes[0].swizzle)) { - swizzle[0] = PIPE_SWIZZLE_W; - swizzle[1] = PIPE_SWIZZLE_X; - swizzle[2] = PIPE_SWIZZLE_Y; - swizzle[3] = PIPE_SWIZZLE_Z; -+ } -+ /* In v3d 7.x we no longer have a reverse flag for the border color. Instead -+ * we have to use the new reverse and swap_r/b flags in the texture shader -+ * state which will apply the format swizzle automatically when sampling -+ * the border color too and we should not apply it manually here. -+ */ -+ else if (v3d_has_reverse_swap_rb_bits && -+ (v3dv_format_swizzle_needs_rb_swap(format->planes[0].swizzle) || -+ v3dv_format_swizzle_needs_reverse(format->planes[0].swizzle))) { -+ swizzle[0] = PIPE_SWIZZLE_X; -+ swizzle[1] = PIPE_SWIZZLE_Y; -+ swizzle[2] = PIPE_SWIZZLE_Z; -+ swizzle[3] = PIPE_SWIZZLE_W; - } else { - memcpy(swizzle, format->planes[0].swizzle, sizeof (swizzle)); - } -@@ -179,7 +196,8 @@ static union pipe_color_union encode_border_color( - } - - void --v3dX(pack_sampler_state)(struct v3dv_sampler *sampler, -+v3dX(pack_sampler_state)(const struct v3dv_device *device, -+ struct v3dv_sampler *sampler, - const VkSamplerCreateInfo *pCreateInfo, - const VkSamplerCustomBorderColorCreateInfoEXT *bc_info) - { -@@ -221,7 +239,7 @@ v3dX(pack_sampler_state)(struct v3dv_sampler *sampler, - s.border_color_mode = border_color_mode; - - if (s.border_color_mode == V3D_BORDER_COLOR_FOLLOWS) { -- union pipe_color_union border = encode_border_color(bc_info); -+ union pipe_color_union border = encode_border_color(device, bc_info); - - s.border_color_word_0 = border.ui[0]; - s.border_color_word_1 = border.ui[1]; -diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c -index ae6eaa88d0c..de984e81220 100644 ---- a/src/broadcom/vulkan/v3dvx_image.c -+++ b/src/broadcom/vulkan/v3dvx_image.c -@@ -108,25 +108,6 @@ pack_texture_shader_state_helper(struct v3dv_device *device, - - tex.array_stride_64_byte_aligned = image->planes[iplane].cube_map_stride / 64; - -- bool is_srgb = vk_format_is_srgb(image_view->vk.format); --#if V3D_VERSION == 42 -- tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse; --#endif -- --#if V3D_VERSION == 42 -- tex.srgb = is_srgb; --#endif --#if V3D_VERSION >= 71 -- tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE; -- -- /* V3D 7.1.5 has array stride starting one bit later than previous -- * V3D versions to make room for the new RB swap bit, but we don't -- * handle that in the CLE parser. -- */ -- if (device->devinfo.rev >= 5) -- tex.array_stride_64_byte_aligned <<= 1; --#endif -- - /* At this point we don't have the job. That's the reason the first - * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to - * add the bo to the job. This also means that we need to add manually -@@ -138,7 +119,44 @@ pack_texture_shader_state_helper(struct v3dv_device *device, - iplane); - tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); - -+ bool is_srgb = vk_format_is_srgb(image_view->vk.format); -+ -+ /* V3D 4.x doesn't have the reverse and swap_r/b bits, so we compose -+ * the reverse and/or swap_r/b swizzle from the format table with the -+ * image view swizzle. This, however, doesn't work for border colors, -+ * for that there is the reverse_standard_border_color. -+ * -+ * In v3d 7.x, however, there is no reverse_standard_border_color bit, -+ * since the reverse and swap_r/b bits also affect border colors. It is -+ * because of this that we absolutely need to use these bits with -+ * reversed and swpaped formats, since that's the only way to ensure -+ * correct border colors. In that case we don't want to program the -+ * swizzle to the composition of the format swizzle and the view -+ * swizzle like we do in v3d 4.x, since the format swizzle is applied -+ * via the reverse and swap_r/b bits. -+ */ -+#if V3D_VERSION == 42 -+ tex.srgb = is_srgb; -+ tex.reverse_standard_border_color = -+ image_view->planes[plane].channel_reverse; -+#endif - #if V3D_VERSION >= 71 -+ tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE; -+ -+ tex.reverse = image_view->planes[plane].channel_reverse; -+ tex.r_b_swap = image_view->planes[plane].swap_rb; -+ -+ if (tex.reverse || tex.r_b_swap) { -+ tex.swizzle_r = -+ v3d_translate_pipe_swizzle(image_view->view_swizzle[0]); -+ tex.swizzle_g = -+ v3d_translate_pipe_swizzle(image_view->view_swizzle[1]); -+ tex.swizzle_b = -+ v3d_translate_pipe_swizzle(image_view->view_swizzle[2]); -+ tex.swizzle_a = -+ v3d_translate_pipe_swizzle(image_view->view_swizzle[3]); -+ } -+ - tex.chroma_offset_x = 1; - tex.chroma_offset_y = 1; - /* See comment in XML field definition for rationale of the shifts */ -diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h -index 1ce4789c5ac..27d6736c0e3 100644 ---- a/src/broadcom/vulkan/v3dvx_private.h -+++ b/src/broadcom/vulkan/v3dvx_private.h -@@ -131,7 +131,8 @@ v3dX(get_hw_clear_color)(const VkClearColorValue *color, - /* Used at v3dv_device */ - - void --v3dX(pack_sampler_state)(struct v3dv_sampler *sampler, -+v3dX(pack_sampler_state)(const struct v3dv_device *device, -+ struct v3dv_sampler *sampler, - const VkSamplerCreateInfo *pCreateInfo, - const VkSamplerCustomBorderColorCreateInfoEXT *bc_info); - -diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c -index 970a082aa85..8cca1a5030b 100644 ---- a/src/gallium/drivers/v3d/v3dx_state.c -+++ b/src/gallium/drivers/v3d/v3dx_state.c -@@ -960,12 +960,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo, - /* See comment in XML field definition for rationale of the shifts */ - tex->texture_base_pointer_cb = base_offset >> 6; - tex->texture_base_pointer_cr = base_offset >> 6; -- -- /* V3D 7.1.5 has array stride start at bit 33 instead of bit 32 to -- * make room for the RB swap bit. -- */ -- if (devinfo->rev >= 5) -- tex->array_stride_64_byte_aligned <<= 1; - #endif - - /* Since other platform devices may produce UIF images even --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0128-v3dv-fix-color-write-mask-for-v3d-7.x.patch b/projects/RPi/devices/RPi5/patches/mesa/0128-v3dv-fix-color-write-mask-for-v3d-7.x.patch deleted file mode 100644 index c991d19da5..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0128-v3dv-fix-color-write-mask-for-v3d-7.x.patch +++ /dev/null @@ -1,34 +0,0 @@ -From ef1159ad68e4969992a61b1fcdf9103409f689ca Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 8 Feb 2023 08:41:12 +0100 -Subject: [PATCH 128/142] v3dv: fix color write mask for v3d 7.x - ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 580aeb8ba2b..6827c829934 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1627,9 +1627,15 @@ v3dX(cmd_buffer_emit_color_write_mask)(struct v3dv_cmd_buffer *cmd_buffer) - - struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; -+ uint32_t color_write_mask = ~dynamic->color_write_enable | -+ pipeline->blend.color_write_masks; -+#if V3D_VERSION <= 42 -+ /* Only 4 RTs */ -+ color_write_mask &= 0xffff; -+#endif -+ - cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) { -- mask.mask = (~dynamic->color_write_enable | -- pipeline->blend.color_write_masks) & 0xffff; -+ mask.mask = color_write_mask; - } - - cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0129-v3d-v3dv-fix-depth-bias-for-v3d-7.x.patch b/projects/RPi/devices/RPi5/patches/mesa/0129-v3d-v3dv-fix-depth-bias-for-v3d-7.x.patch deleted file mode 100644 index 61b2e9a859..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0129-v3d-v3dv-fix-depth-bias-for-v3d-7.x.patch +++ /dev/null @@ -1,68 +0,0 @@ -From aee0180b79a6a546d1e7263d89ef868016082687 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 8 Feb 2023 09:04:02 +0100 -Subject: [PATCH 129/142] v3d,v3dv: fix depth bias for v3d 7.x - -In v3d 7.x we don't need to scale up depth bias for D16 buffers. ---- - src/broadcom/vulkan/v3dvx_cmd_buffer.c | 2 ++ - src/gallium/drivers/v3d/v3dx_emit.c | 3 ++- - src/gallium/drivers/v3d/v3dx_state.c | 4 +++- - 3 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -index 6827c829934..1bd634f5027 100644 ---- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c -@@ -1499,8 +1499,10 @@ v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer) - cl_emit(&job->bcl, DEPTH_OFFSET, bias) { - bias.depth_offset_factor = dynamic->depth_bias.slope_factor; - bias.depth_offset_units = dynamic->depth_bias.constant_factor; -+#if V3D_VERSION <= 42 - if (pipeline->depth_bias.is_z16) - bias.depth_offset_units *= 256.0f; -+#endif - bias.limit = dynamic->depth_bias.depth_bias_clamp; - } - -diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c -index 87e75281dc9..82a45e44f82 100644 ---- a/src/gallium/drivers/v3d/v3dx_emit.c -+++ b/src/gallium/drivers/v3d/v3dx_emit.c -@@ -558,7 +558,8 @@ v3dX(emit_state)(struct pipe_context *pctx) - - if (v3d->dirty & V3D_DIRTY_RASTERIZER && - v3d->rasterizer->base.offset_tri) { -- if (job->zsbuf && -+ if (v3d->screen->devinfo.ver <= 42 && -+ job->zsbuf && - job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) { - cl_emit_prepacked_sized(&job->bcl, - v3d->rasterizer->depth_offset_z16, -diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c -index 8cca1a5030b..a7fad572a2d 100644 ---- a/src/gallium/drivers/v3d/v3dx_state.c -+++ b/src/gallium/drivers/v3d/v3dx_state.c -@@ -111,9 +111,10 @@ v3d_create_rasterizer_state(struct pipe_context *pctx, - #endif - } - -- /* The HW treats polygon offset units based on a Z24 buffer, so we -+ /* V3d 4.x treats polygon offset units based on a Z24 buffer, so we - * need to scale up offset_units if we're only Z16. - */ -+#if V3D_VERSION <= 42 - v3dx_pack(&so->depth_offset_z16, DEPTH_OFFSET, depth) { - depth.depth_offset_factor = cso->offset_scale; - depth.depth_offset_units = cso->offset_units * 256.0; -@@ -121,6 +122,7 @@ v3d_create_rasterizer_state(struct pipe_context *pctx, - depth.limit = cso->offset_clamp; - #endif - } -+#endif - - return so; - } --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0130-v3d-v3dv-fix-compute-for-V3D-7.1.6.patch b/projects/RPi/devices/RPi5/patches/mesa/0130-v3d-v3dv-fix-compute-for-V3D-7.1.6.patch deleted file mode 100644 index 2d0a54aa83..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0130-v3d-v3dv-fix-compute-for-V3D-7.1.6.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 221d4079c616752b249cefb352268fce5758b578 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Thu, 9 Mar 2023 19:05:19 +0100 -Subject: [PATCH 130/142] v3d,v3dv: fix compute for V3D 7.1.6+ - ---- - src/broadcom/vulkan/v3dv_cmd_buffer.c | 25 +++++++++++++++++++++---- - src/broadcom/vulkan/v3dv_private.h | 3 ++- - src/broadcom/vulkan/v3dv_queue.c | 2 +- - src/gallium/drivers/v3d/v3dx_draw.c | 14 +++++++++++--- - 4 files changed, 35 insertions(+), 9 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c -index 36bd7960985..609c7acfa8f 100644 ---- a/src/broadcom/vulkan/v3dv_cmd_buffer.c -+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c -@@ -3816,6 +3816,7 @@ cmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer) - - void - v3dv_cmd_buffer_rewrite_indirect_csd_job( -+ struct v3dv_device *device, - struct v3dv_csd_indirect_cpu_job_info *info, - const uint32_t *wg_counts) - { -@@ -3835,8 +3836,15 @@ v3dv_cmd_buffer_rewrite_indirect_csd_job( - submit->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; - submit->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; - -- submit->cfg[4] = DIV_ROUND_UP(info->wg_size, 16) * -- (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; -+ uint32_t num_batches = DIV_ROUND_UP(info->wg_size, 16) * -+ (wg_counts[0] * wg_counts[1] * wg_counts[2]); -+ /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ -+ if (device->devinfo.ver < 71 || -+ (device->devinfo.ver == 71 && device->devinfo.rev < 6)) { -+ submit->cfg[4] = num_batches - 1; -+ } else { -+ submit->cfg[4] = num_batches; -+ } - assert(submit->cfg[4] != ~0); - - if (info->needs_wg_uniform_rewrite) { -@@ -3869,6 +3877,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t **wg_uniform_offsets_out, - uint32_t *wg_size_out) - { -+ struct v3dv_device *device = cmd_buffer->device; - struct v3dv_pipeline *pipeline = cmd_buffer->state.compute.pipeline; - assert(pipeline && pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); - struct v3dv_shader_variant *cs_variant = -@@ -3927,18 +3936,26 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, - if (wg_size_out) - *wg_size_out = wg_size; - -- submit->cfg[4] = num_batches - 1; -+ /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ -+ if (device->devinfo.ver < 71 || -+ (device->devinfo.ver == 71 && device->devinfo.rev < 6)) { -+ submit->cfg[4] = num_batches - 1; -+ } else { -+ submit->cfg[4] = num_batches; -+ } - assert(submit->cfg[4] != ~0); - - assert(pipeline->shared_data->assembly_bo); - struct v3dv_bo *cs_assembly_bo = pipeline->shared_data->assembly_bo; - - submit->cfg[5] = cs_assembly_bo->offset + cs_variant->assembly_offset; -- submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS; - if (cs_variant->prog_data.base->single_seg) - submit->cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG; - if (cs_variant->prog_data.base->threads == 4) - submit->cfg[5] |= V3D_CSD_CFG5_THREADING; -+ /* V3D 7.x has made the PROPAGATE_NANS bit in CFG5 reserved */ -+ if (device->devinfo.ver < 71) -+ submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS; - - if (cs_variant->prog_data.cs->shared_size > 0) { - job->csd.shared_memory = -diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h -index 8adb8873efd..2f3ef185126 100644 ---- a/src/broadcom/vulkan/v3dv_private.h -+++ b/src/broadcom/vulkan/v3dv_private.h -@@ -1818,7 +1818,8 @@ void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer, - void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, - struct drm_v3d_submit_tfu *tfu); - --void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info, -+void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_device *device, -+ struct v3dv_csd_indirect_cpu_job_info *info, - const uint32_t *wg_counts); - - void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer, -diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c -index b4aae195180..429d14a9196 100644 ---- a/src/broadcom/vulkan/v3dv_queue.c -+++ b/src/broadcom/vulkan/v3dv_queue.c -@@ -408,7 +408,7 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue, - - if (memcmp(group_counts, info->csd_job->csd.wg_count, - sizeof(info->csd_job->csd.wg_count)) != 0) { -- v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts); -+ v3dv_cmd_buffer_rewrite_indirect_csd_job(queue->device, info, group_counts); - } - - return VK_SUCCESS; -diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c -index e4b414b0676..4e1af41d50e 100644 ---- a/src/gallium/drivers/v3d/v3dx_draw.c -+++ b/src/gallium/drivers/v3d/v3dx_draw.c -@@ -1473,8 +1473,15 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) - submit.cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT; - - -- /* Number of batches the dispatch will invoke (minus 1). */ -- submit.cfg[4] = num_batches - 1; -+ /* Number of batches the dispatch will invoke. -+ * V3D 7.1.6 and later don't subtract 1 from the number of batches -+ */ -+ if (v3d->screen->devinfo.ver < 71 || -+ (v3d->screen->devinfo.ver == 71 && v3d->screen->devinfo.rev < 6)) { -+ submit.cfg[4] = num_batches - 1; -+ } else { -+ submit.cfg[4] = num_batches; -+ } - - /* Make sure we didn't accidentally underflow. */ - assert(submit.cfg[4] != ~0); -@@ -1482,7 +1489,8 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) - v3d_job_add_bo(job, v3d_resource(v3d->prog.compute->resource)->bo); - submit.cfg[5] = (v3d_resource(v3d->prog.compute->resource)->bo->offset + - v3d->prog.compute->offset); -- submit.cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS; -+ if (v3d->screen->devinfo.ver < 71) -+ submit.cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS; - if (v3d->prog.compute->prog_data.base->single_seg) - submit.cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG; - if (v3d->prog.compute->prog_data.base->threads == 4) --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0131-broadcom-add-performance-counters-for-V3D-7.x.patch b/projects/RPi/devices/RPi5/patches/mesa/0131-broadcom-add-performance-counters-for-V3D-7.x.patch deleted file mode 100644 index b4270672ec..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0131-broadcom-add-performance-counters-for-V3D-7.x.patch +++ /dev/null @@ -1,567 +0,0 @@ -From be6c7ba62dbdb9c5babd33a518a042dd554679d7 Mon Sep 17 00:00:00 2001 -From: "Juan A. Suarez Romero" -Date: Wed, 22 Feb 2023 09:43:40 +0100 -Subject: [PATCH 131/142] broadcom: add performance counters for V3D 7.x - -Some of the counters need to be defined correctly. - -v2: Remove references to extended performance counters. The hw does - not support them. - -Signed-off-by: Juan A. Suarez Romero ---- - .../common/v3d_performance_counters.h | 108 ++++++++++++++++++ - src/broadcom/simulator/v3d_simulator.c | 8 +- - src/broadcom/simulator/v3dx_simulator.c | 2 +- - src/broadcom/vulkan/meson.build | 1 + - src/broadcom/vulkan/v3dv_private.h | 7 +- - src/broadcom/vulkan/v3dv_query.c | 43 +------ - src/broadcom/vulkan/v3dvx_private.h | 6 + - src/broadcom/vulkan/v3dvx_query.c | 67 +++++++++++ - src/gallium/drivers/v3d/meson.build | 2 +- - src/gallium/drivers/v3d/v3d_query.c | 20 +++- - src/gallium/drivers/v3d/v3d_query.h | 6 - - src/gallium/drivers/v3d/v3dx_context.h | 10 ++ - ...d_query_perfcnt.c => v3dx_query_perfcnt.c} | 12 +- - 13 files changed, 233 insertions(+), 59 deletions(-) - create mode 100644 src/broadcom/vulkan/v3dvx_query.c - rename src/gallium/drivers/v3d/{v3d_query_perfcnt.c => v3dx_query_perfcnt.c} (94%) - -diff --git a/src/broadcom/common/v3d_performance_counters.h b/src/broadcom/common/v3d_performance_counters.h -index 08d750c2cbe..a8f0cff8784 100644 ---- a/src/broadcom/common/v3d_performance_counters.h -+++ b/src/broadcom/common/v3d_performance_counters.h -@@ -28,6 +28,110 @@ - #define V3D_PERFCNT_NAME 1 - #define V3D_PERFCNT_DESCRIPTION 2 - -+#ifndef V3D_VERSION -+# error "The V3D_VERSION macro must be defined" -+#endif -+ -+#if (V3D_VERSION >= 71) -+ -+static const char *v3d_performance_counters[][3] = { -+ {"CORE", "cycle-count", "[CORE] Cycle counter"}, -+ {"CORE", "core-active", "[CORE] Bin/Render/Compute active cycles"}, -+ {"CLE", "CLE-bin-thread-active-cycles", "[CLE] Bin thread active cycles"}, -+ {"CLE", "CLE-render-thread-active-cycles", "[CLE] Render thread active cycles"}, -+ {"CORE", "compute-active-cycles", "[CORE] Compute active cycles"}, -+ {"FEP", "FEP-valid-primitives-no-rendered-pixels", "[FEP] Valid primitives that result in no rendered pixels, for all rendered tiles"}, -+ {"FEP", "FEP-valid-primitives-rendered-pixels", "[FEP] Valid primitives for all rendered tiles (primitives may be counted in more than one tile)"}, -+ {"FEP", "FEP-clipped-quads", "[FEP] Early-Z/Near/Far clipped quads"}, -+ {"FEP", "FEP-valid-quads", "[FEP] Valid quads"}, -+ {"TLB", "TLB-quads-not-passing-stencil-test", "[TLB] Quads with no pixels passing the stencil test"}, -+ {"TLB", "TLB-quads-not-passing-z-and-stencil-test", "[TLB] Quads with no pixels passing the Z and stencil tests"}, -+ {"TLB", "TLB-quads-passing-z-and-stencil-test", "[TLB] Quads with any pixels passing the Z and stencil tests"}, -+ {"TLB", "TLB-quads-written-to-color-buffer", "[TLB] Quads with valid pixels written to colour buffer"}, -+ {"TLB", "TLB-partial-quads-written-to-color-buffer", "[TLB] Partial quads written to the colour buffer"}, -+ {"PTB", "PTB-primitives-need-clipping", "[PTB] Primitives that need clipping"}, -+ {"PTB", "PTB-primitives-discarded-outside-viewport", "[PTB] Primitives discarded by being outside the viewport"}, -+ {"PTB", "PTB-primitives-binned", "[PTB] Total primitives binned"}, -+ {"PTB", "PTB-primitives-discarded-reversed", "[PTB] Primitives that are discarded because they are reversed"}, -+ {"QPU", "QPU-total-instr-cache-hit", "[QPU] Total instruction cache hits for all slices"}, -+ {"QPU", "QPU-total-instr-cache-miss", "[QPU] Total instruction cache misses for all slices"}, -+ {"QPU", "QPU-total-uniform-cache-hit", "[QPU] Total uniforms cache hits for all slices"}, -+ {"QPU", "QPU-total-uniform-cache-miss", "[QPU] Total uniforms cache misses for all slices"}, -+ {"TMU", "TMU-active-cycles", "[TMU] Active cycles"}, -+ {"TMU", "TMU-stalled-cycles", "[TMU] Stalled cycles"}, -+ {"TMU", "TMU-total-text-quads-access", "[TMU] Total texture cache accesses"}, -+ {"TMU", "TMU-cache-x4-active-cycles", "[TMU] Cache active cycles for x4 access"}, -+ {"TMU", "TMU-cache-x4-stalled-cycles", "[TMU] Cache stalled cycles for x4 access"}, -+ {"TMU", "TMU-total-text-quads-x4-access", "[TMU] Total texture cache x4 access"}, -+ {"L2T", "L2T-total-cache-hit", "[L2T] Total Level 2 cache hits"}, -+ {"L2T", "L2T-total-cache-miss", "[L2T] Total Level 2 cache misses"}, -+ {"L2T", "L2T-local", "[L2T] Local mode access"}, -+ {"L2T", "L2T-writeback", "[L2T] Writeback"}, -+ {"L2T", "L2T-zero", "[L2T] Zero"}, -+ {"L2T", "L2T-merge", "[L2T] Merge"}, -+ {"L2T", "L2T-fill", "[L2T] Fill"}, -+ {"L2T", "L2T-stalls-no-wid", "[L2T] Stalls because no WID available"}, -+ {"L2T", "L2T-stalls-no-rid", "[L2T] Stalls because no RID available"}, -+ {"L2T", "L2T-stalls-queue-full", "[L2T] Stalls because internal queue full"}, -+ {"L2T", "L2T-stalls-wrightback", "[L2T] Stalls because writeback in flight"}, -+ {"L2T", "L2T-stalls-mem", "[L2T] Stalls because AXI blocks read"}, -+ {"L2T", "L2T-stalls-fill", "[L2T] Stalls because fill pending for victim cache-line"}, -+ {"L2T", "L2T-hitq", "[L2T] Sent request via hit queue"}, -+ {"L2T", "L2T-hitq-full", "[L2T] Sent request via main queue because hit queue is full"}, -+ {"L2T", "L2T-stalls-read-data", "[L2T] Stalls because waiting for data from SDRAM"}, -+ {"L2T", "L2T-TMU-read-hits", "[L2T] TMU read hits"}, -+ {"L2T", "L2T-TMU-read-miss", "[L2T] TMU read misses"}, -+ {"L2T", "L2T-VCD-read-hits", "[L2T] VCD read hits"}, -+ {"L2T", "L2T-VCD-read-miss", "[L2T] VCD read misses"}, -+ {"L2T", "L2T-SLC-read-hits", "[L2T] SLC read hits (all slices)"}, -+ {"L2T", "L2T-SLC-read-miss", "[L2T] SLC read misses (all slices)"}, -+ {"AXI", "AXI-writes-seen-watch-0", "[AXI] Writes seen by watch 0"}, -+ {"AXI", "AXI-reads-seen-watch-0", "[AXI] Reads seen by watch 0"}, -+ {"AXI", "AXI-writes-stalled-seen-watch-0", "[AXI] Write stalls seen by watch 0"}, -+ {"AXI", "AXI-reads-stalled-seen-watch-0", "[AXI] Read stalls seen by watch 0"}, -+ {"AXI", "AXI-write-bytes-seen-watch-0", "[AXI] Total bytes written seen by watch 0"}, -+ {"AXI", "AXI-read-bytes-seen-watch-0", "[AXI] Total bytes read seen by watch 0"}, -+ {"AXI", "AXI-writes-seen-watch-1", "[AXI] Writes seen by watch 1"}, -+ {"AXI", "AXI-reads-seen-watch-1", "[AXI] Reads seen by watch 1"}, -+ {"AXI", "AXI-writes-stalled-seen-watch-1", "[AXI] Write stalls seen by watch 1"}, -+ {"AXI", "AXI-reads-stalled-seen-watch-1", "[AXI] Read stalls seen by watch 1"}, -+ {"AXI", "AXI-write-bytes-seen-watch-1", "[AXI] Total bytes written seen by watch 1"}, -+ {"AXI", "AXI-read-bytes-seen-watch-1", "[AXI] Total bytes read seen by watch 1"}, -+ {"CORE", "core-memory-writes", "[CORE] Total memory writes"}, -+ {"L2T", "L2T-memory-writes", "[L2T] Total memory writes"}, -+ {"PTB", "PTB-memory-writes", "[PTB] Total memory writes"}, -+ {"TLB", "TLB-memory-writes", "[TLB] Total memory writes"}, -+ {"CORE", "core-memory-reads", "[CORE] Total memory reads"}, -+ {"L2T", "L2T-memory-reads", "[L2T] Total memory reads"}, -+ {"PTB", "PTB-memory-reads", "[PTB] Total memory reads"}, -+ {"PSE", "PSE-memory-reads", "[PSE] Total memory reads"}, -+ {"TLB", "TLB-memory-reads", "[TLB] Total memory reads"}, -+ {"PTB", "PTB-memory-words-writes", "[PTB] Total memory words written"}, -+ {"TLB", "TLB-memory-words-writes", "[TLB] Total memory words written"}, -+ {"PSE", "PSE-memory-words-reads", "[PSE] Total memory words read"}, -+ {"TLB", "TLB-memory-words-reads", "[TLB] Total memory words read"}, -+ {"AXI", "AXI-read-trans", "[AXI] Read transaction count"}, -+ {"AXI", "AXI-write-trans", "[AXI] Write transaction count"}, -+ {"AXI", "AXI-read-wait-cycles", "[AXI] Read total wait cycles"}, -+ {"AXI", "AXI-write-wait-cycles", "[AXI] Write total wait cycles"}, -+ {"AXI", "AXI-max-outstanding-reads", "[AXI] Maximium outstanding read transactions"}, -+ {"AXI", "AXI-max-outstanding-writes", "[AXI] Maximum outstanding write transactions"}, -+ {"QPU", "QPU-wait-bubble", "[QPU] Pipeline bubble in qcycles due all threads waiting"}, -+ {"QPU", "QPU-ic-miss-bubble", "[QPU] Pipeline bubble in qcycles due instruction-cache miss"}, -+ {"QPU", "QPU-active", "[QPU] Executed shader instruction"}, -+ {"QPU", "QPU-total-active-clk-cycles-fragment-shading", "[QPU] Total active clock cycles for all QPUs doing fragment shading (counts only when QPU is not stalled)"}, -+ {"QPU", "QPU-stalls", "[QPU] Stalled qcycles executing shader instruction"}, -+ {"QPU", "QPU-total-clk-cycles-waiting-fragment-shading", "[QPU] Total stalled clock cycles for all QPUs doing fragment shading"}, -+ {"QPU", "QPU-stalls-TMU", "[QPU] Stalled qcycles waiting for TMU"}, -+ {"QPU", "QPU-stalls-TLB", "[QPU] Stalled qcycles waiting for TLB"}, -+ {"QPU", "QPU-stalls-VPM", "[QPU] Stalled qcycles waiting for VPM"}, -+ {"QPU", "QPU-stalls-uniforms", "[QPU] Stalled qcycles waiting for uniforms"}, -+ {"QPU", "QPU-stalls-SFU", "[QPU] Stalled qcycles waiting for SFU"}, -+ {"QPU", "QPU-stalls-other", "[QPU] Stalled qcycles waiting for any other reason (vary/W/Z)"}, -+}; -+ -+#elif (V3D_VERSION >= 41) -+ - static const char *v3d_performance_counters[][3] = { - {"FEP", "FEP-valid-primitives-no-rendered-pixels", "[FEP] Valid primitives that result in no rendered pixels, for all rendered tiles"}, - {"FEP", "FEP-valid-primitives-rendered-pixels", "[FEP] Valid primitives for all rendered tiles (primitives may be counted in more than one tile)"}, -@@ -118,4 +222,8 @@ static const char *v3d_performance_counters[][3] = { - {"CORE", "compute-active-cycles", "[CORE] Compute active cycles"}, - }; - -+#else -+static const char *v3d_performance_counters[][3] = { }; -+#endif -+ - #endif -diff --git a/src/broadcom/simulator/v3d_simulator.c b/src/broadcom/simulator/v3d_simulator.c -index 5cceb1a82cc..36e719296f4 100644 ---- a/src/broadcom/simulator/v3d_simulator.c -+++ b/src/broadcom/simulator/v3d_simulator.c -@@ -92,6 +92,9 @@ static struct v3d_simulator_state { - /** Last performance monitor ID. */ - uint32_t last_perfid; - -+ /** Total performance counters */ -+ uint32_t perfcnt_total; -+ - struct util_dynarray bin_oom; - int refcount; - } sim_state = { -@@ -751,7 +754,7 @@ v3d_simulator_perfmon_create_ioctl(int fd, struct drm_v3d_perfmon_create *args) - - perfmon->ncounters = args->ncounters; - for (int i = 0; i < args->ncounters; i++) { -- if (args->counters[i] >= V3D_PERFCNT_NUM) { -+ if (args->counters[i] >= sim_state.perfcnt_total) { - ralloc_free(perfmon); - return -EINVAL; - } else { -@@ -918,13 +921,16 @@ v3d_simulator_init_global() - switch(sim_state.ver) { - case 33: - v3d33_simulator_init_regs(sim_state.v3d); -+ sim_state.perfcnt_total = 0; - break; - case 41: - case 42: - v3d41_simulator_init_regs(sim_state.v3d); -+ sim_state.perfcnt_total = 87; - break; - case 71: - v3d71_simulator_init_regs(sim_state.v3d); -+ sim_state.perfcnt_total = 93; - break; - default: - unreachable("Not supported V3D version\n"); -diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c -index 4ea177c9bb7..4520fe75719 100644 ---- a/src/broadcom/simulator/v3dx_simulator.c -+++ b/src/broadcom/simulator/v3dx_simulator.c -@@ -50,7 +50,7 @@ - #include "libs/core/v3d/registers/7.1.5.1/v3d.h" - #else - #if V3D_VERSION == 41 || V3D_VERSION == 42 --#include "libs/core/v3d/registers/4.1.35.0/v3d.h" -+#include "libs/core/v3d/registers/4.2.14.0/v3d.h" - #else - #include "libs/core/v3d/registers/3.3.0.0/v3d.h" - #endif -diff --git a/src/broadcom/vulkan/meson.build b/src/broadcom/vulkan/meson.build -index 3da7364686f..182388a35b4 100644 ---- a/src/broadcom/vulkan/meson.build -+++ b/src/broadcom/vulkan/meson.build -@@ -65,6 +65,7 @@ files_per_version = files( - 'v3dvx_pipeline.c', - 'v3dvx_meta_common.c', - 'v3dvx_pipeline.c', -+ 'v3dvx_query.c', - 'v3dvx_queue.c', - ) - -diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h -index 2f3ef185126..89e2f1c7e5c 100644 ---- a/src/broadcom/vulkan/v3dv_private.h -+++ b/src/broadcom/vulkan/v3dv_private.h -@@ -123,6 +123,9 @@ struct v3d_simulator_file; - /* Minimum required by the Vulkan 1.1 spec */ - #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30) - -+/* Maximum performance counters number */ -+#define V3D_MAX_PERFCNT 93 -+ - struct v3dv_physical_device { - struct vk_physical_device vk; - -@@ -1210,7 +1213,7 @@ struct v3dv_timestamp_query_cpu_job_info { - }; - - /* Number of perfmons required to handle all supported performance counters */ --#define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_PERFCNT_NUM, \ -+#define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_MAX_PERFCNT, \ - DRM_V3D_MAX_PERF_COUNTERS) - - struct v3dv_perf_query { -@@ -1682,7 +1685,7 @@ struct v3dv_query_pool { - /* Only used with performance queries */ - struct { - uint32_t ncounters; -- uint8_t counters[V3D_PERFCNT_NUM]; -+ uint8_t counters[V3D_MAX_PERFCNT]; - - /* V3D has a limit on the number of counters we can track in a - * single performance monitor, so if too many counters are requested -diff --git a/src/broadcom/vulkan/v3dv_query.c b/src/broadcom/vulkan/v3dv_query.c -index 3284c467d74..deb7821f02b 100644 ---- a/src/broadcom/vulkan/v3dv_query.c -+++ b/src/broadcom/vulkan/v3dv_query.c -@@ -23,7 +23,6 @@ - - #include "v3dv_private.h" - --#include "common/v3d_performance_counters.h" - #include "util/timespec.h" - #include "compiler/nir/nir_builder.h" - -@@ -48,7 +47,7 @@ kperfmon_create(struct v3dv_device *device, - DRM_IOCTL_V3D_PERFMON_CREATE, - &req); - if (ret) -- fprintf(stderr, "Failed to create perfmon: %s\n", strerror(ret)); -+ fprintf(stderr, "Failed to create perfmon for query %d: %s\n", query, strerror(ret)); - - pool->queries[query].perf.kperfmon_ids[i] = req.id; - } -@@ -303,7 +302,6 @@ v3dv_CreateQueryPool(VkDevice _device, - QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR); - - assert(pq_info); -- assert(pq_info->counterIndexCount <= V3D_PERFCNT_NUM); - - pool->perfmon.ncounters = pq_info->counterIndexCount; - for (uint32_t i = 0; i < pq_info->counterIndexCount; i++) -@@ -592,7 +590,7 @@ write_performance_query_result(struct v3dv_device *device, - assert(pool && pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR); - - struct v3dv_query *q = &pool->queries[query]; -- uint64_t counter_values[V3D_PERFCNT_NUM]; -+ uint64_t counter_values[V3D_MAX_PERFCNT]; - - for (uint32_t i = 0; i < pool->perfmon.nperfmons; i++) { - struct drm_v3d_perfmon_get_values req = { -@@ -1284,40 +1282,11 @@ v3dv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( - VkPerformanceCounterKHR *pCounters, - VkPerformanceCounterDescriptionKHR *pCounterDescriptions) - { -- uint32_t desc_count = *pCounterCount; -+ V3DV_FROM_HANDLE(v3dv_physical_device, pDevice, physicalDevice); - -- VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, -- out, pCounters, pCounterCount); -- VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, -- out_desc, pCounterDescriptions, &desc_count); -- -- for (int i = 0; i < ARRAY_SIZE(v3d_performance_counters); i++) { -- vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) { -- counter->unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR; -- counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR; -- counter->storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR; -- -- unsigned char sha1_result[20]; -- _mesa_sha1_compute(v3d_performance_counters[i][V3D_PERFCNT_NAME], -- strlen(v3d_performance_counters[i][V3D_PERFCNT_NAME]), -- sha1_result); -- -- memcpy(counter->uuid, sha1_result, sizeof(counter->uuid)); -- } -- -- vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, -- &out_desc, desc) { -- desc->flags = 0; -- snprintf(desc->name, sizeof(desc->name), "%s", -- v3d_performance_counters[i][V3D_PERFCNT_NAME]); -- snprintf(desc->category, sizeof(desc->category), "%s", -- v3d_performance_counters[i][V3D_PERFCNT_CATEGORY]); -- snprintf(desc->description, sizeof(desc->description), "%s", -- v3d_performance_counters[i][V3D_PERFCNT_DESCRIPTION]); -- } -- } -- -- return vk_outarray_status(&out); -+ return v3dv_X(pDevice, enumerate_performance_query_counters)(pCounterCount, -+ pCounters, -+ pCounterDescriptions); - } - - VKAPI_ATTR void VKAPI_CALL -diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h -index 27d6736c0e3..0f5887eab93 100644 ---- a/src/broadcom/vulkan/v3dvx_private.h -+++ b/src/broadcom/vulkan/v3dvx_private.h -@@ -324,6 +324,12 @@ v3dX(create_default_attribute_values)(struct v3dv_device *device, - void - v3dX(job_emit_noop)(struct v3dv_job *job); - -+/* Used at v3dv_query */ -+VkResult -+v3dX(enumerate_performance_query_counters)(uint32_t *pCounterCount, -+ VkPerformanceCounterKHR *pCounters, -+ VkPerformanceCounterDescriptionKHR *pCounterDescriptions); -+ - /* Used at v3dv_descriptor_set, and other descriptor set utils */ - uint32_t v3dX(descriptor_bo_size)(VkDescriptorType type); - -diff --git a/src/broadcom/vulkan/v3dvx_query.c b/src/broadcom/vulkan/v3dvx_query.c -new file mode 100644 -index 00000000000..e59a1e84ff6 ---- /dev/null -+++ b/src/broadcom/vulkan/v3dvx_query.c -@@ -0,0 +1,67 @@ -+/* -+ * Copyright © 2023 Raspberry Pi Ltd -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#include "v3dv_private.h" -+ -+#include "common/v3d_performance_counters.h" -+ -+VkResult -+v3dX(enumerate_performance_query_counters)(uint32_t *pCounterCount, -+ VkPerformanceCounterKHR *pCounters, -+ VkPerformanceCounterDescriptionKHR *pCounterDescriptions) -+{ -+ uint32_t desc_count = *pCounterCount; -+ -+ VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, -+ out, pCounters, pCounterCount); -+ VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, -+ out_desc, pCounterDescriptions, &desc_count); -+ -+ for (int i = 0; i < ARRAY_SIZE(v3d_performance_counters); i++) { -+ vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) { -+ counter->unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR; -+ counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR; -+ counter->storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR; -+ -+ unsigned char sha1_result[20]; -+ _mesa_sha1_compute(v3d_performance_counters[i][V3D_PERFCNT_NAME], -+ strlen(v3d_performance_counters[i][V3D_PERFCNT_NAME]), -+ sha1_result); -+ -+ memcpy(counter->uuid, sha1_result, sizeof(counter->uuid)); -+ } -+ -+ vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, -+ &out_desc, desc) { -+ desc->flags = 0; -+ snprintf(desc->name, sizeof(desc->name), "%s", -+ v3d_performance_counters[i][V3D_PERFCNT_NAME]); -+ snprintf(desc->category, sizeof(desc->category), "%s", -+ v3d_performance_counters[i][V3D_PERFCNT_CATEGORY]); -+ snprintf(desc->description, sizeof(desc->description), "%s", -+ v3d_performance_counters[i][V3D_PERFCNT_DESCRIPTION]); -+ } -+ } -+ -+ return vk_outarray_status(&out); -+} -diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build -index b2e748573b7..289473d2ca1 100644 ---- a/src/gallium/drivers/v3d/meson.build -+++ b/src/gallium/drivers/v3d/meson.build -@@ -34,7 +34,6 @@ files_libv3d = files( - 'v3d_query.c', - 'v3d_query.h', - 'v3d_query_pipe.c', -- 'v3d_query_perfcnt.c', - 'v3d_resource.c', - 'v3d_resource.h', - 'v3d_screen.c', -@@ -47,6 +46,7 @@ files_per_version = files( - 'v3dx_emit.c', - 'v3dx_format_table.c', - 'v3dx_job.c', -+ 'v3dx_query_perfcnt.c', - 'v3dx_rcl.c', - 'v3dx_state.c', - 'v3dx_tfu.c', -diff --git a/src/gallium/drivers/v3d/v3d_query.c b/src/gallium/drivers/v3d/v3d_query.c -index db98c89625f..83f82e44a3d 100644 ---- a/src/gallium/drivers/v3d/v3d_query.c -+++ b/src/gallium/drivers/v3d/v3d_query.c -@@ -28,8 +28,11 @@ v3d_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index, - struct pipe_driver_query_group_info *info) - { - struct v3d_screen *screen = v3d_screen(pscreen); -+ struct v3d_device_info *devinfo = &screen->devinfo; - -- return v3d_get_driver_query_group_info_perfcnt(screen, index, info); -+ return v3d_X(devinfo, get_driver_query_group_info_perfcnt)(screen, -+ index, -+ info); - } - - int -@@ -37,8 +40,11 @@ v3d_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, - struct pipe_driver_query_info *info) - { - struct v3d_screen *screen = v3d_screen(pscreen); -+ struct v3d_device_info *devinfo = &screen->devinfo; - -- return v3d_get_driver_query_info_perfcnt(screen, index, info); -+ return v3d_X(devinfo, get_driver_query_info_perfcnt)(screen, -+ index, -+ info); - } - - static struct pipe_query * -@@ -53,9 +59,13 @@ static struct pipe_query * - v3d_create_batch_query(struct pipe_context *pctx, unsigned num_queries, - unsigned *query_types) - { -- return v3d_create_batch_query_perfcnt(v3d_context(pctx), -- num_queries, -- query_types); -+ struct v3d_context *v3d = v3d_context(pctx); -+ struct v3d_screen *screen = v3d->screen; -+ struct v3d_device_info *devinfo = &screen->devinfo; -+ -+ return v3d_X(devinfo, create_batch_query_perfcnt)(v3d_context(pctx), -+ num_queries, -+ query_types); - } - - static void -diff --git a/src/gallium/drivers/v3d/v3d_query.h b/src/gallium/drivers/v3d/v3d_query.h -index 3e1426b8d86..605ed1a12f9 100644 ---- a/src/gallium/drivers/v3d/v3d_query.h -+++ b/src/gallium/drivers/v3d/v3d_query.h -@@ -42,11 +42,5 @@ struct v3d_query - }; - - struct pipe_query *v3d_create_query_pipe(struct v3d_context *v3d, unsigned query_type, unsigned index); --struct pipe_query *v3d_create_batch_query_perfcnt(struct v3d_context *v3d, unsigned num_queries, -- unsigned *query_types); --int v3d_get_driver_query_group_info_perfcnt(struct v3d_screen *screen, unsigned index, -- struct pipe_driver_query_group_info *info); --int v3d_get_driver_query_info_perfcnt(struct v3d_screen *screen, unsigned index, -- struct pipe_driver_query_info *info); - - #endif /* V3D_QUERY_H */ -diff --git a/src/gallium/drivers/v3d/v3dx_context.h b/src/gallium/drivers/v3d/v3dx_context.h -index e0a5cbfb2f3..c487ac3b996 100644 ---- a/src/gallium/drivers/v3d/v3dx_context.h -+++ b/src/gallium/drivers/v3d/v3dx_context.h -@@ -61,3 +61,13 @@ bool v3dX(tfu)(struct pipe_context *pctx, - unsigned int src_layer, - unsigned int dst_layer, - bool for_mipmap); -+ -+int v3dX(get_driver_query_group_info_perfcnt)(struct v3d_screen *screen, -+ unsigned index, -+ struct pipe_driver_query_group_info *info); -+int v3dX(get_driver_query_info_perfcnt)(struct v3d_screen *screen, -+ unsigned index, -+ struct pipe_driver_query_info *info); -+struct pipe_query *v3dX(create_batch_query_perfcnt)(struct v3d_context *v3d, -+ unsigned num_queries, -+ unsigned *query_types); -diff --git a/src/gallium/drivers/v3d/v3d_query_perfcnt.c b/src/gallium/drivers/v3d/v3dx_query_perfcnt.c -similarity index 94% -rename from src/gallium/drivers/v3d/v3d_query_perfcnt.c -rename to src/gallium/drivers/v3d/v3dx_query_perfcnt.c -index e00d84e375f..431aad14b4f 100644 ---- a/src/gallium/drivers/v3d/v3d_query_perfcnt.c -+++ b/src/gallium/drivers/v3d/v3dx_query_perfcnt.c -@@ -52,8 +52,8 @@ kperfmon_destroy(struct v3d_context *v3d, struct v3d_perfmon_state *perfmon) - } - - int --v3d_get_driver_query_group_info_perfcnt(struct v3d_screen *screen, unsigned index, -- struct pipe_driver_query_group_info *info) -+v3dX(get_driver_query_group_info_perfcnt)(struct v3d_screen *screen, unsigned index, -+ struct pipe_driver_query_group_info *info) - { - if (!screen->has_perfmon) - return 0; -@@ -72,8 +72,8 @@ v3d_get_driver_query_group_info_perfcnt(struct v3d_screen *screen, unsigned inde - } - - int --v3d_get_driver_query_info_perfcnt(struct v3d_screen *screen, unsigned index, -- struct pipe_driver_query_info *info) -+v3dX(get_driver_query_info_perfcnt)(struct v3d_screen *screen, unsigned index, -+ struct pipe_driver_query_info *info) - { - if (!screen->has_perfmon) - return 0; -@@ -222,8 +222,8 @@ static const struct v3d_query_funcs perfcnt_query_funcs = { - }; - - struct pipe_query * --v3d_create_batch_query_perfcnt(struct v3d_context *v3d, unsigned num_queries, -- unsigned *query_types) -+v3dX(create_batch_query_perfcnt)(struct v3d_context *v3d, unsigned num_queries, -+ unsigned *query_types) - { - struct v3d_query_perfcnt *pquery = NULL; - struct v3d_query *query; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0132-broadcom-simulator-add-per-hw-version-calls.patch b/projects/RPi/devices/RPi5/patches/mesa/0132-broadcom-simulator-add-per-hw-version-calls.patch deleted file mode 100644 index 25d4e26ca4..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0132-broadcom-simulator-add-per-hw-version-calls.patch +++ /dev/null @@ -1,239 +0,0 @@ -From f7d5b57bca07eb9ba6fb292852e3b5057c0a8b8f Mon Sep 17 00:00:00 2001 -From: "Juan A. Suarez Romero" -Date: Mon, 20 Mar 2023 16:48:51 +0100 -Subject: [PATCH 132/142] broadcom/simulator: add per-hw version calls - -Add a wrapper to allow calling the right simulator function based on the -hardware under simulation. - -Signed-off-by: Juan A. Suarez Romero ---- - src/broadcom/simulator/v3d_simulator.c | 86 ++++--------------------- - src/broadcom/simulator/v3d_simulator.h | 21 ++++++ - src/broadcom/simulator/v3dx_simulator.c | 9 ++- - 3 files changed, 41 insertions(+), 75 deletions(-) - -diff --git a/src/broadcom/simulator/v3d_simulator.c b/src/broadcom/simulator/v3d_simulator.c -index 36e719296f4..c4bbd61abc2 100644 ---- a/src/broadcom/simulator/v3d_simulator.c -+++ b/src/broadcom/simulator/v3d_simulator.c -@@ -439,15 +439,15 @@ v3d_simulator_perfmon_switch(int fd, uint32_t perfid) - - perfmon = v3d_get_simulator_perfmon(fd, file->active_perfid); - if (perfmon) -- v3d41_simulator_perfmon_stop(sim_state.v3d, -- perfmon->ncounters, -- perfmon->values); -+ v3d_X_simulator(perfmon_stop)(sim_state.v3d, -+ perfmon->ncounters, -+ perfmon->values); - - perfmon = v3d_get_simulator_perfmon(fd, perfid); - if (perfmon) -- v3d41_simulator_perfmon_start(sim_state.v3d, -- perfmon->ncounters, -- perfmon->counters); -+ v3d_X_simulator(perfmon_start)(sim_state.v3d, -+ perfmon->ncounters, -+ perfmon->counters); - - file->active_perfid = perfid; - } -@@ -492,21 +492,7 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit) - bin_fd = fd; - - v3d_simulator_perfmon_switch(fd, submit->perfmon_id); -- -- switch(sim_state.ver) { -- case 33: -- v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); -- break; -- case 41: -- case 42: -- v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); -- break; -- case 71: -- v3d71_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); -- break; -- default: -- unreachable("Unsupported V3D version\n"); -- } -+ v3d_X_simulator(submit_cl_ioctl)(sim_state.v3d, submit, file->gmp->ofs); - - util_dynarray_foreach(&sim_state.bin_oom, struct v3d_simulator_bo *, - sim_bo) { -@@ -645,22 +631,6 @@ v3d_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args) - return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args); - } - --static int --v3d_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args) --{ -- switch(sim_state.ver) { -- case 33: -- return v3d33_simulator_get_param_ioctl(sim_state.v3d, args); -- case 41: -- case 42: -- return v3d41_simulator_get_param_ioctl(sim_state.v3d, args); -- case 71: -- return v3d71_simulator_get_param_ioctl(sim_state.v3d, args); -- default: -- unreachable("Unsupported V3D version\n"); -- } --} -- - static int - v3d_simulator_submit_tfu_ioctl(int fd, struct drm_v3d_submit_tfu *args) - { -@@ -672,20 +642,7 @@ v3d_simulator_submit_tfu_ioctl(int fd, struct drm_v3d_submit_tfu *args) - v3d_simulator_copy_in_handle(file, args->bo_handles[2]); - v3d_simulator_copy_in_handle(file, args->bo_handles[3]); - -- switch(sim_state.ver) { -- case 33: -- ret = v3d33_simulator_submit_tfu_ioctl(sim_state.v3d, args); -- break; -- case 41: -- case 42: -- ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args); -- break; -- case 71: -- ret = v3d71_simulator_submit_tfu_ioctl(sim_state.v3d, args); -- break; -- default: -- unreachable("Unsupported V3D version\n"); -- } -+ ret = v3d_X_simulator(submit_tfu_ioctl)(sim_state.v3d, args); - - v3d_simulator_copy_out_handle(file, args->bo_handles[0]); - -@@ -712,19 +669,8 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args) - - v3d_simulator_perfmon_switch(fd, args->perfmon_id); - -- switch(sim_state.ver) { -- case 41: -- case 42: -- ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args, -- file->gmp->ofs); -- break; -- case 71: -- ret = v3d71_simulator_submit_csd_ioctl(sim_state.v3d, args, -- file->gmp->ofs); -- break; -- default: -- ret = -1; -- } -+ ret = v3d_X_simulator(submit_csd_ioctl)(sim_state.v3d, args, -+ file->gmp->ofs); - - for (int i = 0; i < args->bo_handle_count; i++) - v3d_simulator_copy_out_handle(file, bo_handles[i]); -@@ -835,7 +781,7 @@ v3d_simulator_ioctl(int fd, unsigned long request, void *args) - return 0; - - case DRM_IOCTL_V3D_GET_PARAM: -- return v3d_simulator_get_param_ioctl(fd, args); -+ return v3d_X_simulator(get_param_ioctl)(sim_state.v3d, args); - - case DRM_IOCTL_GEM_CLOSE: - return v3d_simulator_gem_close_ioctl(fd, args); -@@ -918,22 +864,18 @@ v3d_simulator_init_global() - - util_dynarray_init(&sim_state.bin_oom, NULL); - -+ v3d_X_simulator(init_regs)(sim_state.v3d); -+ - switch(sim_state.ver) { -- case 33: -- v3d33_simulator_init_regs(sim_state.v3d); -- sim_state.perfcnt_total = 0; -- break; - case 41: - case 42: -- v3d41_simulator_init_regs(sim_state.v3d); - sim_state.perfcnt_total = 87; - break; - case 71: -- v3d71_simulator_init_regs(sim_state.v3d); - sim_state.perfcnt_total = 93; - break; - default: -- unreachable("Not supported V3D version\n"); -+ sim_state.perfcnt_total = 0; - } - } - -diff --git a/src/broadcom/simulator/v3d_simulator.h b/src/broadcom/simulator/v3d_simulator.h -index 1472c313a03..92305634468 100644 ---- a/src/broadcom/simulator/v3d_simulator.h -+++ b/src/broadcom/simulator/v3d_simulator.h -@@ -59,4 +59,25 @@ uint32_t v3d_simulator_get_mem_free(void); - - #endif - -+/* Helper to call simulator ver specific functions */ -+#define v3d_X_simulator(thing) ({ \ -+ __typeof(&v3d33_simulator_##thing) v3d_X_sim_thing;\ -+ switch (sim_state.ver) { \ -+ case 33: \ -+ case 40: \ -+ v3d_X_sim_thing = &v3d33_simulator_##thing; \ -+ break; \ -+ case 41: \ -+ case 42: \ -+ v3d_X_sim_thing = &v3d41_simulator_##thing; \ -+ break; \ -+ case 71: \ -+ v3d_X_sim_thing = &v3d71_simulator_##thing; \ -+ break; \ -+ default: \ -+ unreachable("Unsupported hardware generation"); \ -+ } \ -+ v3d_X_sim_thing; \ -+}) -+ - #endif -diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c -index 4520fe75719..01cf6b22663 100644 ---- a/src/broadcom/simulator/v3dx_simulator.c -+++ b/src/broadcom/simulator/v3dx_simulator.c -@@ -218,12 +218,12 @@ v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d, - return 0; - } - --#if V3D_VERSION >= 41 - int - v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d, - struct drm_v3d_submit_csd *args, - uint32_t gmp_ofs) - { -+#if V3D_VERSION >= 41 - int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) & - V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET); - g_gmp_ofs = gmp_ofs; -@@ -256,8 +256,10 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d, - v3d_flush_caches(v3d); - - return 0; --} -+#else -+ return -1; - #endif -+} - - int - v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, -@@ -545,7 +547,8 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, - #define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x)) - #define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8) - #define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \ -- V3D_PCTR_0_SRC_N_SHIFT(x) + 6)) -+ V3D_PCTR_0_SRC_N_SHIFT(x) + \ -+ V3D_PCTR_0_SRC_0_3_PCTRS0_MSB)) - #endif - - void --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0133-v3dv-expose-fullDrawIndexUint32-in-V3D-7.x.patch b/projects/RPi/devices/RPi5/patches/mesa/0133-v3dv-expose-fullDrawIndexUint32-in-V3D-7.x.patch deleted file mode 100644 index 8b238d4963..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0133-v3dv-expose-fullDrawIndexUint32-in-V3D-7.x.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 151c13365703631f88ad77ba07afbd2ba9fa172c Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 31 May 2023 09:23:51 +0200 -Subject: [PATCH 133/142] v3dv: expose fullDrawIndexUint32 in V3D 7.x - ---- - src/broadcom/vulkan/v3dv_device.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index b520bfa0002..ca5f676b6f7 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -214,7 +214,7 @@ get_features(const struct v3dv_physical_device *physical_device, - *features = (struct vk_features) { - /* Vulkan 1.0 */ - .robustBufferAccess = true, /* This feature is mandatory */ -- .fullDrawIndexUint32 = false, /* Only available since V3D 4.4.9.1 */ -+ .fullDrawIndexUint32 = physical_device->devinfo.ver >= 71, - .imageCubeArray = true, - .independentBlend = true, - .geometryShader = true, -@@ -1451,7 +1451,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, - .subPixelPrecisionBits = V3D_COORD_SHIFT, - .subTexelPrecisionBits = 8, - .mipmapPrecisionBits = 8, -- .maxDrawIndexedIndexValue = 0x00ffffff, -+ .maxDrawIndexedIndexValue = pdevice->devinfo.ver >= 71 ? -+ 0xffffffff : 0x00ffffff, - .maxDrawIndirectCount = 0x7fffffff, - .maxSamplerLodBias = 14.0f, - .maxSamplerAnisotropy = 16.0f, --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0134-v3dv-expose-depthClamp-in-V3D-7.x.patch b/projects/RPi/devices/RPi5/patches/mesa/0134-v3dv-expose-depthClamp-in-V3D-7.x.patch deleted file mode 100644 index 6f906ff11d..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0134-v3dv-expose-depthClamp-in-V3D-7.x.patch +++ /dev/null @@ -1,56 +0,0 @@ -From aec0c613e651984e577f580aedceb3561d6a3b19 Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 31 May 2023 10:38:59 +0200 -Subject: [PATCH 134/142] v3dv: expose depthClamp in V3D 7.x - ---- - src/broadcom/vulkan/v3dv_device.c | 2 +- - src/broadcom/vulkan/v3dvx_pipeline.c | 5 ++++- - 2 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index ca5f676b6f7..30a9894789b 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -224,7 +224,7 @@ get_features(const struct v3dv_physical_device *physical_device, - .logicOp = true, - .multiDrawIndirect = false, - .drawIndirectFirstInstance = true, -- .depthClamp = false, /* Only available since V3D 4.5.1.1 */ -+ .depthClamp = physical_device->devinfo.ver >= 71, - .depthBiasClamp = true, - .fillModeNonSolid = true, - .depthBounds = physical_device->devinfo.ver >= 71, -diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c -index c9b537f4b32..ad22add155d 100644 ---- a/src/broadcom/vulkan/v3dvx_pipeline.c -+++ b/src/broadcom/vulkan/v3dvx_pipeline.c -@@ -243,6 +243,7 @@ pack_cfg_bits(struct v3dv_pipeline *pipeline, - * supported in the driver yet, so in practice we are always enabling Z - * clipping for now. - */ -+ bool z_clamp_enable = rs_info && rs_info->depthClampEnable; - bool z_clip_enable = false; - const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info = - ds_info ? vk_find_struct_const(ds_info->pNext, -@@ -250,7 +251,7 @@ pack_cfg_bits(struct v3dv_pipeline *pipeline, - NULL; - if (clip_info) - z_clip_enable = clip_info->depthClipEnable; -- else if (!(rs_info && rs_info->depthClampEnable)) -+ else if (!z_clamp_enable) - z_clip_enable = true; - - if (z_clip_enable) { -@@ -260,6 +261,8 @@ pack_cfg_bits(struct v3dv_pipeline *pipeline, - config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE; - } - -+ config.z_clamp_mode = z_clamp_enable; -+ - config.depth_bounds_test_enable = - ds_info && ds_info->depthBoundsTestEnable && has_ds_attachment; - #endif --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0135-v3dv-temporary-disable-EXT_acquire_drm_display.patch b/projects/RPi/devices/RPi5/patches/mesa/0135-v3dv-temporary-disable-EXT_acquire_drm_display.patch deleted file mode 100644 index 831de83810..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0135-v3dv-temporary-disable-EXT_acquire_drm_display.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 6bd92fecf57b5b1ae3f1f665726c4a0c43d3d90e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= -Date: Tue, 11 Apr 2023 13:11:39 +0200 -Subject: [PATCH 135/142] v3dv/temporary: disable EXT_acquire_drm_display - -So we could made a conformance run, without the need to include the -CTS patch for this issue: - -https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/4377 ---- - src/broadcom/vulkan/v3dv_device.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index 30a9894789b..c0ffc05750f 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -91,7 +91,7 @@ static const struct vk_instance_extension_table instance_extensions = { - .KHR_display = true, - .KHR_get_display_properties2 = true, - .EXT_direct_mode_display = true, -- .EXT_acquire_drm_display = true, -+ .EXT_acquire_drm_display = false, - #endif - .KHR_external_fence_capabilities = true, - .KHR_external_memory_capabilities = true, --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0136-v3dv-expose-scalarBlockLayout-on-V3D-7.x.patch b/projects/RPi/devices/RPi5/patches/mesa/0136-v3dv-expose-scalarBlockLayout-on-V3D-7.x.patch deleted file mode 100644 index 402eb77074..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0136-v3dv-expose-scalarBlockLayout-on-V3D-7.x.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 7960516490008ab42ab31e921369b1ffb8f67bde Mon Sep 17 00:00:00 2001 -From: Iago Toral Quiroga -Date: Wed, 21 Jun 2023 10:29:07 +0200 -Subject: [PATCH 136/142] v3dv: expose scalarBlockLayout on V3D 7.x - -This version of V3D doesn't have the restriction that vector accesses -must not cross 16-byte boundaries. ---- - src/broadcom/vulkan/v3dv_device.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c -index c0ffc05750f..8f8102ae46e 100644 ---- a/src/broadcom/vulkan/v3dv_device.c -+++ b/src/broadcom/vulkan/v3dv_device.c -@@ -304,7 +304,7 @@ get_features(const struct v3dv_physical_device *physical_device, - * problematic, we would always have to scalarize. Overall, this would - * not lead to best performance so let's just not support it. - */ -- .scalarBlockLayout = false, -+ .scalarBlockLayout = physical_device->devinfo.ver >= 71, - /* This tells applications 2 things: - * - * 1. If they can select just one aspect for barriers. For us barriers --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0137-dri-Limit-the-max_num_back-to-2-on-COMPLETE_MODE_FLI.patch b/projects/RPi/devices/RPi5/patches/mesa/0137-dri-Limit-the-max_num_back-to-2-on-COMPLETE_MODE_FLI.patch deleted file mode 100644 index 5ff628c96d..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0137-dri-Limit-the-max_num_back-to-2-on-COMPLETE_MODE_FLI.patch +++ /dev/null @@ -1,42 +0,0 @@ -From b58e1d7fd1c315e6ada0ad9ec4961b65c88f0c2a Mon Sep 17 00:00:00 2001 -From: Jose Maria Casanova Crespo -Date: Mon, 4 Oct 2021 14:30:30 +0200 -Subject: [PATCH 137/142] dri: Limit the max_num_back to 2 on - COMPLETE_MODE_FLIP present mode - -This is limiting the number of back buffers that mesa can allocate, so -this avoids triple buffering, although that is desirable in some cases. - -To get this to upstream, we could convert it to a DRI option -and enable it only in the case of using mutter. -It seems to be feasible to limit this to some kind of configuration, as -we have access to the size of the back-buffer allocated. For example, -only limit for 4k-dual screen setup. - -With this Raspberry OS start-up CMA usage is 210Mb with 4k-dual screen -setup instead of 276Mb. - -The correct approach would be to check if we can make Mutter to wait -for buffer swaps before starting a new frame. - -https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7033 ---- - src/loader/loader_dri3_helper.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c -index 32135770e9d..2534c817dcc 100644 ---- a/src/loader/loader_dri3_helper.c -+++ b/src/loader/loader_dri3_helper.c -@@ -275,7 +275,7 @@ dri3_update_max_num_back(struct loader_dri3_drawable *draw) - if (draw->swap_interval == 0) - draw->max_num_back = 4; - else -- draw->max_num_back = 3; -+ draw->max_num_back = 2; - - assert(draw->max_num_back <= LOADER_DRI3_MAX_BACK); - break; --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0138-v3d-Ignore-SCANOUT-usage-flags-when-not-needed-under.patch b/projects/RPi/devices/RPi5/patches/mesa/0138-v3d-Ignore-SCANOUT-usage-flags-when-not-needed-under.patch deleted file mode 100644 index d1504ba496..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0138-v3d-Ignore-SCANOUT-usage-flags-when-not-needed-under.patch +++ /dev/null @@ -1,369 +0,0 @@ -From d0f2a99045fa9835fea822ada58a344e2fdc1b13 Mon Sep 17 00:00:00 2001 -From: Jose Maria Casanova Crespo -Date: Thu, 21 Oct 2021 22:04:57 +0200 -Subject: [PATCH 138/142] v3d: Ignore SCANOUT usage flags when not needed under - X - -These downstream patches force the usage of tiled formats -when possible, they have been tested for the Rasbperry Pi OS -desktop enviroment using Mutter+Xserver. - -It includes the following 3 patches: - - v3d: Add driconf options to rewrite SCANOUT usages - - v3d: Check if are under X session - - v3d: enable options to ignore SCANOUT flag on resource creation - -v3d: Add driconf options to rewrite SCANOUT usages - -We create a new eviroment variable V3D_IGNORE_SCANOUT_USAGES -that will affect v3d_resource_create_with_modifiers so -SCANOUT usages can be ignored. It can be enabled under X11 -with a compositor so applications are forces to use tiled render -buffers instead of the default behaviour that uses SCANOUT and -consume the limited CMA memory in the RPi4. - -The two new driconf options modulate the effect on two applications -Xorg and mutter. - -"v3d_maintain_ignorable_scanout": is enabled in mutter, could be used -in other compositors, the objective is that the enviroment has enable -the V3D_IGNORE_SCANOUT_USAGES, they aren't ignored in the compositor. - -"v3d_is_xserver_process": is used to handle a particular case -to avoid checking if an Xserver connection is available using XCB -as in some cases the call stalls the Xserver on boot. - -Following patches will use this configuration options to ignore or not -the SCANOUT usage on v3d_resource_allocation with modifiers. - -Upstreaming this patch need to review the effects of: - ad50b47a14e9 ("gbm: assume USE_SCANOUT in create_with_modifiers") - -v2: driconf for v3d_is_xserver_process is needed under XWayland - to avoid XCB connections in the XWayland process. - -v3d: Check if are under X session - -If we are using Wayland + XWayland, this is considered *not* being under -X session. - -v3d: enable options to ignore SCANOUT flag on resource creation - -This is a downstream patch for enabling the usage of more tiled -buffers in Raspberry OS under an enviroment using mutter and Xorg. - -This patch enables the following behaviour in order to reduce the -number of CMA usage and use tiled layouts because we ignore -the possible SCANOUT usage of the resource. - -This patch makes mutter to not ignore SCANOUT flags because as -compositor it should allocate linear render buffers suitable for display. - -Then if the Xserver has enabled the dmabuf_capable option, the -buffers backing the windows pixmaps will allocate using modifiers, -in the patched Xserver downstream making pixmaps exportable will use -gbm_gbm_bo_create_with_modifiers2 that does not add the SCANOUT flag -for exporting pixmaps. With the Mutter compositor we didn't find a -situation were this pixmaps needed to be SCANOUT. But this is not sure, -but it allows us to not use CMA for every window opened, and having them -in tiled format saves all linear->tiled conversion for sampling. - -Finally to take advantage of using Tiled render buffers for applications -we can enable in the enviroment V3D_IGNORE_SCANOUT_USAGES so all render -targes use the tiled UIF format without CMA memory instead of a linear one. -As the compositor mutter will composite the final surface for display we -aren't going to use the SCANOUT flag. This only applies if we are under -an X11 session. - -v2: v3d: ignore V3D_IGNORE_SCANOUT if only LINEAR modifier available - This is a fixup for the behaviour of ignoring SCANOUT flags - so we don't allocate CMA memory on V3D for render targets under - X11 as UIF isn't included and only LINEAR is a valid modifier - when Xserver is using msdri3. So we cannot ignore the SCANOUT flag. - As the Xserver in this situation is limiting the available modifiers - to linear, we can identify this case just not ignoring the SCANOUT - flag when we can only allocate linear resources. ---- - src/gallium/drivers/v3d/driinfo_v3d.h | 2 + - src/gallium/drivers/v3d/meson.build | 17 +++++--- - src/gallium/drivers/v3d/v3d_resource.c | 31 ++++++++++++-- - src/gallium/drivers/v3d/v3d_screen.c | 59 ++++++++++++++++++++++++++ - src/gallium/drivers/v3d/v3d_screen.h | 6 +++ - src/util/00-mesa-defaults.conf | 3 ++ - src/util/driconf.h | 8 ++++ - 7 files changed, 117 insertions(+), 9 deletions(-) - -diff --git a/src/gallium/drivers/v3d/driinfo_v3d.h b/src/gallium/drivers/v3d/driinfo_v3d.h -index 147ad0b49bd..8f989e8aa57 100644 ---- a/src/gallium/drivers/v3d/driinfo_v3d.h -+++ b/src/gallium/drivers/v3d/driinfo_v3d.h -@@ -2,4 +2,6 @@ - - DRI_CONF_SECTION_MISCELLANEOUS - DRI_CONF_V3D_NONMSAA_TEXTURE_SIZE_LIMIT(false) -+ DRI_CONF_V3D_MAINTAIN_IGNORABLE_SCANOUT(false) -+ DRI_CONF_V3D_IS_XSERVER_PROCESS(false) - DRI_CONF_SECTION_END -diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build -index 289473d2ca1..e47682db1aa 100644 ---- a/src/gallium/drivers/v3d/meson.build -+++ b/src/gallium/drivers/v3d/meson.build -@@ -61,6 +61,16 @@ endif - - v3d_versions = ['33', '42', '71'] - -+v3d_deps = [dep_v3dv3, dep_libdrm, dep_valgrind, idep_nir_headers] -+ -+if with_platform_x11 -+ v3d_deps += dep_xcb -+endif -+ -+if with_platform_wayland -+ v3d_deps += dep_wayland_client -+endif -+ - per_version_libs = [] - foreach ver : v3d_versions - per_version_libs += static_library( -@@ -72,7 +82,7 @@ foreach ver : v3d_versions - ], - c_args : [v3d_args, '-DV3D_VERSION=' + ver], - gnu_symbol_visibility : 'hidden', -- dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind, idep_nir_headers], -+ dependencies : v3d_deps, - ) - - endforeach -@@ -95,10 +105,7 @@ libv3d = static_library( - c_args : [v3d_args], - cpp_args : [v3d_args], - gnu_symbol_visibility : 'hidden', -- dependencies : [ -- dep_v3dv3, dep_libdrm, dep_valgrind, -- idep_nir_headers, idep_mesautil, -- ], -+ dependencies : v3d_deps + idep_mesautil, - link_with: [per_version_libs], - ) - -diff --git a/src/gallium/drivers/v3d/v3d_resource.c b/src/gallium/drivers/v3d/v3d_resource.c -index a0a210ccad5..46de1b16ae0 100644 ---- a/src/gallium/drivers/v3d/v3d_resource.c -+++ b/src/gallium/drivers/v3d/v3d_resource.c -@@ -439,7 +439,7 @@ v3d_resource_get_handle(struct pipe_screen *pscreen, - case WINSYS_HANDLE_TYPE_SHARED: - return v3d_bo_flink(bo, &whandle->handle); - case WINSYS_HANDLE_TYPE_KMS: -- if (screen->ro) { -+ if (screen->ro && rsc->scanout) { - if (renderonly_get_handle(rsc->scanout, whandle)) { - whandle->stride = rsc->slices[0].stride; - return true; -@@ -785,6 +785,27 @@ v3d_resource_setup(struct pipe_screen *pscreen, - return rsc; - } - -+static bool -+v3d_resource_should_scanout(struct pipe_screen *pscreen, -+ const struct pipe_resource *tmpl, -+ const uint64_t *modifiers, -+ int count) -+{ -+ struct v3d_screen *screen = v3d_screen(pscreen); -+ -+ if (tmpl->bind & PIPE_BIND_SCANOUT) { -+ if (screen->maintain_ignorable_scanout) -+ return true; -+ if (screen->has_x_session && screen->ignore_scanout_usages) { -+ if (drm_find_modifier(DRM_FORMAT_MOD_BROADCOM_UIF, -+ modifiers, count)) -+ return false; -+ } -+ return true; -+ } -+ return false; -+} -+ - static struct pipe_resource * - v3d_resource_create_with_modifiers(struct pipe_screen *pscreen, - const struct pipe_resource *tmpl, -@@ -798,6 +819,8 @@ v3d_resource_create_with_modifiers(struct pipe_screen *pscreen, - struct pipe_resource *prsc = &rsc->base; - /* Use a tiled layout if we can, for better 3D performance. */ - bool should_tile = true; -+ bool should_scanout = v3d_resource_should_scanout(pscreen, tmpl, -+ modifiers, count); - - assert(tmpl->target != PIPE_BUFFER || - (tmpl->format == PIPE_FORMAT_NONE || -@@ -827,7 +850,7 @@ v3d_resource_create_with_modifiers(struct pipe_screen *pscreen, - /* If using the old-school SCANOUT flag, we don't know what the screen - * might support other than linear. Just force linear. - */ -- if (tmpl->bind & PIPE_BIND_SCANOUT) -+ if ((tmpl->bind & PIPE_BIND_SCANOUT) && should_scanout) - should_tile = false; - - /* No user-specified modifier; determine our own. */ -@@ -849,7 +872,7 @@ v3d_resource_create_with_modifiers(struct pipe_screen *pscreen, - - v3d_setup_slices(rsc, 0, tmpl->bind & PIPE_BIND_SHARED); - -- if (screen->ro && (tmpl->bind & PIPE_BIND_SCANOUT)) { -+ if (screen->ro && should_scanout) { - struct winsys_handle handle; - struct pipe_resource scanout_tmpl = { - .target = prsc->target, -@@ -979,7 +1002,7 @@ v3d_resource_from_handle(struct pipe_screen *pscreen, - } - } - -- if (screen->ro) { -+ if (screen->ro && !rsc->tiled) { - /* Make sure that renderonly has a handle to our buffer in the - * display's fd, so that a later renderonly_get_handle() - * returns correct handles or GEM names. -diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c -index 2225edf85bd..1d4f619d710 100644 ---- a/src/gallium/drivers/v3d/v3d_screen.c -+++ b/src/gallium/drivers/v3d/v3d_screen.c -@@ -47,6 +47,42 @@ - #include "compiler/v3d_compiler.h" - #include "drm-uapi/drm_fourcc.h" - -+#ifdef HAVE_WAYLAND_PLATFORM -+#include -+#endif -+ -+#ifdef HAVE_X11_PLATFORM -+#include -+#endif -+ -+static bool -+check_x_session() -+{ -+ bool xcb_connection = false; -+ -+#ifdef HAVE_WAYLAND_PLATFORM -+ struct wl_display *display; -+ -+ display = wl_display_connect(NULL); -+ -+ if (display) { -+ wl_display_disconnect(display); -+ return xcb_connection; -+ } -+#endif -+ -+#ifdef HAVE_X11_PLATFORM -+ xcb_connection_t *conn; -+ -+ conn = xcb_connect(NULL, NULL); -+ -+ if (!xcb_connection_has_error(conn)) -+ xcb_connection = true; -+ xcb_disconnect(conn); -+#endif -+ return xcb_connection; -+} -+ - static const char * - v3d_screen_get_name(struct pipe_screen *pscreen) - { -@@ -945,6 +981,29 @@ v3d_screen_create(int fd, const struct pipe_screen_config *config, - v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH); - screen->has_perfmon = v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_PERFMON); - -+ screen->ignore_scanout_usages = getenv("V3D_IGNORE_SCANOUT_USAGES"); -+ -+ const char *is_xserver_process = -+ "v3d_is_xserver_process"; -+ screen->is_xserver_process = -+ driCheckOption(config->options, -+ is_xserver_process, -+ DRI_BOOL) && -+ driQueryOptionb(config->options, -+ is_xserver_process); -+ -+ const char *maintain_ignorable_scanout_name = -+ "v3d_maintain_ignorable_scanout"; -+ screen->maintain_ignorable_scanout = -+ driCheckOption(config->options, -+ maintain_ignorable_scanout_name, -+ DRI_BOOL) && -+ driQueryOptionb(config->options, -+ maintain_ignorable_scanout_name); -+ -+ screen->has_x_session = !screen->is_xserver_process && -+ check_x_session(); -+ - v3d_fence_init(screen); - - v3d_process_debug_variable(); -diff --git a/src/gallium/drivers/v3d/v3d_screen.h b/src/gallium/drivers/v3d/v3d_screen.h -index 1da9b83c965..c0f22707075 100644 ---- a/src/gallium/drivers/v3d/v3d_screen.h -+++ b/src/gallium/drivers/v3d/v3d_screen.h -@@ -83,6 +83,12 @@ struct v3d_screen { - bool has_cache_flush; - bool has_perfmon; - bool nonmsaa_texture_size_limit; -+ bool ignore_scanout_usages; -+ bool is_xserver_process; -+ bool maintain_ignorable_scanout; -+ -+ /* Are we running in an X session? */ -+ bool has_x_session; - - struct v3d_simulator_file *sim_file; - -diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf -index 948c1ef78ba..2de7505521c 100644 ---- a/src/util/00-mesa-defaults.conf -+++ b/src/util/00-mesa-defaults.conf -@@ -77,6 +77,7 @@ TODO: document the other workarounds. - - - - - -@@ -767,6 +768,7 @@ TODO: document the other workarounds. - - - - - - - - -diff --git a/src/util/driconf.h b/src/util/driconf.h -index 042ee27d9a3..56511f6615e 100644 ---- a/src/util/driconf.h -+++ b/src/util/driconf.h -@@ -521,6 +521,14 @@ - DRI_CONF_OPT_B(v3d_nonmsaa_texture_size_limit, def, \ - "Report the non-MSAA-only texture size limit") - -+#define DRI_CONF_V3D_IS_XSERVER_PROCESS(def) \ -+ DRI_CONF_OPT_B(v3d_is_xserver_process, def, \ -+ "Identifies if the application is the Xserver.") -+ -+#define DRI_CONF_V3D_MAINTAIN_IGNORABLE_SCANOUT(def) \ -+ DRI_CONF_OPT_B(v3d_maintain_ignorable_scanout, def, \ -+ "Maintain SCANOUT usage on resource allocations when the environment allows ignoring SCANOUT usage.") -+ - /** - * \brief virgl specific configuration options - */ --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0139-Add-a-hack-to-avoid-the-shadow-tex-update-for-import.patch b/projects/RPi/devices/RPi5/patches/mesa/0139-Add-a-hack-to-avoid-the-shadow-tex-update-for-import.patch deleted file mode 100644 index a453a83892..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0139-Add-a-hack-to-avoid-the-shadow-tex-update-for-import.patch +++ /dev/null @@ -1,117 +0,0 @@ -From fc1fe85f01a67ef6e5758f1022950ad79b1b305a Mon Sep 17 00:00:00 2001 -From: Neil Roberts -Date: Mon, 5 Jul 2021 20:19:06 +0200 -Subject: [PATCH 139/142] Add a hack to avoid the shadow tex update for - imported linear texs - -This adds a hacky interface so that an application can override the -mechanism used to detect when to update the shadow texture which is used -when importing a linear texture. The application can enable this by -calling: - -glTexParameteri(GL_TEXTURE_2D, GL_SYNC_CONDITION, 1); - -And then whenever it determines that the shadow texture should be -updated it can call: - -glTexParameteri(GL_TEXTURE_2D, GL_SYNC_STATUS, 1); - -(cherry picked from commit 1269e2cfbfa876fdc85037b9435085174d76ad57) ---- - src/gallium/drivers/v3d/v3d_resource.c | 5 ++++- - src/gallium/include/pipe/p_state.h | 4 ++++ - src/mesa/main/mtypes.h | 3 +++ - src/mesa/main/texparam.c | 18 ++++++++++++++++++ - 4 files changed, 29 insertions(+), 1 deletion(-) - -diff --git a/src/gallium/drivers/v3d/v3d_resource.c b/src/gallium/drivers/v3d/v3d_resource.c -index 46de1b16ae0..8e31acb0ff0 100644 ---- a/src/gallium/drivers/v3d/v3d_resource.c -+++ b/src/gallium/drivers/v3d/v3d_resource.c -@@ -1048,7 +1048,9 @@ v3d_update_shadow_texture(struct pipe_context *pctx, - - assert(view->texture != pview->texture); - -- if (shadow->writes == orig->writes && orig->bo->private) -+ if (shadow->writes == orig->writes && -+ orig->base.sync_status == 0 && -+ (orig->bo->private || orig->base.sync_condition)) - return; - - perf_debug("Updating %dx%d@%d shadow for linear texture\n", -@@ -1091,6 +1093,7 @@ v3d_update_shadow_texture(struct pipe_context *pctx, - } - - shadow->writes = orig->writes; -+ orig->base.sync_status = 0; - } - - static struct pipe_surface * -diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h -index 549e4d21c05..abc58552544 100644 ---- a/src/gallium/include/pipe/p_state.h -+++ b/src/gallium/include/pipe/p_state.h -@@ -610,6 +610,10 @@ struct pipe_resource - unsigned bind; /**< bitmask of PIPE_BIND_x */ - unsigned flags; /**< bitmask of PIPE_RESOURCE_FLAG_x */ - -+ /* Hack for avoiding sync on v3d */ -+ unsigned sync_condition; -+ unsigned sync_status; -+ - /** - * For planar images, ie. YUV EGLImage external, etc, pointer to the - * next plane. -diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h -index 77c38bf48d5..1eb2dac8018 100644 ---- a/src/mesa/main/mtypes.h -+++ b/src/mesa/main/mtypes.h -@@ -1058,6 +1058,9 @@ struct gl_texture_object - * the pipe_resource *pt above. - */ - bool needs_validation; -+ -+ /* Hack for avoiding sync on v3d */ -+ GLboolean SyncCondition; - }; - - -diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c -index 001cc185722..139db3ce3e2 100644 ---- a/src/mesa/main/texparam.c -+++ b/src/mesa/main/texparam.c -@@ -274,6 +274,13 @@ set_tex_parameteri(struct gl_context *ctx, - } - - switch (pname) { -+ case GL_SYNC_CONDITION: -+ if (!!texObj->SyncCondition == !!params[0]) -+ return GL_FALSE; -+ texObj->SyncCondition = !!params[0]; -+ return GL_TRUE; -+ case GL_SYNC_STATUS: -+ return GL_TRUE; - case GL_TEXTURE_MIN_FILTER: - if (!_mesa_target_allows_setting_sampler_parameters(texObj->Target)) - goto invalid_dsa; -@@ -931,6 +938,17 @@ _mesa_texture_parameter_invalidate(struct gl_context *ctx, - { - if (texparam_invalidates_sampler_views(pname)) - st_texture_release_all_sampler_views(st_context(ctx), texObj); -+ -+ switch (pname) { -+ case GL_SYNC_CONDITION: -+ texObj->pt->sync_condition = texObj->SyncCondition; -+ break; -+ case GL_SYNC_STATUS: -+ texObj->pt->sync_status = 1; -+ break; -+ default: -+ ; /* nothing */ -+ } - } - - void --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0140-vc4-Fix-mask-RGBA-validation-at-YUV-blit.patch b/projects/RPi/devices/RPi5/patches/mesa/0140-vc4-Fix-mask-RGBA-validation-at-YUV-blit.patch deleted file mode 100644 index 1336841a6a..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0140-vc4-Fix-mask-RGBA-validation-at-YUV-blit.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 270deb428f1de371492a5e6185fe410c4329eab4 Mon Sep 17 00:00:00 2001 -From: Jose Maria Casanova Crespo -Date: Mon, 25 Sep 2023 21:16:59 +0200 -Subject: [PATCH 140/142] vc4: Fix mask RGBA validation at YUV blit - -Solves regression on video players using GPU for -video decoding that just displays the video in green. - -Fixes: d13da7782cd80 ("vc4: call blit paths in chain") ---- - src/gallium/drivers/vc4/vc4_blit.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c -index 2cf65b5f585..87b2369b7ad 100644 ---- a/src/gallium/drivers/vc4/vc4_blit.c -+++ b/src/gallium/drivers/vc4/vc4_blit.c -@@ -347,7 +347,7 @@ vc4_yuv_blit(struct pipe_context *pctx, struct pipe_blit_info *info) - struct vc4_resource *dst = vc4_resource(info->dst.resource); - bool ok; - -- if (info->mask & PIPE_MASK_RGBA) -+ if (!(info->mask & PIPE_MASK_RGBA)) - return; - - if (src->tiled) --- -2.39.2 - diff --git a/projects/RPi/devices/RPi5/patches/mesa/0141-vc4-mark-buffers-as-initialized-at-vc4_texture_subda.patch b/projects/RPi/devices/RPi5/patches/mesa/0141-vc4-mark-buffers-as-initialized-at-vc4_texture_subda.patch deleted file mode 100644 index e969ec933b..0000000000 --- a/projects/RPi/devices/RPi5/patches/mesa/0141-vc4-mark-buffers-as-initialized-at-vc4_texture_subda.patch +++ /dev/null @@ -1,175 +0,0 @@ -From f843fbceb381f8c82074e8b68583fbfe57c48a6e Mon Sep 17 00:00:00 2001 -From: Jose Maria Casanova Crespo -Date: Thu, 8 Jun 2023 00:57:15 +0200 -Subject: [PATCH 141/142] vc4: mark buffers as initialized at - vc4_texture_subdata - -This fixes several tests when the initially uploaded buffer -from CPU was being ignored because vc4_texture_subdata was not -marking the resource as written/initialized. - -The usage flags management available at vc4_resource_transfer_map -is generalized into vc4_map_usage_prep and reused at -vc4_resource_transfer_map. This makes vc4 implementation more similar -to v3d. - -This fixes 7 text in the following subgroups: - -dEQP-GLES2.functional.fbo.render.texsubimage.* - -dEQP-GLES2.functional.texture.specification.basic_copytexsubimage2d.* - -spec@arb_clear_texture@arb_clear_texture-* - -Cc: mesa-stable -Reviewed-by: Juan A. Suarez -Reviewed-by: Emma Anholt -Part-of: ---- - src/broadcom/ci/broadcom-rpi3-fails.txt | 11 ---- - src/gallium/drivers/vc4/vc4_resource.c | 71 +++++++++++++++---------- - 2 files changed, 44 insertions(+), 38 deletions(-) - -diff --git a/src/broadcom/ci/broadcom-rpi3-fails.txt b/src/broadcom/ci/broadcom-rpi3-fails.txt -index 5522310d91a..e49e77b1436 100644 ---- a/src/broadcom/ci/broadcom-rpi3-fails.txt -+++ b/src/broadcom/ci/broadcom-rpi3-fails.txt -@@ -18,11 +18,6 @@ dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail - - dEQP-GLES2.functional.depth_stencil_clear.depth_stencil_masked,Fail - --# A glTexImage, glDraw, glTexSubImage sequence into a texture is missing what looks like the drawing. --dEQP-GLES2.functional.fbo.render.texsubimage.after_render_tex2d_rgba,Fail --# A glTexImage, glDraw, glTexSubImage, glDraw sequence into a texture is missing what looks like the first drawing. --dEQP-GLES2.functional.fbo.render.texsubimage.between_render_tex2d_rgba,Fail -- - # Sampling grid slightly off in test 2? - dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_linear_linear_mirror_rgba8888,Fail - dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_linear_linear_repeat_rgba8888,Fail -@@ -38,12 +33,6 @@ dEQP-GLES2.functional.texture.mipmap.2d.basic.nearest_linear_clamp_non_square,Fa - dEQP-GLES2.functional.texture.mipmap.2d.basic.nearest_linear_mirror_non_square,Fail - dEQP-GLES2.functional.texture.mipmap.2d.basic.nearest_linear_repeat_non_square,Fail - --# Sequence of glTexImage, glDraw, glCopyTexSubImage. --# background red/green checkerboard on the left side is incorrectly white. --dEQP-GLES2.functional.texture.specification.basic_copytexsubimage2d.2d_rgba,Fail --# Maybe it was copied as RGB instead of RGBA? --dEQP-GLES2.functional.texture.specification.basic_copytexsubimage2d.cube_rgba,Fail -- - # One of the pixels on the left edge near the bottom is wrong for both min and - # mag. Also a line of pixels through the image in minification. - dEQP-GLES2.functional.texture.wrap.clamp_clamp_nearest_npot_etc1,Fail -diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c -index ad2791aa972..0a3a435a46c 100644 ---- a/src/gallium/drivers/vc4/vc4_resource.c -+++ b/src/gallium/drivers/vc4/vc4_resource.c -@@ -95,34 +95,13 @@ vc4_resource_transfer_unmap(struct pipe_context *pctx, - slab_free(&vc4->transfer_pool, ptrans); - } - --static void * --vc4_resource_transfer_map(struct pipe_context *pctx, -- struct pipe_resource *prsc, -- unsigned level, unsigned usage, -- const struct pipe_box *box, -- struct pipe_transfer **pptrans) -+static void -+vc4_map_usage_prep(struct pipe_context *pctx, -+ struct pipe_resource *prsc, -+ unsigned usage) - { - struct vc4_context *vc4 = vc4_context(pctx); - struct vc4_resource *rsc = vc4_resource(prsc); -- struct vc4_transfer *trans; -- struct pipe_transfer *ptrans; -- enum pipe_format format = prsc->format; -- char *buf; -- -- /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is -- * being mapped. -- */ -- if ((usage & PIPE_MAP_DISCARD_RANGE) && -- !(usage & PIPE_MAP_UNSYNCHRONIZED) && -- !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) && -- prsc->last_level == 0 && -- prsc->width0 == box->width && -- prsc->height0 == box->height && -- prsc->depth0 == box->depth && -- prsc->array_size == 1 && -- rsc->bo->private) { -- usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE; -- } - - if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) { - if (vc4_resource_bo_alloc(rsc)) { -@@ -131,6 +110,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx, - */ - if (prsc->bind & PIPE_BIND_VERTEX_BUFFER) - vc4->dirty |= VC4_DIRTY_VTXBUF; -+ if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER) -+ vc4->dirty |= VC4_DIRTY_CONSTBUF; - } else { - /* If we failed to reallocate, flush users so that we - * don't violate any syncing requirements. -@@ -139,7 +120,7 @@ vc4_resource_transfer_map(struct pipe_context *pctx, - } - } else if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) { - /* If we're writing and the buffer is being used by the CL, we -- * have to flush the CL first. If we're only reading, we need -+ * have to flush the CL first. If we're only reading, we need - * to flush if the CL has written our buffer. - */ - if (usage & PIPE_MAP_WRITE) -@@ -152,6 +133,38 @@ vc4_resource_transfer_map(struct pipe_context *pctx, - rsc->writes++; - rsc->initialized_buffers = ~0; - } -+} -+ -+static void * -+vc4_resource_transfer_map(struct pipe_context *pctx, -+ struct pipe_resource *prsc, -+ unsigned level, unsigned usage, -+ const struct pipe_box *box, -+ struct pipe_transfer **pptrans) -+{ -+ struct vc4_context *vc4 = vc4_context(pctx); -+ struct vc4_resource *rsc = vc4_resource(prsc); -+ struct vc4_transfer *trans; -+ struct pipe_transfer *ptrans; -+ enum pipe_format format = prsc->format; -+ char *buf; -+ -+ /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is -+ * being mapped. -+ */ -+ if ((usage & PIPE_MAP_DISCARD_RANGE) && -+ !(usage & PIPE_MAP_UNSYNCHRONIZED) && -+ !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) && -+ prsc->last_level == 0 && -+ prsc->width0 == box->width && -+ prsc->height0 == box->height && -+ prsc->depth0 == box->depth && -+ prsc->array_size == 1 && -+ rsc->bo->private) { -+ usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE; -+ } -+ -+ vc4_map_usage_prep(pctx, prsc, usage); - - trans = slab_zalloc(&vc4->transfer_pool); - if (!trans) -@@ -240,8 +253,12 @@ vc4_texture_subdata(struct pipe_context *pctx, - } - - /* Otherwise, map and store the texture data directly into the tiled -- * texture. -+ * texture. Note that gallium's texture_subdata may be called with -+ * obvious usage flags missing! - */ -+ vc4_map_usage_prep(pctx, prsc, usage | (PIPE_MAP_WRITE | -+ PIPE_MAP_DISCARD_RANGE)); -+ - void *buf; - if (usage & PIPE_MAP_UNSYNCHRONIZED) - buf = vc4_bo_map_unsynchronized(rsc->bo); --- -2.39.2 -