mirror of
https://github.com/LibreELEC/LibreELEC.tv.git
synced 2025-07-24 11:16:51 +00:00
commit
80f59f03b9
@ -3,8 +3,8 @@
|
||||
# Copyright (C) 2018-present Team LibreELEC (https://libreelec.tv)
|
||||
|
||||
PKG_NAME="mesa"
|
||||
PKG_VERSION="23.2.1"
|
||||
PKG_SHA256="64de0616fc2d801f929ab1ac2a4f16b3e2783c4309a724c8a259b20df8bbc1cc"
|
||||
PKG_VERSION="23.3.0"
|
||||
PKG_SHA256="50f729dd60ed6335b989095baad81ef5edf7cfdd4b4b48b9b955917cb07d69c5"
|
||||
PKG_LICENSE="OSS"
|
||||
PKG_SITE="http://www.mesa3d.org/"
|
||||
PKG_URL="https://mesa.freedesktop.org/archive/mesa-${PKG_VERSION}.tar.xz"
|
||||
@ -13,6 +13,10 @@ PKG_LONGDESC="Mesa is a 3-D graphics library with an API."
|
||||
|
||||
get_graphicdrivers
|
||||
|
||||
if [ "${DEVICE}" = "Dragonboard" ]; then
|
||||
PKG_DEPENDS_TARGET+=" libarchive libxml2 lua54"
|
||||
fi
|
||||
|
||||
PKG_MESON_OPTS_TARGET="-Dgallium-drivers=${GALLIUM_DRIVERS// /,} \
|
||||
-Dgallium-extra-hud=false \
|
||||
-Dgallium-omx=disabled \
|
||||
|
@ -1,332 +0,0 @@
|
||||
From f62aa2640f92796ff5216da0a5d3c8f46a2855b4 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Mon, 26 Apr 2021 00:02:21 +0200
|
||||
Subject: [PATCH 001/142] broadcom(cle,clif,common,simulator): add 7.1 version
|
||||
on the list of versions to build
|
||||
|
||||
This adds 7.1 to the list of available V3D_VERSION, and first changes
|
||||
on the simulator needed to get it working.
|
||||
|
||||
Note that we needed to touch all those 4 codebases because it is
|
||||
needed if we want to use V3D_DEBUG=clif with the simulator, that it is
|
||||
the easier way to see which packets a vulkan program is using.
|
||||
|
||||
About the simulator, this commit only handle the rename of some
|
||||
registers. Any additional changes needed to get a proper support for
|
||||
v71 will be handled them on following commits.
|
||||
---
|
||||
src/broadcom/cle/meson.build | 3 +-
|
||||
src/broadcom/cle/v3dx_pack.h | 2 +
|
||||
src/broadcom/clif/clif_private.h | 2 +
|
||||
src/broadcom/common/v3d_device_info.c | 1 +
|
||||
src/broadcom/common/v3d_macros.h | 3 +
|
||||
src/broadcom/meson.build | 2 +-
|
||||
src/broadcom/simulator/v3d_simulator.c | 81 +++++++++++++++++++------
|
||||
src/broadcom/simulator/v3d_simulator.h | 5 ++
|
||||
src/broadcom/simulator/v3dx_simulator.c | 31 ++++++++--
|
||||
9 files changed, 106 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/cle/meson.build b/src/broadcom/cle/meson.build
|
||||
index 31a0d5bfa94..8ac32b313e4 100644
|
||||
--- a/src/broadcom/cle/meson.build
|
||||
+++ b/src/broadcom/cle/meson.build
|
||||
@@ -23,7 +23,8 @@ v3d_versions = [
|
||||
[21, 21],
|
||||
[33, 33],
|
||||
[41, 33],
|
||||
- [42, 33]
|
||||
+ [42, 33],
|
||||
+ [71, 33]
|
||||
]
|
||||
|
||||
v3d_xml_files = []
|
||||
diff --git a/src/broadcom/cle/v3dx_pack.h b/src/broadcom/cle/v3dx_pack.h
|
||||
index 5762e5aaa70..e5a1eb26698 100644
|
||||
--- a/src/broadcom/cle/v3dx_pack.h
|
||||
+++ b/src/broadcom/cle/v3dx_pack.h
|
||||
@@ -37,6 +37,8 @@
|
||||
# include "cle/v3d_packet_v41_pack.h"
|
||||
#elif (V3D_VERSION == 42)
|
||||
# include "cle/v3d_packet_v42_pack.h"
|
||||
+#elif (V3D_VERSION == 71)
|
||||
+# include "cle/v3d_packet_v71_pack.h"
|
||||
#else
|
||||
# error "Need to add a pack header include for this v3d version"
|
||||
#endif
|
||||
diff --git a/src/broadcom/clif/clif_private.h b/src/broadcom/clif/clif_private.h
|
||||
index 6ace62b0310..cda407a00bf 100644
|
||||
--- a/src/broadcom/clif/clif_private.h
|
||||
+++ b/src/broadcom/clif/clif_private.h
|
||||
@@ -101,6 +101,8 @@ bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
const uint8_t *cl, uint32_t *size, bool reloc_mode);
|
||||
bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
const uint8_t *cl, uint32_t *size, bool reloc_mode);
|
||||
+bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
+ const uint8_t *cl, uint32_t *size, bool reloc_mode);
|
||||
|
||||
static inline void
|
||||
out(struct clif_dump *clif, const char *fmt, ...)
|
||||
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
|
||||
index 272190eb2e5..7e0862f1f02 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.c
|
||||
+++ b/src/broadcom/common/v3d_device_info.c
|
||||
@@ -66,6 +66,7 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
|
||||
case 33:
|
||||
case 41:
|
||||
case 42:
|
||||
+ case 71:
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr,
|
||||
diff --git a/src/broadcom/common/v3d_macros.h b/src/broadcom/common/v3d_macros.h
|
||||
index fe89398208a..b4291fb5350 100644
|
||||
--- a/src/broadcom/common/v3d_macros.h
|
||||
+++ b/src/broadcom/common/v3d_macros.h
|
||||
@@ -41,6 +41,9 @@
|
||||
#elif (V3D_VERSION == 42)
|
||||
# define V3DX(x) V3D42_##x
|
||||
# define v3dX(x) v3d42_##x
|
||||
+#elif (V3D_VERSION == 71)
|
||||
+# define V3DX(x) V3D71_##x
|
||||
+# define v3dX(x) v3d71_##x
|
||||
#else
|
||||
# error "Need to add prefixing macros for this v3d version"
|
||||
#endif
|
||||
diff --git a/src/broadcom/meson.build b/src/broadcom/meson.build
|
||||
index 2c10e46b188..73cb7aa0575 100644
|
||||
--- a/src/broadcom/meson.build
|
||||
+++ b/src/broadcom/meson.build
|
||||
@@ -22,7 +22,7 @@ inc_broadcom = include_directories('.', 'cle')
|
||||
|
||||
subdir('cle')
|
||||
|
||||
-v3d_versions = ['33', '41', '42']
|
||||
+v3d_versions = ['33', '41', '42', '71']
|
||||
v3d_libs = []
|
||||
|
||||
if with_gallium_v3d or with_broadcom_vk
|
||||
diff --git a/src/broadcom/simulator/v3d_simulator.c b/src/broadcom/simulator/v3d_simulator.c
|
||||
index eea5d3f050e..5cceb1a82cc 100644
|
||||
--- a/src/broadcom/simulator/v3d_simulator.c
|
||||
+++ b/src/broadcom/simulator/v3d_simulator.c
|
||||
@@ -490,10 +490,20 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit)
|
||||
|
||||
v3d_simulator_perfmon_switch(fd, submit->perfmon_id);
|
||||
|
||||
- if (sim_state.ver >= 41)
|
||||
- v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
- else
|
||||
- v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 33:
|
||||
+ v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
+ break;
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
+ break;
|
||||
+ case 71:
|
||||
+ v3d71_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
+ break;
|
||||
+ default:
|
||||
+ unreachable("Unsupported V3D version\n");
|
||||
+ }
|
||||
|
||||
util_dynarray_foreach(&sim_state.bin_oom, struct v3d_simulator_bo *,
|
||||
sim_bo) {
|
||||
@@ -635,10 +645,17 @@ v3d_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args)
|
||||
static int
|
||||
v3d_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args)
|
||||
{
|
||||
- if (sim_state.ver >= 41)
|
||||
- return v3d41_simulator_get_param_ioctl(sim_state.v3d, args);
|
||||
- else
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 33:
|
||||
return v3d33_simulator_get_param_ioctl(sim_state.v3d, args);
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ return v3d41_simulator_get_param_ioctl(sim_state.v3d, args);
|
||||
+ case 71:
|
||||
+ return v3d71_simulator_get_param_ioctl(sim_state.v3d, args);
|
||||
+ default:
|
||||
+ unreachable("Unsupported V3D version\n");
|
||||
+ }
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -652,10 +669,20 @@ v3d_simulator_submit_tfu_ioctl(int fd, struct drm_v3d_submit_tfu *args)
|
||||
v3d_simulator_copy_in_handle(file, args->bo_handles[2]);
|
||||
v3d_simulator_copy_in_handle(file, args->bo_handles[3]);
|
||||
|
||||
- if (sim_state.ver >= 41)
|
||||
- ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args);
|
||||
- else
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 33:
|
||||
ret = v3d33_simulator_submit_tfu_ioctl(sim_state.v3d, args);
|
||||
+ break;
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args);
|
||||
+ break;
|
||||
+ case 71:
|
||||
+ ret = v3d71_simulator_submit_tfu_ioctl(sim_state.v3d, args);
|
||||
+ break;
|
||||
+ default:
|
||||
+ unreachable("Unsupported V3D version\n");
|
||||
+ }
|
||||
|
||||
v3d_simulator_copy_out_handle(file, args->bo_handles[0]);
|
||||
|
||||
@@ -682,11 +709,19 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args)
|
||||
|
||||
v3d_simulator_perfmon_switch(fd, args->perfmon_id);
|
||||
|
||||
- if (sim_state.ver >= 41)
|
||||
- ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
|
||||
- file->gmp->ofs);
|
||||
- else
|
||||
- ret = -1;
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
|
||||
+ file->gmp->ofs);
|
||||
+ break;
|
||||
+ case 71:
|
||||
+ ret = v3d71_simulator_submit_csd_ioctl(sim_state.v3d, args,
|
||||
+ file->gmp->ofs);
|
||||
+ break;
|
||||
+ default:
|
||||
+ ret = -1;
|
||||
+ }
|
||||
|
||||
for (int i = 0; i < args->bo_handle_count; i++)
|
||||
v3d_simulator_copy_out_handle(file, bo_handles[i]);
|
||||
@@ -880,10 +915,20 @@ v3d_simulator_init_global()
|
||||
|
||||
util_dynarray_init(&sim_state.bin_oom, NULL);
|
||||
|
||||
- if (sim_state.ver >= 41)
|
||||
- v3d41_simulator_init_regs(sim_state.v3d);
|
||||
- else
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 33:
|
||||
v3d33_simulator_init_regs(sim_state.v3d);
|
||||
+ break;
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ v3d41_simulator_init_regs(sim_state.v3d);
|
||||
+ break;
|
||||
+ case 71:
|
||||
+ v3d71_simulator_init_regs(sim_state.v3d);
|
||||
+ break;
|
||||
+ default:
|
||||
+ unreachable("Not supported V3D version\n");
|
||||
+ }
|
||||
}
|
||||
|
||||
struct v3d_simulator_file *
|
||||
diff --git a/src/broadcom/simulator/v3d_simulator.h b/src/broadcom/simulator/v3d_simulator.h
|
||||
index ddb079c1455..1472c313a03 100644
|
||||
--- a/src/broadcom/simulator/v3d_simulator.h
|
||||
+++ b/src/broadcom/simulator/v3d_simulator.h
|
||||
@@ -52,6 +52,11 @@ uint32_t v3d_simulator_get_mem_free(void);
|
||||
# define v3dX(x) v3d41_##x
|
||||
# include "v3dx_simulator.h"
|
||||
# undef v3dX
|
||||
+
|
||||
+# define v3dX(x) v3d71_##x
|
||||
+# include "v3dx_simulator.h"
|
||||
+# undef v3dX
|
||||
+
|
||||
#endif
|
||||
|
||||
#endif
|
||||
diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c
|
||||
index c9322f0397b..723796b16c9 100644
|
||||
--- a/src/broadcom/simulator/v3dx_simulator.c
|
||||
+++ b/src/broadcom/simulator/v3dx_simulator.c
|
||||
@@ -46,11 +46,15 @@
|
||||
|
||||
#define HW_REGISTER_RO(x) (x)
|
||||
#define HW_REGISTER_RW(x) (x)
|
||||
-#if V3D_VERSION >= 41
|
||||
+#if V3D_VERSION == 71
|
||||
+#include "libs/core/v3d/registers/7.1.5.1/v3d.h"
|
||||
+#else
|
||||
+#if V3D_VERSION == 41 || V3D_VERSION == 42
|
||||
#include "libs/core/v3d/registers/4.1.35.0/v3d.h"
|
||||
#else
|
||||
#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
|
||||
#endif
|
||||
+#endif
|
||||
|
||||
#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
|
||||
#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
|
||||
@@ -310,16 +314,17 @@ v3d_isr_core(struct v3d_hw *v3d,
|
||||
return;
|
||||
}
|
||||
|
||||
+#if V3D_VERSION <= 42
|
||||
if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
|
||||
fprintf(stderr, "GMP violation at 0x%08x\n",
|
||||
V3D_READ(V3D_GMP_VIO_ADDR));
|
||||
- abort();
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"Unexpected ISR with core status 0x%08x\n",
|
||||
core_status);
|
||||
}
|
||||
abort();
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -396,6 +401,18 @@ v3d_isr_hub(struct v3d_hw *v3d)
|
||||
}
|
||||
|
||||
handle_mmu_interruptions(v3d, hub_status);
|
||||
+
|
||||
+#if V3D_VERSION == 71
|
||||
+ if (hub_status & V3D_HUB_CTL_INT_STS_INT_GMPV_SET) {
|
||||
+ fprintf(stderr, "GMP violation at 0x%08x\n",
|
||||
+ V3D_READ(V3D_GMP_VIO_ADDR));
|
||||
+ } else {
|
||||
+ fprintf(stderr,
|
||||
+ "Unexpected ISR with status 0x%08x\n",
|
||||
+ hub_status);
|
||||
+ }
|
||||
+ abort();
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -436,8 +453,11 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
|
||||
* for tracing. Perhaps we should evaluate to do the same here and add
|
||||
* some debug options.
|
||||
*/
|
||||
- uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
|
||||
- V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
|
||||
+ uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_OUTOMEM_SET;
|
||||
+#if V3D_VERSION <= 42
|
||||
+ core_interrupts |= V3D_CTL_0_INT_STS_INT_GMPV_SET;
|
||||
+#endif
|
||||
+
|
||||
V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
|
||||
V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
|
||||
|
||||
@@ -447,6 +467,9 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
|
||||
V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */
|
||||
V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
|
||||
|
||||
+#if V3D_VERSION == 71
|
||||
+ hub_interrupts |= V3D_HUB_CTL_INT_STS_INT_GMPV_SET;
|
||||
+#endif
|
||||
V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
|
||||
V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 3322c102282cf726ae575b122358060abd5b24db Mon Sep 17 00:00:00 2001
|
||||
From 54cc206be2d48916862d7e264e886f58b27dd653 Mon Sep 17 00:00:00 2001
|
||||
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
|
||||
Date: Thu, 5 Oct 2023 19:32:10 +0100
|
||||
Subject: [PATCH 142/142] gallium: Add kmsro drivers for RP1 DSI, DPI, and VEC
|
||||
Subject: [PATCH 1/3] gallium: Add kmsro drivers for RP1 DSI, DPI, and VEC
|
||||
devices
|
||||
|
||||
Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
|
||||
@ -11,7 +11,7 @@ Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build
|
||||
index fbec1da957b..59daf3b6fb6 100644
|
||||
index 66619bba0db..443923772e8 100644
|
||||
--- a/src/gallium/targets/dri/meson.build
|
||||
+++ b/src/gallium/targets/dri/meson.build
|
||||
@@ -68,6 +68,9 @@ libgallium_dri = shared_library(
|
||||
@ -22,10 +22,10 @@ index fbec1da957b..59daf3b6fb6 100644
|
||||
+ 'drm-rp1-dsi_dri.so',
|
||||
+ 'drm-rp1-vec_dri.so',
|
||||
'exynos_dri.so',
|
||||
'hdlcd_dri.so',
|
||||
'hx8357d_dri.so',
|
||||
'ili9225_dri.so',
|
||||
diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
|
||||
index d506869cbb4..ecb25edd03b 100644
|
||||
index 9d3069eb004..79f60a7224a 100644
|
||||
--- a/src/gallium/targets/dri/target.c
|
||||
+++ b/src/gallium/targets/dri/target.c
|
||||
@@ -98,6 +98,9 @@ DEFINE_LOADER_DRM_ENTRYPOINT(tegra);
|
||||
@ -36,8 +36,8 @@ index d506869cbb4..ecb25edd03b 100644
|
||||
+DEFINE_LOADER_DRM_ENTRYPOINT(drm_rp1_dsi)
|
||||
+DEFINE_LOADER_DRM_ENTRYPOINT(drm_rp1_vec)
|
||||
DEFINE_LOADER_DRM_ENTRYPOINT(exynos)
|
||||
DEFINE_LOADER_DRM_ENTRYPOINT(hdlcd)
|
||||
DEFINE_LOADER_DRM_ENTRYPOINT(hx8357d)
|
||||
DEFINE_LOADER_DRM_ENTRYPOINT(ili9225)
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,30 +0,0 @@
|
||||
From 9e85edd1b347b0e779b393f463f42044a720bcff Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 28 Sep 2021 13:16:49 +0200
|
||||
Subject: [PATCH 002/142] broadcom/simulator: reset CFG7 for compute dispatch
|
||||
in v71
|
||||
|
||||
This register is new in 7.x, it doesn't seem that we need to
|
||||
do anything specific for now, but let's make sure it is reset
|
||||
every time.
|
||||
---
|
||||
src/broadcom/simulator/v3dx_simulator.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c
|
||||
index 723796b16c9..f23b0538de3 100644
|
||||
--- a/src/broadcom/simulator/v3dx_simulator.c
|
||||
+++ b/src/broadcom/simulator/v3dx_simulator.c
|
||||
@@ -227,6 +227,9 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
|
||||
V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
|
||||
V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
|
||||
V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
|
||||
+#if V3D_VERSION >= 71
|
||||
+ V3D_WRITE(V3D_CSD_0_QUEUED_CFG7, 0);
|
||||
+#endif
|
||||
/* CFG0 kicks off the job */
|
||||
V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,8 +1,8 @@
|
||||
From 4f33de7771621e15aae3e3c60c09fd5a2f29bdac Mon Sep 17 00:00:00 2001
|
||||
From 80050d6960a688d061eac9798c6f5f1b0eb3e960 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 30 Nov 2021 02:39:20 +0100
|
||||
Subject: [PATCH 066/142] nir: add new opcodes to map new v71
|
||||
packing/conversion instructions
|
||||
Subject: [PATCH 2/3] nir: add new opcodes to map new v71 packing/conversion
|
||||
instructions
|
||||
|
||||
Since v71, broadcom hw include specific packing/conversion
|
||||
instructions, so this commit adds opcodes to be able to make use of
|
||||
@ -28,17 +28,14 @@ integer.
|
||||
Interestingly broadcom also defines a similar one that packs the
|
||||
higher halfword. Not used yet.
|
||||
|
||||
FIXME: vftounorm10lo/hi constant expression implementation is somewhat
|
||||
convoluted. It is likely that it could be implemented in a more easy
|
||||
way. But it works (passing the tests added with CTS issue #3372,
|
||||
created with this change in mind).
|
||||
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
|
||||
---
|
||||
src/compiler/nir/nir_constant_expressions.py | 106 +++++++++++++++++++
|
||||
src/compiler/nir/nir_opcodes.py | 44 ++++++++
|
||||
2 files changed, 150 insertions(+)
|
||||
src/compiler/nir/nir_constant_expressions.py | 94 ++++++++++++++++++++
|
||||
src/compiler/nir/nir_opcodes.py | 52 +++++++++++
|
||||
2 files changed, 146 insertions(+)
|
||||
|
||||
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
|
||||
index e6383b67737..46395d79a89 100644
|
||||
index e6383b67737..0d0797526a9 100644
|
||||
--- a/src/compiler/nir/nir_constant_expressions.py
|
||||
+++ b/src/compiler/nir/nir_constant_expressions.py
|
||||
@@ -62,6 +62,8 @@ template = """\
|
||||
@ -50,7 +47,7 @@ index e6383b67737..46395d79a89 100644
|
||||
#include "nir_constant_expressions.h"
|
||||
|
||||
/**
|
||||
@@ -277,6 +279,110 @@ unpack_half_1x16(uint16_t u)
|
||||
@@ -277,6 +279,98 @@ unpack_half_1x16(uint16_t u)
|
||||
return _mesa_half_to_float(u);
|
||||
}
|
||||
|
||||
@ -61,24 +58,22 @@ index e6383b67737..46395d79a89 100644
|
||||
+static uint32_t v11fpack_v3d(const uint32_t src0,
|
||||
+ const uint32_t src1)
|
||||
+{
|
||||
+ float rgb[3];
|
||||
+
|
||||
+ rgb[0] = unpack_half_1x16((src0 & 0xffff));
|
||||
+ rgb[1] = unpack_half_1x16((src0 >> 16));
|
||||
+ rgb[2] = unpack_half_1x16((src1 & 0xffff));
|
||||
+ float rgb[3] = {
|
||||
+ unpack_half_1x16((src0 & 0xffff)),
|
||||
+ unpack_half_1x16((src0 >> 16)),
|
||||
+ unpack_half_1x16((src1 & 0xffff)),
|
||||
+ };
|
||||
+
|
||||
+ return float3_to_r11g11b10f(rgb);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * The three methods below are basically wrappers over pack_s/unorm_1x8/1x16,
|
||||
+ * as it receives a uint16_t val instead of a float
|
||||
+ * as they receives a uint16_t val instead of a float
|
||||
+ */
|
||||
+static uint8_t _mesa_half_to_snorm8(uint16_t val)
|
||||
+static inline uint8_t _mesa_half_to_snorm8(uint16_t val)
|
||||
+{
|
||||
+ float x = _mesa_half_to_float(val);
|
||||
+
|
||||
+ return pack_snorm_1x8(x);
|
||||
+ return pack_snorm_1x8(_mesa_half_to_float(val));
|
||||
+}
|
||||
+
|
||||
+static uint16_t _mesa_float_to_snorm16(uint32_t val)
|
||||
@ -95,51 +90,42 @@ index e6383b67737..46395d79a89 100644
|
||||
+ return pack_unorm_1x16(aux.f);
|
||||
+}
|
||||
+
|
||||
+/* FIXME: the implementation below of vftounorm10hi/lo is somewhat too
|
||||
+ * verbose. It is likely that there would be a simpler way to implement
|
||||
+ * it.
|
||||
+ */
|
||||
+static uint32_t float_pack16_v3d(uint32_t f32)
|
||||
+static inline uint32_t float_pack16_v3d(uint32_t f32)
|
||||
+{
|
||||
+ float f = uif(f32);
|
||||
+ return _mesa_float_to_half(f);
|
||||
+ return _mesa_float_to_half(uif(f32));
|
||||
+}
|
||||
+
|
||||
+static uint32_t float_unpack16_v3d(uint32_t f16)
|
||||
+static inline uint32_t float_unpack16_v3d(uint32_t f16)
|
||||
+{
|
||||
+ float f = _mesa_half_to_float(f16);
|
||||
+ return fui(f);
|
||||
+ return fui(_mesa_half_to_float(f16));
|
||||
+}
|
||||
+
|
||||
+static uint32_t vfpack_v3d(uint32_t a, uint32_t b)
|
||||
+static inline uint32_t vfpack_v3d(uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ return float_pack16_v3d(b) << 16 | float_pack16_v3d(a);
|
||||
+}
|
||||
+
|
||||
+static uint32_t vfsat_v3d(uint32_t a)
|
||||
+static inline uint32_t vfsat_v3d(uint32_t a)
|
||||
+{
|
||||
+ return vfpack_v3d(
|
||||
+ fui(SATURATE(_mesa_half_to_float(a & 0xffff))),
|
||||
+ fui(SATURATE(_mesa_half_to_float(a >> 16))));
|
||||
+ const uint32_t low = fui(SATURATE(_mesa_half_to_float(a & 0xffff)));
|
||||
+ const uint32_t high = fui(SATURATE(_mesa_half_to_float(a >> 16)));
|
||||
+
|
||||
+ return vfpack_v3d(low, high);
|
||||
+}
|
||||
+
|
||||
+static uint32_t fmul_v3d(uint32_t a, uint32_t b)
|
||||
+static inline uint32_t fmul_v3d(uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ float f = uif(a);
|
||||
+ float g = uif(b);
|
||||
+
|
||||
+ float x = f * g;
|
||||
+
|
||||
+ return fui(x);
|
||||
+ return fui(uif(a) * uif(b));
|
||||
+}
|
||||
+
|
||||
+#define L(x) float_unpack16_v3d((x) & 0xffff)
|
||||
+#define H(x) float_unpack16_v3d((x) >> 16)
|
||||
+#define V(f,a,b) vfpack_v3d(f(L(a), L(b)), f(H(a), H(b)))
|
||||
+
|
||||
+static uint32_t vfmul_v3d(uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ return V(fmul_v3d, a, b);
|
||||
+ const uint32_t low = fmul_v3d(float_unpack16_v3d(a & 0xffff),
|
||||
+ float_unpack16_v3d(b & 0xffff));
|
||||
+ const uint32_t high = fmul_v3d(float_unpack16_v3d(a >> 16),
|
||||
+ float_unpack16_v3d(b >> 16));
|
||||
+
|
||||
+ return vfpack_v3d(low, high);
|
||||
+}
|
||||
+
|
||||
+/* Convert 2x16-bit floating point to 2x10-bit unorm */
|
||||
@ -156,34 +142,41 @@ index e6383b67737..46395d79a89 100644
|
||||
+{
|
||||
+ return vfmul_v3d(vfsat_v3d(src0), 0x000303ff);
|
||||
+}
|
||||
+
|
||||
+
|
||||
/* Some typed vector structures to make things like src0.y work */
|
||||
typedef int8_t int1_t;
|
||||
typedef uint8_t uint1_t;
|
||||
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
|
||||
index e4d87aa6126..63aa7cfa315 100644
|
||||
index 0f81328f441..b70d9567cd6 100644
|
||||
--- a/src/compiler/nir/nir_opcodes.py
|
||||
+++ b/src/compiler/nir/nir_opcodes.py
|
||||
@@ -1393,6 +1393,50 @@ for (int i = 0; i < 32; i += 8) {
|
||||
@@ -1413,6 +1413,58 @@ for (int i = 0; i < 32; i += 8) {
|
||||
}
|
||||
""")
|
||||
|
||||
+# v3d-specific opcodes
|
||||
+
|
||||
+# v3d-specific (v71) instruction that packs parts of 2 2x16 floating point into
|
||||
+# r11g11b10 bits, rounding to nearest even
|
||||
+# v3d-specific (v71) instruction that packs bits of 2 2x16 floating point into
|
||||
+# r11g11b10 bits, rounding to nearest even, so
|
||||
+# dst[10:0] = float16_to_float11 (src0[15:0])
|
||||
+# dst[21:11] = float16_to_float11 (src0[31:16])
|
||||
+# dst[31:22] = float16_to_float10 (src1[15:0])
|
||||
+binop_convert("v11fpack_v3d", tuint32, tuint32, "",
|
||||
+ "v11fpack_v3d(src0, src1)")
|
||||
+
|
||||
+# v3d-specific (v71) instruction that packs 2x32 bit to 2x16 bit integer. The
|
||||
+# difference with pack_32_2x16_split is that the sources are 32bit too. So it
|
||||
+# receives 2 32-bit integer, and pack the lower halfword as 2x16 on a 32-bit
|
||||
+# pack.
|
||||
+# receives 2 32-bit integer, and packs the lower halfword as 2x16 on a 32-bit
|
||||
+# integer.
|
||||
+binop_horiz("vpack_v3d", 1, tuint32, 1, tuint32, 1, tuint32,
|
||||
+ "(src0.x & 0xffff) | (src1.x << 16)")
|
||||
+
|
||||
+# v3d-specific (v71) instruction that packs parts of 2 2x16 integers into r10g10b10a2
|
||||
+# v3d-specific (v71) instruction that packs bits of 2 2x16 integers into
|
||||
+# r10g10b10a2:
|
||||
+# dst[9:0] = src0[9:0]
|
||||
+# dst[19:10] = src0[25:16]
|
||||
+# dst[29:20] = src1[9:0]
|
||||
+# dst[31:30] = src1[17:16]
|
||||
+binop_convert("v10pack_v3d", tuint32, tuint32, "",
|
||||
+ "(src0 & 0x3ff) | ((src0 >> 16) & 0x3ff) << 10 | (src1 & 0x3ff) << 20 | ((src1 >> 16) & 0x3ff) << 30")
|
||||
+
|
@ -1,712 +0,0 @@
|
||||
From 6f744bc4bec98f9769486d427e8e2d4e314ae056 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 29 Jun 2021 12:03:24 +0200
|
||||
Subject: [PATCH 003/142] broadcom/cle: update the packet definitions for new
|
||||
generation v71
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Using as reference the spec for 7.1.5. This include totally new
|
||||
packets, and redefine some that already existed on v42.
|
||||
|
||||
Full list:
|
||||
* Add Depth Bounds Test Limits
|
||||
* Redefine Tile Binning Mode Cfg
|
||||
* Redefine Cfg Bits. There are some changes on the fields:
|
||||
* Line Rasterization is now 1 bit size
|
||||
* Depth Bounds Enable (that takes one of the bits of Line Rasterization)
|
||||
* Early-Z/Early-Z updates enable bits (16-17) figure now as reserved.
|
||||
* New Z-Clipping mode field
|
||||
* Redefine Tile Rendering Mode Cfg (Common). Changes with respect to v42:
|
||||
* New log2 tile height/width fields starting at bit 52/55
|
||||
* Due those two news, end pad is smaller
|
||||
* sub-id has now a size of 3. Bit 4 is reserved.
|
||||
* Number of render targets: this field max value is now 7 (not
|
||||
reflected on the xml).
|
||||
* Maximum BPP is removed on v71 (now bits 40-41 are reserved)
|
||||
* Depth Buffer disable: on bit 44
|
||||
* Update Store Tile Buffer General
|
||||
* Adding Cfg Render Target Part1/2/3 packets: they replace v4X "Tile
|
||||
Rendering Mode Cfg (Color)" (real name "Rendering Configuration
|
||||
(Render Targets Config)"), "Tile Rendering Mode Cfg (Clear Colors
|
||||
Part1)", "Tile Rendering Mode Cfg (Clear Colors Part2)", and "Tile
|
||||
Rendering Mode Cfg (Clear Colors Part3)". On those old versions,
|
||||
the first packet is used to configure 4 render targets. Now that 8
|
||||
are supported, invididual per-render-target are used.
|
||||
* Update ZS clear values packet.
|
||||
* Add new v71 output formats
|
||||
* Define Clear Render Targets (Replaces Clear Tile Buffers from v42)
|
||||
* Redefine GL Shader State Record. Changes copared with v42:
|
||||
* Fields removed:
|
||||
* "Coordinate shader has separate input and output VPM blocks"
|
||||
(reserved bit now)
|
||||
* "Vertex shader has separate input and output VPM blocks"
|
||||
(reserved bit now)
|
||||
* "Address of table of default attribute Values." (we needed to
|
||||
change the start position for all the following fields)
|
||||
* New field:
|
||||
* "Never defer FEP depth writes to fragment shader auto Z writes
|
||||
on scoreboard conflict"
|
||||
* Redefine clipper xy scaling: Now it uses 1/64ths of pixels, instead
|
||||
of 1/256ths
|
||||
* Update texture shader state.
|
||||
* Notice we don't use an address type for these fields in the XML
|
||||
description. This is because the addresses are 64-bit aligned
|
||||
(even though the PRM doesn't say it) which means the 6 LSB bits
|
||||
are implicitly 0, but the fields are encoded before the 6th bit
|
||||
of their starting byte, so we can't use the usual trick we do
|
||||
with address types where the first 6 bits in the byte are
|
||||
implicitly overwritten by other fields and we have to encode this
|
||||
manually as a uint field. This would mean that if we had an
|
||||
actual BO we would also need to add it manually to the job's
|
||||
list, but since we don't have one, we don't have to do anything
|
||||
about it.
|
||||
* Add new RB_Swap field for texture shader state
|
||||
* Document Cb/Cr addresses as uint fields in texture shader state
|
||||
* Fixup Blend Config description: we now support 8 RTs.
|
||||
* TMU config parameter 2 has new fields
|
||||
* Add new clipper Z without guardband packet in v71
|
||||
* Add enums for the Z clip modes accepted in v71
|
||||
* Fix texture state array stride packing for V3D 7.1.5
|
||||
|
||||
Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
|
||||
|
||||
broadcom/cle: rb_swap
|
||||
---
|
||||
src/broadcom/cle/v3d_packet_v33.xml | 386 ++++++++++++++++++++++++++--
|
||||
1 file changed, 368 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/cle/v3d_packet_v33.xml b/src/broadcom/cle/v3d_packet_v33.xml
|
||||
index a0242b5f1c2..624353ca2bf 100644
|
||||
--- a/src/broadcom/cle/v3d_packet_v33.xml
|
||||
+++ b/src/broadcom/cle/v3d_packet_v33.xml
|
||||
@@ -1,4 +1,4 @@
|
||||
-<vcxml gen="3.3" min_ver="33" max_ver="42">
|
||||
+<vcxml gen="3.3" min_ver="33" max_ver="71">
|
||||
|
||||
<enum name="Compare Function" prefix="V3D_COMPARE_FUNC">
|
||||
<value name="NEVER" value="0"/>
|
||||
@@ -167,13 +167,36 @@
|
||||
<value name="depth_16" value="2"/>
|
||||
</enum>
|
||||
|
||||
- <enum name="Render Target Clamp" prefix="V3D_RENDER_TARGET_CLAMP" min_ver="41">
|
||||
+ <enum name="Render Target Clamp" prefix="V3D_RENDER_TARGET_CLAMP" min_ver="41" max_ver="42">
|
||||
<value name="none" value="0"/> <!-- no clamping -->
|
||||
<value name="norm" value="1"/> <!-- [0,1] for f16 -->
|
||||
<value name="pos" value="2"/> <!-- [0, for f16 -->
|
||||
<value name="int" value="3" min_ver="42"/> <!-- clamp to integer RT's range -->
|
||||
</enum>
|
||||
|
||||
+ <enum name="Render Target Type Clamp" prefix="V3D_RENDER_TARGET_TYPE_CLAMP" min_ver="71">
|
||||
+ <value name="8i" value="0"/> <!-- no clamping -->
|
||||
+ <value name="16i" value="1"/> <!-- no clamping -->
|
||||
+ <value name="32i" value="2"/> <!-- no clamping -->
|
||||
+ <value name="8ui" value="4"/> <!-- no clamping -->
|
||||
+ <value name="16ui" value="5"/> <!-- no clamping -->
|
||||
+ <value name="32ui" value="6"/> <!-- no clamping -->
|
||||
+ <value name="8" value="8"/> <!-- no clamping -->
|
||||
+ <value name="16f" value="9"/> <!-- no clamping -->
|
||||
+ <value name="32f" value="10"/> <!-- no clamping -->
|
||||
+ <value name="8i_clamped" value="16"/> <!-- clamp to integer RT's range -->
|
||||
+ <value name="16i_clamped" value="17"/> <!-- clamp to integer RT's range -->
|
||||
+ <value name="32i_clamped" value="18"/> <!-- clamp to integer RT's range -->
|
||||
+ <value name="8ui_clamped" value="20"/> <!-- clamp to integer RT's range -->
|
||||
+ <value name="16ui_clamped" value="21"/> <!-- clamp to integer RT's range -->
|
||||
+ <value name="32ui_clamped" value="22"/> <!-- clamp to integer RT's range -->
|
||||
+ <value name="16f_clamp_norm" value="24"/> <!-- [0,1] for f16 -->
|
||||
+ <value name="16f_clamp_pos" value="25"/> <!-- [0, for f16 -->
|
||||
+ <value name="16f_clamp_pq" value="26"/> <!-- PQ lin range, colour to [0, 125], alpha to [0, 1] for f16 -->
|
||||
+ <value name="16f_clamp_hlg" value="27"/> <!-- HLG lin range, colour to [0, 12], alpha to [0, 1] for f16 -->
|
||||
+ <value name="invalid" value="32"/>
|
||||
+ </enum>
|
||||
+
|
||||
<!---
|
||||
CL cache flush commands are not fully documented and subject to a
|
||||
number of hardware issues that make them unreliable. Specifically:
|
||||
@@ -263,13 +286,27 @@
|
||||
<value name="r8ui" value="36"/>
|
||||
<value name="srgbx8" value="37" max_ver="33"/>
|
||||
<value name="rgbx8" value="38" max_ver="33"/>
|
||||
- <value name="bstc" value="39" min_ver="41"/>
|
||||
+ <value name="bstc8" value="39" min_ver="41"/>
|
||||
<value name="d32f" value="40" min_ver="41"/>
|
||||
<value name="d24" value="41" min_ver="41"/>
|
||||
<value name="d16" value="42" min_ver="41"/>
|
||||
<value name="d24s8" value="43" min_ver="41"/>
|
||||
<value name="s8" value="44" min_ver="41"/>
|
||||
<value name="rgba5551" value="45" min_ver="41"/>
|
||||
+ <value name="bstc8_srgb" value="46" min_ver="71"/>
|
||||
+ <value name="bstc10" value="47" min_ver="71"/>
|
||||
+ <value name="bstc10_srgb" value="48" min_ver="71"/>
|
||||
+ <value name="bstc10_pq" value="49" min_ver="71"/>
|
||||
+ <value name="rgba10x6" value="50" min_ver="71"/>
|
||||
+ <value name="bstc10_hlg" value="55" min_ver="71"/>
|
||||
+ <value name="rgba10x6_hlg" value="56" min_ver="71"/>
|
||||
+ <value name="rgb10_a2_hlg" value="57" min_ver="71"/>
|
||||
+ <value name="bstc10_pq_bt1886" value="58" min_ver="71"/>
|
||||
+ <value name="rgba10x6_pq_bt1886" value="59" min_ver="71"/>
|
||||
+ <value name="rgb10_a2_pq_bt1886" value="60" min_ver="71"/>
|
||||
+ <value name="bstc10_hlg_bt1886" value="61" min_ver="71"/>
|
||||
+ <value name="rgba10x6_hlg_bt1886" value="62" min_ver="71"/>
|
||||
+ <value name="rgb10_a2_hlg_bt1886" value="63" min_ver="71"/>
|
||||
</enum>
|
||||
|
||||
<enum name="Z/S Output Image Format" prefix="V3D_OUTPUT_IMAGE_FORMAT_ZS" max_ver="33">
|
||||
@@ -314,6 +351,12 @@
|
||||
<value name="perp end caps" value="1"/>
|
||||
</enum>
|
||||
|
||||
+ <enum name="Z Clip Mode" prefix="V3D_Z_CLIP_MODE">
|
||||
+ <value name="NONE" value="0"/>
|
||||
+ <value name="MIN_ONE_TO_ONE" value="1"/>
|
||||
+ <value name="ZERO_TO_ONE" value="2"/>
|
||||
+ </enum>
|
||||
+
|
||||
<packet code="0" name="Halt"/>
|
||||
<packet code="1" name="NOP"/>
|
||||
<packet code="4" name="Flush"/>
|
||||
@@ -381,11 +424,13 @@
|
||||
<field name="Last Tile of Frame" size="1" start="0" type="bool"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="25" shortname="clear" name="Clear Tile Buffers" cl="R" min_ver="41">
|
||||
+ <packet code="25" shortname="clear" name="Clear Tile Buffers" cl="R" min_ver="41" max_ver="42">
|
||||
<field name="Clear Z/Stencil Buffer" size="1" start="1" type="bool"/>
|
||||
<field name="Clear all Render Targets" size="1" start="0" type="bool"/>
|
||||
</packet>
|
||||
|
||||
+ <packet code="25" shortname="clear_rt" name="Clear Render Targets" cl="R" min_ver="71"/>
|
||||
+
|
||||
<packet code="26" shortname="load" name="Reload Tile Color Buffer" cl="R" max_ver="33">
|
||||
<field name="Disable Color Buffer load" size="8" start="8" type="uint"/>
|
||||
<field name="Enable Z load" size="1" start="7" type="bool"/>
|
||||
@@ -443,6 +488,10 @@
|
||||
<value name="Render target 1" value="1"/>
|
||||
<value name="Render target 2" value="2"/>
|
||||
<value name="Render target 3" value="3"/>
|
||||
+ <value name="Render target 4" value="4" min_ver="71"/>
|
||||
+ <value name="Render target 5" value="5" min_ver="71"/>
|
||||
+ <value name="Render target 6" value="6" min_ver="71"/>
|
||||
+ <value name="Render target 7" value="7" min_ver="71"/>
|
||||
<value name="None" value="8"/>
|
||||
<value name="Z" value="9"/>
|
||||
<value name="Stencil" value="10"/>
|
||||
@@ -789,7 +838,7 @@
|
||||
<field name="Alpha blend mode" size="4" start="0" type="Blend Mode"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="84" name="Blend Cfg" min_ver="41">
|
||||
+ <packet code="84" name="Blend Cfg" min_ver="41" max_ver="42">
|
||||
<field name="Render Target Mask" size="4" start="24" type="uint"/>
|
||||
<field name="Color blend dst factor" size="4" start="20" type="Blend Factor"/>
|
||||
<field name="Color blend src factor" size="4" start="16" type="Blend Factor"/>
|
||||
@@ -799,6 +848,16 @@
|
||||
<field name="Alpha blend mode" size="4" start="0" type="Blend Mode"/>
|
||||
</packet>
|
||||
|
||||
+ <packet code="84" name="Blend Cfg" min_ver="71">
|
||||
+ <field name="Render Target Mask" size="8" start="24" type="uint"/>
|
||||
+ <field name="Color blend dst factor" size="4" start="20" type="Blend Factor"/>
|
||||
+ <field name="Color blend src factor" size="4" start="16" type="Blend Factor"/>
|
||||
+ <field name="Color blend mode" size="4" start="12" type="Blend Mode"/>
|
||||
+ <field name="Alpha blend dst factor" size="4" start="8" type="Blend Factor"/>
|
||||
+ <field name="Alpha blend src factor" size="4" start="4" type="Blend Factor"/>
|
||||
+ <field name="Alpha blend mode" size="4" start="0" type="Blend Mode"/>
|
||||
+ </packet>
|
||||
+
|
||||
<packet code="86" shortname="blend_ccolor" name="Blend Constant Color">
|
||||
<field name="Alpha (F16)" size="16" start="48" type="uint"/>
|
||||
<field name="Blue (F16)" size="16" start="32" type="uint"/>
|
||||
@@ -828,7 +887,12 @@
|
||||
<field name="address" size="32" start="0" type="address"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="96" name="Cfg Bits">
|
||||
+ <packet code="93" name="Depth Bounds Test Limits" min_ver="71">
|
||||
+ <field name="Lower Test Limit" size="32" start="0" type="float"/>
|
||||
+ <field name="Upper Test Limit" size="32" start="32" type="float"/>
|
||||
+ </packet>
|
||||
+
|
||||
+ <packet code="96" name="Cfg Bits" max_ver="42">
|
||||
<field name="Direct3D Provoking Vertex" size="1" start="21" type="bool"/>
|
||||
<field name="Direct3D 'Point-fill' mode" size="1" start="20" type="bool"/>
|
||||
<field name="Blend enable" size="1" start="19" type="bool"/>
|
||||
@@ -846,6 +910,25 @@
|
||||
<field name="Enable Forward Facing Primitive" size="1" start="0" type="bool"/>
|
||||
</packet>
|
||||
|
||||
+ <packet code="96" name="Cfg Bits" min_ver="71">
|
||||
+ <field name="Z Clipping mode" size="2" start="22" type="Z Clip Mode"/>
|
||||
+ <field name="Direct3D Provoking Vertex" size="1" start="21" type="bool"/>
|
||||
+ <field name="Direct3D 'Point-fill' mode" size="1" start="20" type="bool"/>
|
||||
+ <field name="Blend enable" size="1" start="19" type="bool"/>
|
||||
+ <field name="Stencil enable" size="1" start="18" type="bool"/>
|
||||
+ <field name="Z updates enable" size="1" start="15" type="bool"/>
|
||||
+ <field name="Depth-Test Function" size="3" start="12" type="Compare Function"/>
|
||||
+ <field name="Direct3D Wireframe triangles mode" size="1" start="11" type="bool"/>
|
||||
+ <field name="Z Clamp Mode" size="1" start="10" type="bool"/>
|
||||
+ <field name="Rasterizer Oversample Mode" size="2" start="6" type="uint"/>
|
||||
+ <field name="Depth Bounds Test Enable" size="1" start="5" type="bool"/>
|
||||
+ <field name="Line Rasterization" size="1" start="4" type="uint"/>
|
||||
+ <field name="Enable Depth Offset" size="1" start="3" type="bool"/>
|
||||
+ <field name="Clockwise Primitives" size="1" start="2" type="bool"/>
|
||||
+ <field name="Enable Reverse Facing Primitive" size="1" start="1" type="bool"/>
|
||||
+ <field name="Enable Forward Facing Primitive" size="1" start="0" type="bool"/>
|
||||
+ </packet>
|
||||
+
|
||||
<packet code="97" shortname="zero_all_flatshade_flags" name="Zero All Flat Shade Flags"/>
|
||||
|
||||
<packet code="98" shortname="flatshade_flags" name="Flat Shade Flags">
|
||||
@@ -907,16 +990,26 @@
|
||||
<field name="Minimum Zw" size="32" start="0" type="float"/>
|
||||
</packet>
|
||||
|
||||
- <packet shortname="clipper_xy" name="Clipper XY Scaling" code="110" cl="B">
|
||||
+ <packet shortname="clipper_xy" name="Clipper XY Scaling" code="110" cl="B" max_ver="42">
|
||||
<field name="Viewport Half-Height in 1/256th of pixel" size="32" start="32" type="float"/>
|
||||
<field name="Viewport Half-Width in 1/256th of pixel" size="32" start="0" type="float"/>
|
||||
</packet>
|
||||
|
||||
+ <packet shortname="clipper_xy" name="Clipper XY Scaling" code="110" cl="B" min_ver="71">
|
||||
+ <field name="Viewport Half-Height in 1/64th of pixel" size="32" start="32" type="float"/>
|
||||
+ <field name="Viewport Half-Width in 1/64th of pixel" size="32" start="0" type="float"/>
|
||||
+ </packet>
|
||||
+
|
||||
<packet shortname="clipper_z" name="Clipper Z Scale and Offset" code="111" cl="B">
|
||||
<field name="Viewport Z Offset (Zc to Zs)" size="32" start="32" type="float"/>
|
||||
<field name="Viewport Z Scale (Zc to Zs)" size="32" start="0" type="float"/>
|
||||
</packet>
|
||||
|
||||
+ <packet shortname="clipper_z_no_guardband" name="Clipper Z Scale and Offset no guardband" code="112" cl="B" min_ver="71">
|
||||
+ <field name="Viewport Z Offset (Zc to Zs)" size="32" start="32" type="float"/>
|
||||
+ <field name="Viewport Z Scale (Zc to Zs)" size="32" start="0" type="float"/>
|
||||
+ </packet>
|
||||
+
|
||||
<packet name="Number of Layers" code="119" min_ver="41">
|
||||
<field name="Number of Layers" size="8" start="0" type="uint" minus_one="true"/>
|
||||
</packet>
|
||||
@@ -947,7 +1040,7 @@
|
||||
<field name="sub-id" size="1" start="0" type="uint" default="0"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="120" name="Tile Binning Mode Cfg" min_ver="41">
|
||||
+ <packet code="120" name="Tile Binning Mode Cfg" min_ver="41" max_ver="42">
|
||||
|
||||
<field name="Height (in pixels)" size="16" start="48" type="uint" minus_one="true"/>
|
||||
<field name="Width (in pixels)" size="16" start="32" type="uint" minus_one="true"/>
|
||||
@@ -971,6 +1064,35 @@
|
||||
</field>
|
||||
</packet>
|
||||
|
||||
+ <packet code="120" name="Tile Binning Mode Cfg" min_ver="71">
|
||||
+ <field name="Height (in pixels)" size="16" start="48" type="uint" minus_one="true"/>
|
||||
+ <field name="Width (in pixels)" size="16" start="32" type="uint" minus_one="true"/>
|
||||
+
|
||||
+ <field name="Log2 Tile Height" size="3" start="11" type="uint">
|
||||
+ <value name="tile height 8 pixels" value="0"/>
|
||||
+ <value name="tile height 16 pixels" value="1"/>
|
||||
+ <value name="tile height 32 pixels" value="2"/>
|
||||
+ <value name="tile height 64 pixels" value="3"/>
|
||||
+ </field>
|
||||
+ <field name="Log2 Tile Width" size="3" start="8" type="uint">
|
||||
+ <value name="tile width 8 pixels" value="0"/>
|
||||
+ <value name="tile width 16 pixels" value="1"/>
|
||||
+ <value name="tile width 32 pixels" value="2"/>
|
||||
+ <value name="tile width 64 pixels" value="3"/>
|
||||
+ </field>
|
||||
+
|
||||
+ <field name="tile allocation block size" size="2" start="4" type="uint">
|
||||
+ <value name="tile allocation block size 64b" value="0"/>
|
||||
+ <value name="tile allocation block size 128b" value="1"/>
|
||||
+ <value name="tile allocation block size 256b" value="2"/>
|
||||
+ </field>
|
||||
+ <field name="tile allocation initial block size" size="2" start="2" type="uint">
|
||||
+ <value name="tile allocation initial block size 64b" value="0"/>
|
||||
+ <value name="tile allocation initial block size 128b" value="1"/>
|
||||
+ <value name="tile allocation initial block size 256b" value="2"/>
|
||||
+ </field>
|
||||
+ </packet>
|
||||
+
|
||||
<packet code="120" name="Tile Binning Mode Cfg (Part2)" cl="B" max_ver="33">
|
||||
<field name="Tile Allocation Memory Address" size="32" start="32" type="address"/>
|
||||
<field name="Tile Allocation Memory Size" size="32" start="0" type="uint"/>
|
||||
@@ -1002,7 +1124,7 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="0"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="121" name="Tile Rendering Mode Cfg (Common)" cl="R" min_ver="41">
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (Common)" cl="R" min_ver="41" max_ver="42">
|
||||
<field name="Pad" size="12" start="52" type="uint"/>
|
||||
|
||||
<field name="Early Depth/Stencil Clear" size="1" start="51" type="bool"/>
|
||||
@@ -1018,7 +1140,11 @@
|
||||
<field name="Double-buffer in non-ms mode" size="1" start="43" type="bool"/>
|
||||
<field name="Multisample Mode (4x)" size="1" start="42" type="bool"/>
|
||||
|
||||
- <field name="Maximum BPP of all render targets" size="2" start="40" type="Internal BPP"/>
|
||||
+ <field name="Maximum BPP of all render targets" size="2" start="40" type="Internal BPP">
|
||||
+ <value name="Render target maximum 32bpp" value="0"/>
|
||||
+ <value name="Render target maximum 64bpp" value="1"/>
|
||||
+ <value name="Render target maximum 128bpp" value="2"/>
|
||||
+ </field>
|
||||
|
||||
<field name="Image Height (pixels)" size="16" start="24" type="uint"/>
|
||||
<field name="Image Width (pixels)" size="16" start="8" type="uint"/>
|
||||
@@ -1027,6 +1153,43 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="0"/>
|
||||
</packet>
|
||||
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (Common)" cl="R" min_ver="71">
|
||||
+ <field name="Pad" size="6" start="58" type="uint"/>
|
||||
+
|
||||
+ <field name="Log2 Tile Height" size="3" start="55" type="uint">
|
||||
+ <value name="tile height 8 pixels" value="0"/>
|
||||
+ <value name="tile height 16 pixels" value="1"/>
|
||||
+ <value name="tile height 32 pixels" value="2"/>
|
||||
+ <value name="tile height 64 pixels" value="3"/>
|
||||
+ </field>
|
||||
+ <field name="Log2 Tile Width" size="3" start="52" type="uint">
|
||||
+ <value name="tile width 8 pixels" value="0"/>
|
||||
+ <value name="tile width 16 pixels" value="1"/>
|
||||
+ <value name="tile width 32 pixels" value="2"/>
|
||||
+ <value name="tile width 64 pixels" value="3"/>
|
||||
+ </field>
|
||||
+
|
||||
+ <field name="Early Depth/Stencil Clear" size="1" start="51" type="bool"/>
|
||||
+ <field name="Internal Depth Type" size="4" start="47" type="Internal Depth Type"/>
|
||||
+
|
||||
+ <field name="Early-Z disable" size="1" start="46" type="bool"/>
|
||||
+
|
||||
+ <field name="Early-Z Test and Update Direction" size="1" start="45" type="uint">
|
||||
+ <value name="Early-Z direction LT/LE" value="0"/>
|
||||
+ <value name="Early-Z direction GT/GE" value="1"/>
|
||||
+ </field>
|
||||
+
|
||||
+ <field name="Depth-buffer disable" size="1" start="44" type="bool"/>
|
||||
+ <field name="Double-buffer in non-ms mode" size="1" start="43" type="bool"/>
|
||||
+ <field name="Multisample Mode (4x)" size="1" start="42" type="bool"/>
|
||||
+
|
||||
+ <field name="Image Height (pixels)" size="16" start="24" type="uint"/>
|
||||
+ <field name="Image Width (pixels)" size="16" start="8" type="uint"/>
|
||||
+ <field name="Number of Render Targets" size="4" start="4" type="uint" minus_one="true"/>
|
||||
+
|
||||
+ <field name="sub-id" size="3" start="0" type="uint" default="0"/>
|
||||
+ </packet>
|
||||
+
|
||||
<packet code="121" name="Tile Rendering Mode Cfg (Color)" cl="R" max_ver="33">
|
||||
<field name="Address" size="32" start="32" type="address"/>
|
||||
|
||||
@@ -1048,7 +1211,8 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="2"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="121" name="Tile Rendering Mode Cfg (Color)" cl="R" min_ver="41">
|
||||
+ <!-- On 4.1 the real name would be "Tile Rendering Mode Cfg (Render Target Configs) -->
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (Color)" cl="R" min_ver="41" max_ver="42">
|
||||
|
||||
<field name="Pad" size="28" start="36" type="uint"/>
|
||||
|
||||
@@ -1099,7 +1263,7 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="3"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="121" name="Tile Rendering Mode Cfg (ZS Clear Values)" cl="R" min_ver="41">
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (ZS Clear Values)" cl="R" min_ver="41" max_ver="42">
|
||||
<field name="unused" size="16" start="48" type="uint"/>
|
||||
|
||||
<field name="Z Clear Value" size="32" start="16" type="float"/>
|
||||
@@ -1108,6 +1272,15 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="2"/>
|
||||
</packet>
|
||||
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (ZS Clear Values)" cl="R" min_ver="71">
|
||||
+ <field name="unused" size="16" start="48" type="uint"/>
|
||||
+
|
||||
+ <field name="Z Clear Value" size="32" start="16" type="float"/>
|
||||
+
|
||||
+ <field name="Stencil Clear Value" size="8" start="8" type="uint"/>
|
||||
+ <field name="sub-id" size="4" start="0" type="uint" default="1"/>
|
||||
+ </packet>
|
||||
+
|
||||
<packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part1)" cl="R" max_ver="33">
|
||||
<!-- Express this as a 56-bit field? -->
|
||||
<field name="Clear Color next 24 bits" size="24" start="40" type="uint"/>
|
||||
@@ -1117,7 +1290,7 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="4"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part1)" cl="R" min_ver="41">
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part1)" cl="R" min_ver="41" max_ver="42">
|
||||
<!-- Express this as a 56-bit field? -->
|
||||
<field name="Clear Color next 24 bits" size="24" start="40" type="uint"/>
|
||||
<field name="Clear Color low 32 bits" size="32" start="8" type="uint"/>
|
||||
@@ -1126,6 +1299,19 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="3"/>
|
||||
</packet>
|
||||
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (Render Target Part1)" cl="R" min_ver="71">
|
||||
+
|
||||
+ <field name="Clear Color low bits" size="32" start="32" type="uint"/>
|
||||
+ <field name="Internal Type and Clamping" size="5" start="27" type="Render Target Type Clamp"/>
|
||||
+ <field name="Internal BPP" size="2" start="25" type="Internal BPP"/>
|
||||
+
|
||||
+ <field name="Stride" size="7" start="18" type="uint" minus_one="true"/>
|
||||
+ <!-- In multiples of 512 bits -->
|
||||
+ <field name="Base Address" size="11" start="7" type="uint"/>
|
||||
+ <field name="Render Target number" size="3" start="3" type="uint"/>
|
||||
+ <field name="sub-id" size="3" start="0" type="uint" default="2"/>
|
||||
+ </packet>
|
||||
+
|
||||
<packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part2)" cl="R" max_ver="33">
|
||||
<!-- Express this as a 56-bit field? -->
|
||||
<field name="Clear Color mid-high 24 bits" size="24" start="40" type="uint"/>
|
||||
@@ -1135,7 +1321,7 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="5"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part2)" cl="R" min_ver="41">
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part2)" cl="R" min_ver="41" max_ver="42">
|
||||
<!-- Express this as a 56-bit field? -->
|
||||
<field name="Clear Color mid-high 24 bits" size="24" start="40" type="uint"/>
|
||||
<field name="Clear Color mid-low 32 bits" size="32" start="8" type="uint"/>
|
||||
@@ -1144,6 +1330,13 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="4"/>
|
||||
</packet>
|
||||
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (Render Target Part2)" cl="R" min_ver="71">
|
||||
+ <field name="Clear Color mid bits" size="40" start="24" type="uint"/>
|
||||
+
|
||||
+ <field name="Render Target number" size="3" start="3" type="uint"/>
|
||||
+ <field name="sub-id" size="3" start="0" type="uint" default="3"/>
|
||||
+ </packet>
|
||||
+
|
||||
<packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part3)" cl="R" max_ver="33">
|
||||
<field name="pad" size="11" start="53" type="uint"/>
|
||||
<field name="UIF padded height in UIF blocks" size="13" start="40" type="uint"/>
|
||||
@@ -1155,7 +1348,7 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="6"/>
|
||||
</packet>
|
||||
|
||||
- <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part3)" cl="R" min_ver="41">
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part3)" cl="R" min_ver="41" max_ver="42">
|
||||
<field name="pad" size="11" start="53" type="uint"/>
|
||||
<field name="UIF padded height in UIF blocks" size="13" start="40" type="uint"/>
|
||||
<!-- image height is for Y flipping -->
|
||||
@@ -1166,6 +1359,13 @@
|
||||
<field name="sub-id" size="4" start="0" type="uint" default="5"/>
|
||||
</packet>
|
||||
|
||||
+ <packet code="121" name="Tile Rendering Mode Cfg (Render Target Part3)" cl="R" min_ver="71">
|
||||
+ <field name="Clear Color top bits" size="56" start="8" type="uint"/>
|
||||
+
|
||||
+ <field name="Render Target number" size="3" start="3" type="uint"/>
|
||||
+ <field name="sub-id" size="3" start="0" type="uint" default="4"/>
|
||||
+ </packet>
|
||||
+
|
||||
<packet code="124" shortname="tile_coords" name="Tile Coordinates">
|
||||
<field name="tile row number" size="12" start="12" type="uint"/>
|
||||
<field name="tile column number" size="12" start="0" type="uint"/>
|
||||
@@ -1240,7 +1440,7 @@
|
||||
<field name="Coordinate Shader Uniforms Address" size="32" start="32b" type="address"/>
|
||||
</struct>
|
||||
|
||||
- <struct name="GL Shader State Record" min_ver="41">
|
||||
+ <struct name="GL Shader State Record" min_ver="41" max_ver="42">
|
||||
<field name="Point size in shaded vertex data" size="1" start="0" type="bool"/>
|
||||
<field name="Enable clipping" size="1" start="1" type="bool"/>
|
||||
|
||||
@@ -1299,6 +1499,63 @@
|
||||
<field name="Coordinate Shader Uniforms Address" size="32" start="32b" type="address"/>
|
||||
</struct>
|
||||
|
||||
+ <struct name="GL Shader State Record" min_ver="71">
|
||||
+ <field name="Point size in shaded vertex data" size="1" start="0" type="bool"/>
|
||||
+ <field name="Enable clipping" size="1" start="1" type="bool"/>
|
||||
+
|
||||
+ <field name="Vertex ID read by coordinate shader" size="1" start="2" type="bool"/>
|
||||
+ <field name="Instance ID read by coordinate shader" size="1" start="3" type="bool"/>
|
||||
+ <field name="Base Instance ID read by coordinate shader" size="1" start="4" type="bool"/>
|
||||
+ <field name="Vertex ID read by vertex shader" size="1" start="5" type="bool"/>
|
||||
+ <field name="Instance ID read by vertex shader" size="1" start="6" type="bool"/>
|
||||
+ <field name="Base Instance ID read by vertex shader" size="1" start="7" type="bool"/>
|
||||
+
|
||||
+ <field name="Fragment shader does Z writes" size="1" start="8" type="bool"/>
|
||||
+ <field name="Turn off early-z test" size="1" start="9" type="bool"/>
|
||||
+
|
||||
+ <field name="Fragment shader uses real pixel centre W in addition to centroid W2" size="1" start="12" type="bool"/>
|
||||
+ <field name="Enable Sample Rate Shading" size="1" start="13" type="bool"/>
|
||||
+ <field name="Any shader reads hardware-written Primitive ID" size="1" start="14" type="bool"/>
|
||||
+ <field name="Insert Primitive ID as first varying to fragment shader" size="1" start="15" type="bool"/>
|
||||
+ <field name="Turn off scoreboard" size="1" start="16" type="bool"/>
|
||||
+ <field name="Do scoreboard wait on first thread switch" size="1" start="17" type="bool"/>
|
||||
+ <field name="Disable implicit point/line varyings" size="1" start="18" type="bool"/>
|
||||
+ <field name="No prim pack" size="1" start="19" type="bool"/>
|
||||
+ <field name="Never defer FEP depth writes" size="1" start="20" type="bool"/>
|
||||
+
|
||||
+ <field name="Number of varyings in Fragment Shader" size="8" start="3b" type="uint"/>
|
||||
+
|
||||
+ <field name="Coordinate Shader output VPM segment size" size="4" start="4b" type="uint"/>
|
||||
+ <field name="Min Coord Shader output segments required in play in addition to VCM cache size" size="4" start="36" type="uint"/>
|
||||
+
|
||||
+ <field name="Coordinate Shader input VPM segment size" size="4" start="5b" type="uint"/>
|
||||
+ <field name="Min Coord Shader input segments required in play" size="4" start="44" type="uint" minus_one="true"/>
|
||||
+
|
||||
+ <field name="Vertex Shader output VPM segment size" size="4" start="6b" type="uint"/>
|
||||
+ <field name="Min Vertex Shader output segments required in play in addition to VCM cache size" size="4" start="52" type="uint"/>
|
||||
+
|
||||
+ <field name="Vertex Shader input VPM segment size" size="4" start="7b" type="uint"/>
|
||||
+ <field name="Min Vertex Shader input segments required in play" size="4" start="60" type="uint" minus_one="true"/>
|
||||
+
|
||||
+ <field name="Fragment Shader Code Address" size="29" start="67" type="address"/>
|
||||
+ <field name="Fragment Shader 4-way threadable" size="1" start="64" type="bool"/>
|
||||
+ <field name="Fragment Shader start in final thread section" size="1" start="65" type="bool"/>
|
||||
+ <field name="Fragment Shader Propagate NaNs" size="1" start="66" type="bool"/>
|
||||
+ <field name="Fragment Shader Uniforms Address" size="32" start="12b" type="address"/>
|
||||
+
|
||||
+ <field name="Vertex Shader Code Address" size="29" start="131" type="address"/>
|
||||
+ <field name="Vertex Shader 4-way threadable" size="1" start="128" type="bool"/>
|
||||
+ <field name="Vertex Shader start in final thread section" size="1" start="129" type="bool"/>
|
||||
+ <field name="Vertex Shader Propagate NaNs" size="1" start="130" type="bool"/>
|
||||
+ <field name="Vertex Shader Uniforms Address" size="32" start="20b" type="address"/>
|
||||
+
|
||||
+ <field name="Coordinate Shader Code Address" size="29" start="195" type="address"/>
|
||||
+ <field name="Coordinate Shader 4-way threadable" size="1" start="192" type="bool"/>
|
||||
+ <field name="Coordinate Shader start in final thread section" size="1" start="193" type="bool"/>
|
||||
+ <field name="Coordinate Shader Propagate NaNs" size="1" start="194" type="bool"/>
|
||||
+ <field name="Coordinate Shader Uniforms Address" size="32" start="28b" type="address"/>
|
||||
+ </struct>
|
||||
+
|
||||
<struct name="Geometry Shader State Record" min_ver="41">
|
||||
<field name="Geometry Bin Mode Shader Code Address" size="29" start="3" type="address"/>
|
||||
<field name="Geometry Bin Mode Shader 4-way threadable" size="1" start="0" type="bool"/>
|
||||
@@ -1543,7 +1800,7 @@
|
||||
<field name="Offset Format 8" size="1" start="0" type="bool"/>
|
||||
</struct>
|
||||
|
||||
- <struct name="TMU Config Parameter 2" min_ver="42">
|
||||
+ <struct name="TMU Config Parameter 2" min_ver="42" max_ver="42">
|
||||
<field name="Pad" size="7" start="25" type="uint"/>
|
||||
<field name="LOD Query" size="1" start="24" type="bool"/>
|
||||
<field name="Op" size="4" start="20" type="TMU Op"/>
|
||||
@@ -1558,6 +1815,23 @@
|
||||
<field name="Offset Format 8" size="1" start="0" type="bool"/>
|
||||
</struct>
|
||||
|
||||
+ <struct name="TMU Config Parameter 2" min_ver="71">
|
||||
+ <field name="Pad" size="5" start="27" type="uint"/>
|
||||
+ <field name="Write conversion" size="1" start="26" type="bool"/>
|
||||
+ <field name="DIM query" size="1" start="25" type="bool"/>
|
||||
+ <field name="LOD Query" size="1" start="24" type="bool"/>
|
||||
+ <field name="Op" size="4" start="20" type="TMU Op"/>
|
||||
+ <field name="Offset R" size="4" start="16" type="int"/>
|
||||
+ <field name="Offset T" size="4" start="12" type="int"/>
|
||||
+ <field name="Offset S" size="4" start="8" type="int"/>
|
||||
+ <field name="Gather Mode" size="1" start="7" type="bool"/>
|
||||
+ <field name="Gather Component" size="2" start="5" type="uint"/>
|
||||
+ <field name="Coefficient Mode" size="1" start="4" type="bool"/>
|
||||
+ <field name="Sample Number" size="2" start="2" type="uint"/>
|
||||
+ <field name="Disable AutoLOD" size="1" start="1" type="bool"/>
|
||||
+ <field name="Offset Format 8" size="1" start="0" type="bool"/>
|
||||
+ </struct>
|
||||
+
|
||||
<struct name="Texture Shader State" max_ver="33">
|
||||
<field name="UIF XOR disable" size="1" start="255" type="bool"/>
|
||||
<field name="Level 0 is strictly UIF" size="1" start="254" type="bool"/>
|
||||
@@ -1611,7 +1885,7 @@
|
||||
<field name="Filter" size="4" start="0" type="TMU Filter"/>
|
||||
</struct>
|
||||
|
||||
- <struct name="Texture Shader State" min_ver="41">
|
||||
+ <struct name="Texture Shader State" min_ver="41" max_ver="42">
|
||||
<field name="Pad" size="56" start="136" type="uint"/>
|
||||
<field name="UIF XOR disable" size="1" start="135" type="bool"/>
|
||||
<field name="Level 0 is strictly UIF" size="1" start="134" type="bool"/>
|
||||
@@ -1652,6 +1926,82 @@
|
||||
<field name="Flip texture X Axis" size="1" start="0" type="bool"/>
|
||||
</struct>
|
||||
|
||||
+ <struct name="Texture Shader State" min_ver="71">
|
||||
+ <field name="Pad" size="2" start="190" type="uint"/>
|
||||
+ <!-- When we use an address type, there is an implicit requirement
|
||||
+ that the address is a 32-bit that is encoded starting at a 32-bit
|
||||
+ aligned bit offset into the packet. If the address field has less than
|
||||
+ 32 bits, it is assumed that the address is aligned. For example, a
|
||||
+ 26-bit address field is expected to be 64-byte aligned (6 lsb bits
|
||||
+ are 0) and that this will be encoded into a packet starting at bit
|
||||
+ offset 6 into a 32-bit dword (since bits 0..5 of the address are
|
||||
+ implicitly 0 and don't need to be explicitly encoded).
|
||||
+
|
||||
+ Unfortunately, the CB address below doesn't match this requirement:
|
||||
+ it starts at bit 138, which is 10 bits into a 32-bit dword, but it
|
||||
+ represents a 64-bit aligned address (6 lsb bits are 0), so we cannot
|
||||
+ encode it as an address type. To fix this we encode these addresses
|
||||
+ as uint types which has two implications:
|
||||
+ 1. the driver is responsible for manually addinng the buffer objects
|
||||
+ for these addresses to the job BO list.
|
||||
+ 2. the driver needs to pass an actual 26-bit address value by manually
|
||||
+ shifting the 6 lsb bits (that are implicitly 0).
|
||||
+ -->
|
||||
+ <field name="texture_base pointer_Cr" size="26" start="164" type="uint"/>
|
||||
+ <field name="texture base pointer Cb" size="26" start="138" type="uint"/>
|
||||
+ <field name="Chroma offset y" size="1" start="137" type="uint"/>
|
||||
+ <field name="Chroma offset x" size="1" start="136" type="uint"/>
|
||||
+
|
||||
+ <field name="UIF XOR disable" size="1" start="135" type="bool"/>
|
||||
+ <field name="Level 0 is strictly UIF" size="1" start="134" type="bool"/>
|
||||
+ <field name="Level 0 XOR enable" size="1" start="132" type="bool"/>
|
||||
+ <field name="Level 0 UB_PAD" size="4" start="128" type="uint"/>
|
||||
+
|
||||
+ <field name="Base Level" size="4" start="124" type="uint"/>
|
||||
+ <field name="Max Level" size="4" start="120" type="uint"/>
|
||||
+
|
||||
+ <field name="Swizzle A" size="3" start="117" type="uint">
|
||||
+ <value name="Swizzle Zero" value="0"/>
|
||||
+ <value name="Swizzle One" value="1"/>
|
||||
+ <value name="Swizzle Red" value="2"/>
|
||||
+ <value name="Swizzle Green" value="3"/>
|
||||
+ <value name="Swizzle Blue" value="4"/>
|
||||
+ <value name="Swizzle Alpha" value="5"/>
|
||||
+ </field>
|
||||
+
|
||||
+ <field name="Swizzle B" size="3" start="114" type="uint"/>
|
||||
+ <field name="Swizzle G" size="3" start="111" type="uint"/>
|
||||
+ <field name="Swizzle R" size="3" start="108" type="uint"/>
|
||||
+ <field name="Extended" size="1" start="107" type="bool"/>
|
||||
+
|
||||
+ <field name="Texture type" size="7" start="100" type="uint"/>
|
||||
+ <field name="Image Depth" size="14" start="86" type="uint"/>
|
||||
+ <field name="Image Height" size="14" start="72" type="uint"/>
|
||||
+ <field name="Image Width" size="14" start="58" type="uint"/>
|
||||
+
|
||||
+ <!-- V3D 7.1.2 doesn't have the RB swap bit and has Array Stride starting
|
||||
+ at bit 32. However, 7.1.5 included the RB swap bit at bit 32 and has
|
||||
+ Array Stride starting at 33, which is backwards incompatible,
|
||||
+ We use the definition from 7.1.5.
|
||||
+ -->
|
||||
+ <field name="Array Stride (64-byte aligned)" size="24" start="33" type="uint"/>
|
||||
+ <field name="R/B swap" size="1" start="32" type="bool"/>
|
||||
+
|
||||
+ <field name="Texture base pointer" size="32" start="0" type="address"/>
|
||||
+
|
||||
+ <field name="Reverse" size="1" start="5" type="bool"/>
|
||||
+ <field name="Transfer func" size="3" start="2" type="uint">
|
||||
+ <value name="Transfer Func None" value="0"/>
|
||||
+ <value name="Transfer Func sRGB" value="1"/>
|
||||
+ <value name="Transfer Func PQ" value="2"/>
|
||||
+ <value name="Transfer Func HLG" value="3"/>
|
||||
+ <value name="Transfer Func PQ BT1886" value="4"/>
|
||||
+ <value name="Transfer Func HLG BT1886" value="5"/>
|
||||
+ </field>
|
||||
+ <field name="Flip texture Y Axis" size="1" start="1" type="bool"/>
|
||||
+ <field name="Flip texture X Axis" size="1" start="0" type="bool"/>
|
||||
+ </struct>
|
||||
+
|
||||
<struct name="Sampler State" min_ver="41">
|
||||
<field name="Border color word 3" size="32" start="160" type="uint"/>
|
||||
<field name="Border color word 2" size="32" start="128" type="uint"/>
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,8 +1,8 @@
|
||||
From 381c29e3ff5237c89380cc53eb2271d1985f4e34 Mon Sep 17 00:00:00 2001
|
||||
From 7e151fd3a213848c8022c9f48e10f2aec76c3e4d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 2 Dec 2021 13:26:43 +0100
|
||||
Subject: [PATCH 067/142] broadcom/compiler: update image store lowering to use
|
||||
v71 new packing/conversion instructions
|
||||
Subject: [PATCH 3/3] broadcom/compiler: update image store lowering to use v71
|
||||
new packing/conversion instructions
|
||||
|
||||
Vulkan shaderdb stats with pattern dEQP-VK.image.*.with_format.*.*:
|
||||
total instructions in shared programs: 35993 -> 33245 (-7.63%)
|
||||
@ -31,18 +31,20 @@ Vulkan shaderdb stats with pattern dEQP-VK.image.*.with_format.*.*:
|
||||
|
||||
FWIW, that one HURT on the instructions count is for just one
|
||||
instruction.
|
||||
|
||||
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
|
||||
---
|
||||
src/broadcom/compiler/nir_to_vir.c | 39 +++
|
||||
src/broadcom/compiler/nir_to_vir.c | 40 +++
|
||||
src/broadcom/compiler/v3d_compiler.h | 16 +-
|
||||
.../compiler/v3d_nir_lower_image_load_store.c | 246 +++++++++++++++++-
|
||||
.../compiler/v3d_nir_lower_image_load_store.c | 239 +++++++++++++++++-
|
||||
src/broadcom/compiler/vir.c | 2 +-
|
||||
4 files changed, 294 insertions(+), 9 deletions(-)
|
||||
4 files changed, 288 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
|
||||
index 90fe1d1e7f0..a8cf02dd386 100644
|
||||
index 220c864a056..4329d4c85f6 100644
|
||||
--- a/src/broadcom/compiler/nir_to_vir.c
|
||||
+++ b/src/broadcom/compiler/nir_to_vir.c
|
||||
@@ -1689,6 +1689,22 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
|
||||
@@ -1688,6 +1688,22 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
|
||||
result = vir_VFPACK(c, src[0], src[1]);
|
||||
break;
|
||||
|
||||
@ -65,10 +67,10 @@ index 90fe1d1e7f0..a8cf02dd386 100644
|
||||
case nir_op_unpack_half_2x16_split_x:
|
||||
result = vir_FMOV(c, src[0]);
|
||||
vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_L);
|
||||
@@ -1719,6 +1735,29 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
|
||||
result = vir_FMOV(c, vir_SEL(c, V3D_QPU_COND_IFNA, tmp, zero));
|
||||
@@ -1698,6 +1714,30 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
|
||||
vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_H);
|
||||
break;
|
||||
}
|
||||
|
||||
+ case nir_op_vftounorm8_v3d:
|
||||
+ result = vir_VFTOUNORM8(c, src[0]);
|
||||
+ break;
|
||||
@ -92,14 +94,15 @@ index 90fe1d1e7f0..a8cf02dd386 100644
|
||||
+ case nir_op_ftosnorm16_v3d:
|
||||
+ result = vir_FTOSNORM16(c, src[0]);
|
||||
+ break;
|
||||
|
||||
+
|
||||
default:
|
||||
fprintf(stderr, "unknown NIR ALU inst: ");
|
||||
nir_print_instr(&instr->instr, stderr);
|
||||
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
|
||||
index 36adf8830b5..425ab0cdf9d 100644
|
||||
index 095b33c03b8..5714e85d2b8 100644
|
||||
--- a/src/broadcom/compiler/v3d_compiler.h
|
||||
+++ b/src/broadcom/compiler/v3d_compiler.h
|
||||
@@ -1186,7 +1186,7 @@ bool v3d_nir_lower_line_smooth(nir_shader *shader);
|
||||
@@ -1180,7 +1180,7 @@ bool v3d_nir_lower_line_smooth(nir_shader *shader);
|
||||
bool v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c);
|
||||
bool v3d_nir_lower_scratch(nir_shader *s);
|
||||
bool v3d_nir_lower_txf_ms(nir_shader *s);
|
||||
@ -108,7 +111,7 @@ index 36adf8830b5..425ab0cdf9d 100644
|
||||
bool v3d_nir_lower_load_store_bitsize(nir_shader *s);
|
||||
|
||||
void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components);
|
||||
@@ -1427,6 +1427,20 @@ VIR_SFU(LOG)
|
||||
@@ -1421,6 +1421,20 @@ VIR_SFU(LOG)
|
||||
VIR_SFU(SIN)
|
||||
VIR_SFU(RSQRT2)
|
||||
|
||||
@ -130,7 +133,7 @@ index 36adf8830b5..425ab0cdf9d 100644
|
||||
vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
|
||||
struct qreg dest, struct qreg src)
|
||||
diff --git a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
|
||||
index 2900a29817f..bbb55be4a14 100644
|
||||
index 5f8363377cb..ec43f834897 100644
|
||||
--- a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
|
||||
+++ b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
|
||||
@@ -40,6 +40,10 @@
|
||||
@ -151,9 +154,9 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
+ *
|
||||
+ * This is the generic helper, using all common nir operations.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
|
||||
@@ -91,8 +97,185 @@ pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
|
||||
static nir_def *
|
||||
pack_bits(nir_builder *b, nir_def *color, const unsigned *bits,
|
||||
@@ -91,8 +97,180 @@ pack_bits(nir_builder *b, nir_def *color, const unsigned *bits,
|
||||
return nir_vec(b, results, DIV_ROUND_UP(offset, 32));
|
||||
}
|
||||
|
||||
@ -161,46 +164,42 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
+ * just easier to read vfpack on the code, specially while using the PRM as
|
||||
+ * reference
|
||||
+ */
|
||||
+static nir_ssa_def *
|
||||
+nir_vfpack(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
|
||||
+static inline nir_def *
|
||||
+nir_vfpack(nir_builder *b, nir_def *p1, nir_def *p2)
|
||||
+{
|
||||
+ return nir_pack_half_2x16_split(b, p1, p2);
|
||||
+}
|
||||
+
|
||||
+static inline nir_ssa_def *
|
||||
+pack_11f11f10f(nir_builder *b, nir_ssa_def *color)
|
||||
+static inline nir_def *
|
||||
+pack_11f11f10f(nir_builder *b, nir_def *color)
|
||||
+{
|
||||
+ nir_ssa_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
|
||||
+ nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
|
||||
+ nir_channel(b, color, 1));
|
||||
+ /* FIXME: we noted that we could just use p2 again as the second
|
||||
+ * element to pack, and CTS tests still works. Just using undef as is
|
||||
+ * slightly more correct
|
||||
+ */
|
||||
+ nir_ssa_def *undef = nir_ssa_undef(b, 1, color->bit_size);
|
||||
+ nir_ssa_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), undef);
|
||||
+ nir_def *undef = nir_undef(b, 1, color->bit_size);
|
||||
+ nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), undef);
|
||||
+
|
||||
+ return nir_v11fpack_v3d(b, p1, p2);
|
||||
+}
|
||||
+
|
||||
+static inline nir_ssa_def *
|
||||
+pack_r10g10b10a2_uint(nir_builder *b, nir_ssa_def *color)
|
||||
+static inline nir_def *
|
||||
+pack_r10g10b10a2_uint(nir_builder *b, nir_def *color)
|
||||
+{
|
||||
+ nir_ssa_def *p1 = nir_vpack_v3d(b, nir_channel(b, color, 0),
|
||||
+ nir_def *p1 = nir_vpack_v3d(b, nir_channel(b, color, 0),
|
||||
+ nir_channel(b, color, 1));
|
||||
+ nir_ssa_def *p2 = nir_vpack_v3d(b, nir_channel(b, color, 2),
|
||||
+ nir_def *p2 = nir_vpack_v3d(b, nir_channel(b, color, 2),
|
||||
+ nir_channel(b, color, 3));
|
||||
+
|
||||
+ return nir_v10pack_v3d(b, p1, p2);
|
||||
+}
|
||||
+
|
||||
+static inline nir_ssa_def *
|
||||
+pack_r10g10b10a2_unorm(nir_builder *b, nir_ssa_def *color)
|
||||
+static inline nir_def *
|
||||
+pack_r10g10b10a2_unorm(nir_builder *b, nir_def *color)
|
||||
+{
|
||||
+ nir_ssa_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
|
||||
+ nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
|
||||
+ nir_channel(b, color, 1));
|
||||
+ p1 = nir_vftounorm10lo_v3d(b, p1);
|
||||
+
|
||||
+ nir_ssa_def *p2 = nir_vfpack(b, nir_channel(b, color, 2),
|
||||
+ nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2),
|
||||
+ nir_channel(b, color, 3));
|
||||
+ p2 = nir_vftounorm10hi_v3d(b, p2);
|
||||
+
|
||||
@ -213,8 +212,8 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
+ TO_UNORM
|
||||
+};
|
||||
+
|
||||
+static inline nir_ssa_def *
|
||||
+pack_8bit(nir_builder *b, nir_ssa_def *color,
|
||||
+static inline nir_def *
|
||||
+pack_8bit(nir_builder *b, nir_def *color,
|
||||
+ unsigned num_components,
|
||||
+ enum hw_conversion conversion)
|
||||
+{
|
||||
@ -223,8 +222,8 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
+ * conversion. But we support also that case, and let the caller
|
||||
+ * decide which method to use.
|
||||
+ */
|
||||
+ nir_ssa_def *p1;
|
||||
+ nir_ssa_def *p2;
|
||||
+ nir_def *p1;
|
||||
+ nir_def *p2;
|
||||
+
|
||||
+ if (conversion == NONE) {
|
||||
+ p1 = nir_vpack_v3d(b, nir_channel(b, color, 0),
|
||||
@ -246,10 +245,9 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
+ nir_vftounorm8_v3d(b, p2) : nir_vftosnorm8_v3d(b, p2);
|
||||
+ }
|
||||
+ } else {
|
||||
+ /* As mentioned on the comment before, using an undef here
|
||||
+ * would be more correct. But for this case we are getting
|
||||
+ * worse values, and in fact even some worse instruction count
|
||||
+ * with some CTS tests, so we just reuse the first packing
|
||||
+ /* Using an undef here would be more correct. But for this
|
||||
+ * case we are getting worse shader-db values with some CTS
|
||||
+ * tests, so we just reuse the first packing.
|
||||
+ */
|
||||
+ p2 = p1;
|
||||
+ }
|
||||
@ -257,13 +255,13 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
+ return nir_v8pack_v3d(b, p1, p2);
|
||||
+}
|
||||
+
|
||||
+static inline nir_ssa_def *
|
||||
+pack_16bit(nir_builder *b, nir_ssa_def *color,
|
||||
+static inline nir_def *
|
||||
+pack_16bit(nir_builder *b, nir_def *color,
|
||||
+ unsigned num_components,
|
||||
+ enum hw_conversion conversion)
|
||||
+{
|
||||
+ nir_ssa_def *results[2];
|
||||
+ nir_ssa_def *channels[4];
|
||||
+ nir_def *results[2];
|
||||
+ nir_def *channels[4];
|
||||
+
|
||||
+ /* Note that usually you should not use this method (that relies on
|
||||
+ * custom packing) if we are not doing any conversion. But we support
|
||||
@ -299,8 +297,8 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
+ return nir_vec(b, results, DIV_ROUND_UP(num_components, 2));
|
||||
+}
|
||||
+
|
||||
+static inline nir_ssa_def *
|
||||
+pack_xbit(nir_builder *b, nir_ssa_def *color,
|
||||
+static inline nir_def *
|
||||
+pack_xbit(nir_builder *b, nir_def *color,
|
||||
+ unsigned num_components,
|
||||
+ const struct util_format_channel_description *r_chan)
|
||||
+{
|
||||
@ -340,7 +338,7 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
{
|
||||
enum pipe_format format = nir_intrinsic_format(instr);
|
||||
assert(format != PIPE_FORMAT_NONE);
|
||||
@@ -118,9 +301,6 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
@@ -118,9 +296,6 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
*/
|
||||
formatted = color;
|
||||
} else {
|
||||
@ -350,7 +348,7 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
const unsigned *bits;
|
||||
|
||||
switch (r_chan->size) {
|
||||
@@ -171,6 +351,52 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
@@ -170,6 +345,50 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -366,10 +364,9 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
+ unsigned num_components = util_format_get_nr_components(format);
|
||||
+ b->cursor = nir_before_instr(&instr->instr);
|
||||
+
|
||||
+ nir_ssa_def *color = nir_channels(b,
|
||||
+ nir_ssa_for_src(b, instr->src[3], 4),
|
||||
+ (1 << num_components) - 1);
|
||||
+ nir_ssa_def *formatted = NULL;
|
||||
+ nir_def *color =
|
||||
+ nir_trim_vector(b, instr->src[3].ssa, num_components);
|
||||
+ nir_def *formatted = NULL;
|
||||
+ if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
|
||||
+ formatted = nir_format_pack_r9g9b9e5(b, color);
|
||||
+ } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
|
||||
@ -393,8 +390,7 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
+ formatted = pack_xbit(b, color, num_components, r_chan);
|
||||
+ }
|
||||
+
|
||||
+ nir_instr_rewrite_src(&instr->instr, &instr->src[3],
|
||||
+ nir_src_for_ssa(formatted));
|
||||
+ nir_src_rewrite(&instr->src[3], formatted);
|
||||
+ instr->num_components = formatted->num_components;
|
||||
+
|
||||
+ return true;
|
||||
@ -403,10 +399,10 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
static bool
|
||||
v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
@@ -215,11 +441,17 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
|
||||
nir_intrinsic_instr *intr =
|
||||
nir_instr_as_intrinsic(instr);
|
||||
|
||||
@@ -207,11 +426,17 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
|
||||
nir_intrinsic_instr *intr,
|
||||
void *_state)
|
||||
{
|
||||
+ struct v3d_compile *c = (struct v3d_compile *) _state;
|
||||
+
|
||||
switch (intr->intrinsic) {
|
||||
@ -422,23 +418,24 @@ index 2900a29817f..bbb55be4a14 100644
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@@ -228,9 +460,9 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
|
||||
@@ -220,10 +445,10 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
|
||||
}
|
||||
|
||||
bool
|
||||
-v3d_nir_lower_image_load_store(nir_shader *s)
|
||||
+v3d_nir_lower_image_load_store(nir_shader *s, struct v3d_compile *c)
|
||||
{
|
||||
return nir_shader_instructions_pass(s, v3d_nir_lower_image_load_store_cb,
|
||||
return nir_shader_intrinsics_pass(s,
|
||||
v3d_nir_lower_image_load_store_cb,
|
||||
nir_metadata_block_index |
|
||||
- nir_metadata_dominance, NULL);
|
||||
+ nir_metadata_dominance, c);
|
||||
}
|
||||
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
|
||||
index aea113f050e..7612eed7130 100644
|
||||
index 8c536b8fbcc..acb13a6cbf9 100644
|
||||
--- a/src/broadcom/compiler/vir.c
|
||||
+++ b/src/broadcom/compiler/vir.c
|
||||
@@ -1576,7 +1576,7 @@ v3d_attempt_compile(struct v3d_compile *c)
|
||||
@@ -1599,7 +1599,7 @@ v3d_attempt_compile(struct v3d_compile *c)
|
||||
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_io, c);
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_txf_ms);
|
@ -1,65 +0,0 @@
|
||||
From 569cbe4229df737ce5915c4be2cad534707fb4f7 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 9 Nov 2021 08:50:51 +0100
|
||||
Subject: [PATCH 004/142] broadcom/common: retrieve V3D revision number
|
||||
|
||||
The subrev field from the hub ident3 register is bumped with every
|
||||
hardware revision doing backwards incompatible changes so we want to
|
||||
keep track of this.
|
||||
|
||||
Instead of modifying the 'ver' field info to acommodate subrev info,
|
||||
which would require a lot of changes, simply add a new 'rev' field in
|
||||
devinfo that we can use when we need to make changes based on the
|
||||
revision number of a hardware release.
|
||||
---
|
||||
src/broadcom/common/v3d_device_info.c | 14 +++++++++++++-
|
||||
src/broadcom/common/v3d_device_info.h | 3 +++
|
||||
2 files changed, 16 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
|
||||
index 7e0862f1f02..7512fe3a06b 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.c
|
||||
+++ b/src/broadcom/common/v3d_device_info.c
|
||||
@@ -36,6 +36,9 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
|
||||
struct drm_v3d_get_param ident1 = {
|
||||
.param = DRM_V3D_PARAM_V3D_CORE0_IDENT1,
|
||||
};
|
||||
+ struct drm_v3d_get_param hub_ident3 = {
|
||||
+ .param = DRM_V3D_PARAM_V3D_HUB_IDENT3,
|
||||
+ };
|
||||
int ret;
|
||||
|
||||
ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &ident0);
|
||||
@@ -76,5 +79,14 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
|
||||
return false;
|
||||
}
|
||||
|
||||
- return true;
|
||||
+ ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &hub_ident3);
|
||||
+ if (ret != 0) {
|
||||
+ fprintf(stderr, "Couldn't get V3D core HUB IDENT3: %s\n",
|
||||
+ strerror(errno));
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ devinfo->rev = (hub_ident3.value >> 8) & 0xff;
|
||||
+
|
||||
+ return true;
|
||||
}
|
||||
diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
|
||||
index 97abd9b8d9f..32cb65cf81f 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.h
|
||||
+++ b/src/broadcom/common/v3d_device_info.h
|
||||
@@ -34,6 +34,9 @@ struct v3d_device_info {
|
||||
/** Simple V3D version: major * 10 + minor */
|
||||
uint8_t ver;
|
||||
|
||||
+ /** V3D revision number */
|
||||
+ uint8_t rev;
|
||||
+
|
||||
/** Size of the VPM, in bytes. */
|
||||
int vpm_size;
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,91 +0,0 @@
|
||||
From c260843c882d25bd31e308566b45d4517fda0fa2 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 17 Nov 2021 14:40:47 +0100
|
||||
Subject: [PATCH 005/142] broadcom/common: add some common v71 helpers
|
||||
|
||||
---
|
||||
src/broadcom/common/v3d_util.c | 27 +++++++++++++++++++++++++++
|
||||
src/broadcom/common/v3d_util.h | 27 +++++++++++++++++++++++++++
|
||||
2 files changed, 54 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c
|
||||
index 57872a923d3..26f5c6b336f 100644
|
||||
--- a/src/broadcom/common/v3d_util.c
|
||||
+++ b/src/broadcom/common/v3d_util.c
|
||||
@@ -170,3 +170,30 @@ v3d_hw_prim_type(enum mesa_prim prim_type)
|
||||
unreachable("Unsupported primitive type");
|
||||
}
|
||||
}
|
||||
+
|
||||
+uint32_t
|
||||
+v3d_internal_bpp_words(uint32_t internal_bpp)
|
||||
+{
|
||||
+ switch (internal_bpp) {
|
||||
+ case 0 /* V3D_INTERNAL_BPP_32 */:
|
||||
+ return 1;
|
||||
+ case 1 /* V3D_INTERNAL_BPP_64 */:
|
||||
+ return 2;
|
||||
+ case 2 /* V3D_INTERNAL_BPP_128 */:
|
||||
+ return 4;
|
||||
+ default:
|
||||
+ unreachable("Unsupported internal BPP");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+uint32_t
|
||||
+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
|
||||
+ uint32_t bpp)
|
||||
+{
|
||||
+ /* stride in multiples of 128 bits, and covers 2 rows. This is the
|
||||
+ * reason we divide by 2 instead of 4, as we divide number of 32-bit
|
||||
+ * words per row by 2.
|
||||
+ */
|
||||
+
|
||||
+ return (tile_width * bpp) / 2;
|
||||
+}
|
||||
diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h
|
||||
index eb802b77f67..864fc949ffa 100644
|
||||
--- a/src/broadcom/common/v3d_util.h
|
||||
+++ b/src/broadcom/common/v3d_util.h
|
||||
@@ -24,6 +24,7 @@
|
||||
#ifndef V3D_UTIL_H
|
||||
#define V3D_UTIL_H
|
||||
|
||||
+#include "util/macros.h"
|
||||
#include "common/v3d_device_info.h"
|
||||
#include "pipe/p_defines.h"
|
||||
|
||||
@@ -46,4 +47,30 @@ v3d_translate_pipe_swizzle(enum pipe_swizzle swizzle);
|
||||
uint32_t
|
||||
v3d_hw_prim_type(enum mesa_prim prim_type);
|
||||
|
||||
+uint32_t
|
||||
+v3d_internal_bpp_words(uint32_t internal_bpp);
|
||||
+
|
||||
+/* Some configuration packets want the size on log2, but starting at 0 for
|
||||
+ * size 8.
|
||||
+ */
|
||||
+static inline uint8_t
|
||||
+log2_tile_size(uint32_t size)
|
||||
+{
|
||||
+ switch(size) {
|
||||
+ case 8:
|
||||
+ return 0;
|
||||
+ case 16:
|
||||
+ return 1;
|
||||
+ case 32:
|
||||
+ return 2;
|
||||
+ case 64:
|
||||
+ return 3;
|
||||
+ default:
|
||||
+ unreachable("Unsupported tile width/height");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+uint32_t
|
||||
+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
|
||||
+ uint32_t bpp);
|
||||
#endif
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,53 +0,0 @@
|
||||
From a5211a4d71acc53183d2a90eb1694d8cce6eb44f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 5 Aug 2021 01:03:11 +0200
|
||||
Subject: [PATCH 006/142] broadcom/qpu: add comments on waddr not used on V3D
|
||||
7.x
|
||||
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.h | 22 +++++++++++-----------
|
||||
1 file changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 2e133472698..45a0cad9760 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -88,11 +88,11 @@ enum v3d_qpu_uf {
|
||||
};
|
||||
|
||||
enum v3d_qpu_waddr {
|
||||
- V3D_QPU_WADDR_R0 = 0,
|
||||
- V3D_QPU_WADDR_R1 = 1,
|
||||
- V3D_QPU_WADDR_R2 = 2,
|
||||
- V3D_QPU_WADDR_R3 = 3,
|
||||
- V3D_QPU_WADDR_R4 = 4,
|
||||
+ V3D_QPU_WADDR_R0 = 0, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_R1 = 1, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_R2 = 2, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_R3 = 3, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_R4 = 4, /* Reserved on V3D 7.x */
|
||||
V3D_QPU_WADDR_R5 = 5,
|
||||
V3D_QPU_WADDR_NOP = 6,
|
||||
V3D_QPU_WADDR_TLB = 7,
|
||||
@@ -108,12 +108,12 @@ enum v3d_qpu_waddr {
|
||||
V3D_QPU_WADDR_SYNC = 16,
|
||||
V3D_QPU_WADDR_SYNCU = 17,
|
||||
V3D_QPU_WADDR_SYNCB = 18,
|
||||
- V3D_QPU_WADDR_RECIP = 19,
|
||||
- V3D_QPU_WADDR_RSQRT = 20,
|
||||
- V3D_QPU_WADDR_EXP = 21,
|
||||
- V3D_QPU_WADDR_LOG = 22,
|
||||
- V3D_QPU_WADDR_SIN = 23,
|
||||
- V3D_QPU_WADDR_RSQRT2 = 24,
|
||||
+ V3D_QPU_WADDR_RECIP = 19, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_RSQRT = 20, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_EXP = 21, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_LOG = 22, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_SIN = 23, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_RSQRT2 = 24, /* Reserved on V3D 7.x */
|
||||
V3D_QPU_WADDR_TMUC = 32,
|
||||
V3D_QPU_WADDR_TMUS = 33,
|
||||
V3D_QPU_WADDR_TMUT = 34,
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,60 +0,0 @@
|
||||
From 0ccf3043e4a584e5592bb7fad737d5d98ed23db0 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 5 Aug 2021 01:00:47 +0200
|
||||
Subject: [PATCH 007/142] broadcom/qpu: set V3D 7.x names for some waddr
|
||||
aliasing
|
||||
|
||||
V3D 7.x got rid of the accumulator, but still uses the values for
|
||||
WADDR_R5 and WADDR_R5REP, so let's return a proper name and add some
|
||||
aliases.
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.c | 8 ++++++++
|
||||
src/broadcom/qpu/qpu_instr.h | 6 ++++--
|
||||
2 files changed, 12 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index 60dabf74e8e..7759fb0efdf 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -35,6 +35,14 @@ v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
|
||||
if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
|
||||
return "tmu";
|
||||
|
||||
+ /* V3D 7.x QUAD and REP aliases R5 and R5REPT in the table below
|
||||
+ */
|
||||
+ if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_QUAD)
|
||||
+ return "quad";
|
||||
+
|
||||
+ if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_REP)
|
||||
+ return "rep";
|
||||
+
|
||||
static const char *waddr_magic[] = {
|
||||
[V3D_QPU_WADDR_R0] = "r0",
|
||||
[V3D_QPU_WADDR_R1] = "r1",
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 45a0cad9760..19bf721dbe1 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -93,7 +93,8 @@ enum v3d_qpu_waddr {
|
||||
V3D_QPU_WADDR_R2 = 2, /* Reserved on V3D 7.x */
|
||||
V3D_QPU_WADDR_R3 = 3, /* Reserved on V3D 7.x */
|
||||
V3D_QPU_WADDR_R4 = 4, /* Reserved on V3D 7.x */
|
||||
- V3D_QPU_WADDR_R5 = 5,
|
||||
+ V3D_QPU_WADDR_R5 = 5, /* V3D 4.x */
|
||||
+ V3D_QPU_WADDR_QUAD = 5, /* V3D 7.x */
|
||||
V3D_QPU_WADDR_NOP = 6,
|
||||
V3D_QPU_WADDR_TLB = 7,
|
||||
V3D_QPU_WADDR_TLBU = 8,
|
||||
@@ -129,7 +130,8 @@ enum v3d_qpu_waddr {
|
||||
V3D_QPU_WADDR_TMUHSCM = 44,
|
||||
V3D_QPU_WADDR_TMUHSF = 45,
|
||||
V3D_QPU_WADDR_TMUHSLOD = 46,
|
||||
- V3D_QPU_WADDR_R5REP = 55,
|
||||
+ V3D_QPU_WADDR_R5REP = 55, /* V3D 4.x */
|
||||
+ V3D_QPU_WADDR_REP = 55, /* V3D 7.x */
|
||||
};
|
||||
|
||||
struct v3d_qpu_flags {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,241 +0,0 @@
|
||||
From 18de3cc85cf8bbe294e044f7a12abe14e554de0a Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Sun, 19 Sep 2021 03:20:18 +0200
|
||||
Subject: [PATCH 008/142] broadcom/compiler: rename small_imm to small_imm_b
|
||||
|
||||
Current small_imm is associated with the "B" read address.
|
||||
|
||||
We do this change in advance for v71 support, where we will have 4
|
||||
different small_imm (a/b/c/d), so we start with a renaming.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 22 +++++++++----------
|
||||
.../compiler/vir_opt_small_immediates.c | 4 ++--
|
||||
src/broadcom/compiler/vir_to_qpu.c | 2 +-
|
||||
src/broadcom/qpu/qpu_disasm.c | 2 +-
|
||||
src/broadcom/qpu/qpu_instr.h | 2 +-
|
||||
src/broadcom/qpu/qpu_pack.c | 22 +++++++++----------
|
||||
6 files changed, 27 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 3b32b48f86f..a10fa03ed10 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -160,7 +160,7 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n,
|
||||
add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
|
||||
break;
|
||||
case V3D_QPU_MUX_B:
|
||||
- if (!n->inst->qpu.sig.small_imm) {
|
||||
+ if (!n->inst->qpu.sig.small_imm_b) {
|
||||
add_read_dep(state,
|
||||
state->last_rf[n->inst->qpu.raddr_b], n);
|
||||
}
|
||||
@@ -615,7 +615,7 @@ qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
|
||||
return true;
|
||||
|
||||
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
|
||||
- !inst->sig.small_imm && (inst->raddr_b == waddr))
|
||||
+ !inst->sig.small_imm_b && (inst->raddr_b == waddr))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -790,11 +790,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a,
|
||||
uint64_t raddrs_used = 0;
|
||||
if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_A))
|
||||
raddrs_used |= (1ll << a->raddr_a);
|
||||
- if (!a->sig.small_imm && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
|
||||
+ if (!a->sig.small_imm_b && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
|
||||
raddrs_used |= (1ll << a->raddr_b);
|
||||
if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_A))
|
||||
raddrs_used |= (1ll << b->raddr_a);
|
||||
- if (!b->sig.small_imm && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
|
||||
+ if (!b->sig.small_imm_b && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
|
||||
raddrs_used |= (1ll << b->raddr_b);
|
||||
|
||||
return raddrs_used;
|
||||
@@ -816,16 +816,16 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
||||
if (naddrs > 2)
|
||||
return false;
|
||||
|
||||
- if ((add_instr->sig.small_imm || mul_instr->sig.small_imm)) {
|
||||
+ if ((add_instr->sig.small_imm_b || mul_instr->sig.small_imm_b)) {
|
||||
if (naddrs > 1)
|
||||
return false;
|
||||
|
||||
- if (add_instr->sig.small_imm && mul_instr->sig.small_imm)
|
||||
+ if (add_instr->sig.small_imm_b && mul_instr->sig.small_imm_b)
|
||||
if (add_instr->raddr_b != mul_instr->raddr_b)
|
||||
return false;
|
||||
|
||||
- result->sig.small_imm = true;
|
||||
- result->raddr_b = add_instr->sig.small_imm ?
|
||||
+ result->sig.small_imm_b = true;
|
||||
+ result->raddr_b = add_instr->sig.small_imm_b ?
|
||||
add_instr->raddr_b : mul_instr->raddr_b;
|
||||
}
|
||||
|
||||
@@ -836,7 +836,7 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
||||
raddrs_used &= ~(1ll << raddr_a);
|
||||
result->raddr_a = raddr_a;
|
||||
|
||||
- if (!result->sig.small_imm) {
|
||||
+ if (!result->sig.small_imm_b) {
|
||||
if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) &&
|
||||
raddr_a == add_instr->raddr_b) {
|
||||
if (add_instr->alu.add.a == V3D_QPU_MUX_B)
|
||||
@@ -1025,7 +1025,7 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
merge.sig.ldtmu |= b->sig.ldtmu;
|
||||
merge.sig.ldvary |= b->sig.ldvary;
|
||||
merge.sig.ldvpm |= b->sig.ldvpm;
|
||||
- merge.sig.small_imm |= b->sig.small_imm;
|
||||
+ merge.sig.small_imm_b |= b->sig.small_imm_b;
|
||||
merge.sig.ldtlb |= b->sig.ldtlb;
|
||||
merge.sig.ldtlbu |= b->sig.ldtlbu;
|
||||
merge.sig.ucb |= b->sig.ucb;
|
||||
@@ -1614,7 +1614,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
return false;
|
||||
|
||||
if (inst->raddr_b < 3 &&
|
||||
- !inst->sig.small_imm &&
|
||||
+ !inst->sig.small_imm_b &&
|
||||
v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
|
||||
return false;
|
||||
}
|
||||
diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c
|
||||
index 47d7722968d..df0d6c36c9b 100644
|
||||
--- a/src/broadcom/compiler/vir_opt_small_immediates.c
|
||||
+++ b/src/broadcom/compiler/vir_opt_small_immediates.c
|
||||
@@ -80,7 +80,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
|
||||
*/
|
||||
struct v3d_qpu_sig new_sig = inst->qpu.sig;
|
||||
uint32_t sig_packed;
|
||||
- new_sig.small_imm = true;
|
||||
+ new_sig.small_imm_b = true;
|
||||
if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig_packed))
|
||||
continue;
|
||||
|
||||
@@ -89,7 +89,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
|
||||
vir_dump_inst(c, inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
- inst->qpu.sig.small_imm = true;
|
||||
+ inst->qpu.sig.small_imm_b = true;
|
||||
inst->qpu.raddr_b = packed;
|
||||
|
||||
inst->src[i].file = QFILE_SMALL_IMM;
|
||||
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
|
||||
index 45e6bfa1470..15c2e3674c2 100644
|
||||
--- a/src/broadcom/compiler/vir_to_qpu.c
|
||||
+++ b/src/broadcom/compiler/vir_to_qpu.c
|
||||
@@ -94,7 +94,7 @@ static void
|
||||
set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
{
|
||||
if (src.smimm) {
|
||||
- assert(instr->sig.small_imm);
|
||||
+ assert(instr->sig.small_imm_b);
|
||||
*mux = V3D_QPU_MUX_B;
|
||||
return;
|
||||
}
|
||||
diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
|
||||
index 28fb2357b97..6aca3c28e78 100644
|
||||
--- a/src/broadcom/qpu/qpu_disasm.c
|
||||
+++ b/src/broadcom/qpu/qpu_disasm.c
|
||||
@@ -62,7 +62,7 @@ v3d_qpu_disasm_raddr(struct disasm_state *disasm,
|
||||
if (mux == V3D_QPU_MUX_A) {
|
||||
append(disasm, "rf%d", instr->raddr_a);
|
||||
} else if (mux == V3D_QPU_MUX_B) {
|
||||
- if (instr->sig.small_imm) {
|
||||
+ if (instr->sig.small_imm_b) {
|
||||
uint32_t val;
|
||||
ASSERTED bool ok =
|
||||
v3d_qpu_small_imm_unpack(disasm->devinfo,
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 19bf721dbe1..9cd831863b4 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -50,7 +50,7 @@ struct v3d_qpu_sig {
|
||||
bool ldvpm:1;
|
||||
bool ldtlb:1;
|
||||
bool ldtlbu:1;
|
||||
- bool small_imm:1;
|
||||
+ bool small_imm_b:1;
|
||||
bool ucb:1;
|
||||
bool rotate:1;
|
||||
bool wrtmuc:1;
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index a875683c6f8..beac591d3c1 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -112,7 +112,7 @@
|
||||
#define LDTMU .ldtmu = true
|
||||
#define LDVARY .ldvary = true
|
||||
#define LDVPM .ldvpm = true
|
||||
-#define SMIMM .small_imm = true
|
||||
+#define SMIMM_B .small_imm_b = true
|
||||
#define LDTLB .ldtlb = true
|
||||
#define LDTLBU .ldtlbu = true
|
||||
#define UCB .ucb = true
|
||||
@@ -135,8 +135,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
|
||||
[11] = { THRSW, LDVARY, LDUNIF },
|
||||
[12] = { LDVARY, LDTMU, },
|
||||
[13] = { THRSW, LDVARY, LDTMU, },
|
||||
- [14] = { SMIMM, LDVARY, },
|
||||
- [15] = { SMIMM, },
|
||||
+ [14] = { SMIMM_B, LDVARY, },
|
||||
+ [15] = { SMIMM_B, },
|
||||
[16] = { LDTLB, },
|
||||
[17] = { LDTLBU, },
|
||||
/* 18-21 reserved */
|
||||
@@ -148,8 +148,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
|
||||
[27] = { THRSW, LDVPM, LDUNIF },
|
||||
[28] = { LDVPM, LDTMU, },
|
||||
[29] = { THRSW, LDVPM, LDTMU, },
|
||||
- [30] = { SMIMM, LDVPM, },
|
||||
- [31] = { SMIMM, },
|
||||
+ [30] = { SMIMM_B, LDVPM, },
|
||||
+ [31] = { SMIMM_B, },
|
||||
};
|
||||
|
||||
static const struct v3d_qpu_sig v40_sig_map[] = {
|
||||
@@ -167,8 +167,8 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
|
||||
[10] = { LDVARY, LDUNIF },
|
||||
[11] = { THRSW, LDVARY, LDUNIF },
|
||||
/* 12-13 reserved */
|
||||
- [14] = { SMIMM, LDVARY, },
|
||||
- [15] = { SMIMM, },
|
||||
+ [14] = { SMIMM_B, LDVARY, },
|
||||
+ [15] = { SMIMM_B, },
|
||||
[16] = { LDTLB, },
|
||||
[17] = { LDTLBU, },
|
||||
[18] = { WRTMUC },
|
||||
@@ -178,7 +178,7 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
|
||||
[22] = { UCB, },
|
||||
[23] = { ROT, },
|
||||
/* 24-30 reserved */
|
||||
- [31] = { SMIMM, LDTMU, },
|
||||
+ [31] = { SMIMM_B, LDTMU, },
|
||||
};
|
||||
|
||||
static const struct v3d_qpu_sig v41_sig_map[] = {
|
||||
@@ -197,8 +197,8 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
|
||||
[11] = { THRSW, LDVARY, LDUNIF },
|
||||
[12] = { LDUNIFRF },
|
||||
[13] = { THRSW, LDUNIFRF },
|
||||
- [14] = { SMIMM, LDVARY, },
|
||||
- [15] = { SMIMM, },
|
||||
+ [14] = { SMIMM_B, LDVARY },
|
||||
+ [15] = { SMIMM_B, },
|
||||
[16] = { LDTLB, },
|
||||
[17] = { LDTLBU, },
|
||||
[18] = { WRTMUC },
|
||||
@@ -210,7 +210,7 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
|
||||
[24] = { LDUNIFA},
|
||||
[25] = { LDUNIFARF },
|
||||
/* 26-30 reserved */
|
||||
- [31] = { SMIMM, LDTMU, },
|
||||
+ [31] = { SMIMM_B, LDTMU, },
|
||||
};
|
||||
|
||||
bool
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 0e87405fe73694c173b7ce14c3d60611f241922c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 5 Aug 2021 00:50:12 +0200
|
||||
Subject: [PATCH 009/142] broadcom/compiler: add small_imm a/c/d on v3d_qpu_sig
|
||||
|
||||
small_imm_a, small_imm_c and small_imm_d added on top of the already
|
||||
existing small_imm_b, as V3D 7.1 defines 4 small immediates, tied to
|
||||
the 4 raddr. Note that this is only the definition, and just a inst
|
||||
validation rule to check that are not used before v71. Any real use is
|
||||
still pending.
|
||||
---
|
||||
src/broadcom/compiler/qpu_validate.c | 5 +++++
|
||||
src/broadcom/qpu/qpu_instr.h | 5 ++++-
|
||||
2 files changed, 9 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
|
||||
index 2cc7a0eb0ae..12788692432 100644
|
||||
--- a/src/broadcom/compiler/qpu_validate.c
|
||||
+++ b/src/broadcom/compiler/qpu_validate.c
|
||||
@@ -115,6 +115,11 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||
return;
|
||||
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ if (inst->sig.small_imm_a || inst->sig.small_imm_c || inst->sig.small_imm_d)
|
||||
+ fail_instr(state, "small imm a/c/d added after V3D 7.1");
|
||||
+ }
|
||||
+
|
||||
/* LDVARY writes r5 two instructions later and LDUNIF writes
|
||||
* r5 one instruction later, which is illegal to have
|
||||
* together.
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 9cd831863b4..13b3f37d43f 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -50,10 +50,13 @@ struct v3d_qpu_sig {
|
||||
bool ldvpm:1;
|
||||
bool ldtlb:1;
|
||||
bool ldtlbu:1;
|
||||
- bool small_imm_b:1;
|
||||
bool ucb:1;
|
||||
bool rotate:1;
|
||||
bool wrtmuc:1;
|
||||
+ bool small_imm_a:1; /* raddr_a (add a), since V3D 7.x */
|
||||
+ bool small_imm_b:1; /* raddr_b (add b) */
|
||||
+ bool small_imm_c:1; /* raddr_c (mul a), since V3D 7.x */
|
||||
+ bool small_imm_d:1; /* raddr_d (mul b), since V3D 7.x */
|
||||
};
|
||||
|
||||
enum v3d_qpu_cond {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,106 +0,0 @@
|
||||
From eca19c911d9af3b0ab3b563ea65dc455e3d27987 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 4 Aug 2021 01:11:16 +0200
|
||||
Subject: [PATCH 010/142] broadcom/qpu: add v71 signal map
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Compared with v41, the differences are:
|
||||
* 14, 15, 29 and 30 are now about immediate a, b, c, d respectively
|
||||
* 23 is now reserved. On v42 this was for rotate signals, that are
|
||||
gone on v71.
|
||||
|
||||
Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
|
||||
Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
|
||||
---
|
||||
src/broadcom/qpu/qpu_pack.c | 47 ++++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 44 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index beac591d3c1..2820d9d4c56 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -112,12 +112,15 @@
|
||||
#define LDTMU .ldtmu = true
|
||||
#define LDVARY .ldvary = true
|
||||
#define LDVPM .ldvpm = true
|
||||
-#define SMIMM_B .small_imm_b = true
|
||||
#define LDTLB .ldtlb = true
|
||||
#define LDTLBU .ldtlbu = true
|
||||
#define UCB .ucb = true
|
||||
#define ROT .rotate = true
|
||||
#define WRTMUC .wrtmuc = true
|
||||
+#define SMIMM_A .small_imm_a = true
|
||||
+#define SMIMM_B .small_imm_b = true
|
||||
+#define SMIMM_C .small_imm_c = true
|
||||
+#define SMIMM_D .small_imm_d = true
|
||||
|
||||
static const struct v3d_qpu_sig v33_sig_map[] = {
|
||||
/* MISC R3 R4 R5 */
|
||||
@@ -213,6 +216,40 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
|
||||
[31] = { SMIMM_B, LDTMU, },
|
||||
};
|
||||
|
||||
+
|
||||
+static const struct v3d_qpu_sig v71_sig_map[] = {
|
||||
+ /* MISC phys RF0 */
|
||||
+ [0] = { },
|
||||
+ [1] = { THRSW, },
|
||||
+ [2] = { LDUNIF },
|
||||
+ [3] = { THRSW, LDUNIF },
|
||||
+ [4] = { LDTMU, },
|
||||
+ [5] = { THRSW, LDTMU, },
|
||||
+ [6] = { LDTMU, LDUNIF },
|
||||
+ [7] = { THRSW, LDTMU, LDUNIF },
|
||||
+ [8] = { LDVARY, },
|
||||
+ [9] = { THRSW, LDVARY, },
|
||||
+ [10] = { LDVARY, LDUNIF },
|
||||
+ [11] = { THRSW, LDVARY, LDUNIF },
|
||||
+ [12] = { LDUNIFRF },
|
||||
+ [13] = { THRSW, LDUNIFRF },
|
||||
+ [14] = { SMIMM_A, },
|
||||
+ [15] = { SMIMM_B, },
|
||||
+ [16] = { LDTLB, },
|
||||
+ [17] = { LDTLBU, },
|
||||
+ [18] = { WRTMUC },
|
||||
+ [19] = { THRSW, WRTMUC },
|
||||
+ [20] = { LDVARY, WRTMUC },
|
||||
+ [21] = { THRSW, LDVARY, WRTMUC },
|
||||
+ [22] = { UCB, },
|
||||
+ /* 23 reserved */
|
||||
+ [24] = { LDUNIFA},
|
||||
+ [25] = { LDUNIFARF },
|
||||
+ /* 26-29 reserved */
|
||||
+ [30] = { SMIMM_C, },
|
||||
+ [31] = { SMIMM_D, },
|
||||
+};
|
||||
+
|
||||
bool
|
||||
v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
|
||||
uint32_t packed_sig,
|
||||
@@ -221,7 +258,9 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
|
||||
if (packed_sig >= ARRAY_SIZE(v33_sig_map))
|
||||
return false;
|
||||
|
||||
- if (devinfo->ver >= 41)
|
||||
+ if (devinfo->ver >= 71)
|
||||
+ *sig = v71_sig_map[packed_sig];
|
||||
+ else if (devinfo->ver >= 41)
|
||||
*sig = v41_sig_map[packed_sig];
|
||||
else if (devinfo->ver == 40)
|
||||
*sig = v40_sig_map[packed_sig];
|
||||
@@ -240,7 +279,9 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
|
||||
{
|
||||
static const struct v3d_qpu_sig *map;
|
||||
|
||||
- if (devinfo->ver >= 41)
|
||||
+ if (devinfo->ver >= 71)
|
||||
+ map = v71_sig_map;
|
||||
+ else if (devinfo->ver >= 41)
|
||||
map = v41_sig_map;
|
||||
else if (devinfo->ver == 40)
|
||||
map = v40_sig_map;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,778 +0,0 @@
|
||||
From d10e67a396d713ec81fb133f3516e09fe1e067b6 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 6 Aug 2021 01:22:31 +0200
|
||||
Subject: [PATCH 011/142] broadcom/qpu: define v3d_qpu_input, use on
|
||||
v3d_qpu_alu_instr
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
At this point it just tidy up a little the alu_instr structure.
|
||||
|
||||
But also serves to prepare the structure for new changes, as 7.x uses
|
||||
raddr instead of mux, and it is just easier to add the raddr to the
|
||||
new input structure.
|
||||
|
||||
Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
|
||||
Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 65 +++++++--------
|
||||
src/broadcom/compiler/vir.c | 16 ++--
|
||||
src/broadcom/compiler/vir_dump.c | 8 +-
|
||||
.../compiler/vir_opt_copy_propagate.c | 12 +--
|
||||
.../compiler/vir_opt_redundant_flags.c | 8 +-
|
||||
src/broadcom/compiler/vir_to_qpu.c | 30 +++----
|
||||
src/broadcom/qpu/qpu_disasm.c | 16 ++--
|
||||
src/broadcom/qpu/qpu_instr.c | 8 +-
|
||||
src/broadcom/qpu/qpu_instr.h | 13 +--
|
||||
src/broadcom/qpu/qpu_pack.c | 82 +++++++++----------
|
||||
src/broadcom/qpu/tests/qpu_disasm.c | 8 +-
|
||||
11 files changed, 134 insertions(+), 132 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index a10fa03ed10..455fa3867be 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -306,14 +306,14 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
|
||||
/* XXX: LOAD_IMM */
|
||||
|
||||
if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0)
|
||||
- process_mux_deps(state, n, inst->alu.add.a);
|
||||
+ process_mux_deps(state, n, inst->alu.add.a.mux);
|
||||
if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1)
|
||||
- process_mux_deps(state, n, inst->alu.add.b);
|
||||
+ process_mux_deps(state, n, inst->alu.add.b.mux);
|
||||
|
||||
if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0)
|
||||
- process_mux_deps(state, n, inst->alu.mul.a);
|
||||
+ process_mux_deps(state, n, inst->alu.mul.a.mux);
|
||||
if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1)
|
||||
- process_mux_deps(state, n, inst->alu.mul.b);
|
||||
+ process_mux_deps(state, n, inst->alu.mul.b.mux);
|
||||
|
||||
switch (inst->alu.add.op) {
|
||||
case V3D_QPU_A_VPMSETUP:
|
||||
@@ -537,22 +537,22 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
|
||||
|
||||
if (inst->alu.add.op != V3D_QPU_A_NOP) {
|
||||
if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0 &&
|
||||
- mux_reads_too_soon(scoreboard, inst, inst->alu.add.a)) {
|
||||
+ mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux)) {
|
||||
return true;
|
||||
}
|
||||
if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1 &&
|
||||
- mux_reads_too_soon(scoreboard, inst, inst->alu.add.b)) {
|
||||
+ mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->alu.mul.op != V3D_QPU_M_NOP) {
|
||||
if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0 &&
|
||||
- mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a)) {
|
||||
+ mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux)) {
|
||||
return true;
|
||||
}
|
||||
if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1 &&
|
||||
- mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b)) {
|
||||
+ mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -839,20 +839,20 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
||||
if (!result->sig.small_imm_b) {
|
||||
if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) &&
|
||||
raddr_a == add_instr->raddr_b) {
|
||||
- if (add_instr->alu.add.a == V3D_QPU_MUX_B)
|
||||
- result->alu.add.a = V3D_QPU_MUX_A;
|
||||
- if (add_instr->alu.add.b == V3D_QPU_MUX_B &&
|
||||
+ if (add_instr->alu.add.a.mux == V3D_QPU_MUX_B)
|
||||
+ result->alu.add.a.mux = V3D_QPU_MUX_A;
|
||||
+ if (add_instr->alu.add.b.mux == V3D_QPU_MUX_B &&
|
||||
v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) {
|
||||
- result->alu.add.b = V3D_QPU_MUX_A;
|
||||
+ result->alu.add.b.mux = V3D_QPU_MUX_A;
|
||||
}
|
||||
}
|
||||
if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_B) &&
|
||||
raddr_a == mul_instr->raddr_b) {
|
||||
- if (mul_instr->alu.mul.a == V3D_QPU_MUX_B)
|
||||
- result->alu.mul.a = V3D_QPU_MUX_A;
|
||||
- if (mul_instr->alu.mul.b == V3D_QPU_MUX_B &&
|
||||
+ if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_B)
|
||||
+ result->alu.mul.a.mux = V3D_QPU_MUX_A;
|
||||
+ if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_B &&
|
||||
v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) {
|
||||
- result->alu.mul.b = V3D_QPU_MUX_A;
|
||||
+ result->alu.mul.b.mux = V3D_QPU_MUX_A;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -863,20 +863,20 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
||||
result->raddr_b = raddr_b;
|
||||
if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_A) &&
|
||||
raddr_b == add_instr->raddr_a) {
|
||||
- if (add_instr->alu.add.a == V3D_QPU_MUX_A)
|
||||
- result->alu.add.a = V3D_QPU_MUX_B;
|
||||
- if (add_instr->alu.add.b == V3D_QPU_MUX_A &&
|
||||
+ if (add_instr->alu.add.a.mux == V3D_QPU_MUX_A)
|
||||
+ result->alu.add.a.mux = V3D_QPU_MUX_B;
|
||||
+ if (add_instr->alu.add.b.mux == V3D_QPU_MUX_A &&
|
||||
v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) {
|
||||
- result->alu.add.b = V3D_QPU_MUX_B;
|
||||
+ result->alu.add.b.mux = V3D_QPU_MUX_B;
|
||||
}
|
||||
}
|
||||
if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_A) &&
|
||||
raddr_b == mul_instr->raddr_a) {
|
||||
- if (mul_instr->alu.mul.a == V3D_QPU_MUX_A)
|
||||
- result->alu.mul.a = V3D_QPU_MUX_B;
|
||||
- if (mul_instr->alu.mul.b == V3D_QPU_MUX_A &&
|
||||
+ if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_A)
|
||||
+ result->alu.mul.a.mux = V3D_QPU_MUX_B;
|
||||
+ if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_A &&
|
||||
v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) {
|
||||
- result->alu.mul.b = V3D_QPU_MUX_B;
|
||||
+ result->alu.mul.b.mux = V3D_QPU_MUX_B;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -927,11 +927,12 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
|
||||
inst->flags.auf = V3D_QPU_UF_NONE;
|
||||
|
||||
inst->alu.mul.output_pack = inst->alu.add.output_pack;
|
||||
- inst->alu.mul.a_unpack = inst->alu.add.a_unpack;
|
||||
- inst->alu.mul.b_unpack = inst->alu.add.b_unpack;
|
||||
+
|
||||
+ inst->alu.mul.a.unpack = inst->alu.add.a.unpack;
|
||||
+ inst->alu.mul.b.unpack = inst->alu.add.b.unpack;
|
||||
inst->alu.add.output_pack = V3D_QPU_PACK_NONE;
|
||||
- inst->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
|
||||
- inst->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
|
||||
+ inst->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
|
||||
+ inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -2064,12 +2065,12 @@ alu_reads_register(struct v3d_qpu_instr *inst,
|
||||
|
||||
if (add) {
|
||||
num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
|
||||
- mux_a = inst->alu.add.a;
|
||||
- mux_b = inst->alu.add.b;
|
||||
+ mux_a = inst->alu.add.a.mux;
|
||||
+ mux_b = inst->alu.add.b.mux;
|
||||
} else {
|
||||
num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
|
||||
- mux_a = inst->alu.mul.a;
|
||||
- mux_b = inst->alu.mul.b;
|
||||
+ mux_a = inst->alu.mul.a.mux;
|
||||
+ mux_b = inst->alu.mul.b.mux;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_src; i++) {
|
||||
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
|
||||
index 660b11b0577..007cb0a941b 100644
|
||||
--- a/src/broadcom/compiler/vir.c
|
||||
+++ b/src/broadcom/compiler/vir.c
|
||||
@@ -113,10 +113,10 @@ vir_is_raw_mov(struct qinst *inst)
|
||||
return false;
|
||||
}
|
||||
|
||||
- if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
|
||||
- inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE ||
|
||||
- inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
|
||||
- inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) {
|
||||
+ if (inst->qpu.alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
|
||||
+ inst->qpu.alu.add.b.unpack != V3D_QPU_UNPACK_NONE ||
|
||||
+ inst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
|
||||
+ inst->qpu.alu.mul.b.unpack != V3D_QPU_UNPACK_NONE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -209,15 +209,15 @@ vir_set_unpack(struct qinst *inst, int src,
|
||||
|
||||
if (vir_is_add(inst)) {
|
||||
if (src == 0)
|
||||
- inst->qpu.alu.add.a_unpack = unpack;
|
||||
+ inst->qpu.alu.add.a.unpack = unpack;
|
||||
else
|
||||
- inst->qpu.alu.add.b_unpack = unpack;
|
||||
+ inst->qpu.alu.add.b.unpack = unpack;
|
||||
} else {
|
||||
assert(vir_is_mul(inst));
|
||||
if (src == 0)
|
||||
- inst->qpu.alu.mul.a_unpack = unpack;
|
||||
+ inst->qpu.alu.mul.a.unpack = unpack;
|
||||
else
|
||||
- inst->qpu.alu.mul.b_unpack = unpack;
|
||||
+ inst->qpu.alu.mul.b.unpack = unpack;
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c
|
||||
index 5c47bbdc1b0..ab5d4043039 100644
|
||||
--- a/src/broadcom/compiler/vir_dump.c
|
||||
+++ b/src/broadcom/compiler/vir_dump.c
|
||||
@@ -270,8 +270,8 @@ vir_dump_alu(struct v3d_compile *c, struct qinst *inst)
|
||||
vir_print_reg(c, inst, inst->dst);
|
||||
fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.add.output_pack));
|
||||
|
||||
- unpack[0] = instr->alu.add.a_unpack;
|
||||
- unpack[1] = instr->alu.add.b_unpack;
|
||||
+ unpack[0] = instr->alu.add.a.unpack;
|
||||
+ unpack[1] = instr->alu.add.b.unpack;
|
||||
} else {
|
||||
fprintf(stderr, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
|
||||
fprintf(stderr, "%s", v3d_qpu_cond_name(instr->flags.mc));
|
||||
@@ -282,8 +282,8 @@ vir_dump_alu(struct v3d_compile *c, struct qinst *inst)
|
||||
vir_print_reg(c, inst, inst->dst);
|
||||
fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.mul.output_pack));
|
||||
|
||||
- unpack[0] = instr->alu.mul.a_unpack;
|
||||
- unpack[1] = instr->alu.mul.b_unpack;
|
||||
+ unpack[0] = instr->alu.mul.a.unpack;
|
||||
+ unpack[1] = instr->alu.mul.b.unpack;
|
||||
}
|
||||
|
||||
for (int i = 0; i < nsrc; i++) {
|
||||
diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c
|
||||
index da121c2a5bd..c4aa7255a17 100644
|
||||
--- a/src/broadcom/compiler/vir_opt_copy_propagate.c
|
||||
+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c
|
||||
@@ -104,14 +104,14 @@ vir_has_unpack(struct qinst *inst, int chan)
|
||||
|
||||
if (vir_is_add(inst)) {
|
||||
if (chan == 0)
|
||||
- return inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE;
|
||||
+ return inst->qpu.alu.add.a.unpack != V3D_QPU_UNPACK_NONE;
|
||||
else
|
||||
- return inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE;
|
||||
+ return inst->qpu.alu.add.b.unpack != V3D_QPU_UNPACK_NONE;
|
||||
} else {
|
||||
if (chan == 0)
|
||||
- return inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE;
|
||||
+ return inst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE;
|
||||
else
|
||||
- return inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE;
|
||||
+ return inst->qpu.alu.mul.b.unpack != V3D_QPU_UNPACK_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,7 +161,7 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
|
||||
continue;
|
||||
|
||||
/* these ops can't represent abs. */
|
||||
- if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) {
|
||||
+ if (mov->qpu.alu.mul.a.unpack == V3D_QPU_UNPACK_ABS) {
|
||||
switch (inst->qpu.alu.add.op) {
|
||||
case V3D_QPU_A_VFPACK:
|
||||
case V3D_QPU_A_FROUND:
|
||||
@@ -189,7 +189,7 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
|
||||
|
||||
inst->src[i] = mov->src[0];
|
||||
if (vir_has_unpack(mov, 0)) {
|
||||
- enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a_unpack;
|
||||
+ enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a.unpack;
|
||||
|
||||
vir_set_unpack(inst, i, unpack);
|
||||
}
|
||||
diff --git a/src/broadcom/compiler/vir_opt_redundant_flags.c b/src/broadcom/compiler/vir_opt_redundant_flags.c
|
||||
index c7896d57f2b..6b61ed6a39a 100644
|
||||
--- a/src/broadcom/compiler/vir_opt_redundant_flags.c
|
||||
+++ b/src/broadcom/compiler/vir_opt_redundant_flags.c
|
||||
@@ -81,11 +81,11 @@ vir_instr_flags_op_equal(struct qinst *a, struct qinst *b)
|
||||
a->qpu.flags.mpf != b->qpu.flags.mpf ||
|
||||
a->qpu.alu.add.op != b->qpu.alu.add.op ||
|
||||
a->qpu.alu.mul.op != b->qpu.alu.mul.op ||
|
||||
- a->qpu.alu.add.a_unpack != b->qpu.alu.add.a_unpack ||
|
||||
- a->qpu.alu.add.b_unpack != b->qpu.alu.add.b_unpack ||
|
||||
+ a->qpu.alu.add.a.unpack != b->qpu.alu.add.a.unpack ||
|
||||
+ a->qpu.alu.add.b.unpack != b->qpu.alu.add.b.unpack ||
|
||||
a->qpu.alu.add.output_pack != b->qpu.alu.add.output_pack ||
|
||||
- a->qpu.alu.mul.a_unpack != b->qpu.alu.mul.a_unpack ||
|
||||
- a->qpu.alu.mul.b_unpack != b->qpu.alu.mul.b_unpack ||
|
||||
+ a->qpu.alu.mul.a.unpack != b->qpu.alu.mul.a.unpack ||
|
||||
+ a->qpu.alu.mul.b.unpack != b->qpu.alu.mul.b.unpack ||
|
||||
a->qpu.alu.mul.output_pack != b->qpu.alu.mul.output_pack) {
|
||||
return false;
|
||||
}
|
||||
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
|
||||
index 15c2e3674c2..c8b6e0a91a0 100644
|
||||
--- a/src/broadcom/compiler/vir_to_qpu.c
|
||||
+++ b/src/broadcom/compiler/vir_to_qpu.c
|
||||
@@ -106,20 +106,20 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
return;
|
||||
}
|
||||
|
||||
- if (instr->alu.add.a != V3D_QPU_MUX_A &&
|
||||
- instr->alu.add.b != V3D_QPU_MUX_A &&
|
||||
- instr->alu.mul.a != V3D_QPU_MUX_A &&
|
||||
- instr->alu.mul.b != V3D_QPU_MUX_A) {
|
||||
+ if (instr->alu.add.a.mux != V3D_QPU_MUX_A &&
|
||||
+ instr->alu.add.b.mux != V3D_QPU_MUX_A &&
|
||||
+ instr->alu.mul.a.mux != V3D_QPU_MUX_A &&
|
||||
+ instr->alu.mul.b.mux != V3D_QPU_MUX_A) {
|
||||
instr->raddr_a = src.index;
|
||||
*mux = V3D_QPU_MUX_A;
|
||||
} else {
|
||||
if (instr->raddr_a == src.index) {
|
||||
*mux = V3D_QPU_MUX_A;
|
||||
} else {
|
||||
- assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
|
||||
- instr->alu.add.b == V3D_QPU_MUX_B &&
|
||||
- instr->alu.mul.a == V3D_QPU_MUX_B &&
|
||||
- instr->alu.mul.b == V3D_QPU_MUX_B) ||
|
||||
+ assert(!(instr->alu.add.a.mux == V3D_QPU_MUX_B &&
|
||||
+ instr->alu.add.b.mux == V3D_QPU_MUX_B &&
|
||||
+ instr->alu.mul.a.mux == V3D_QPU_MUX_B &&
|
||||
+ instr->alu.mul.b.mux == V3D_QPU_MUX_B) ||
|
||||
src.index == instr->raddr_b);
|
||||
|
||||
instr->raddr_b = src.index;
|
||||
@@ -147,14 +147,14 @@ is_no_op_mov(struct qinst *qinst)
|
||||
if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
|
||||
return false;
|
||||
|
||||
- if (qinst->qpu.alu.mul.a !=
|
||||
+ if (qinst->qpu.alu.mul.a.mux !=
|
||||
V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
int raddr;
|
||||
|
||||
- switch (qinst->qpu.alu.mul.a) {
|
||||
+ switch (qinst->qpu.alu.mul.a.mux) {
|
||||
case V3D_QPU_MUX_A:
|
||||
raddr = qinst->qpu.raddr_a;
|
||||
break;
|
||||
@@ -171,7 +171,7 @@ is_no_op_mov(struct qinst *qinst)
|
||||
/* No packing or flags updates, or we need to execute the
|
||||
* instruction.
|
||||
*/
|
||||
- if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
|
||||
+ if (qinst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
|
||||
qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
|
||||
qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
|
||||
qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
|
||||
@@ -302,11 +302,11 @@ v3d_generate_code_block(struct v3d_compile *c,
|
||||
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
|
||||
if (nsrc >= 1) {
|
||||
set_src(&qinst->qpu,
|
||||
- &qinst->qpu.alu.add.a, src[0]);
|
||||
+ &qinst->qpu.alu.add.a.mux, src[0]);
|
||||
}
|
||||
if (nsrc >= 2) {
|
||||
set_src(&qinst->qpu,
|
||||
- &qinst->qpu.alu.add.b, src[1]);
|
||||
+ &qinst->qpu.alu.add.b.mux, src[1]);
|
||||
}
|
||||
|
||||
qinst->qpu.alu.add.waddr = dst.index;
|
||||
@@ -314,11 +314,11 @@ v3d_generate_code_block(struct v3d_compile *c,
|
||||
} else {
|
||||
if (nsrc >= 1) {
|
||||
set_src(&qinst->qpu,
|
||||
- &qinst->qpu.alu.mul.a, src[0]);
|
||||
+ &qinst->qpu.alu.mul.a.mux, src[0]);
|
||||
}
|
||||
if (nsrc >= 2) {
|
||||
set_src(&qinst->qpu,
|
||||
- &qinst->qpu.alu.mul.b, src[1]);
|
||||
+ &qinst->qpu.alu.mul.b.mux, src[1]);
|
||||
}
|
||||
|
||||
qinst->qpu.alu.mul.waddr = dst.index;
|
||||
diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
|
||||
index 6aca3c28e78..588a665f770 100644
|
||||
--- a/src/broadcom/qpu/qpu_disasm.c
|
||||
+++ b/src/broadcom/qpu/qpu_disasm.c
|
||||
@@ -121,16 +121,16 @@ v3d_qpu_disasm_add(struct disasm_state *disasm,
|
||||
if (num_src >= 1) {
|
||||
if (has_dst)
|
||||
append(disasm, ", ");
|
||||
- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a);
|
||||
+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a.mux);
|
||||
append(disasm, "%s",
|
||||
- v3d_qpu_unpack_name(instr->alu.add.a_unpack));
|
||||
+ v3d_qpu_unpack_name(instr->alu.add.a.unpack));
|
||||
}
|
||||
|
||||
if (num_src >= 2) {
|
||||
append(disasm, ", ");
|
||||
- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b);
|
||||
+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b.mux);
|
||||
append(disasm, "%s",
|
||||
- v3d_qpu_unpack_name(instr->alu.add.b_unpack));
|
||||
+ v3d_qpu_unpack_name(instr->alu.add.b.unpack));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,16 +164,16 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
|
||||
if (num_src >= 1) {
|
||||
if (has_dst)
|
||||
append(disasm, ", ");
|
||||
- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a);
|
||||
+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a.mux);
|
||||
append(disasm, "%s",
|
||||
- v3d_qpu_unpack_name(instr->alu.mul.a_unpack));
|
||||
+ v3d_qpu_unpack_name(instr->alu.mul.a.unpack));
|
||||
}
|
||||
|
||||
if (num_src >= 2) {
|
||||
append(disasm, ", ");
|
||||
- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b);
|
||||
+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b.mux);
|
||||
append(disasm, "%s",
|
||||
- v3d_qpu_unpack_name(instr->alu.mul.b_unpack));
|
||||
+ v3d_qpu_unpack_name(instr->alu.mul.b.unpack));
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index 7759fb0efdf..7ece8b5e570 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -926,10 +926,10 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
|
||||
int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
|
||||
int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
|
||||
|
||||
- return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
|
||||
- (add_nsrc > 1 && inst->alu.add.b == mux) ||
|
||||
- (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
|
||||
- (mul_nsrc > 1 && inst->alu.mul.b == mux));
|
||||
+ return ((add_nsrc > 0 && inst->alu.add.a.mux == mux) ||
|
||||
+ (add_nsrc > 1 && inst->alu.add.b.mux == mux) ||
|
||||
+ (mul_nsrc > 0 && inst->alu.mul.a.mux == mux) ||
|
||||
+ (mul_nsrc > 1 && inst->alu.mul.b.mux == mux));
|
||||
}
|
||||
|
||||
bool
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 13b3f37d43f..53a51bfb3e1 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -294,25 +294,26 @@ enum v3d_qpu_mux {
|
||||
V3D_QPU_MUX_B,
|
||||
};
|
||||
|
||||
+struct v3d_qpu_input {
|
||||
+ enum v3d_qpu_mux mux;
|
||||
+ enum v3d_qpu_input_unpack unpack;
|
||||
+};
|
||||
+
|
||||
struct v3d_qpu_alu_instr {
|
||||
struct {
|
||||
enum v3d_qpu_add_op op;
|
||||
- enum v3d_qpu_mux a, b;
|
||||
+ struct v3d_qpu_input a, b;
|
||||
uint8_t waddr;
|
||||
bool magic_write;
|
||||
enum v3d_qpu_output_pack output_pack;
|
||||
- enum v3d_qpu_input_unpack a_unpack;
|
||||
- enum v3d_qpu_input_unpack b_unpack;
|
||||
} add;
|
||||
|
||||
struct {
|
||||
enum v3d_qpu_mul_op op;
|
||||
- enum v3d_qpu_mux a, b;
|
||||
+ struct v3d_qpu_input a, b;
|
||||
uint8_t waddr;
|
||||
bool magic_write;
|
||||
enum v3d_qpu_output_pack output_pack;
|
||||
- enum v3d_qpu_input_unpack a_unpack;
|
||||
- enum v3d_qpu_input_unpack b_unpack;
|
||||
} mul;
|
||||
};
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 2820d9d4c56..6e975793fc0 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -853,12 +853,12 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
|
||||
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
|
||||
|
||||
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
|
||||
- &instr->alu.add.a_unpack)) {
|
||||
+ &instr->alu.add.a.unpack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
|
||||
- &instr->alu.add.b_unpack)) {
|
||||
+ &instr->alu.add.b.unpack)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@@ -872,7 +872,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
|
||||
instr->alu.add.output_pack = mux_b & 0x3;
|
||||
|
||||
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
|
||||
- &instr->alu.add.a_unpack)) {
|
||||
+ &instr->alu.add.a.unpack)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@@ -884,7 +884,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
|
||||
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
|
||||
|
||||
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
|
||||
- &instr->alu.add.a_unpack)) {
|
||||
+ &instr->alu.add.a.unpack)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@@ -892,23 +892,23 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
|
||||
case V3D_QPU_A_VFMIN:
|
||||
case V3D_QPU_A_VFMAX:
|
||||
if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
|
||||
- &instr->alu.add.a_unpack)) {
|
||||
+ &instr->alu.add.a.unpack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
|
||||
- instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
|
||||
+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
break;
|
||||
|
||||
default:
|
||||
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
|
||||
- instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
|
||||
- instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
|
||||
+ instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
|
||||
+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
break;
|
||||
}
|
||||
|
||||
- instr->alu.add.a = mux_a;
|
||||
- instr->alu.add.b = mux_b;
|
||||
+ instr->alu.add.a.mux = mux_a;
|
||||
+ instr->alu.add.b.mux = mux_b;
|
||||
instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
|
||||
|
||||
instr->alu.add.magic_write = false;
|
||||
@@ -956,12 +956,12 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
|
||||
instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
|
||||
|
||||
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
|
||||
- &instr->alu.mul.a_unpack)) {
|
||||
+ &instr->alu.mul.a.unpack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
|
||||
- &instr->alu.mul.b_unpack)) {
|
||||
+ &instr->alu.mul.b.unpack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -972,7 +972,7 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
|
||||
((mux_b >> 2) & 1));
|
||||
|
||||
if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
|
||||
- &instr->alu.mul.a_unpack)) {
|
||||
+ &instr->alu.mul.a.unpack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -982,23 +982,23 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
|
||||
instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
|
||||
|
||||
if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
|
||||
- &instr->alu.mul.a_unpack)) {
|
||||
+ &instr->alu.mul.a.unpack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
- instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
|
||||
+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
|
||||
- instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
|
||||
- instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
|
||||
+ instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
|
||||
+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
break;
|
||||
}
|
||||
|
||||
- instr->alu.mul.a = mux_a;
|
||||
- instr->alu.mul.b = mux_b;
|
||||
+ instr->alu.mul.a.mux = mux_a;
|
||||
+ instr->alu.mul.b.mux = mux_b;
|
||||
instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
|
||||
instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
|
||||
|
||||
@@ -1030,8 +1030,8 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
|
||||
{
|
||||
uint32_t waddr = instr->alu.add.waddr;
|
||||
- uint32_t mux_a = instr->alu.add.a;
|
||||
- uint32_t mux_b = instr->alu.add.b;
|
||||
+ uint32_t mux_a = instr->alu.add.a.mux;
|
||||
+ uint32_t mux_b = instr->alu.add.b.mux;
|
||||
int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
|
||||
const struct opcode_desc *desc =
|
||||
lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
|
||||
@@ -1102,12 +1102,12 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
}
|
||||
opcode |= output_pack << 4;
|
||||
|
||||
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
|
||||
&a_unpack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
|
||||
&b_unpack)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1141,17 +1141,17 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
uint32_t a_unpack;
|
||||
uint32_t b_unpack;
|
||||
|
||||
- if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
|
||||
- instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
|
||||
+ if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
|
||||
+ instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
|
||||
&a_unpack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
|
||||
&b_unpack)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1176,7 +1176,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
}
|
||||
mux_b |= packed;
|
||||
|
||||
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
|
||||
&packed)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1194,7 +1194,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
return false;
|
||||
|
||||
uint32_t packed;
|
||||
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
|
||||
&packed)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1207,11 +1207,11 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
case V3D_QPU_A_VFMIN:
|
||||
case V3D_QPU_A_VFMAX:
|
||||
if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
|
||||
- instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
|
||||
+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
- if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
|
||||
+ if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
|
||||
&packed)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1221,8 +1221,8 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
default:
|
||||
if (instr->alu.add.op != V3D_QPU_A_NOP &&
|
||||
(instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
|
||||
- instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
|
||||
- instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
|
||||
+ instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
|
||||
+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@@ -1242,8 +1242,8 @@ static bool
|
||||
v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
|
||||
{
|
||||
- uint32_t mux_a = instr->alu.mul.a;
|
||||
- uint32_t mux_b = instr->alu.mul.b;
|
||||
+ uint32_t mux_a = instr->alu.mul.a.mux;
|
||||
+ uint32_t mux_b = instr->alu.mul.b.mux;
|
||||
int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
|
||||
|
||||
const struct opcode_desc *desc =
|
||||
@@ -1277,13 +1277,13 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
|
||||
*/
|
||||
opcode += packed << 4;
|
||||
|
||||
- if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
|
||||
&packed)) {
|
||||
return false;
|
||||
}
|
||||
opcode |= packed << 2;
|
||||
|
||||
- if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack,
|
||||
&packed)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1301,7 +1301,7 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
|
||||
opcode |= (packed >> 1) & 1;
|
||||
mux_b = (packed & 1) << 2;
|
||||
|
||||
- if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
|
||||
&packed)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1315,16 +1315,16 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
|
||||
if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
|
||||
return false;
|
||||
|
||||
- if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
|
||||
+ if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
|
||||
&packed)) {
|
||||
return false;
|
||||
}
|
||||
- if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
|
||||
+ if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
|
||||
opcode = 8;
|
||||
else
|
||||
opcode |= (packed + 4) & 7;
|
||||
|
||||
- if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
|
||||
+ if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
|
||||
return false;
|
||||
|
||||
break;
|
||||
diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c
|
||||
index 2f8e19c73fe..be7b78d5ef0 100644
|
||||
--- a/src/broadcom/qpu/tests/qpu_disasm.c
|
||||
+++ b/src/broadcom/qpu/tests/qpu_disasm.c
|
||||
@@ -160,10 +160,10 @@ main(int argc, char **argv)
|
||||
/* Swap the operands to be sure that we test
|
||||
* how the QPUs distinguish between these ops.
|
||||
*/
|
||||
- swap_mux(&instr.alu.add.a,
|
||||
- &instr.alu.add.b);
|
||||
- swap_pack(&instr.alu.add.a_unpack,
|
||||
- &instr.alu.add.b_unpack);
|
||||
+ swap_mux(&instr.alu.add.a.mux,
|
||||
+ &instr.alu.add.b.mux);
|
||||
+ swap_pack(&instr.alu.add.a.unpack,
|
||||
+ &instr.alu.add.b.unpack);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,45 +0,0 @@
|
||||
From 52ea09792ff8a438ccdecac47b8415657be90098 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 6 Aug 2021 01:33:32 +0200
|
||||
Subject: [PATCH 012/142] broadcom/qpu: add raddr on v3d_qpu_input
|
||||
|
||||
On V3D 7.x mux are not used, and raddr_a/b/c/d are used instead
|
||||
|
||||
This is not perfect, as for v71, the raddr_a/b defined at qpu_instr
|
||||
became superfluous. But the alternative would be to define two
|
||||
different structs, or even having them defined based on version
|
||||
ifdefs, so this is a reasonable compromise.
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.h | 9 ++++++---
|
||||
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 53a51bfb3e1..9e56e2d6a99 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -295,7 +295,10 @@ enum v3d_qpu_mux {
|
||||
};
|
||||
|
||||
struct v3d_qpu_input {
|
||||
- enum v3d_qpu_mux mux;
|
||||
+ union {
|
||||
+ enum v3d_qpu_mux mux; /* V3D 4.x */
|
||||
+ uint8_t raddr; /* V3D 7.x */
|
||||
+ };
|
||||
enum v3d_qpu_input_unpack unpack;
|
||||
};
|
||||
|
||||
@@ -385,8 +388,8 @@ struct v3d_qpu_instr {
|
||||
struct v3d_qpu_sig sig;
|
||||
uint8_t sig_addr;
|
||||
bool sig_magic; /* If the signal writes to a magic address */
|
||||
- uint8_t raddr_a;
|
||||
- uint8_t raddr_b;
|
||||
+ uint8_t raddr_a; /* V3D 4.x */
|
||||
+ uint8_t raddr_b; /* V3D 4.x*/
|
||||
struct v3d_qpu_flags flags;
|
||||
|
||||
union {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,37 +0,0 @@
|
||||
From 3e5ad0881c2789619cdf65f40a44d5481e28e800 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 12 Aug 2021 02:24:02 +0200
|
||||
Subject: [PATCH 013/142] broadcom/qpu: defining shift/mask for raddr_c/d
|
||||
|
||||
On V3D 7.x it replaces mul_a/b and add_a/b
|
||||
---
|
||||
src/broadcom/qpu/qpu_pack.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 6e975793fc0..4f106909729 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -84,6 +84,9 @@
|
||||
#define V3D_QPU_MUL_A_SHIFT 18
|
||||
#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
|
||||
|
||||
+#define V3D_QPU_RADDR_C_SHIFT 18
|
||||
+#define V3D_QPU_RADDR_C_MASK QPU_MASK(23, 18)
|
||||
+
|
||||
#define V3D_QPU_ADD_B_SHIFT 15
|
||||
#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
|
||||
|
||||
@@ -98,6 +101,9 @@
|
||||
#define V3D_QPU_BRANCH_BDI_SHIFT 12
|
||||
#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
|
||||
|
||||
+#define V3D_QPU_RADDR_D_SHIFT 12
|
||||
+#define V3D_QPU_RADDR_D_MASK QPU_MASK(17, 12)
|
||||
+
|
||||
#define V3D_QPU_RADDR_A_SHIFT 6
|
||||
#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,46 +0,0 @@
|
||||
From 81febf14fe05ad26e992275b911e8bc1e1416ebc Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 17 Sep 2021 01:04:31 +0200
|
||||
Subject: [PATCH 014/142] broadcom/commmon: add has_accumulators field on
|
||||
v3d_device_info
|
||||
|
||||
Even if we can just check for the version on the code, checking for
|
||||
this field makes several places more readable. So for example, on the
|
||||
register allocate code we doesn't assign an accumulator because we
|
||||
don't have accumulators on that hw, instead of because hw version is a
|
||||
given one.
|
||||
---
|
||||
src/broadcom/common/v3d_device_info.c | 2 ++
|
||||
src/broadcom/common/v3d_device_info.h | 3 +++
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
|
||||
index 7512fe3a06b..7bc2b662cfc 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.c
|
||||
+++ b/src/broadcom/common/v3d_device_info.c
|
||||
@@ -65,6 +65,8 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
|
||||
int qups = (ident1.value >> 8) & 0xf;
|
||||
devinfo->qpu_count = nslc * qups;
|
||||
|
||||
+ devinfo->has_accumulators = devinfo->ver < 71;
|
||||
+
|
||||
switch (devinfo->ver) {
|
||||
case 33:
|
||||
case 41:
|
||||
diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
|
||||
index 32cb65cf81f..8dfc7858727 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.h
|
||||
+++ b/src/broadcom/common/v3d_device_info.h
|
||||
@@ -42,6 +42,9 @@ struct v3d_device_info {
|
||||
|
||||
/* NSLC * QUPS from the core's IDENT registers. */
|
||||
int qpu_count;
|
||||
+
|
||||
+ /* If the hw has accumulator registers */
|
||||
+ bool has_accumulators;
|
||||
};
|
||||
|
||||
typedef int (*v3d_ioctl_fun)(int fd, unsigned long request, void *arg);
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,52 +0,0 @@
|
||||
From 7d42eca87b6e144697810405308d99d200dca62a Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 15 Sep 2021 10:56:43 +0200
|
||||
Subject: [PATCH 015/142] broadcom/qpu: add qpu_writes_rf0_implicitly helper
|
||||
|
||||
On v71 rf0 replaces r5 as the register that gets updated implicitly
|
||||
with uniform loads, and gets the C coefficient with ldvary. This
|
||||
helper return if rf0 gets implicitly updated.
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.c | 12 ++++++++++++
|
||||
src/broadcom/qpu/qpu_instr.h | 2 ++
|
||||
2 files changed, 14 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index 7ece8b5e570..8de99c611d5 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -920,6 +920,18 @@ v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
|
||||
return false;
|
||||
}
|
||||
|
||||
+bool
|
||||
+v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
|
||||
+ const struct v3d_qpu_instr *inst)
|
||||
+{
|
||||
+ if (devinfo->ver >= 71 &&
|
||||
+ (inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa)) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
bool
|
||||
v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
|
||||
{
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 9e56e2d6a99..a25be8e0ee6 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -473,6 +473,8 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
+bool v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
|
||||
+ const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
--
|
||||
2.39.2
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,261 +0,0 @@
|
||||
From ebba9019461083687f6afd23ff0d4646c1a667cb Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Sun, 29 Jan 2023 00:27:11 +0100
|
||||
Subject: [PATCH 017/142] broadcom/compiler: update node/temp translation for
|
||||
v71
|
||||
|
||||
As the offset applied needs to take into account if we have
|
||||
accumulators or not.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 68 +++++++++----------
|
||||
1 file changed, 34 insertions(+), 34 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index b22f915d1df..aa9473d124b 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -39,30 +39,31 @@
|
||||
CLASS_BITS_R5)
|
||||
|
||||
static inline uint32_t
|
||||
-temp_to_node(uint32_t temp)
|
||||
+temp_to_node(struct v3d_compile *c, uint32_t temp)
|
||||
{
|
||||
- return temp + ACC_COUNT;
|
||||
+ return temp + (c->devinfo->has_accumulators ? ACC_COUNT : 0);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
-node_to_temp(uint32_t node)
|
||||
+node_to_temp(struct v3d_compile *c, uint32_t node)
|
||||
{
|
||||
- assert(node >= ACC_COUNT);
|
||||
- return node - ACC_COUNT;
|
||||
+ assert((c->devinfo->has_accumulators && node >= ACC_COUNT) ||
|
||||
+ (!c->devinfo->has_accumulators && node >= 0));
|
||||
+ return node - (c->devinfo->has_accumulators ? ACC_COUNT : 0);
|
||||
}
|
||||
|
||||
static inline uint8_t
|
||||
-get_temp_class_bits(struct v3d_ra_node_info *nodes,
|
||||
+get_temp_class_bits(struct v3d_compile *c,
|
||||
uint32_t temp)
|
||||
{
|
||||
- return nodes->info[temp_to_node(temp)].class_bits;
|
||||
+ return c->nodes.info[temp_to_node(c, temp)].class_bits;
|
||||
}
|
||||
|
||||
static inline void
|
||||
-set_temp_class_bits(struct v3d_ra_node_info *nodes,
|
||||
+set_temp_class_bits(struct v3d_compile *c,
|
||||
uint32_t temp, uint8_t class_bits)
|
||||
{
|
||||
- nodes->info[temp_to_node(temp)].class_bits = class_bits;
|
||||
+ c->nodes.info[temp_to_node(c, temp)].class_bits = class_bits;
|
||||
}
|
||||
|
||||
static struct ra_class *
|
||||
@@ -84,7 +85,7 @@ static inline struct ra_class *
|
||||
choose_reg_class_for_temp(struct v3d_compile *c, uint32_t temp)
|
||||
{
|
||||
assert(temp < c->num_temps && temp < c->nodes.alloc_count);
|
||||
- return choose_reg_class(c, get_temp_class_bits(&c->nodes, temp));
|
||||
+ return choose_reg_class(c, get_temp_class_bits(c, temp));
|
||||
}
|
||||
|
||||
static inline bool
|
||||
@@ -313,7 +314,7 @@ v3d_choose_spill_node(struct v3d_compile *c)
|
||||
|
||||
for (unsigned i = 0; i < c->num_temps; i++) {
|
||||
if (BITSET_TEST(c->spillable, i)) {
|
||||
- ra_set_node_spill_cost(c->g, temp_to_node(i),
|
||||
+ ra_set_node_spill_cost(c->g, temp_to_node(c, i),
|
||||
spill_costs[i]);
|
||||
}
|
||||
}
|
||||
@@ -482,7 +483,7 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
|
||||
c->temp_start[i] < ip && c->temp_end[i] >= ip :
|
||||
c->temp_start[i] <= ip && c->temp_end[i] > ip;
|
||||
if (thrsw_cross) {
|
||||
- ra_set_node_class(c->g, temp_to_node(i),
|
||||
+ ra_set_node_class(c->g, temp_to_node(c, i),
|
||||
choose_reg_class(c, CLASS_BITS_PHYS));
|
||||
}
|
||||
}
|
||||
@@ -509,8 +510,7 @@ v3d_emit_tmu_spill(struct v3d_compile *c,
|
||||
* same register class bits as the original.
|
||||
*/
|
||||
if (inst == position) {
|
||||
- uint8_t class_bits = get_temp_class_bits(&c->nodes,
|
||||
- inst->dst.index);
|
||||
+ uint8_t class_bits = get_temp_class_bits(c, inst->dst.index);
|
||||
inst->dst = vir_get_temp(c);
|
||||
add_node(c, inst->dst.index, class_bits);
|
||||
} else {
|
||||
@@ -574,7 +574,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
reconstruct_op = orig_def->qpu.alu.add.op;
|
||||
}
|
||||
|
||||
- uint32_t spill_node = temp_to_node(spill_temp);
|
||||
+ uint32_t spill_node = temp_to_node(c, spill_temp);
|
||||
|
||||
/* We must disable the ldunif optimization if we are spilling uniforms */
|
||||
bool had_disable_ldunif_opt = c->disable_ldunif_opt;
|
||||
@@ -739,12 +739,12 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
* update node priorities based one new liveness data.
|
||||
*/
|
||||
uint32_t sb_temp =c->spill_base.index;
|
||||
- uint32_t sb_node = temp_to_node(sb_temp);
|
||||
+ uint32_t sb_node = temp_to_node(c, sb_temp);
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_end[i] == -1)
|
||||
continue;
|
||||
|
||||
- uint32_t node_i = temp_to_node(i);
|
||||
+ uint32_t node_i = temp_to_node(c, i);
|
||||
c->nodes.info[node_i].priority =
|
||||
c->temp_end[i] - c->temp_start[i];
|
||||
|
||||
@@ -752,7 +752,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
j < c->num_temps; j++) {
|
||||
if (interferes(c->temp_start[i], c->temp_end[i],
|
||||
c->temp_start[j], c->temp_end[j])) {
|
||||
- uint32_t node_j = temp_to_node(j);
|
||||
+ uint32_t node_j = temp_to_node(c, j);
|
||||
ra_add_node_interference(c->g, node_i, node_j);
|
||||
}
|
||||
}
|
||||
@@ -958,7 +958,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
ra_add_node_interference(c->g,
|
||||
- temp_to_node(i),
|
||||
+ temp_to_node(c, i),
|
||||
acc_nodes[3]);
|
||||
}
|
||||
}
|
||||
@@ -968,7 +968,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
ra_add_node_interference(c->g,
|
||||
- temp_to_node(i),
|
||||
+ temp_to_node(c, i),
|
||||
acc_nodes[4]);
|
||||
}
|
||||
}
|
||||
@@ -987,7 +987,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
* decides whether the LDVPM is in or out)
|
||||
*/
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
- set_temp_class_bits(&c->nodes, inst->dst.index,
|
||||
+ set_temp_class_bits(c, inst->dst.index,
|
||||
CLASS_BITS_PHYS);
|
||||
break;
|
||||
}
|
||||
@@ -1002,7 +1002,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
* phys regfile.
|
||||
*/
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
- set_temp_class_bits(&c->nodes, inst->dst.index,
|
||||
+ set_temp_class_bits(c, inst->dst.index,
|
||||
CLASS_BITS_PHYS);
|
||||
break;
|
||||
}
|
||||
@@ -1024,7 +1024,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
*/
|
||||
assert(inst->qpu.alu.mul.op == V3D_QPU_M_MOV);
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
- uint32_t node = temp_to_node(inst->dst.index);
|
||||
+ uint32_t node = temp_to_node(c, inst->dst.index);
|
||||
ra_set_node_reg(c->g, node,
|
||||
PHYS_INDEX + inst->src[0].index);
|
||||
break;
|
||||
@@ -1043,9 +1043,9 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
*/
|
||||
if (!inst->qpu.sig.ldunif) {
|
||||
uint8_t class_bits =
|
||||
- get_temp_class_bits(&c->nodes, inst->dst.index) &
|
||||
+ get_temp_class_bits(c, inst->dst.index) &
|
||||
~CLASS_BITS_R5;
|
||||
- set_temp_class_bits(&c->nodes, inst->dst.index,
|
||||
+ set_temp_class_bits(c, inst->dst.index,
|
||||
class_bits);
|
||||
|
||||
} else {
|
||||
@@ -1054,7 +1054,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
* loads interfere with each other.
|
||||
*/
|
||||
if (c->devinfo->ver < 40) {
|
||||
- set_temp_class_bits(&c->nodes, inst->dst.index,
|
||||
+ set_temp_class_bits(c, inst->dst.index,
|
||||
CLASS_BITS_R5);
|
||||
}
|
||||
}
|
||||
@@ -1064,7 +1064,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
if (inst->qpu.sig.thrsw) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
- set_temp_class_bits(&c->nodes, i,
|
||||
+ set_temp_class_bits(c, i,
|
||||
CLASS_BITS_PHYS);
|
||||
}
|
||||
}
|
||||
@@ -1125,7 +1125,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
c->nodes.info[i].priority = 0;
|
||||
c->nodes.info[i].class_bits = 0;
|
||||
} else {
|
||||
- uint32_t t = node_to_temp(i);
|
||||
+ uint32_t t = node_to_temp(c, i);
|
||||
c->nodes.info[i].priority =
|
||||
c->temp_end[t] - c->temp_start[t];
|
||||
c->nodes.info[i].class_bits = CLASS_BITS_ANY;
|
||||
@@ -1143,7 +1143,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
|
||||
/* Set the register classes for all our temporaries in the graph */
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
- ra_set_node_class(c->g, temp_to_node(i),
|
||||
+ ra_set_node_class(c->g, temp_to_node(c, i),
|
||||
choose_reg_class_for_temp(c, i));
|
||||
}
|
||||
|
||||
@@ -1153,8 +1153,8 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
if (interferes(c->temp_start[i], c->temp_end[i],
|
||||
c->temp_start[j], c->temp_end[j])) {
|
||||
ra_add_node_interference(c->g,
|
||||
- temp_to_node(i),
|
||||
- temp_to_node(j));
|
||||
+ temp_to_node(c, i),
|
||||
+ temp_to_node(c, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1171,7 +1171,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
if (c->spill_size <
|
||||
V3D_CHANNELS * sizeof(uint32_t) * force_register_spills) {
|
||||
int node = v3d_choose_spill_node(c);
|
||||
- uint32_t temp = node_to_temp(node);
|
||||
+ uint32_t temp = node_to_temp(c, node);
|
||||
if (node != -1) {
|
||||
v3d_spill_reg(c, acc_nodes, temp);
|
||||
continue;
|
||||
@@ -1186,7 +1186,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
if (node == -1)
|
||||
goto spill_fail;
|
||||
|
||||
- uint32_t temp = node_to_temp(node);
|
||||
+ uint32_t temp = node_to_temp(c, node);
|
||||
enum temp_spill_type spill_type =
|
||||
get_spill_type_for_temp(c, temp);
|
||||
if (spill_type != SPILL_TYPE_TMU || tmu_spilling_allowed(c)) {
|
||||
@@ -1201,7 +1201,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
/* Allocation was successful, build the 'temp -> reg' map */
|
||||
temp_registers = calloc(c->num_temps, sizeof(*temp_registers));
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
- int ra_reg = ra_get_node_reg(c->g, temp_to_node(i));
|
||||
+ int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i));
|
||||
if (ra_reg < PHYS_INDEX) {
|
||||
temp_registers[i].magic = true;
|
||||
temp_registers[i].index = (V3D_QPU_WADDR_R0 +
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,144 +0,0 @@
|
||||
From 9b2dfe0286212aba3687a06023cc5b4ce9944ee0 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Mon, 23 Aug 2021 02:18:43 +0200
|
||||
Subject: [PATCH 018/142] broadcom/compiler: phys index depends on hw version
|
||||
|
||||
For 7.1 there are not accumulators. So we replace the macro with a
|
||||
function call.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 39 ++++++++++++++-----
|
||||
1 file changed, 29 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index aa9473d124b..a358b616e13 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -28,9 +28,19 @@
|
||||
|
||||
#define ACC_INDEX 0
|
||||
#define ACC_COUNT 6
|
||||
-#define PHYS_INDEX (ACC_INDEX + ACC_COUNT)
|
||||
-#define PHYS_COUNT 64
|
||||
|
||||
+#define PHYS_COUNT 64
|
||||
+
|
||||
+static uint8_t
|
||||
+get_phys_index(const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ if (devinfo->has_accumulators)
|
||||
+ return ACC_INDEX + ACC_COUNT;
|
||||
+ else
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* ACC as accumulator */
|
||||
#define CLASS_BITS_PHYS (1 << 0)
|
||||
#define CLASS_BITS_ACC (1 << 1)
|
||||
#define CLASS_BITS_R5 (1 << 4)
|
||||
@@ -771,9 +781,11 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
}
|
||||
|
||||
struct v3d_ra_select_callback_data {
|
||||
+ uint32_t phys_index;
|
||||
uint32_t next_acc;
|
||||
uint32_t next_phys;
|
||||
struct v3d_ra_node_info *nodes;
|
||||
+ const struct v3d_device_info *devinfo;
|
||||
};
|
||||
|
||||
/* Choosing accumulators improves chances of merging QPU instructions
|
||||
@@ -794,7 +806,7 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
static const int available_rf_threshold = 5;
|
||||
int available_rf = 0 ;
|
||||
for (int i = 0; i < PHYS_COUNT; i++) {
|
||||
- if (BITSET_TEST(regs, PHYS_INDEX + i))
|
||||
+ if (BITSET_TEST(regs, v3d_ra->phys_index + i))
|
||||
available_rf++;
|
||||
if (available_rf >= available_rf_threshold)
|
||||
break;
|
||||
@@ -854,7 +866,7 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
{
|
||||
for (int i = 0; i < PHYS_COUNT; i++) {
|
||||
int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
|
||||
- int phys = PHYS_INDEX + phys_off;
|
||||
+ int phys = v3d_ra->phys_index + phys_off;
|
||||
|
||||
if (BITSET_TEST(regs, phys)) {
|
||||
v3d_ra->next_phys = phys_off + 1;
|
||||
@@ -896,8 +908,9 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
* register file can be divided up for fragment shader threading.
|
||||
*/
|
||||
int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3);
|
||||
+ uint8_t phys_index = get_phys_index(compiler->devinfo);
|
||||
|
||||
- compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT,
|
||||
+ compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT,
|
||||
false);
|
||||
if (!compiler->regs)
|
||||
return false;
|
||||
@@ -912,8 +925,8 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
compiler->reg_class_phys[threads] =
|
||||
ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
|
||||
- for (int i = PHYS_INDEX;
|
||||
- i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) {
|
||||
+ for (int i = phys_index;
|
||||
+ i < phys_index + (PHYS_COUNT >> threads); i++) {
|
||||
ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
|
||||
ra_class_add_reg(compiler->reg_class_phys[threads], i);
|
||||
ra_class_add_reg(compiler->reg_class_any[threads], i);
|
||||
@@ -1026,7 +1039,8 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
uint32_t node = temp_to_node(c, inst->dst.index);
|
||||
ra_set_node_reg(c->g, node,
|
||||
- PHYS_INDEX + inst->src[0].index);
|
||||
+ get_phys_index(c->devinfo) +
|
||||
+ inst->src[0].index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1086,13 +1100,17 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
c->num_temps + ACC_COUNT),
|
||||
};
|
||||
|
||||
+ uint32_t phys_index = get_phys_index(c->devinfo);
|
||||
+
|
||||
struct v3d_ra_select_callback_data callback_data = {
|
||||
+ .phys_index = phys_index,
|
||||
.next_acc = 0,
|
||||
/* Start at RF3, to try to keep the TLB writes from using
|
||||
* RF0-2.
|
||||
*/
|
||||
.next_phys = 3,
|
||||
.nodes = &c->nodes,
|
||||
+ .devinfo = c->devinfo,
|
||||
};
|
||||
|
||||
vir_calculate_live_intervals(c);
|
||||
@@ -1139,6 +1157,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
inst->ip = ip++;
|
||||
update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
|
||||
+
|
||||
}
|
||||
|
||||
/* Set the register classes for all our temporaries in the graph */
|
||||
@@ -1202,13 +1221,13 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
temp_registers = calloc(c->num_temps, sizeof(*temp_registers));
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i));
|
||||
- if (ra_reg < PHYS_INDEX) {
|
||||
+ if (ra_reg < phys_index) {
|
||||
temp_registers[i].magic = true;
|
||||
temp_registers[i].index = (V3D_QPU_WADDR_R0 +
|
||||
ra_reg - ACC_INDEX);
|
||||
} else {
|
||||
temp_registers[i].magic = false;
|
||||
- temp_registers[i].index = ra_reg - PHYS_INDEX;
|
||||
+ temp_registers[i].index = ra_reg - phys_index;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,40 +0,0 @@
|
||||
From da0a3deadf86a46c8323267d3f6a49e442835608 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 17 Sep 2021 01:07:06 +0200
|
||||
Subject: [PATCH 019/142] broadcom/compiler: don't favor/select accum registers
|
||||
for hw not supporting it
|
||||
|
||||
Note that what we do is to just return false on the favor/select accum
|
||||
methods. We could just avoid to call them, but as the select is called
|
||||
more than once, it is just easier this way.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index a358b616e13..1f495180784 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -797,6 +797,9 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
BITSET_WORD *regs,
|
||||
int priority)
|
||||
{
|
||||
+ if (!v3d_ra->devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
/* Favor accumulators if we have less that this number of physical
|
||||
* registers. Accumulators have more restrictions (like being
|
||||
* invalidated through thrsw), so running out of physical registers
|
||||
@@ -832,6 +835,9 @@ v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
BITSET_WORD *regs,
|
||||
unsigned int *out)
|
||||
{
|
||||
+ if (!v3d_ra->devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
/* Choose r5 for our ldunifs if possible (nobody else can load to that
|
||||
* reg, and it keeps the QPU cond field free from being occupied by
|
||||
* ldunifrf).
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,105 +0,0 @@
|
||||
From 6c04d7c917da6b38f8b2b4306ab03ed2ab7e6ce0 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 9 Sep 2021 00:28:53 +0200
|
||||
Subject: [PATCH 020/142] broadcom/vir: implement is_no_op_mov for v71
|
||||
|
||||
Did some refactoring/splitting.
|
||||
---
|
||||
src/broadcom/compiler/vir_to_qpu.c | 66 ++++++++++++++++++++++++------
|
||||
1 file changed, 53 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
|
||||
index c8b6e0a91a0..08970d52954 100644
|
||||
--- a/src/broadcom/compiler/vir_to_qpu.c
|
||||
+++ b/src/broadcom/compiler/vir_to_qpu.c
|
||||
@@ -129,19 +129,8 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
}
|
||||
|
||||
static bool
|
||||
-is_no_op_mov(struct qinst *qinst)
|
||||
+v3d33_mov_src_and_dst_equal(struct qinst *qinst)
|
||||
{
|
||||
- static const struct v3d_qpu_sig no_sig = {0};
|
||||
-
|
||||
- /* Make sure it's just a lone MOV. */
|
||||
- if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
|
||||
- qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
|
||||
- qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
|
||||
- memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
|
||||
- return false;
|
||||
- }
|
||||
-
|
||||
- /* Check if it's a MOV from a register to itself. */
|
||||
enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
|
||||
if (qinst->qpu.alu.mul.magic_write) {
|
||||
if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
|
||||
@@ -168,6 +157,57 @@ is_no_op_mov(struct qinst *qinst)
|
||||
return false;
|
||||
}
|
||||
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+v3d71_mov_src_and_dst_equal(struct qinst *qinst)
|
||||
+{
|
||||
+ if (qinst->qpu.alu.mul.magic_write)
|
||||
+ return false;
|
||||
+
|
||||
+ enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
|
||||
+ int raddr;
|
||||
+
|
||||
+ raddr = qinst->qpu.alu.mul.a.raddr;
|
||||
+ if (raddr != waddr)
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+mov_src_and_dst_equal(struct qinst *qinst,
|
||||
+ const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ if (devinfo->ver < 71)
|
||||
+ return v3d33_mov_src_and_dst_equal(qinst);
|
||||
+ else
|
||||
+ return v3d71_mov_src_and_dst_equal(qinst);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+static bool
|
||||
+is_no_op_mov(struct qinst *qinst,
|
||||
+ const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ static const struct v3d_qpu_sig no_sig = {0};
|
||||
+
|
||||
+ /* Make sure it's just a lone MOV. We only check for M_MOV. Although
|
||||
+ * for V3D 7.x there is also A_MOV, we don't need to check for it as
|
||||
+ * we always emit using M_MOV. We could use A_MOV later on the
|
||||
+ * squedule to improve performance
|
||||
+ */
|
||||
+ if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
|
||||
+ qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
|
||||
+ qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
|
||||
+ memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if (!mov_src_and_dst_equal(qinst, devinfo))
|
||||
+ return false;
|
||||
+
|
||||
/* No packing or flags updates, or we need to execute the
|
||||
* instruction.
|
||||
*/
|
||||
@@ -324,7 +364,7 @@ v3d_generate_code_block(struct v3d_compile *c,
|
||||
qinst->qpu.alu.mul.waddr = dst.index;
|
||||
qinst->qpu.alu.mul.magic_write = dst.magic;
|
||||
|
||||
- if (is_no_op_mov(qinst)) {
|
||||
+ if (is_no_op_mov(qinst, c->devinfo)) {
|
||||
vir_remove_instruction(c, qinst);
|
||||
continue;
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,104 +0,0 @@
|
||||
From 7b5be2d9b178a45c34c22db2744639a6a8a216d1 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 9 Sep 2021 01:18:54 +0200
|
||||
Subject: [PATCH 021/142] broadcom/compiler: update vir_to_qpu::set_src for v71
|
||||
|
||||
---
|
||||
src/broadcom/compiler/vir_to_qpu.c | 47 ++++++++++++++++++++++++++----
|
||||
1 file changed, 42 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
|
||||
index 08970d52954..afc4941fdb1 100644
|
||||
--- a/src/broadcom/compiler/vir_to_qpu.c
|
||||
+++ b/src/broadcom/compiler/vir_to_qpu.c
|
||||
@@ -86,12 +86,22 @@ new_qpu_nop_before(struct qinst *inst)
|
||||
return q;
|
||||
}
|
||||
|
||||
+static void
|
||||
+v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src)
|
||||
+{
|
||||
+ if (src.smimm)
|
||||
+ unreachable("v3d71_set_src: pending handling small immediates");
|
||||
+
|
||||
+ assert(!src.magic);
|
||||
+ *raddr = src.index;
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* Allocates the src register (accumulator or register file) into the RADDR
|
||||
* fields of the instruction.
|
||||
*/
|
||||
static void
|
||||
-set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
+v3d33_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
{
|
||||
if (src.smimm) {
|
||||
assert(instr->sig.small_imm_b);
|
||||
@@ -128,6 +138,24 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
}
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * The main purpose of the following wrapper is to make calling set_src
|
||||
+ * cleaner. This is the reason it receives both mux and raddr pointers. Those
|
||||
+ * will be filled or not based on the device version.
|
||||
+ */
|
||||
+static void
|
||||
+set_src(struct v3d_qpu_instr *instr,
|
||||
+ enum v3d_qpu_mux *mux,
|
||||
+ uint8_t *raddr,
|
||||
+ struct qpu_reg src,
|
||||
+ const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ if (devinfo->ver < 71)
|
||||
+ return v3d33_set_src(instr, mux, src);
|
||||
+ else
|
||||
+ return v3d71_set_src(instr, raddr, src);
|
||||
+}
|
||||
+
|
||||
static bool
|
||||
v3d33_mov_src_and_dst_equal(struct qinst *qinst)
|
||||
{
|
||||
@@ -340,13 +368,18 @@ v3d_generate_code_block(struct v3d_compile *c,
|
||||
qinst->qpu.sig_magic = dst.magic;
|
||||
} else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
|
||||
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
|
||||
+
|
||||
if (nsrc >= 1) {
|
||||
set_src(&qinst->qpu,
|
||||
- &qinst->qpu.alu.add.a.mux, src[0]);
|
||||
+ &qinst->qpu.alu.add.a.mux,
|
||||
+ &qinst->qpu.alu.add.a.raddr,
|
||||
+ src[0], c->devinfo);
|
||||
}
|
||||
if (nsrc >= 2) {
|
||||
set_src(&qinst->qpu,
|
||||
- &qinst->qpu.alu.add.b.mux, src[1]);
|
||||
+ &qinst->qpu.alu.add.b.mux,
|
||||
+ &qinst->qpu.alu.add.b.raddr,
|
||||
+ src[1], c->devinfo);
|
||||
}
|
||||
|
||||
qinst->qpu.alu.add.waddr = dst.index;
|
||||
@@ -354,11 +387,15 @@ v3d_generate_code_block(struct v3d_compile *c,
|
||||
} else {
|
||||
if (nsrc >= 1) {
|
||||
set_src(&qinst->qpu,
|
||||
- &qinst->qpu.alu.mul.a.mux, src[0]);
|
||||
+ &qinst->qpu.alu.mul.a.mux,
|
||||
+ &qinst->qpu.alu.mul.a.raddr,
|
||||
+ src[0], c->devinfo);
|
||||
}
|
||||
if (nsrc >= 2) {
|
||||
set_src(&qinst->qpu,
|
||||
- &qinst->qpu.alu.mul.b.mux, src[1]);
|
||||
+ &qinst->qpu.alu.mul.b.mux,
|
||||
+ &qinst->qpu.alu.mul.b.raddr,
|
||||
+ src[1], c->devinfo);
|
||||
}
|
||||
|
||||
qinst->qpu.alu.mul.waddr = dst.index;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,92 +0,0 @@
|
||||
From fe89703008f2a3d6bfe6e260791f712013be5e48 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 9 Sep 2021 23:59:28 +0200
|
||||
Subject: [PATCH 022/142] broadcom/qpu_schedule: add process_raddr_deps
|
||||
|
||||
On v71 we don't have muxes, but more raddr. Adding a equivalent add
|
||||
deps function.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 52 +++++++++++++++++++++++-----
|
||||
1 file changed, 44 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 455fa3867be..89254643c90 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -155,6 +155,7 @@ static void
|
||||
process_mux_deps(struct schedule_state *state, struct schedule_node *n,
|
||||
enum v3d_qpu_mux mux)
|
||||
{
|
||||
+ assert(state->devinfo->ver < 71);
|
||||
switch (mux) {
|
||||
case V3D_QPU_MUX_A:
|
||||
add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
|
||||
@@ -171,6 +172,17 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n,
|
||||
}
|
||||
}
|
||||
|
||||
+
|
||||
+static void
|
||||
+process_raddr_deps(struct schedule_state *state, struct schedule_node *n,
|
||||
+ uint8_t raddr, bool is_small_imm)
|
||||
+{
|
||||
+ assert(state->devinfo->ver >= 71);
|
||||
+
|
||||
+ if (!is_small_imm)
|
||||
+ add_read_dep(state, state->last_rf[raddr], n);
|
||||
+}
|
||||
+
|
||||
static bool
|
||||
tmu_write_is_sequence_terminator(uint32_t waddr)
|
||||
{
|
||||
@@ -305,15 +317,39 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
|
||||
|
||||
/* XXX: LOAD_IMM */
|
||||
|
||||
- if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0)
|
||||
- process_mux_deps(state, n, inst->alu.add.a.mux);
|
||||
- if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1)
|
||||
- process_mux_deps(state, n, inst->alu.add.b.mux);
|
||||
+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) {
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ process_mux_deps(state, n, inst->alu.add.a.mux);
|
||||
+ } else {
|
||||
+ process_raddr_deps(state, n, inst->alu.add.a.raddr,
|
||||
+ inst->sig.small_imm_a);
|
||||
+ }
|
||||
+ }
|
||||
+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) {
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ process_mux_deps(state, n, inst->alu.add.b.mux);
|
||||
+ } else {
|
||||
+ process_raddr_deps(state, n, inst->alu.add.b.raddr,
|
||||
+ inst->sig.small_imm_b);
|
||||
+ }
|
||||
+ }
|
||||
|
||||
- if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0)
|
||||
- process_mux_deps(state, n, inst->alu.mul.a.mux);
|
||||
- if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1)
|
||||
- process_mux_deps(state, n, inst->alu.mul.b.mux);
|
||||
+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) {
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ process_mux_deps(state, n, inst->alu.mul.a.mux);
|
||||
+ } else {
|
||||
+ process_raddr_deps(state, n, inst->alu.mul.a.raddr,
|
||||
+ inst->sig.small_imm_c);
|
||||
+ }
|
||||
+ }
|
||||
+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) {
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ process_mux_deps(state, n, inst->alu.mul.b.mux);
|
||||
+ } else {
|
||||
+ process_raddr_deps(state, n, inst->alu.mul.b.raddr,
|
||||
+ inst->sig.small_imm_d);
|
||||
+ }
|
||||
+ }
|
||||
|
||||
switch (inst->alu.add.op) {
|
||||
case V3D_QPU_A_VPMSETUP:
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,128 +0,0 @@
|
||||
From 20ce426df1ab2546332141f4bc4531ada754cdea Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 10 Sep 2021 01:20:44 +0200
|
||||
Subject: [PATCH 023/142] broadcom/qpu: update disasm_raddr for v71
|
||||
|
||||
---
|
||||
src/broadcom/qpu/qpu_disasm.c | 72 ++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 66 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
|
||||
index 588a665f770..b613de781dc 100644
|
||||
--- a/src/broadcom/qpu/qpu_disasm.c
|
||||
+++ b/src/broadcom/qpu/qpu_disasm.c
|
||||
@@ -56,8 +56,9 @@ pad_to(struct disasm_state *disasm, int n)
|
||||
|
||||
|
||||
static void
|
||||
-v3d_qpu_disasm_raddr(struct disasm_state *disasm,
|
||||
- const struct v3d_qpu_instr *instr, uint8_t mux)
|
||||
+v3d33_qpu_disasm_raddr(struct disasm_state *disasm,
|
||||
+ const struct v3d_qpu_instr *instr,
|
||||
+ enum v3d_qpu_mux mux)
|
||||
{
|
||||
if (mux == V3D_QPU_MUX_A) {
|
||||
append(disasm, "rf%d", instr->raddr_a);
|
||||
@@ -82,6 +83,65 @@ v3d_qpu_disasm_raddr(struct disasm_state *disasm,
|
||||
}
|
||||
}
|
||||
|
||||
+enum v3d_qpu_input_class {
|
||||
+ V3D_QPU_ADD_A,
|
||||
+ V3D_QPU_ADD_B,
|
||||
+ V3D_QPU_MUL_A,
|
||||
+ V3D_QPU_MUL_B
|
||||
+};
|
||||
+
|
||||
+static void
|
||||
+v3d71_qpu_disasm_raddr(struct disasm_state *disasm,
|
||||
+ const struct v3d_qpu_instr *instr,
|
||||
+ uint8_t raddr,
|
||||
+ enum v3d_qpu_input_class input_class)
|
||||
+{
|
||||
+ bool is_small_imm = false;
|
||||
+ switch(input_class) {
|
||||
+ case V3D_QPU_ADD_A:
|
||||
+ is_small_imm = instr->sig.small_imm_a;
|
||||
+ break;
|
||||
+ case V3D_QPU_ADD_B:
|
||||
+ is_small_imm = instr->sig.small_imm_b;
|
||||
+ break;
|
||||
+ case V3D_QPU_MUL_A:
|
||||
+ is_small_imm = instr->sig.small_imm_c;
|
||||
+ break;
|
||||
+ case V3D_QPU_MUL_B:
|
||||
+ is_small_imm = instr->sig.small_imm_d;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (is_small_imm) {
|
||||
+ unreachable("Pending handling small immediates");
|
||||
+ uint32_t val;
|
||||
+ ASSERTED bool ok =
|
||||
+ v3d_qpu_small_imm_unpack(disasm->devinfo,
|
||||
+ raddr,
|
||||
+ &val);
|
||||
+
|
||||
+ if ((int)val >= -16 && (int)val <= 15)
|
||||
+ append(disasm, "%d", val);
|
||||
+ else
|
||||
+ append(disasm, "0x%08x", val);
|
||||
+ assert(ok);
|
||||
+ } else {
|
||||
+ append(disasm, "rf%d", raddr);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+v3d_qpu_disasm_raddr(struct disasm_state *disasm,
|
||||
+ const struct v3d_qpu_instr *instr,
|
||||
+ const struct v3d_qpu_input *input,
|
||||
+ enum v3d_qpu_input_class input_class)
|
||||
+{
|
||||
+ if (disasm->devinfo->ver < 71)
|
||||
+ v3d33_qpu_disasm_raddr(disasm, instr, input->mux);
|
||||
+ else
|
||||
+ v3d71_qpu_disasm_raddr(disasm, instr, input->raddr, input_class);
|
||||
+}
|
||||
+
|
||||
static void
|
||||
v3d_qpu_disasm_waddr(struct disasm_state *disasm, uint32_t waddr, bool magic)
|
||||
{
|
||||
@@ -121,14 +181,14 @@ v3d_qpu_disasm_add(struct disasm_state *disasm,
|
||||
if (num_src >= 1) {
|
||||
if (has_dst)
|
||||
append(disasm, ", ");
|
||||
- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a.mux);
|
||||
+ v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.add.a, V3D_QPU_ADD_A);
|
||||
append(disasm, "%s",
|
||||
v3d_qpu_unpack_name(instr->alu.add.a.unpack));
|
||||
}
|
||||
|
||||
if (num_src >= 2) {
|
||||
append(disasm, ", ");
|
||||
- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b.mux);
|
||||
+ v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.add.b, V3D_QPU_ADD_B);
|
||||
append(disasm, "%s",
|
||||
v3d_qpu_unpack_name(instr->alu.add.b.unpack));
|
||||
}
|
||||
@@ -164,14 +224,14 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
|
||||
if (num_src >= 1) {
|
||||
if (has_dst)
|
||||
append(disasm, ", ");
|
||||
- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a.mux);
|
||||
+ v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.mul.a, V3D_QPU_MUL_A);
|
||||
append(disasm, "%s",
|
||||
v3d_qpu_unpack_name(instr->alu.mul.a.unpack));
|
||||
}
|
||||
|
||||
if (num_src >= 2) {
|
||||
append(disasm, ", ");
|
||||
- v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b.mux);
|
||||
+ v3d_qpu_disasm_raddr(disasm, instr, &instr->alu.mul.b, V3D_QPU_MUL_B);
|
||||
append(disasm, "%s",
|
||||
v3d_qpu_unpack_name(instr->alu.mul.b.unpack));
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,59 +0,0 @@
|
||||
From 7263fa24a3c57b1dcd4d870670cda86ae89aa28c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 15 Sep 2021 10:55:49 +0200
|
||||
Subject: [PATCH 024/142] broadcom/qpu: return false on
|
||||
qpu_writes_accumulatorXX helpers for v71
|
||||
|
||||
As for v71 doesn't have accumulators (devinfo->has_accumulators set to
|
||||
false), those methods would always return false.
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.c | 12 ++++++++++++
|
||||
1 file changed, 12 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index 8de99c611d5..7ec3c867260 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -854,6 +854,9 @@ bool
|
||||
v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
+ if(!devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))
|
||||
return true;
|
||||
|
||||
@@ -864,6 +867,9 @@ bool
|
||||
v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
+ if (!devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if (inst->alu.add.op != V3D_QPU_A_NOP &&
|
||||
inst->alu.add.magic_write &&
|
||||
@@ -894,6 +900,9 @@ bool
|
||||
v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
+ if (!devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))
|
||||
return true;
|
||||
|
||||
@@ -904,6 +913,9 @@ bool
|
||||
v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
+ if (!devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
if (v3d_qpu_writes_r5(devinfo, inst))
|
||||
return true;
|
||||
if (v3d_qpu_writes_r4(devinfo, inst))
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,116 +0,0 @@
|
||||
From 6a9611c5a22218388bba419174d3343e0cdf773b Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 14 Sep 2021 10:42:55 +0200
|
||||
Subject: [PATCH 025/142] broadcom/compiler: add support for varyings on nir to
|
||||
vir generation for v71
|
||||
|
||||
Needs update as v71 doesn't have accumulators anymore, and ldvary uses
|
||||
now rf0 to return the value.
|
||||
---
|
||||
src/broadcom/compiler/nir_to_vir.c | 34 +++++++++++++++++-------------
|
||||
1 file changed, 19 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
|
||||
index ca072971f01..79a22c3bd08 100644
|
||||
--- a/src/broadcom/compiler/nir_to_vir.c
|
||||
+++ b/src/broadcom/compiler/nir_to_vir.c
|
||||
@@ -1005,32 +1005,36 @@ emit_fragcoord_input(struct v3d_compile *c, int attr)
|
||||
|
||||
static struct qreg
|
||||
emit_smooth_varying(struct v3d_compile *c,
|
||||
- struct qreg vary, struct qreg w, struct qreg r5)
|
||||
+ struct qreg vary, struct qreg w, struct qreg c_reg)
|
||||
{
|
||||
- return vir_FADD(c, vir_FMUL(c, vary, w), r5);
|
||||
+ return vir_FADD(c, vir_FMUL(c, vary, w), c_reg);
|
||||
}
|
||||
|
||||
static struct qreg
|
||||
emit_noperspective_varying(struct v3d_compile *c,
|
||||
- struct qreg vary, struct qreg r5)
|
||||
+ struct qreg vary, struct qreg c_reg)
|
||||
{
|
||||
- return vir_FADD(c, vir_MOV(c, vary), r5);
|
||||
+ return vir_FADD(c, vir_MOV(c, vary), c_reg);
|
||||
}
|
||||
|
||||
static struct qreg
|
||||
emit_flat_varying(struct v3d_compile *c,
|
||||
- struct qreg vary, struct qreg r5)
|
||||
+ struct qreg vary, struct qreg c_reg)
|
||||
{
|
||||
vir_MOV_dest(c, c->undef, vary);
|
||||
- return vir_MOV(c, r5);
|
||||
+ return vir_MOV(c, c_reg);
|
||||
}
|
||||
|
||||
static struct qreg
|
||||
emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
|
||||
int8_t input_idx, uint8_t swizzle, int array_index)
|
||||
{
|
||||
- struct qreg r3 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3);
|
||||
- struct qreg r5 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5);
|
||||
+ struct qreg c_reg; /* C coefficient */
|
||||
+
|
||||
+ if (c->devinfo->has_accumulators)
|
||||
+ c_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5);
|
||||
+ else
|
||||
+ c_reg = vir_reg(QFILE_REG, 0);
|
||||
|
||||
struct qinst *ldvary = NULL;
|
||||
struct qreg vary;
|
||||
@@ -1041,7 +1045,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
|
||||
vary = vir_emit_def(c, ldvary);
|
||||
} else {
|
||||
vir_NOP(c)->qpu.sig.ldvary = true;
|
||||
- vary = r3;
|
||||
+ vary = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3);
|
||||
}
|
||||
|
||||
/* Store the input value before interpolation so we can implement
|
||||
@@ -1050,7 +1054,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
|
||||
if (input_idx >= 0) {
|
||||
assert(var);
|
||||
c->interp[input_idx].vp = vary;
|
||||
- c->interp[input_idx].C = vir_MOV(c, r5);
|
||||
+ c->interp[input_idx].C = vir_MOV(c, c_reg);
|
||||
c->interp[input_idx].mode = var->data.interpolation;
|
||||
}
|
||||
|
||||
@@ -1060,7 +1064,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
|
||||
*/
|
||||
if (!var) {
|
||||
assert(input_idx < 0);
|
||||
- return emit_smooth_varying(c, vary, c->payload_w, r5);
|
||||
+ return emit_smooth_varying(c, vary, c->payload_w, c_reg);
|
||||
}
|
||||
|
||||
int i = c->num_inputs++;
|
||||
@@ -1075,20 +1079,20 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
|
||||
if (var->data.centroid) {
|
||||
BITSET_SET(c->centroid_flags, i);
|
||||
result = emit_smooth_varying(c, vary,
|
||||
- c->payload_w_centroid, r5);
|
||||
+ c->payload_w_centroid, c_reg);
|
||||
} else {
|
||||
- result = emit_smooth_varying(c, vary, c->payload_w, r5);
|
||||
+ result = emit_smooth_varying(c, vary, c->payload_w, c_reg);
|
||||
}
|
||||
break;
|
||||
|
||||
case INTERP_MODE_NOPERSPECTIVE:
|
||||
BITSET_SET(c->noperspective_flags, i);
|
||||
- result = emit_noperspective_varying(c, vary, r5);
|
||||
+ result = emit_noperspective_varying(c, vary, c_reg);
|
||||
break;
|
||||
|
||||
case INTERP_MODE_FLAT:
|
||||
BITSET_SET(c->flat_shade_flags, i);
|
||||
- result = emit_flat_varying(c, vary, r5);
|
||||
+ result = emit_flat_varying(c, vary, c_reg);
|
||||
break;
|
||||
|
||||
default:
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 06af15a60f7a9c135893e5f8934b8030c1da95f9 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 15 Sep 2021 01:14:15 +0200
|
||||
Subject: [PATCH 026/142] broadcom/compiler: payload_w is loaded on rf3 for v71
|
||||
|
||||
And in general rf0 is now used for other needs.
|
||||
---
|
||||
src/broadcom/compiler/nir_to_vir.c | 6 +++++-
|
||||
src/broadcom/compiler/vir_register_allocate.c | 6 +++++-
|
||||
2 files changed, 10 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
|
||||
index 79a22c3bd08..1a05b279a2d 100644
|
||||
--- a/src/broadcom/compiler/nir_to_vir.c
|
||||
+++ b/src/broadcom/compiler/nir_to_vir.c
|
||||
@@ -4325,7 +4325,11 @@ nir_to_vir(struct v3d_compile *c)
|
||||
{
|
||||
switch (c->s->info.stage) {
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
- c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
|
||||
+ if (c->devinfo->ver < 71)
|
||||
+ c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
|
||||
+ else
|
||||
+ c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 3));
|
||||
+
|
||||
c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
|
||||
c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index 1f495180784..eca9a6751a6 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -1034,6 +1034,11 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
if (inst->src[0].file == QFILE_REG) {
|
||||
switch (inst->src[0].index) {
|
||||
case 0:
|
||||
+ /* V3D 7.x doesn't use rf0 for thread payload */
|
||||
+ if (c->devinfo->ver >= 71)
|
||||
+ break;
|
||||
+ else
|
||||
+ FALLTHROUGH;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3: {
|
||||
@@ -1163,7 +1168,6 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
inst->ip = ip++;
|
||||
update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
|
||||
-
|
||||
}
|
||||
|
||||
/* Set the register classes for all our temporaries in the graph */
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,30 +0,0 @@
|
||||
From d38d8056903b9a4f96ab56261ac3b3c3be0af4fb Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 15 Sep 2021 11:12:59 +0200
|
||||
Subject: [PATCH 027/142] broadcom/qpu_schedule: update write deps for v71
|
||||
|
||||
We just need to add a write dep if rf0 is written implicitly.
|
||||
|
||||
Note that we don't need to check if we have accumulators when checking
|
||||
for r3/r4/r5, as v3d_qpu_writes_rX would return false for hw version
|
||||
that doesn't have accumulators.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 89254643c90..2fa9031d7b6 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -422,6 +422,8 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
|
||||
add_write_dep(state, &state->last_r[4], n);
|
||||
if (v3d_qpu_writes_r5(devinfo, inst))
|
||||
add_write_dep(state, &state->last_r[5], n);
|
||||
+ if (v3d_qpu_writes_rf0_implicitly(devinfo, inst))
|
||||
+ add_write_dep(state, &state->last_rf[0], n);
|
||||
|
||||
/* If we add any more dependencies here we should consider whether we
|
||||
* also need to update qpu_inst_after_thrsw_valid_in_delay_slot.
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,140 +0,0 @@
|
||||
From 7e2a2be830b1672ab846389a46b5d09bad0f7a98 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 16 Sep 2021 00:49:25 +0200
|
||||
Subject: [PATCH 028/142] broadcom/compiler: update register classes to not
|
||||
include accumulators on v71
|
||||
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 56 ++++++++++++-------
|
||||
1 file changed, 36 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index eca9a6751a6..7b3f6c41934 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -44,10 +44,15 @@ get_phys_index(const struct v3d_device_info *devinfo)
|
||||
#define CLASS_BITS_PHYS (1 << 0)
|
||||
#define CLASS_BITS_ACC (1 << 1)
|
||||
#define CLASS_BITS_R5 (1 << 4)
|
||||
-#define CLASS_BITS_ANY (CLASS_BITS_PHYS | \
|
||||
- CLASS_BITS_ACC | \
|
||||
- CLASS_BITS_R5)
|
||||
|
||||
+static uint8_t
|
||||
+get_class_bit_any(const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ if (devinfo->has_accumulators)
|
||||
+ return (CLASS_BITS_PHYS | CLASS_BITS_ACC | CLASS_BITS_R5);
|
||||
+ else
|
||||
+ return CLASS_BITS_PHYS;
|
||||
+}
|
||||
static inline uint32_t
|
||||
temp_to_node(struct v3d_compile *c, uint32_t temp)
|
||||
{
|
||||
@@ -82,11 +87,13 @@ choose_reg_class(struct v3d_compile *c, uint8_t class_bits)
|
||||
if (class_bits == CLASS_BITS_PHYS) {
|
||||
return c->compiler->reg_class_phys[c->thread_index];
|
||||
} else if (class_bits == (CLASS_BITS_R5)) {
|
||||
+ assert(c->devinfo->has_accumulators);
|
||||
return c->compiler->reg_class_r5[c->thread_index];
|
||||
} else if (class_bits == (CLASS_BITS_PHYS | CLASS_BITS_ACC)) {
|
||||
+ assert(c->devinfo->has_accumulators);
|
||||
return c->compiler->reg_class_phys_or_acc[c->thread_index];
|
||||
} else {
|
||||
- assert(class_bits == CLASS_BITS_ANY);
|
||||
+ assert(class_bits == get_class_bit_any(c->devinfo));
|
||||
return c->compiler->reg_class_any[c->thread_index];
|
||||
}
|
||||
}
|
||||
@@ -447,7 +454,7 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
|
||||
*/
|
||||
assert(c->disable_ldunif_opt);
|
||||
struct qreg offset = vir_uniform_ui(c, spill_offset);
|
||||
- add_node(c, offset.index, CLASS_BITS_ANY);
|
||||
+ add_node(c, offset.index, get_class_bit_any(c->devinfo));
|
||||
|
||||
/* We always enable per-quad on spills/fills to ensure we spill
|
||||
* any channels involved with helper invocations.
|
||||
@@ -645,7 +652,8 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
* instruction immediately after, so
|
||||
* we can use any register class for it.
|
||||
*/
|
||||
- add_node(c, unif.index, CLASS_BITS_ANY);
|
||||
+ add_node(c, unif.index,
|
||||
+ get_class_bit_any(c->devinfo));
|
||||
} else if (spill_type == SPILL_TYPE_RECONSTRUCT) {
|
||||
struct qreg temp =
|
||||
reconstruct_temp(c, reconstruct_op);
|
||||
@@ -924,31 +932,38 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
for (int threads = 0; threads < max_thread_index; threads++) {
|
||||
compiler->reg_class_any[threads] =
|
||||
ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
- compiler->reg_class_r5[threads] =
|
||||
- ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
- compiler->reg_class_phys_or_acc[threads] =
|
||||
- ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
+ if (compiler->devinfo->has_accumulators) {
|
||||
+ compiler->reg_class_r5[threads] =
|
||||
+ ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
+ compiler->reg_class_phys_or_acc[threads] =
|
||||
+ ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
+ }
|
||||
compiler->reg_class_phys[threads] =
|
||||
ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
|
||||
for (int i = phys_index;
|
||||
i < phys_index + (PHYS_COUNT >> threads); i++) {
|
||||
- ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
|
||||
+ if (compiler->devinfo->has_accumulators)
|
||||
+ ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
|
||||
ra_class_add_reg(compiler->reg_class_phys[threads], i);
|
||||
ra_class_add_reg(compiler->reg_class_any[threads], i);
|
||||
}
|
||||
|
||||
- for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
|
||||
- ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
|
||||
- ra_class_add_reg(compiler->reg_class_any[threads], i);
|
||||
+ if (compiler->devinfo->has_accumulators) {
|
||||
+ for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
|
||||
+ ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
|
||||
+ ra_class_add_reg(compiler->reg_class_any[threads], i);
|
||||
+ }
|
||||
}
|
||||
/* r5 can only store a single 32-bit value, so not much can
|
||||
* use it.
|
||||
*/
|
||||
- ra_class_add_reg(compiler->reg_class_r5[threads],
|
||||
- ACC_INDEX + 5);
|
||||
- ra_class_add_reg(compiler->reg_class_any[threads],
|
||||
- ACC_INDEX + 5);
|
||||
+ if (compiler->devinfo->has_accumulators) {
|
||||
+ ra_class_add_reg(compiler->reg_class_r5[threads],
|
||||
+ ACC_INDEX + 5);
|
||||
+ ra_class_add_reg(compiler->reg_class_any[threads],
|
||||
+ ACC_INDEX + 5);
|
||||
+ }
|
||||
}
|
||||
|
||||
ra_set_finalize(compiler->regs, NULL);
|
||||
@@ -1086,7 +1101,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
}
|
||||
|
||||
/* All accumulators are invalidated across a thread switch. */
|
||||
- if (inst->qpu.sig.thrsw) {
|
||||
+ if (inst->qpu.sig.thrsw && c->devinfo->has_accumulators) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
set_temp_class_bits(c, i,
|
||||
@@ -1157,7 +1172,8 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
uint32_t t = node_to_temp(c, i);
|
||||
c->nodes.info[i].priority =
|
||||
c->temp_end[t] - c->temp_start[t];
|
||||
- c->nodes.info[i].class_bits = CLASS_BITS_ANY;
|
||||
+ c->nodes.info[i].class_bits =
|
||||
+ get_class_bit_any(c->devinfo);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,109 +0,0 @@
|
||||
From 0157228c729b8812dc4900fa24db63b7d27aa342 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Thu, 23 Sep 2021 11:19:58 +0200
|
||||
Subject: [PATCH 029/142] broadcom/compiler: implement "reads/writes too soon"
|
||||
checks for v71
|
||||
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 65 ++++++++++++++++++++++------
|
||||
1 file changed, 51 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 2fa9031d7b6..4db0c2e72da 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -562,7 +562,24 @@ mux_reads_too_soon(struct choose_scoreboard *scoreboard,
|
||||
}
|
||||
|
||||
static bool
|
||||
-reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
|
||||
+reads_too_soon(struct choose_scoreboard *scoreboard,
|
||||
+ const struct v3d_qpu_instr *inst, uint8_t raddr)
|
||||
+{
|
||||
+ switch (raddr) {
|
||||
+ case 0: /* ldvary delayed write of C coefficient to rf0 */
|
||||
+ if (scoreboard->tick - scoreboard->last_ldvary_tick <= 1)
|
||||
+ return true;
|
||||
+ break;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+reads_too_soon_after_write(const struct v3d_device_info *devinfo,
|
||||
+ struct choose_scoreboard *scoreboard,
|
||||
struct qinst *qinst)
|
||||
{
|
||||
const struct v3d_qpu_instr *inst = &qinst->qpu;
|
||||
@@ -574,24 +591,44 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
|
||||
assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
|
||||
|
||||
if (inst->alu.add.op != V3D_QPU_A_NOP) {
|
||||
- if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0 &&
|
||||
- mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux)) {
|
||||
- return true;
|
||||
+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) {
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux))
|
||||
+ return true;
|
||||
+ } else {
|
||||
+ if (reads_too_soon(scoreboard, inst, inst->alu.add.a.raddr))
|
||||
+ return true;
|
||||
+ }
|
||||
}
|
||||
- if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1 &&
|
||||
- mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux)) {
|
||||
- return true;
|
||||
+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) {
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux))
|
||||
+ return true;
|
||||
+ } else {
|
||||
+ if (reads_too_soon(scoreboard, inst, inst->alu.add.b.raddr))
|
||||
+ return true;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->alu.mul.op != V3D_QPU_M_NOP) {
|
||||
- if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0 &&
|
||||
- mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux)) {
|
||||
- return true;
|
||||
+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) {
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux))
|
||||
+ return true;
|
||||
+ } else {
|
||||
+ if (reads_too_soon(scoreboard, inst, inst->alu.mul.b.raddr))
|
||||
+ return true;
|
||||
+ }
|
||||
}
|
||||
- if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1 &&
|
||||
- mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux)) {
|
||||
- return true;
|
||||
+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) {
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux))
|
||||
+ return true;
|
||||
+ } else {
|
||||
+ if (reads_too_soon(scoreboard, inst, inst->alu.mul.b.raddr))
|
||||
+ return true;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1147,7 +1184,7 @@ retry:
|
||||
* regfile A or B that was written to by the previous
|
||||
* instruction."
|
||||
*/
|
||||
- if (reads_too_soon_after_write(scoreboard, n->inst))
|
||||
+ if (reads_too_soon_after_write(c->devinfo, scoreboard, n->inst))
|
||||
continue;
|
||||
|
||||
if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst))
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,118 +0,0 @@
|
||||
From 3fb3333bdf9699157cf0a2bd46ba4c25058bc5c1 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Thu, 23 Sep 2021 11:44:59 +0200
|
||||
Subject: [PATCH 030/142] broadcom/compiler: implement read stall check for v71
|
||||
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 32 +++++++++++++++++-----------
|
||||
src/broadcom/qpu/qpu_instr.c | 12 +++++++++++
|
||||
src/broadcom/qpu/qpu_instr.h | 2 ++
|
||||
3 files changed, 34 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 4db0c2e72da..b78abe003e9 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -679,29 +679,37 @@ pixel_scoreboard_too_soon(struct v3d_compile *c,
|
||||
}
|
||||
|
||||
static bool
|
||||
-qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
|
||||
+qpu_instruction_uses_rf(const struct v3d_device_info *devinfo,
|
||||
+ const struct v3d_qpu_instr *inst,
|
||||
uint32_t waddr) {
|
||||
|
||||
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||
return false;
|
||||
|
||||
- if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
|
||||
- inst->raddr_a == waddr)
|
||||
- return true;
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
|
||||
+ inst->raddr_a == waddr)
|
||||
+ return true;
|
||||
|
||||
- if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
|
||||
- !inst->sig.small_imm_b && (inst->raddr_b == waddr))
|
||||
- return true;
|
||||
+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
|
||||
+ !inst->sig.small_imm_b && (inst->raddr_b == waddr))
|
||||
+ return true;
|
||||
+ } else {
|
||||
+ /* FIXME: skip if small immediate */
|
||||
+ if (v3d71_qpu_reads_raddr(inst, waddr))
|
||||
+ return true;
|
||||
+ }
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
-mux_read_stalls(struct choose_scoreboard *scoreboard,
|
||||
- const struct v3d_qpu_instr *inst)
|
||||
+read_stalls(const struct v3d_device_info *devinfo,
|
||||
+ struct choose_scoreboard *scoreboard,
|
||||
+ const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 &&
|
||||
- qpu_instruction_uses_rf(inst,
|
||||
+ qpu_instruction_uses_rf(devinfo, inst,
|
||||
scoreboard->last_stallable_sfu_reg);
|
||||
}
|
||||
|
||||
@@ -1319,7 +1327,7 @@ retry:
|
||||
|
||||
int prio = get_instruction_priority(c->devinfo, inst);
|
||||
|
||||
- if (mux_read_stalls(scoreboard, inst)) {
|
||||
+ if (read_stalls(c->devinfo, scoreboard, inst)) {
|
||||
/* Don't merge an instruction that stalls */
|
||||
if (prev_inst)
|
||||
continue;
|
||||
@@ -2389,7 +2397,7 @@ schedule_instructions(struct v3d_compile *c,
|
||||
}
|
||||
}
|
||||
}
|
||||
- if (mux_read_stalls(scoreboard, inst))
|
||||
+ if (read_stalls(c->devinfo, scoreboard, inst))
|
||||
c->qpu_inst_stalled_count++;
|
||||
}
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index 7ec3c867260..e8bbb2141b0 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -956,6 +956,18 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
|
||||
(mul_nsrc > 1 && inst->alu.mul.b.mux == mux));
|
||||
}
|
||||
|
||||
+bool
|
||||
+v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr)
|
||||
+{
|
||||
+ int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
|
||||
+ int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
|
||||
+
|
||||
+ return (add_nsrc > 0 && inst->alu.add.a.raddr == raddr) ||
|
||||
+ (add_nsrc > 1 && inst->alu.add.b.raddr == raddr) ||
|
||||
+ (mul_nsrc > 0 && inst->alu.mul.a.raddr == raddr) ||
|
||||
+ (mul_nsrc > 1 && inst->alu.mul.b.raddr == raddr);
|
||||
+}
|
||||
+
|
||||
bool
|
||||
v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_sig *sig)
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index a25be8e0ee6..9f7582ab06d 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -494,4 +494,6 @@ bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
|
||||
bool v3d_qpu_is_nop(struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
+
|
||||
+bool v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr);
|
||||
#endif
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,65 +0,0 @@
|
||||
From cbe0a7a06a5fb9b3f28acba8c9cac362a6bc5324 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 6 Oct 2021 13:58:00 +0200
|
||||
Subject: [PATCH 031/142] broadcom/compiler: add a
|
||||
v3d71_qpu_writes_waddr_explicitly helper
|
||||
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.c | 28 ++++++++++++++++++++++++++++
|
||||
src/broadcom/qpu/qpu_instr.h | 3 +++
|
||||
2 files changed, 31 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index e8bbb2141b0..feb6b343c1c 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -968,6 +968,34 @@ v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr)
|
||||
(mul_nsrc > 1 && inst->alu.mul.b.raddr == raddr);
|
||||
}
|
||||
|
||||
+bool
|
||||
+v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info *devinfo,
|
||||
+ const struct v3d_qpu_instr *inst,
|
||||
+ uint8_t waddr)
|
||||
+{
|
||||
+ if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||
+ return false;
|
||||
+
|
||||
+ if (v3d_qpu_add_op_has_dst(inst->alu.add.op) &&
|
||||
+ !inst->alu.add.magic_write &&
|
||||
+ inst->alu.add.waddr == waddr) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (v3d_qpu_mul_op_has_dst(inst->alu.mul.op) &&
|
||||
+ !inst->alu.mul.magic_write &&
|
||||
+ inst->alu.mul.waddr == waddr) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
|
||||
+ !inst->sig_magic && inst->sig_addr == waddr) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
bool
|
||||
v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_sig *sig)
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 9f7582ab06d..50a69ce8c3a 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -496,4 +496,7 @@ bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_is_nop(struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
|
||||
bool v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr);
|
||||
+bool v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info *devinfo,
|
||||
+ const struct v3d_qpu_instr *inst,
|
||||
+ uint8_t waddr);
|
||||
#endif
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,67 +0,0 @@
|
||||
From 92e91a9b22ae61dc9f39880e8fdaa7714789efdb Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Mon, 27 Sep 2021 11:49:24 +0200
|
||||
Subject: [PATCH 032/142] broadcom/compiler: prevent rf2-3 usage in thread end
|
||||
delay slots for v71
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 37 +++++++++++++++++++++-------
|
||||
1 file changed, 28 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index b78abe003e9..839c0c62315 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -1691,16 +1691,35 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
|
||||
return false;
|
||||
|
||||
- /* RF0-2 might be overwritten during the delay slots by
|
||||
- * fragment shader setup.
|
||||
- */
|
||||
- if (inst->raddr_a < 3 && v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A))
|
||||
- return false;
|
||||
+ if (c->devinfo->ver <= 42) {
|
||||
+ /* RF0-2 might be overwritten during the delay slots by
|
||||
+ * fragment shader setup.
|
||||
+ */
|
||||
+ if (inst->raddr_a < 3 && v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A))
|
||||
+ return false;
|
||||
|
||||
- if (inst->raddr_b < 3 &&
|
||||
- !inst->sig.small_imm_b &&
|
||||
- v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
|
||||
- return false;
|
||||
+ if (inst->raddr_b < 3 &&
|
||||
+ !inst->sig.small_imm_b &&
|
||||
+ v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (c->devinfo->ver >= 71) {
|
||||
+ /* RF2-3 might be overwritten during the delay slots by
|
||||
+ * fragment shader setup.
|
||||
+ *
|
||||
+ * FIXME: handle small immediate cases
|
||||
+ */
|
||||
+ if (v3d71_qpu_reads_raddr(inst, 2) ||
|
||||
+ v3d71_qpu_reads_raddr(inst, 3)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if (v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 2) ||
|
||||
+ v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 3)) {
|
||||
+ return false;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,78 +0,0 @@
|
||||
From 68a1545eb973e41608534ff05a9e84a86c046453 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Mon, 27 Sep 2021 13:26:04 +0200
|
||||
Subject: [PATCH 033/142] broadcom/qpu: add new ADD opcodes for FMOV/MOV in v71
|
||||
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.c | 5 +++++
|
||||
src/broadcom/qpu/qpu_instr.h | 4 ++++
|
||||
src/broadcom/qpu/qpu_pack.c | 15 +++++++++++++++
|
||||
3 files changed, 24 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index feb6b343c1c..195a0dcd232 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -177,6 +177,8 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
|
||||
[V3D_QPU_A_ITOF] = "itof",
|
||||
[V3D_QPU_A_CLZ] = "clz",
|
||||
[V3D_QPU_A_UTOF] = "utof",
|
||||
+ [V3D_QPU_A_MOV] = "mov",
|
||||
+ [V3D_QPU_A_FMOV] = "fmov",
|
||||
};
|
||||
|
||||
if (op >= ARRAY_SIZE(op_names))
|
||||
@@ -458,6 +460,9 @@ static const uint8_t add_op_args[] = {
|
||||
[V3D_QPU_A_ITOF] = D | A,
|
||||
[V3D_QPU_A_CLZ] = D | A,
|
||||
[V3D_QPU_A_UTOF] = D | A,
|
||||
+
|
||||
+ [V3D_QPU_A_MOV] = D | A,
|
||||
+ [V3D_QPU_A_FMOV] = D | A,
|
||||
};
|
||||
|
||||
static const uint8_t mul_op_args[] = {
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 50a69ce8c3a..c86a4119c54 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -227,6 +227,10 @@ enum v3d_qpu_add_op {
|
||||
V3D_QPU_A_ITOF,
|
||||
V3D_QPU_A_CLZ,
|
||||
V3D_QPU_A_UTOF,
|
||||
+
|
||||
+ /* V3D 7.x */
|
||||
+ V3D_QPU_A_FMOV,
|
||||
+ V3D_QPU_A_MOV,
|
||||
};
|
||||
|
||||
enum v3d_qpu_mul_op {
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 4045275cb9a..0e504e65fbf 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -776,6 +776,21 @@ static const struct opcode_desc add_ops_v71[] = {
|
||||
|
||||
{ 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
|
||||
{ 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
|
||||
+
|
||||
+ { 249, 249, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FMOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FMOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FMOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FMOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FMOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FMOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FMOV, 71 },
|
||||
+
|
||||
+ { 249, 249, .raddr_mask = OP_MASK(3), V3D_QPU_A_MOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_MASK(7), V3D_QPU_A_MOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_MASK(11), V3D_QPU_A_MOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
|
||||
+ { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
|
||||
+
|
||||
};
|
||||
|
||||
static const struct opcode_desc mul_ops_v71[] = {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,46 +0,0 @@
|
||||
From 8dbbb7e22b694fdc62376d112b3dc6105d556c63 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Mon, 4 Oct 2021 13:07:35 +0200
|
||||
Subject: [PATCH 034/142] broadcom/qpu: fix packing/unpacking of fmov variants
|
||||
for v71
|
||||
|
||||
---
|
||||
src/broadcom/qpu/qpu_pack.c | 9 ++++-----
|
||||
1 file changed, 4 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 0e504e65fbf..0eb820b3f10 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -1405,9 +1405,9 @@ v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
|
||||
break;
|
||||
|
||||
case V3D_QPU_M_FMOV:
|
||||
- instr->alu.mul.output_pack = (raddr_d >> 2) & 1;
|
||||
+ instr->alu.mul.output_pack = raddr_d & 0x3;
|
||||
|
||||
- if (!v3d_qpu_float32_unpack_unpack(raddr_d & 0x3,
|
||||
+ if (!v3d_qpu_float32_unpack_unpack((raddr_d >> 2) & 0x7,
|
||||
&instr->alu.mul.a.unpack)) {
|
||||
return false;
|
||||
}
|
||||
@@ -2046,14 +2046,13 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
|
||||
&packed)) {
|
||||
return false;
|
||||
}
|
||||
- opcode |= (packed >> 1) & 1;
|
||||
- raddr_d = (packed & 1) << 2;
|
||||
+ raddr_d |= packed;
|
||||
|
||||
if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
|
||||
&packed)) {
|
||||
return false;
|
||||
}
|
||||
- raddr_d |= packed;
|
||||
+ raddr_d |= packed << 2;
|
||||
break;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,107 +0,0 @@
|
||||
From 63d0059ebef288afb0e2e746dadda8c2238bdfcb Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 28 Sep 2021 01:17:08 +0200
|
||||
Subject: [PATCH 035/142] broadcom/qpu: implement switch rules for fmin/fmax
|
||||
fadd/faddnf for v71
|
||||
|
||||
They use the same opcodes, and switch between one and the other based
|
||||
on raddr.
|
||||
|
||||
Note that the rule rule includes also if small_imm_a/b are used. That
|
||||
is still not in place so that part is hardcode. Would be updated later
|
||||
when small immediates support for v71 gets implemented.
|
||||
---
|
||||
src/broadcom/qpu/qpu_pack.c | 48 +++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 48 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 0eb820b3f10..7a262f18ac3 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -651,7 +651,9 @@ static const struct opcode_desc mul_ops_v33[] = {
|
||||
* opcodes that changed on v71
|
||||
*/
|
||||
static const struct opcode_desc add_ops_v71[] = {
|
||||
+ /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */
|
||||
{ 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD },
|
||||
+ { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF },
|
||||
{ 53, 55, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
|
||||
{ 56, 56, .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD },
|
||||
{ 57, 59, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
|
||||
@@ -666,6 +668,10 @@ static const struct opcode_desc add_ops_v71[] = {
|
||||
{ 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR },
|
||||
{ 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR },
|
||||
{ 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR },
|
||||
+ /* FMIN is instead FMAX depending on the raddr_a/b order. */
|
||||
+ { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN },
|
||||
+ { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX },
|
||||
+ { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN },
|
||||
|
||||
{ 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND },
|
||||
{ 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR },
|
||||
@@ -1162,6 +1168,22 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
|
||||
|
||||
instr->alu.add.op = desc->op;
|
||||
|
||||
+ /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
|
||||
+ * operands.
|
||||
+ */
|
||||
+ /* FIXME: for now hardcoded values, until we got the small_imm support
|
||||
+ * in place
|
||||
+ */
|
||||
+ uint32_t small_imm_a = 0;
|
||||
+ uint32_t small_imm_b = 0;
|
||||
+ if (small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
|
||||
+ small_imm_b *256 + (op & 3) * 64 + raddr_b) {
|
||||
+ if (instr->alu.add.op == V3D_QPU_A_FMIN)
|
||||
+ instr->alu.add.op = V3D_QPU_A_FMAX;
|
||||
+ if (instr->alu.add.op == V3D_QPU_A_FADD)
|
||||
+ instr->alu.add.op = V3D_QPU_A_FADDNF;
|
||||
+ }
|
||||
+
|
||||
/* Some QPU ops require a bit more than just basic opcode and mux a/b
|
||||
* comparisons to distinguish them.
|
||||
*/
|
||||
@@ -1754,6 +1776,11 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
uint32_t output_pack;
|
||||
uint32_t a_unpack;
|
||||
uint32_t b_unpack;
|
||||
+ /* FIXME: for now hardcoded values, until we got the small_imm
|
||||
+ * support in place
|
||||
+ */
|
||||
+ uint32_t small_imm_a = 0;
|
||||
+ uint32_t small_imm_b = 0;
|
||||
|
||||
if (instr->alu.add.op != V3D_QPU_A_FCMP) {
|
||||
if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
|
||||
@@ -1773,6 +1800,27 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
return false;
|
||||
}
|
||||
|
||||
+ /* These operations with commutative operands are
|
||||
+ * distinguished by which order their operands come in.
|
||||
+ */
|
||||
+ bool ordering =
|
||||
+ small_imm_a * 256 + a_unpack * 64 + raddr_a >
|
||||
+ small_imm_b * 256 + b_unpack * 64 + raddr_b;
|
||||
+ if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
|
||||
+ instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
|
||||
+ ((instr->alu.add.op == V3D_QPU_A_FMAX ||
|
||||
+ instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
|
||||
+ uint32_t temp;
|
||||
+
|
||||
+ temp = a_unpack;
|
||||
+ a_unpack = b_unpack;
|
||||
+ b_unpack = temp;
|
||||
+
|
||||
+ temp = raddr_a;
|
||||
+ raddr_a = raddr_b;
|
||||
+ raddr_b = temp;
|
||||
+ }
|
||||
+
|
||||
opcode |= a_unpack << 2;
|
||||
opcode |= b_unpack << 0;
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,37 +0,0 @@
|
||||
From c9f6faa3ddc91024b3d9dc67ce2221187daac128 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 29 Sep 2021 11:54:18 +0200
|
||||
Subject: [PATCH 036/142] broadcom/compiler: make vir_write_rX return false on
|
||||
platforms without accums
|
||||
|
||||
---
|
||||
src/broadcom/compiler/vir.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
|
||||
index 007cb0a941b..d75cd777b6d 100644
|
||||
--- a/src/broadcom/compiler/vir.c
|
||||
+++ b/src/broadcom/compiler/vir.c
|
||||
@@ -158,6 +158,9 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
bool
|
||||
vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
{
|
||||
+ if (!devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
switch (inst->src[i].file) {
|
||||
case QFILE_VPM:
|
||||
@@ -180,6 +183,9 @@ vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
bool
|
||||
vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
{
|
||||
+ if (!devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
switch (inst->dst.file) {
|
||||
case QFILE_MAGIC:
|
||||
switch (inst->dst.index) {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,77 +0,0 @@
|
||||
From 3d16229743e26b58735ed049ee982073f6034342 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 29 Sep 2021 12:03:50 +0200
|
||||
Subject: [PATCH 037/142] broadcom/compiler: rename vir_writes_rX to
|
||||
vir_writes_rX_implicitly
|
||||
|
||||
Since that represents more accurately what they check..
|
||||
---
|
||||
src/broadcom/compiler/v3d_compiler.h | 4 ++--
|
||||
src/broadcom/compiler/vir.c | 6 ++++--
|
||||
src/broadcom/compiler/vir_register_allocate.c | 4 ++--
|
||||
3 files changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
|
||||
index eb4e692464b..7e8f3bfc1a7 100644
|
||||
--- a/src/broadcom/compiler/v3d_compiler.h
|
||||
+++ b/src/broadcom/compiler/v3d_compiler.h
|
||||
@@ -1149,8 +1149,8 @@ bool vir_is_raw_mov(struct qinst *inst);
|
||||
bool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
bool vir_is_add(struct qinst *inst);
|
||||
bool vir_is_mul(struct qinst *inst);
|
||||
-bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
-bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
+bool vir_writes_r3_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
+bool vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
|
||||
uint8_t vir_channels_written(struct qinst *inst);
|
||||
struct qreg ntq_get_src(struct v3d_compile *c, nir_src src, int i);
|
||||
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
|
||||
index d75cd777b6d..aea113f050e 100644
|
||||
--- a/src/broadcom/compiler/vir.c
|
||||
+++ b/src/broadcom/compiler/vir.c
|
||||
@@ -156,7 +156,8 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
}
|
||||
|
||||
bool
|
||||
-vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
+vir_writes_r3_implicitly(const struct v3d_device_info *devinfo,
|
||||
+ struct qinst *inst)
|
||||
{
|
||||
if (!devinfo->has_accumulators)
|
||||
return false;
|
||||
@@ -181,7 +182,8 @@ vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
}
|
||||
|
||||
bool
|
||||
-vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
+vir_writes_r4_implicitly(const struct v3d_device_info *devinfo,
|
||||
+ struct qinst *inst)
|
||||
{
|
||||
if (!devinfo->has_accumulators)
|
||||
return false;
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index 7b3f6c41934..f2df35cd458 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -988,7 +988,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
* result to a temp), nothing else can be stored in r3/r4 across
|
||||
* it.
|
||||
*/
|
||||
- if (vir_writes_r3(c->devinfo, inst)) {
|
||||
+ if (vir_writes_r3_implicitly(c->devinfo, inst)) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
ra_add_node_interference(c->g,
|
||||
@@ -998,7 +998,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
}
|
||||
}
|
||||
|
||||
- if (vir_writes_r4(c->devinfo, inst)) {
|
||||
+ if (vir_writes_r4_implicitly(c->devinfo, inst)) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
ra_add_node_interference(c->g,
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,170 +0,0 @@
|
||||
From 83fae160491737e8568b8fb5eaa5be4d2c8bf3c8 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 29 Sep 2021 12:10:31 +0200
|
||||
Subject: [PATCH 038/142] broadcom/compiler: only handle accumulator classes if
|
||||
present
|
||||
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 77 ++++++++++++-------
|
||||
1 file changed, 49 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index f2df35cd458..e78ccb7c6aa 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -53,6 +53,17 @@ get_class_bit_any(const struct v3d_device_info *devinfo)
|
||||
else
|
||||
return CLASS_BITS_PHYS;
|
||||
}
|
||||
+
|
||||
+static uint8_t
|
||||
+filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits)
|
||||
+{
|
||||
+ if (!devinfo->has_accumulators) {
|
||||
+ assert(class_bits & CLASS_BITS_PHYS);
|
||||
+ class_bits = CLASS_BITS_PHYS;
|
||||
+ }
|
||||
+ return class_bits;
|
||||
+}
|
||||
+
|
||||
static inline uint32_t
|
||||
temp_to_node(struct v3d_compile *c, uint32_t temp)
|
||||
{
|
||||
@@ -413,8 +424,10 @@ v3d_setup_spill_base(struct v3d_compile *c)
|
||||
*/
|
||||
if (c->spilling) {
|
||||
int temp_class = CLASS_BITS_PHYS;
|
||||
- if (i != c->spill_base.index)
|
||||
+ if (c->devinfo->has_accumulators &&
|
||||
+ i != c->spill_base.index) {
|
||||
temp_class |= CLASS_BITS_ACC;
|
||||
+ }
|
||||
add_node(c, i, temp_class);
|
||||
}
|
||||
}
|
||||
@@ -473,14 +486,16 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
|
||||
* temp will be used immediately so just like the uniform above we
|
||||
* can allow accumulators.
|
||||
*/
|
||||
+ int temp_class =
|
||||
+ filter_class_bits(c->devinfo, CLASS_BITS_PHYS | CLASS_BITS_ACC);
|
||||
if (!fill_dst) {
|
||||
struct qreg dst = vir_TMUWT(c);
|
||||
assert(dst.file == QFILE_TEMP);
|
||||
- add_node(c, dst.index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
|
||||
+ add_node(c, dst.index, temp_class);
|
||||
} else {
|
||||
*fill_dst = vir_LDTMU(c);
|
||||
assert(fill_dst->file == QFILE_TEMP);
|
||||
- add_node(c, fill_dst->index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
|
||||
+ add_node(c, fill_dst->index, temp_class);
|
||||
}
|
||||
|
||||
/* Temps across the thread switch we injected can't be assigned to
|
||||
@@ -662,8 +677,10 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
* instruction immediately after so we
|
||||
* can use ACC.
|
||||
*/
|
||||
- add_node(c, temp.index, CLASS_BITS_PHYS |
|
||||
- CLASS_BITS_ACC);
|
||||
+ int temp_class =
|
||||
+ filter_class_bits(c->devinfo, CLASS_BITS_PHYS |
|
||||
+ CLASS_BITS_ACC);
|
||||
+ add_node(c, temp.index, temp_class);
|
||||
} else {
|
||||
/* If we have a postponed spill, we
|
||||
* don't need a fill as the temp would
|
||||
@@ -941,6 +958,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
compiler->reg_class_phys[threads] =
|
||||
ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
|
||||
+ /* Init physical regs */
|
||||
for (int i = phys_index;
|
||||
i < phys_index + (PHYS_COUNT >> threads); i++) {
|
||||
if (compiler->devinfo->has_accumulators)
|
||||
@@ -949,16 +967,15 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
ra_class_add_reg(compiler->reg_class_any[threads], i);
|
||||
}
|
||||
|
||||
+ /* Init accumulator regs */
|
||||
if (compiler->devinfo->has_accumulators) {
|
||||
for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
|
||||
ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
|
||||
ra_class_add_reg(compiler->reg_class_any[threads], i);
|
||||
}
|
||||
- }
|
||||
- /* r5 can only store a single 32-bit value, so not much can
|
||||
- * use it.
|
||||
- */
|
||||
- if (compiler->devinfo->has_accumulators) {
|
||||
+ /* r5 can only store a single 32-bit value, so not much can
|
||||
+ * use it.
|
||||
+ */
|
||||
ra_class_add_reg(compiler->reg_class_r5[threads],
|
||||
ACC_INDEX + 5);
|
||||
ra_class_add_reg(compiler->reg_class_any[threads],
|
||||
@@ -1081,21 +1098,23 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
* because ldunif has usually a shorter lifespan, allowing for
|
||||
* more accumulator reuse and QPU merges.
|
||||
*/
|
||||
- if (!inst->qpu.sig.ldunif) {
|
||||
- uint8_t class_bits =
|
||||
- get_temp_class_bits(c, inst->dst.index) &
|
||||
- ~CLASS_BITS_R5;
|
||||
- set_temp_class_bits(c, inst->dst.index,
|
||||
- class_bits);
|
||||
-
|
||||
- } else {
|
||||
- /* Until V3D 4.x, we could only load a uniform
|
||||
- * to r5, so we'll need to spill if uniform
|
||||
- * loads interfere with each other.
|
||||
- */
|
||||
- if (c->devinfo->ver < 40) {
|
||||
+ if (c->devinfo->has_accumulators) {
|
||||
+ if (!inst->qpu.sig.ldunif) {
|
||||
+ uint8_t class_bits =
|
||||
+ get_temp_class_bits(c, inst->dst.index) &
|
||||
+ ~CLASS_BITS_R5;
|
||||
set_temp_class_bits(c, inst->dst.index,
|
||||
- CLASS_BITS_R5);
|
||||
+ class_bits);
|
||||
+
|
||||
+ } else {
|
||||
+ /* Until V3D 4.x, we could only load a uniform
|
||||
+ * to r5, so we'll need to spill if uniform
|
||||
+ * loads interfere with each other.
|
||||
+ */
|
||||
+ if (c->devinfo->ver < 40) {
|
||||
+ set_temp_class_bits(c, inst->dst.index,
|
||||
+ CLASS_BITS_R5);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1152,8 +1171,10 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
c->thread_index--;
|
||||
}
|
||||
|
||||
- c->g = ra_alloc_interference_graph(c->compiler->regs,
|
||||
- c->num_temps + ARRAY_SIZE(acc_nodes));
|
||||
+ unsigned num_ra_nodes = c->num_temps;
|
||||
+ if (c->devinfo->has_accumulators)
|
||||
+ num_ra_nodes += ARRAY_SIZE(acc_nodes);
|
||||
+ c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
|
||||
ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
|
||||
|
||||
/* Make some fixed nodes for the accumulators, which we will need to
|
||||
@@ -1162,8 +1183,8 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
* live in, but the classes take up a lot of memory to set up, so we
|
||||
* don't want to make too many.
|
||||
*/
|
||||
- for (uint32_t i = 0; i < ACC_COUNT + c->num_temps; i++) {
|
||||
- if (i < ACC_COUNT) {
|
||||
+ for (uint32_t i = 0; i < num_ra_nodes; i++) {
|
||||
+ if (c->devinfo->has_accumulators && i < ACC_COUNT) {
|
||||
acc_nodes[i] = i;
|
||||
ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
|
||||
c->nodes.info[i].priority = 0;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,187 +0,0 @@
|
||||
From fd77cc3204e7c69927f97ce2a1d55d2a47d77a27 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 29 Sep 2021 12:14:04 +0200
|
||||
Subject: [PATCH 039/142] broadcom/compiler: don't assign rf0 to temps across
|
||||
implicit rf0 writes
|
||||
|
||||
In platforms that don't have accumulators and have implicit writes to
|
||||
the register file we need to be careful and avoid assigning a physical
|
||||
register to a temp that lives across an implicit write to that same
|
||||
physical register.
|
||||
|
||||
For now, we have the case of implicit writes to rf0 from various
|
||||
signals, but it should be easy to extend this to include additional
|
||||
registers if needed.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 69 +++++++++++++++----
|
||||
1 file changed, 57 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index e78ccb7c6aa..e0adc1de7a4 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -29,6 +29,9 @@
|
||||
#define ACC_INDEX 0
|
||||
#define ACC_COUNT 6
|
||||
|
||||
+/* RA nodes used to track RF registers with implicit writes */
|
||||
+#define IMPLICIT_RF_COUNT 1
|
||||
+
|
||||
#define PHYS_COUNT 64
|
||||
|
||||
static uint8_t
|
||||
@@ -67,15 +70,17 @@ filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits)
|
||||
static inline uint32_t
|
||||
temp_to_node(struct v3d_compile *c, uint32_t temp)
|
||||
{
|
||||
- return temp + (c->devinfo->has_accumulators ? ACC_COUNT : 0);
|
||||
+ return temp + (c->devinfo->has_accumulators ? ACC_COUNT :
|
||||
+ IMPLICIT_RF_COUNT);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
node_to_temp(struct v3d_compile *c, uint32_t node)
|
||||
{
|
||||
assert((c->devinfo->has_accumulators && node >= ACC_COUNT) ||
|
||||
- (!c->devinfo->has_accumulators && node >= 0));
|
||||
- return node - (c->devinfo->has_accumulators ? ACC_COUNT : 0);
|
||||
+ (!c->devinfo->has_accumulators && node >= IMPLICIT_RF_COUNT));
|
||||
+ return node - (c->devinfo->has_accumulators ? ACC_COUNT :
|
||||
+ IMPLICIT_RF_COUNT);
|
||||
}
|
||||
|
||||
static inline uint8_t
|
||||
@@ -360,7 +365,8 @@ ensure_nodes(struct v3d_compile *c)
|
||||
c->nodes.info = reralloc_array_size(c,
|
||||
c->nodes.info,
|
||||
sizeof(c->nodes.info[0]),
|
||||
- c->nodes.alloc_count + ACC_COUNT);
|
||||
+ c->nodes.alloc_count +
|
||||
+ MAX2(ACC_COUNT, IMPLICIT_RF_COUNT));
|
||||
}
|
||||
|
||||
/* Creates the interference node for a new temp. We use this to keep the node
|
||||
@@ -372,7 +378,8 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits)
|
||||
ensure_nodes(c);
|
||||
|
||||
int node = ra_add_node(c->g, choose_reg_class(c, class_bits));
|
||||
- assert(node == temp + ACC_COUNT);
|
||||
+ assert(c->devinfo->has_accumulators ? node == temp + ACC_COUNT :
|
||||
+ node == temp + IMPLICIT_RF_COUNT);
|
||||
|
||||
/* We fill the node priority after we are done inserting spills */
|
||||
c->nodes.info[node].class_bits = class_bits;
|
||||
@@ -995,7 +1002,9 @@ tmu_spilling_allowed(struct v3d_compile *c)
|
||||
}
|
||||
|
||||
static void
|
||||
-update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
+update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
|
||||
+ int *acc_nodes,
|
||||
+ int *implicit_rf_nodes,
|
||||
struct qinst *inst)
|
||||
{
|
||||
int32_t ip = inst->ip;
|
||||
@@ -1025,6 +1034,19 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
}
|
||||
}
|
||||
|
||||
+ /* If any instruction writes to a physical register implicitly
|
||||
+ * nothing else can write the same register across it.
|
||||
+ */
|
||||
+ if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) {
|
||||
+ for (int i = 0; i < c->num_temps; i++) {
|
||||
+ if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
+ ra_add_node_interference(c->g,
|
||||
+ temp_to_node(c, i),
|
||||
+ implicit_rf_nodes[0]);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
switch (inst->qpu.alu.add.op) {
|
||||
case V3D_QPU_A_LDVPMV_IN:
|
||||
@@ -1116,6 +1138,16 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
CLASS_BITS_R5);
|
||||
}
|
||||
}
|
||||
+ } else {
|
||||
+ /* If the instruction has an implicit write
|
||||
+ * we can't allocate its dest to the same
|
||||
+ * register.
|
||||
+ */
|
||||
+ if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) {
|
||||
+ ra_add_node_interference(c->g,
|
||||
+ temp_to_node(c, inst->dst.index),
|
||||
+ implicit_rf_nodes[0]);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1139,10 +1171,18 @@ struct qpu_reg *
|
||||
v3d_register_allocate(struct v3d_compile *c)
|
||||
{
|
||||
int acc_nodes[ACC_COUNT];
|
||||
+ int implicit_rf_nodes[IMPLICIT_RF_COUNT];
|
||||
+
|
||||
+ unsigned num_ra_nodes = c->num_temps;
|
||||
+ if (c->devinfo->has_accumulators)
|
||||
+ num_ra_nodes += ARRAY_SIZE(acc_nodes);
|
||||
+ else
|
||||
+ num_ra_nodes += ARRAY_SIZE(implicit_rf_nodes);
|
||||
+
|
||||
c->nodes = (struct v3d_ra_node_info) {
|
||||
.alloc_count = c->num_temps,
|
||||
.info = ralloc_array_size(c, sizeof(c->nodes.info[0]),
|
||||
- c->num_temps + ACC_COUNT),
|
||||
+ num_ra_nodes),
|
||||
};
|
||||
|
||||
uint32_t phys_index = get_phys_index(c->devinfo);
|
||||
@@ -1171,9 +1211,6 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
c->thread_index--;
|
||||
}
|
||||
|
||||
- unsigned num_ra_nodes = c->num_temps;
|
||||
- if (c->devinfo->has_accumulators)
|
||||
- num_ra_nodes += ARRAY_SIZE(acc_nodes);
|
||||
c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
|
||||
ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
|
||||
|
||||
@@ -1181,7 +1218,8 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
* interfere with when ops have implied r3/r4 writes or for the thread
|
||||
* switches. We could represent these as classes for the nodes to
|
||||
* live in, but the classes take up a lot of memory to set up, so we
|
||||
- * don't want to make too many.
|
||||
+ * don't want to make too many. We use the same mechanism on platforms
|
||||
+ * without accumulators that can have implicit writes to phys regs.
|
||||
*/
|
||||
for (uint32_t i = 0; i < num_ra_nodes; i++) {
|
||||
if (c->devinfo->has_accumulators && i < ACC_COUNT) {
|
||||
@@ -1189,6 +1227,12 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
|
||||
c->nodes.info[i].priority = 0;
|
||||
c->nodes.info[i].class_bits = 0;
|
||||
+ } else if (!c->devinfo->has_accumulators &&
|
||||
+ i < ARRAY_SIZE(implicit_rf_nodes)) {
|
||||
+ implicit_rf_nodes[i] = i;
|
||||
+ ra_set_node_reg(c->g, implicit_rf_nodes[i], phys_index + i);
|
||||
+ c->nodes.info[i].priority = 0;
|
||||
+ c->nodes.info[i].class_bits = 0;
|
||||
} else {
|
||||
uint32_t t = node_to_temp(c, i);
|
||||
c->nodes.info[i].priority =
|
||||
@@ -1204,7 +1248,8 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
int ip = 0;
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
inst->ip = ip++;
|
||||
- update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
|
||||
+ update_graph_and_reg_classes_for_inst(c, acc_nodes,
|
||||
+ implicit_rf_nodes, inst);
|
||||
}
|
||||
|
||||
/* Set the register classes for all our temporaries in the graph */
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,33 +0,0 @@
|
||||
From 9a08ae9f354a6da6d9d71b87800aca8b3df49e29 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 28 Sep 2021 13:37:28 +0200
|
||||
Subject: [PATCH 040/142] broadcom/compiler: CS payload registers have changed
|
||||
in v71
|
||||
|
||||
---
|
||||
src/broadcom/compiler/nir_to_vir.c | 9 +++++++--
|
||||
1 file changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
|
||||
index 1a05b279a2d..220ff6bcd49 100644
|
||||
--- a/src/broadcom/compiler/nir_to_vir.c
|
||||
+++ b/src/broadcom/compiler/nir_to_vir.c
|
||||
@@ -4362,8 +4362,13 @@ nir_to_vir(struct v3d_compile *c)
|
||||
V3D_QPU_WADDR_SYNC));
|
||||
}
|
||||
|
||||
- c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
|
||||
- c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
|
||||
+ if (c->devinfo->ver <= 42) {
|
||||
+ c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
|
||||
+ c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
|
||||
+ } else if (c->devinfo->ver >= 71) {
|
||||
+ c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 3));
|
||||
+ c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
|
||||
+ }
|
||||
|
||||
/* Set up the division between gl_LocalInvocationIndex and
|
||||
* wg_in_mem in the payload reg.
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,46 +0,0 @@
|
||||
From 5477884196cb54a71f54fa6cad42c6d3326bde88 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Fri, 22 Oct 2021 13:39:48 +0200
|
||||
Subject: [PATCH 041/142] broadcom/compiler: don't schedule rf0 writes right
|
||||
after ldvary
|
||||
|
||||
ldvary writes rf0 implicitly on the next cycle so they would clash.
|
||||
This case is not handled correctly by our normal dependency tracking,
|
||||
which doesn't know anything about delayed writes from instructions
|
||||
and thinks the rf0 write happens on the same cycle ldvary is emitted.
|
||||
|
||||
Fixes (v71):
|
||||
dEQP-VK.glsl.conversions.matrix_to_matrix.mat2x3_to_mat4x2_fragment
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 15 +++++++++++++++
|
||||
1 file changed, 15 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 839c0c62315..870823fd2b1 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -652,6 +652,21 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo,
|
||||
v3d_qpu_writes_r4(devinfo, inst))
|
||||
return true;
|
||||
|
||||
+ if (devinfo->ver <= 42)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Don't schedule anything that writes rf0 right after ldvary, since
|
||||
+ * that would clash with the ldvary's delayed rf0 write (the exception
|
||||
+ * is another ldvary, since its implicit rf0 write would also have
|
||||
+ * one cycle of delay and would not clash).
|
||||
+ */
|
||||
+ if (scoreboard->last_ldvary_tick + 1 == scoreboard->tick &&
|
||||
+ (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) ||
|
||||
+ (v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
|
||||
+ !inst->sig.ldvary))) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
return false;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,60 +0,0 @@
|
||||
From 31623712c2f741d393767641f32d56c35150eda5 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Thu, 30 Sep 2021 13:22:48 +0200
|
||||
Subject: [PATCH 042/142] broadcom/compiler: allow instruction merges in v71
|
||||
|
||||
In v3d 4.x there were restrictions based on the number of raddrs used
|
||||
by the combined instructions, but we don't have these restrictions in
|
||||
v3d 7.x.
|
||||
|
||||
It should be noted that while there are no restrictions on the number
|
||||
of raddrs addressed, a QPU instruction can only address a single small
|
||||
immediate, so we should be careful about that when we add support for
|
||||
small immediates.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 21 +++++++++++++++++----
|
||||
1 file changed, 17 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 870823fd2b1..ff544fb3c1c 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -906,8 +906,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a,
|
||||
static bool
|
||||
qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
||||
const struct v3d_qpu_instr *add_instr,
|
||||
- const struct v3d_qpu_instr *mul_instr)
|
||||
+ const struct v3d_qpu_instr *mul_instr,
|
||||
+ const struct v3d_device_info *devinfo)
|
||||
{
|
||||
+ assert(devinfo->ver <= 42);
|
||||
+
|
||||
uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr);
|
||||
int naddrs = util_bitcount64(raddrs_used);
|
||||
|
||||
@@ -1111,9 +1114,19 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
add_instr = a;
|
||||
}
|
||||
|
||||
- if (add_instr && mul_instr &&
|
||||
- !qpu_merge_raddrs(&merge, add_instr, mul_instr)) {
|
||||
- return false;
|
||||
+ /* V3D 4.x and earlier use muxes to select the inputs for the ALUs and
|
||||
+ * they have restrictions on the number of raddrs that can be adressed
|
||||
+ * in a single instruction.
|
||||
+ *
|
||||
+ * FIXME: for V3D 7.x we can't merge instructions if they address more
|
||||
+ * than one small immediate. For now, we don't support small immediates,
|
||||
+ * so it is not a problem.
|
||||
+ */
|
||||
+ if (devinfo->ver <= 42) {
|
||||
+ if (add_instr && mul_instr &&
|
||||
+ !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) {
|
||||
+ return false;
|
||||
+ }
|
||||
}
|
||||
|
||||
merge.sig.thrsw |= b->sig.thrsw;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,172 +0,0 @@
|
||||
From 959a0128654c94d84fda53ffc108971d3b3a817a Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 6 Oct 2021 09:27:43 +0200
|
||||
Subject: [PATCH 043/142] broadcom/qpu: add MOV integer packing/unpacking
|
||||
variants
|
||||
|
||||
These are new in v71 and cover MOV on both the ADD and the MUL alus.
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.h | 9 ++++
|
||||
src/broadcom/qpu/qpu_pack.c | 98 ++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 107 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index c86a4119c54..4b34d17bd4c 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -285,6 +285,15 @@ enum v3d_qpu_input_unpack {
|
||||
|
||||
/** Swap high and low 16 bits */
|
||||
V3D_QPU_UNPACK_SWAP_16,
|
||||
+
|
||||
+ /** Convert low 16 bits from 16-bit integer to unsigned 32-bit int */
|
||||
+ V3D_QPU_UNPACK_UL,
|
||||
+ /** Convert high 16 bits from 16-bit integer to unsigned 32-bit int */
|
||||
+ V3D_QPU_UNPACK_UH,
|
||||
+ /** Convert low 16 bits from 16-bit integer to signed 32-bit int */
|
||||
+ V3D_QPU_UNPACK_IL,
|
||||
+ /** Convert high 16 bits from 16-bit integer to signed 32-bit int */
|
||||
+ V3D_QPU_UNPACK_IH,
|
||||
};
|
||||
|
||||
enum v3d_qpu_mux {
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 7a262f18ac3..4d677894755 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -922,6 +922,56 @@ v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
|
||||
}
|
||||
}
|
||||
|
||||
+static bool
|
||||
+v3d_qpu_int32_unpack_unpack(uint32_t packed,
|
||||
+ enum v3d_qpu_input_unpack *unpacked)
|
||||
+{
|
||||
+ switch (packed) {
|
||||
+ case 0:
|
||||
+ *unpacked = V3D_QPU_UNPACK_NONE;
|
||||
+ return true;
|
||||
+ case 1:
|
||||
+ *unpacked = V3D_QPU_UNPACK_UL;
|
||||
+ return true;
|
||||
+ case 2:
|
||||
+ *unpacked = V3D_QPU_UNPACK_UH;
|
||||
+ return true;
|
||||
+ case 3:
|
||||
+ *unpacked = V3D_QPU_UNPACK_IL;
|
||||
+ return true;
|
||||
+ case 4:
|
||||
+ *unpacked = V3D_QPU_UNPACK_IH;
|
||||
+ return true;
|
||||
+ default:
|
||||
+ return false;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
|
||||
+ uint32_t *packed)
|
||||
+{
|
||||
+ switch (unpacked) {
|
||||
+ case V3D_QPU_UNPACK_NONE:
|
||||
+ *packed = 0;
|
||||
+ return true;
|
||||
+ case V3D_QPU_UNPACK_UL:
|
||||
+ *packed = 1;
|
||||
+ return true;
|
||||
+ case V3D_QPU_UNPACK_UH:
|
||||
+ *packed = 2;
|
||||
+ return true;
|
||||
+ case V3D_QPU_UNPACK_IL:
|
||||
+ *packed = 3;
|
||||
+ return true;
|
||||
+ case V3D_QPU_UNPACK_IH:
|
||||
+ *packed = 4;
|
||||
+ return true;
|
||||
+ default:
|
||||
+ return false;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static bool
|
||||
v3d_qpu_float16_unpack_unpack(uint32_t packed,
|
||||
enum v3d_qpu_input_unpack *unpacked)
|
||||
@@ -1273,6 +1323,15 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
|
||||
instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
break;
|
||||
|
||||
+ case V3D_QPU_A_MOV:
|
||||
+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
|
||||
+
|
||||
+ if (!v3d_qpu_int32_unpack_unpack((raddr_b >> 2) & 0x7,
|
||||
+ &instr->alu.add.a.unpack)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ break;
|
||||
+
|
||||
default:
|
||||
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
|
||||
instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
|
||||
@@ -1449,6 +1508,15 @@ v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
|
||||
|
||||
break;
|
||||
|
||||
+ case V3D_QPU_M_MOV:
|
||||
+ instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
|
||||
+
|
||||
+ if (!v3d_qpu_int32_unpack_unpack((raddr_d >> 2) & 0x7,
|
||||
+ &instr->alu.mul.a.unpack)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ break;
|
||||
+
|
||||
default:
|
||||
instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
|
||||
instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
|
||||
@@ -1909,6 +1977,21 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
opcode |= packed;
|
||||
break;
|
||||
|
||||
+ case V3D_QPU_A_MOV: {
|
||||
+ uint32_t packed;
|
||||
+
|
||||
+ if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
|
||||
+ return false;
|
||||
+
|
||||
+ if (!v3d_qpu_int32_unpack_pack(instr->alu.add.a.unpack,
|
||||
+ &packed)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ raddr_b |= packed << 2;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
default:
|
||||
if (instr->alu.add.op != V3D_QPU_A_NOP &&
|
||||
(instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
|
||||
@@ -2126,6 +2209,21 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
|
||||
break;
|
||||
}
|
||||
|
||||
+ case V3D_QPU_M_MOV: {
|
||||
+ uint32_t packed;
|
||||
+
|
||||
+ if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
|
||||
+ return false;
|
||||
+
|
||||
+ if (!v3d_qpu_int32_unpack_pack(instr->alu.mul.a.unpack,
|
||||
+ &packed)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ raddr_d |= packed << 2;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
default:
|
||||
break;
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,47 +0,0 @@
|
||||
From 2e86dd0c357d7b432ce6794ae22fbfae89ad186b Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 6 Oct 2021 12:01:10 +0200
|
||||
Subject: [PATCH 044/142] broadcom/qpu: fail packing on unhandled mul
|
||||
pack/unpack
|
||||
|
||||
We are doing this for the ADD alu already and it may be helpful to
|
||||
identify cases where we have QPU code with pack/unpack modifiers on
|
||||
MUL opcodes that we then are not packing into the actual QPU
|
||||
instructions.
|
||||
---
|
||||
src/broadcom/qpu/qpu_pack.c | 12 ++++++++++++
|
||||
1 file changed, 12 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 4d677894755..180d7ab08a3 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -2106,6 +2106,12 @@ v3d33_qpu_mul_pack(const struct v3d_device_info *devinfo,
|
||||
}
|
||||
|
||||
default:
|
||||
+ if (instr->alu.mul.op != V3D_QPU_M_NOP &&
|
||||
+ (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
|
||||
+ instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
|
||||
+ instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
|
||||
+ return false;
|
||||
+ }
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -2225,6 +2231,12 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
|
||||
}
|
||||
|
||||
default:
|
||||
+ if (instr->alu.mul.op != V3D_QPU_M_NOP &&
|
||||
+ (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
|
||||
+ instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
|
||||
+ instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
|
||||
+ return false;
|
||||
+ }
|
||||
break;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,30 +0,0 @@
|
||||
From ed6bfa29d43b5a89ff070961454f1e82e23b4f45 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Fri, 8 Oct 2021 15:10:24 +0200
|
||||
Subject: [PATCH 045/142] broadcom/compiler: generalize check for shaders using
|
||||
pixel center W
|
||||
|
||||
V3D 4.x has pixel center W in rf0 and V3D 7.x has it in rf3. We already
|
||||
account for this when we setup the c->payload_w, so use that.
|
||||
---
|
||||
src/broadcom/compiler/nir_to_vir.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
|
||||
index 220ff6bcd49..90fe1d1e7f0 100644
|
||||
--- a/src/broadcom/compiler/nir_to_vir.c
|
||||
+++ b/src/broadcom/compiler/nir_to_vir.c
|
||||
@@ -4547,8 +4547,8 @@ vir_check_payload_w(struct v3d_compile *c)
|
||||
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
- if (inst->src[i].file == QFILE_REG &&
|
||||
- inst->src[i].index == 0) {
|
||||
+ if (inst->src[i].file == c->payload_w.file &&
|
||||
+ inst->src[i].index == c->payload_w.index) {
|
||||
c->uses_center_w = true;
|
||||
return;
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,34 +0,0 @@
|
||||
From e1a0fa2c2010ef29b8cec798cd0fc99cf44f3a2d Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Thu, 14 Oct 2021 14:16:40 +0200
|
||||
Subject: [PATCH 046/142] broadcom/compiler: v71 isn't affected by
|
||||
double-rounding of viewport X,Y coords
|
||||
|
||||
---
|
||||
src/broadcom/compiler/v3d_nir_lower_io.c | 10 +++++++---
|
||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c
|
||||
index 3ef0e398228..4cdba3748a1 100644
|
||||
--- a/src/broadcom/compiler/v3d_nir_lower_io.c
|
||||
+++ b/src/broadcom/compiler/v3d_nir_lower_io.c
|
||||
@@ -600,9 +600,13 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
|
||||
* The correct fix for this as recommended by Broadcom
|
||||
* is to convert to .8 fixed-point with ffloor().
|
||||
*/
|
||||
- pos = nir_f2i32(b, nir_ffloor(b, pos));
|
||||
- v3d_nir_store_output(b, state->vp_vpm_offset + i,
|
||||
- offset_reg, pos);
|
||||
+ if (c->devinfo->ver <= 42)
|
||||
+ pos = nir_f2i32(b, nir_ffloor(b, pos));
|
||||
+ else
|
||||
+ pos = nir_f2i32(b, nir_fround_even(b, pos));
|
||||
+
|
||||
+ v3d_nir_store_output(b, state->vp_vpm_offset + i,
|
||||
+ offset_reg, pos);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,31 +0,0 @@
|
||||
From 697e6cf01b781b244404872f331a778b6d4e67da Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 19 Oct 2021 11:16:43 +0200
|
||||
Subject: [PATCH 047/142] broadcom/compiler: update one TMUWT restriction for
|
||||
v71
|
||||
|
||||
TMUWT not allowed in the final instruction restriction doesn't apply
|
||||
for v71.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index ff544fb3c1c..25f79aa6f46 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -1700,8 +1700,10 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
/* GFXH-1625: TMUWT not allowed in the final instruction. */
|
||||
- if (slot == 2 && inst->alu.add.op == V3D_QPU_A_TMUWT)
|
||||
+ if (c->devinfo->ver <= 42 && slot == 2 &&
|
||||
+ inst->alu.add.op == V3D_QPU_A_TMUWT) {
|
||||
return false;
|
||||
+ }
|
||||
|
||||
/* No writing physical registers at the end. */
|
||||
bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,37 +0,0 @@
|
||||
From 26fea727a9f34b75a3fe3f6a806accaddcc317f6 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 19 Oct 2021 11:51:32 +0200
|
||||
Subject: [PATCH 048/142] broadcom/compiler: update ldunif/ldvary comment for
|
||||
v71
|
||||
|
||||
For v42 and below ldunif/ldvary write both on r5, but with a different
|
||||
delay, so we need to take that into account when scheduling both.
|
||||
|
||||
For v71 the register used is rf0, but the behaviour is the same. So
|
||||
the scheduling code can be the same, but the comment needs update.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 7 ++++---
|
||||
1 file changed, 4 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 25f79aa6f46..e8197661f89 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -1234,10 +1234,11 @@ retry:
|
||||
if (pixel_scoreboard_too_soon(c, scoreboard, inst))
|
||||
continue;
|
||||
|
||||
- /* ldunif and ldvary both write r5, but ldunif does so a tick
|
||||
- * sooner. If the ldvary's r5 wasn't used, then ldunif might
|
||||
+ /* ldunif and ldvary both write the same register (r5 for v42
|
||||
+ * and below, rf0 for v71), but ldunif does so a tick sooner.
|
||||
+ * If the ldvary's register wasn't used, then ldunif might
|
||||
* otherwise get scheduled so ldunif and ldvary try to update
|
||||
- * r5 in the same tick.
|
||||
+ * the register in the same tick.
|
||||
*/
|
||||
if ((inst->sig.ldunif || inst->sig.ldunifa) &&
|
||||
scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,52 +0,0 @@
|
||||
From 70456e27b039174f767010f96d9b649e5e42d84f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 19 Oct 2021 23:52:30 +0200
|
||||
Subject: [PATCH 049/142] broadcom/compiler: update payload registers handling
|
||||
when computing live intervals
|
||||
|
||||
As for v71 the payload registers are not the same. Specifically now
|
||||
rf3 is used as payload register, so this is needed to avoid rf3 being
|
||||
selected as a instruction dst by the register allocator, overwriting
|
||||
the payload value that could be still used.
|
||||
---
|
||||
src/broadcom/compiler/vir_live_variables.c | 21 +++++++++++++--------
|
||||
1 file changed, 13 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_live_variables.c b/src/broadcom/compiler/vir_live_variables.c
|
||||
index 575b0481dc8..87a7e2b5b81 100644
|
||||
--- a/src/broadcom/compiler/vir_live_variables.c
|
||||
+++ b/src/broadcom/compiler/vir_live_variables.c
|
||||
@@ -179,17 +179,22 @@ vir_setup_def_use(struct v3d_compile *c)
|
||||
flags_inst = NULL;
|
||||
}
|
||||
|
||||
- /* Payload registers: r0/1/2 contain W, centroid W,
|
||||
- * and Z at program start. Register allocation will
|
||||
- * force their nodes to R0/1/2.
|
||||
+ /* Payload registers: for fragment shaders, W,
|
||||
+ * centroid W, and Z will be initialized at r0/1/2
|
||||
+ * until v42, or r1/r2/r3 from v71.
|
||||
+ *
|
||||
+ * For compute shaders, payload would be r0/r2 until
|
||||
+ * v42, r3/r2 from v71
|
||||
+ *
|
||||
+ * Register allocation will force their nodes to those
|
||||
+ * registers.
|
||||
*/
|
||||
if (inst->src[0].file == QFILE_REG) {
|
||||
- switch (inst->src[0].index) {
|
||||
- case 0:
|
||||
- case 1:
|
||||
- case 2:
|
||||
+ uint32_t min_payload_r = c->devinfo->ver >= 71 ? 1 : 0;
|
||||
+ uint32_t max_payload_r = c->devinfo->ver >= 71 ? 3 : 2;
|
||||
+ if (inst->src[0].index >= min_payload_r ||
|
||||
+ inst->src[0].index <= max_payload_r) {
|
||||
c->temp_start[inst->dst.index] = 0;
|
||||
- break;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,235 +0,0 @@
|
||||
From f9a76b3a1e316e5ed6387819b87eaaf60f989a2b Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 26 Oct 2021 11:43:02 +0200
|
||||
Subject: [PATCH 050/142] broadcom/compiler: update peripheral access
|
||||
restrictions for v71
|
||||
|
||||
In V3D 4.x only a couple of simultaneous accesses where allowed, but
|
||||
V3D 7.x is a bit more flexible, so rather than trying to check for all
|
||||
the allowed combinations it is easier to check if we are one of the
|
||||
disallows.
|
||||
|
||||
Shader-db (pi5):
|
||||
|
||||
total instructions in shared programs: 11338883 -> 11307386 (-0.28%)
|
||||
instructions in affected programs: 2727201 -> 2695704 (-1.15%)
|
||||
helped: 12555
|
||||
HURT: 289
|
||||
Instructions are helped.
|
||||
|
||||
total max-temps in shared programs: 2230199 -> 2229260 (-0.04%)
|
||||
max-temps in affected programs: 20508 -> 19569 (-4.58%)
|
||||
helped: 608
|
||||
HURT: 4
|
||||
Max-temps are helped.
|
||||
|
||||
total sfu-stalls in shared programs: 15236 -> 15293 (0.37%)
|
||||
sfu-stalls in affected programs: 148 -> 205 (38.51%)
|
||||
helped: 38
|
||||
HURT: 64
|
||||
Inconclusive result (%-change mean confidence interval includes 0).
|
||||
|
||||
total inst-and-stalls in shared programs: 11354119 -> 11322679 (-0.28%)
|
||||
inst-and-stalls in affected programs: 2732262 -> 2700822 (-1.15%)
|
||||
helped: 12550
|
||||
HURT: 304
|
||||
Inst-and-stalls are helped.
|
||||
|
||||
total nops in shared programs: 273711 -> 274095 (0.14%)
|
||||
nops in affected programs: 9626 -> 10010 (3.99%)
|
||||
helped: 186
|
||||
HURT: 397
|
||||
Nops are HURT.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 88 +++++++++++++++++++++-------
|
||||
src/broadcom/compiler/qpu_validate.c | 2 +-
|
||||
src/broadcom/qpu/qpu_instr.c | 16 +++--
|
||||
src/broadcom/qpu/qpu_instr.h | 2 +
|
||||
4 files changed, 82 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index e8197661f89..adb501e85ce 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -790,7 +790,8 @@ enum {
|
||||
V3D_PERIPHERAL_TMU_WAIT = (1 << 6),
|
||||
V3D_PERIPHERAL_TMU_WRTMUC_SIG = (1 << 7),
|
||||
V3D_PERIPHERAL_TSY = (1 << 8),
|
||||
- V3D_PERIPHERAL_TLB = (1 << 9),
|
||||
+ V3D_PERIPHERAL_TLB_READ = (1 << 9),
|
||||
+ V3D_PERIPHERAL_TLB_WRITE = (1 << 10),
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
@@ -815,8 +816,10 @@ qpu_peripherals(const struct v3d_device_info *devinfo,
|
||||
if (v3d_qpu_uses_sfu(inst))
|
||||
result |= V3D_PERIPHERAL_SFU;
|
||||
|
||||
- if (v3d_qpu_uses_tlb(inst))
|
||||
- result |= V3D_PERIPHERAL_TLB;
|
||||
+ if (v3d_qpu_reads_tlb(inst))
|
||||
+ result |= V3D_PERIPHERAL_TLB_READ;
|
||||
+ if (v3d_qpu_writes_tlb(inst))
|
||||
+ result |= V3D_PERIPHERAL_TLB_WRITE;
|
||||
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if (inst->alu.add.op != V3D_QPU_A_NOP &&
|
||||
@@ -847,32 +850,75 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
|
||||
if (devinfo->ver < 41)
|
||||
return false;
|
||||
|
||||
- /* V3D 4.1+ allow WRTMUC signal with TMU register write (other than
|
||||
- * tmuc).
|
||||
+ /* V3D 4.x can't do more than one peripheral access except in a
|
||||
+ * few cases:
|
||||
*/
|
||||
- if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
||||
- b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
|
||||
- return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
|
||||
+ if (devinfo->ver <= 42) {
|
||||
+ /* WRTMUC signal with TMU register write (other than tmuc). */
|
||||
+ if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
||||
+ b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
|
||||
+ return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
|
||||
+ }
|
||||
+ if (b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
||||
+ a_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
|
||||
+ return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
|
||||
+ }
|
||||
+
|
||||
+ /* TMU read with VPM read/write. */
|
||||
+ if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
|
||||
+ (b_peripherals == V3D_PERIPHERAL_VPM_READ ||
|
||||
+ b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
|
||||
+ return true;
|
||||
+ }
|
||||
+ if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
|
||||
+ (a_peripherals == V3D_PERIPHERAL_VPM_READ ||
|
||||
+ a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
}
|
||||
|
||||
- if (a_peripherals == V3D_PERIPHERAL_TMU_WRITE &&
|
||||
- b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG) {
|
||||
- return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
|
||||
+ /* V3D 7.x can't have more than one of these restricted peripherals */
|
||||
+ const uint32_t restricted = V3D_PERIPHERAL_TMU_WRITE |
|
||||
+ V3D_PERIPHERAL_TMU_WRTMUC_SIG |
|
||||
+ V3D_PERIPHERAL_TSY |
|
||||
+ V3D_PERIPHERAL_TLB_READ |
|
||||
+ V3D_PERIPHERAL_SFU |
|
||||
+ V3D_PERIPHERAL_VPM_READ |
|
||||
+ V3D_PERIPHERAL_VPM_WRITE;
|
||||
+
|
||||
+ const uint32_t a_restricted = a_peripherals & restricted;
|
||||
+ const uint32_t b_restricted = b_peripherals & restricted;
|
||||
+ if (a_restricted && b_restricted) {
|
||||
+ /* WRTMUC signal with TMU register write (other than tmuc) is
|
||||
+ * allowed though.
|
||||
+ */
|
||||
+ if (!((a_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
||||
+ b_restricted == V3D_PERIPHERAL_TMU_WRITE &&
|
||||
+ v3d_qpu_writes_tmu_not_tmuc(devinfo, b)) ||
|
||||
+ (b_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
||||
+ a_restricted == V3D_PERIPHERAL_TMU_WRITE &&
|
||||
+ v3d_qpu_writes_tmu_not_tmuc(devinfo, a)))) {
|
||||
+ return false;
|
||||
+ }
|
||||
}
|
||||
|
||||
- /* V3D 4.1+ allows TMU read with VPM read/write. */
|
||||
- if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
|
||||
- (b_peripherals == V3D_PERIPHERAL_VPM_READ ||
|
||||
- b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
|
||||
- return true;
|
||||
+ /* Only one TMU read per instruction */
|
||||
+ if ((a_peripherals & V3D_PERIPHERAL_TMU_READ) &&
|
||||
+ (b_peripherals & V3D_PERIPHERAL_TMU_READ)) {
|
||||
+ return false;
|
||||
}
|
||||
- if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
|
||||
- (a_peripherals == V3D_PERIPHERAL_VPM_READ ||
|
||||
- a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
|
||||
- return true;
|
||||
+
|
||||
+ /* Only one TLB access per instruction */
|
||||
+ if ((a_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
|
||||
+ V3D_PERIPHERAL_TLB_READ)) &&
|
||||
+ (b_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
|
||||
+ V3D_PERIPHERAL_TLB_READ))) {
|
||||
+ return false;
|
||||
}
|
||||
|
||||
- return false;
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/* Compute a bitmask of which rf registers are used between
|
||||
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
|
||||
index 12788692432..fde6695d59b 100644
|
||||
--- a/src/broadcom/compiler/qpu_validate.c
|
||||
+++ b/src/broadcom/compiler/qpu_validate.c
|
||||
@@ -227,7 +227,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
vpm_writes +
|
||||
tlb_writes +
|
||||
tsy_writes +
|
||||
- inst->sig.ldtmu +
|
||||
+ (devinfo->ver <= 42 ? inst->sig.ldtmu : 0) +
|
||||
inst->sig.ldtlb +
|
||||
inst->sig.ldvpm +
|
||||
inst->sig.ldtlbu > 1) {
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index 195a0dcd232..f54ce7210fb 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -649,12 +649,14 @@ v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)
|
||||
}
|
||||
|
||||
bool
|
||||
-v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
|
||||
+v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
- if (inst->sig.ldtlb ||
|
||||
- inst->sig.ldtlbu)
|
||||
- return true;
|
||||
+ return inst->sig.ldtlb || inst->sig.ldtlbu;
|
||||
+}
|
||||
|
||||
+bool
|
||||
+v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst)
|
||||
+{
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if (inst->alu.add.op != V3D_QPU_A_NOP &&
|
||||
inst->alu.add.magic_write &&
|
||||
@@ -672,6 +674,12 @@ v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
|
||||
return false;
|
||||
}
|
||||
|
||||
+bool
|
||||
+v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
|
||||
+{
|
||||
+ return v3d_qpu_writes_tlb(inst) || v3d_qpu_reads_tlb(inst);
|
||||
+}
|
||||
+
|
||||
bool
|
||||
v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 4b34d17bd4c..dece45c5c54 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -472,6 +472,8 @@ bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
+bool v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
+bool v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,61 +0,0 @@
|
||||
From 3520cceb87fb2f9765ba7dbe2771fbd0cadca78d Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 26 Oct 2021 08:37:54 +0200
|
||||
Subject: [PATCH 051/142] broadcom/qpu: add packing for fmov on ADD alu
|
||||
|
||||
---
|
||||
src/broadcom/qpu/qpu_pack.c | 31 +++++++++++++++++++++++++++++++
|
||||
1 file changed, 31 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 180d7ab08a3..ed5a8bc667d 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -1332,6 +1332,20 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
|
||||
}
|
||||
break;
|
||||
|
||||
+ case V3D_QPU_A_FMOV:
|
||||
+ instr->alu.add.output_pack = raddr_b & 0x3;
|
||||
+
|
||||
+ /* Mul alu FMOV has one additional variant */
|
||||
+ int32_t unpack = (raddr_b >> 2) & 0x7;
|
||||
+ if (unpack == 7)
|
||||
+ return false;
|
||||
+
|
||||
+ if (!v3d_qpu_float32_unpack_unpack(unpack,
|
||||
+ &instr->alu.add.a.unpack)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ break;
|
||||
+
|
||||
default:
|
||||
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
|
||||
instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
|
||||
@@ -1992,6 +2006,23 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
break;
|
||||
}
|
||||
|
||||
+ case V3D_QPU_A_FMOV: {
|
||||
+ uint32_t packed;
|
||||
+
|
||||
+ if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
|
||||
+ &packed)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ raddr_b = packed;
|
||||
+
|
||||
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
|
||||
+ &packed)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ raddr_b |= packed << 2;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
default:
|
||||
if (instr->alu.add.op != V3D_QPU_A_NOP &&
|
||||
(instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,155 +0,0 @@
|
||||
From 7c7ab15b3c9def4bc3bb5be492228a933c325f8a Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 6 Oct 2021 13:58:27 +0200
|
||||
Subject: [PATCH 052/142] broadcom/compiler: handle rf0 flops storage
|
||||
restriction in v71
|
||||
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 81 +++++++++++++++++++++++++++-
|
||||
1 file changed, 79 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index adb501e85ce..7048d9257b6 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -538,6 +538,10 @@ struct choose_scoreboard {
|
||||
int ldvary_count;
|
||||
int pending_ldtmu_count;
|
||||
bool first_ldtmu_after_thrsw;
|
||||
+
|
||||
+ /* V3D 7.x */
|
||||
+ int last_implicit_rf0_write_tick;
|
||||
+ bool has_rf0_flops_conflict;
|
||||
};
|
||||
|
||||
static bool
|
||||
@@ -1499,6 +1503,62 @@ update_scoreboard_tmu_tracking(struct choose_scoreboard *scoreboard,
|
||||
}
|
||||
}
|
||||
|
||||
+static void
|
||||
+set_has_rf0_flops_conflict(struct choose_scoreboard *scoreboard,
|
||||
+ const struct v3d_qpu_instr *inst,
|
||||
+ const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick &&
|
||||
+ v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
|
||||
+ !inst->sig_magic) {
|
||||
+ scoreboard->has_rf0_flops_conflict = true;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+update_scoreboard_for_rf0_flops(struct choose_scoreboard *scoreboard,
|
||||
+ const struct v3d_qpu_instr *inst,
|
||||
+ const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ if (devinfo->ver < 71)
|
||||
+ return;
|
||||
+
|
||||
+ /* Thread switch restrictions:
|
||||
+ *
|
||||
+ * At the point of a thread switch or thread end (when the actual
|
||||
+ * thread switch or thread end happens, not when the signalling
|
||||
+ * instruction is processed):
|
||||
+ *
|
||||
+ * - If the most recent write to rf0 was from a ldunif, ldunifa, or
|
||||
+ * ldvary instruction in which another signal also wrote to the
|
||||
+ * register file, and the final instruction of the thread section
|
||||
+ * contained a signal which wrote to the register file, then the
|
||||
+ * value of rf0 is undefined at the start of the new section
|
||||
+ *
|
||||
+ * Here we use the scoreboard to track if our last rf0 implicit write
|
||||
+ * happens at the same time that another signal writes the register
|
||||
+ * file (has_rf0_flops_conflict). We will use that information when
|
||||
+ * scheduling thrsw instructions to avoid putting anything in their
|
||||
+ * last delay slot which has a signal that writes to the register file.
|
||||
+ */
|
||||
+
|
||||
+ /* Reset tracking if we have an explicit rf0 write or we are starting
|
||||
+ * a new thread section.
|
||||
+ */
|
||||
+ if (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) ||
|
||||
+ scoreboard->tick - scoreboard->last_thrsw_tick == 3) {
|
||||
+ scoreboard->last_implicit_rf0_write_tick = -10;
|
||||
+ scoreboard->has_rf0_flops_conflict = false;
|
||||
+ }
|
||||
+
|
||||
+ if (v3d_qpu_writes_rf0_implicitly(devinfo, inst)) {
|
||||
+ scoreboard->last_implicit_rf0_write_tick = inst->sig.ldvary ?
|
||||
+ scoreboard->tick + 1 : scoreboard->tick;
|
||||
+ }
|
||||
+
|
||||
+ set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
|
||||
+}
|
||||
+
|
||||
static void
|
||||
update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
|
||||
const struct qinst *qinst,
|
||||
@@ -1542,6 +1602,8 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
|
||||
if (inst->sig.ldvary)
|
||||
scoreboard->last_ldvary_tick = scoreboard->tick;
|
||||
|
||||
+ update_scoreboard_for_rf0_flops(scoreboard, inst, devinfo);
|
||||
+
|
||||
update_scoreboard_tmu_tracking(scoreboard, qinst);
|
||||
}
|
||||
|
||||
@@ -1812,6 +1874,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
*/
|
||||
static bool
|
||||
qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
|
||||
+ struct choose_scoreboard *scoreboard,
|
||||
const struct qinst *qinst,
|
||||
uint32_t slot)
|
||||
{
|
||||
@@ -1842,6 +1905,17 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
|
||||
if (v3d_qpu_writes_unifa(c->devinfo, &qinst->qpu))
|
||||
return false;
|
||||
|
||||
+ /* See comment when we set has_rf0_flops_conflict for details */
|
||||
+ if (c->devinfo->ver >= 71 &&
|
||||
+ slot == 2 &&
|
||||
+ v3d_qpu_sig_writes_address(c->devinfo, &qinst->qpu.sig) &&
|
||||
+ !qinst->qpu.sig_magic) {
|
||||
+ if (scoreboard->has_rf0_flops_conflict)
|
||||
+ return false;
|
||||
+ if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick)
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1874,7 +1948,7 @@ qpu_inst_after_thrsw_valid_in_delay_slot(struct v3d_compile *c,
|
||||
* also apply to instructions scheduled after the thrsw that we want
|
||||
* to place in its delay slots.
|
||||
*/
|
||||
- if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, qinst, slot))
|
||||
+ if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard, qinst, slot))
|
||||
return false;
|
||||
|
||||
/* TLB access is disallowed until scoreboard wait is executed, which
|
||||
@@ -1947,8 +2021,10 @@ valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard
|
||||
bool is_thrend)
|
||||
{
|
||||
for (int slot = 0; slot < instructions_in_sequence; slot++) {
|
||||
- if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, qinst, slot))
|
||||
+ if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard,
|
||||
+ qinst, slot)) {
|
||||
return false;
|
||||
+ }
|
||||
|
||||
if (is_thrend &&
|
||||
!qpu_inst_valid_in_thrend_slot(c, qinst, slot)) {
|
||||
@@ -2718,6 +2794,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c)
|
||||
scoreboard.last_setmsf_tick = -10;
|
||||
scoreboard.last_stallable_sfu_tick = -10;
|
||||
scoreboard.first_ldtmu_after_thrsw = true;
|
||||
+ scoreboard.last_implicit_rf0_write_tick = - 10;
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "Pre-schedule instructions\n");
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,189 +0,0 @@
|
||||
From 0c6910721eb50b38b3388c2d2344b6ecfe0fee58 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 27 Oct 2021 11:35:12 +0200
|
||||
Subject: [PATCH 053/142] broadcom/compiler: enable ldvary pipelining on v71
|
||||
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 121 ++++++++++++++++++---------
|
||||
1 file changed, 80 insertions(+), 41 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 7048d9257b6..334ffdc6d58 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -2312,46 +2312,72 @@ emit_branch(struct v3d_compile *c,
|
||||
}
|
||||
|
||||
static bool
|
||||
-alu_reads_register(struct v3d_qpu_instr *inst,
|
||||
+alu_reads_register(const struct v3d_device_info *devinfo,
|
||||
+ struct v3d_qpu_instr *inst,
|
||||
bool add, bool magic, uint32_t index)
|
||||
{
|
||||
uint32_t num_src;
|
||||
- enum v3d_qpu_mux mux_a, mux_b;
|
||||
-
|
||||
- if (add) {
|
||||
+ if (add)
|
||||
num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
|
||||
- mux_a = inst->alu.add.a.mux;
|
||||
- mux_b = inst->alu.add.b.mux;
|
||||
- } else {
|
||||
+ else
|
||||
num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
|
||||
- mux_a = inst->alu.mul.a.mux;
|
||||
- mux_b = inst->alu.mul.b.mux;
|
||||
- }
|
||||
|
||||
- for (int i = 0; i < num_src; i++) {
|
||||
- if (magic) {
|
||||
- if (i == 0 && mux_a == index)
|
||||
- return true;
|
||||
- if (i == 1 && mux_b == index)
|
||||
- return true;
|
||||
+ if (devinfo->ver <= 42) {
|
||||
+ enum v3d_qpu_mux mux_a, mux_b;
|
||||
+ if (add) {
|
||||
+ mux_a = inst->alu.add.a.mux;
|
||||
+ mux_b = inst->alu.add.b.mux;
|
||||
} else {
|
||||
- if (i == 0 && mux_a == V3D_QPU_MUX_A &&
|
||||
- inst->raddr_a == index) {
|
||||
- return true;
|
||||
- }
|
||||
- if (i == 0 && mux_a == V3D_QPU_MUX_B &&
|
||||
- inst->raddr_b == index) {
|
||||
- return true;
|
||||
- }
|
||||
- if (i == 1 && mux_b == V3D_QPU_MUX_A &&
|
||||
- inst->raddr_a == index) {
|
||||
- return true;
|
||||
- }
|
||||
- if (i == 1 && mux_b == V3D_QPU_MUX_B &&
|
||||
- inst->raddr_b == index) {
|
||||
- return true;
|
||||
+ mux_a = inst->alu.mul.a.mux;
|
||||
+ mux_b = inst->alu.mul.b.mux;
|
||||
+ }
|
||||
+
|
||||
+ for (int i = 0; i < num_src; i++) {
|
||||
+ if (magic) {
|
||||
+ if (i == 0 && mux_a == index)
|
||||
+ return true;
|
||||
+ if (i == 1 && mux_b == index)
|
||||
+ return true;
|
||||
+ } else {
|
||||
+ if (i == 0 && mux_a == V3D_QPU_MUX_A &&
|
||||
+ inst->raddr_a == index) {
|
||||
+ return true;
|
||||
+ }
|
||||
+ if (i == 0 && mux_a == V3D_QPU_MUX_B &&
|
||||
+ inst->raddr_b == index) {
|
||||
+ return true;
|
||||
+ }
|
||||
+ if (i == 1 && mux_b == V3D_QPU_MUX_A &&
|
||||
+ inst->raddr_a == index) {
|
||||
+ return true;
|
||||
+ }
|
||||
+ if (i == 1 && mux_b == V3D_QPU_MUX_B &&
|
||||
+ inst->raddr_b == index) {
|
||||
+ return true;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
+
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ assert(devinfo->ver >= 71);
|
||||
+ assert(!magic);
|
||||
+
|
||||
+ uint32_t raddr_a, raddr_b;
|
||||
+ if (add) {
|
||||
+ raddr_a = inst->alu.add.a.raddr;
|
||||
+ raddr_b = inst->alu.add.b.raddr;
|
||||
+ } else {
|
||||
+ raddr_a = inst->alu.mul.a.raddr;
|
||||
+ raddr_b = inst->alu.mul.b.raddr;
|
||||
+ }
|
||||
+
|
||||
+ for (int i = 0; i < num_src; i++) {
|
||||
+ if (i == 0 && raddr_a == index)
|
||||
+ return true;
|
||||
+ if (i == 1 && raddr_b == index)
|
||||
+ return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -2386,6 +2412,8 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
||||
struct qblock *block,
|
||||
struct v3d_qpu_instr *inst)
|
||||
{
|
||||
+ const struct v3d_device_info *devinfo = c->devinfo;
|
||||
+
|
||||
/* We only call this if we have successfully merged an ldvary into a
|
||||
* previous instruction.
|
||||
*/
|
||||
@@ -2398,9 +2426,9 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
||||
* the ldvary destination, if it does, then moving the ldvary before
|
||||
* it would overwrite it.
|
||||
*/
|
||||
- if (alu_reads_register(inst, true, ldvary_magic, ldvary_index))
|
||||
+ if (alu_reads_register(devinfo, inst, true, ldvary_magic, ldvary_index))
|
||||
return false;
|
||||
- if (alu_reads_register(inst, false, ldvary_magic, ldvary_index))
|
||||
+ if (alu_reads_register(devinfo, inst, false, ldvary_magic, ldvary_index))
|
||||
return false;
|
||||
|
||||
/* The implicit ldvary destination may not be written to by a signal
|
||||
@@ -2436,13 +2464,13 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
||||
}
|
||||
|
||||
/* The previous instruction cannot have a conflicting signal */
|
||||
- if (v3d_qpu_sig_writes_address(c->devinfo, &prev->qpu.sig))
|
||||
+ if (v3d_qpu_sig_writes_address(devinfo, &prev->qpu.sig))
|
||||
return false;
|
||||
|
||||
uint32_t sig;
|
||||
struct v3d_qpu_sig new_sig = prev->qpu.sig;
|
||||
new_sig.ldvary = true;
|
||||
- if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig))
|
||||
+ if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
|
||||
return false;
|
||||
|
||||
/* The previous instruction cannot use flags since ldvary uses the
|
||||
@@ -2471,14 +2499,25 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
||||
inst->sig_magic = false;
|
||||
inst->sig_addr = 0;
|
||||
|
||||
- /* By moving ldvary to the previous instruction we make it update
|
||||
- * r5 in the current one, so nothing else in it should write r5.
|
||||
- * This should've been prevented by our dependency tracking, which
|
||||
+ /* Update rf0 flops tracking for new ldvary delayed rf0 write tick */
|
||||
+ if (devinfo->ver >= 71) {
|
||||
+ scoreboard->last_implicit_rf0_write_tick = scoreboard->tick;
|
||||
+ set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
|
||||
+ }
|
||||
+
|
||||
+ /* By moving ldvary to the previous instruction we make it update r5
|
||||
+ * (rf0 for ver >= 71) in the current one, so nothing else in it
|
||||
+ * should write this register.
|
||||
+ *
|
||||
+ * This should've been prevented by our depedency tracking, which
|
||||
* would not allow ldvary to be paired up with an instruction that
|
||||
- * writes r5 (since our dependency tracking doesn't know that the
|
||||
- * ldvary write r5 happens in the next instruction).
|
||||
+ * writes r5/rf0 (since our dependency tracking doesn't know that the
|
||||
+ * ldvary write to r5/rf0 happens in the next instruction).
|
||||
*/
|
||||
- assert(!v3d_qpu_writes_r5(c->devinfo, inst));
|
||||
+ assert(!v3d_qpu_writes_r5(devinfo, inst));
|
||||
+ assert(devinfo->ver <= 42 ||
|
||||
+ (!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
|
||||
+ !v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));
|
||||
|
||||
return true;
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,144 +0,0 @@
|
||||
From 0670d642bb91fc68ce73f2d9fb88c482295a446d Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Thu, 28 Oct 2021 14:13:29 +0200
|
||||
Subject: [PATCH 054/142] broadcom/compiler: try to use ldunif(a) instead of
|
||||
ldunif(a)rf in v71
|
||||
|
||||
The rf variants need to encode the destination in the cond bits, which
|
||||
prevents these to be merged with any other instruction that need them.
|
||||
|
||||
In 4.x, ldunif(a) write to r5 which is a special register that only
|
||||
ldunif(a) and ldvary can write so we have a special register class for
|
||||
it and only allow it for them. Then when we need to choose a register
|
||||
for a node, if this register is available we always use it.
|
||||
|
||||
In 7.x these instructions write to rf0, which can be used by any
|
||||
instruction, so instead of restricting rf0, we track the temps that
|
||||
are used as ldunif(a) destinations and use that information to favor
|
||||
rf0 for them.
|
||||
---
|
||||
src/broadcom/compiler/v3d_compiler.h | 3 ++
|
||||
src/broadcom/compiler/vir_register_allocate.c | 34 ++++++++++++++++---
|
||||
src/broadcom/compiler/vir_to_qpu.c | 11 ++++--
|
||||
3 files changed, 41 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
|
||||
index 7e8f3bfc1a7..36adf8830b5 100644
|
||||
--- a/src/broadcom/compiler/v3d_compiler.h
|
||||
+++ b/src/broadcom/compiler/v3d_compiler.h
|
||||
@@ -613,6 +613,9 @@ struct v3d_ra_node_info {
|
||||
struct {
|
||||
uint32_t priority;
|
||||
uint8_t class_bits;
|
||||
+
|
||||
+ /* V3D 7.x */
|
||||
+ bool is_ldunif_dst;
|
||||
} *info;
|
||||
uint32_t alloc_count;
|
||||
};
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index e0adc1de7a4..1be091f8518 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -384,6 +384,7 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits)
|
||||
/* We fill the node priority after we are done inserting spills */
|
||||
c->nodes.info[node].class_bits = class_bits;
|
||||
c->nodes.info[node].priority = 0;
|
||||
+ c->nodes.info[node].is_ldunif_dst = false;
|
||||
}
|
||||
|
||||
/* The spill offset for this thread takes a bit of setup, so do it once at
|
||||
@@ -899,9 +900,22 @@ v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
|
||||
static bool
|
||||
v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
+ unsigned int node,
|
||||
BITSET_WORD *regs,
|
||||
unsigned int *out)
|
||||
{
|
||||
+ /* In V3D 7.x, try to assign rf0 to temps used as ldunif's dst
|
||||
+ * so we can avoid turning them into ldunifrf (which uses the
|
||||
+ * cond field to encode the dst and would prevent merge with
|
||||
+ * instructions that use cond flags).
|
||||
+ */
|
||||
+ if (v3d_ra->nodes->info[node].is_ldunif_dst &&
|
||||
+ BITSET_TEST(regs, v3d_ra->phys_index)) {
|
||||
+ assert(v3d_ra->devinfo->ver >= 71);
|
||||
+ *out = v3d_ra->phys_index;
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
for (int i = 0; i < PHYS_COUNT; i++) {
|
||||
int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
|
||||
int phys = v3d_ra->phys_index + phys_off;
|
||||
@@ -927,7 +941,7 @@ v3d_ra_select_callback(unsigned int n, BITSET_WORD *regs, void *data)
|
||||
return reg;
|
||||
}
|
||||
|
||||
- if (v3d_ra_select_rf(v3d_ra, regs, ®))
|
||||
+ if (v3d_ra_select_rf(v3d_ra, n, regs, ®))
|
||||
return reg;
|
||||
|
||||
/* If we ran out of physical registers try to assign an accumulator
|
||||
@@ -1139,15 +1153,24 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
- /* If the instruction has an implicit write
|
||||
- * we can't allocate its dest to the same
|
||||
- * register.
|
||||
+ /* Make sure we don't allocate the ldvary's
|
||||
+ * destination to rf0, since it would clash
|
||||
+ * with its implicit write to that register.
|
||||
*/
|
||||
- if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) {
|
||||
+ if (inst->qpu.sig.ldvary) {
|
||||
ra_add_node_interference(c->g,
|
||||
temp_to_node(c, inst->dst.index),
|
||||
implicit_rf_nodes[0]);
|
||||
}
|
||||
+ /* Flag dst temps from ldunif(a) instructions
|
||||
+ * so we can try to assign rf0 to them and avoid
|
||||
+ * converting these to ldunif(a)rf.
|
||||
+ */
|
||||
+ if (inst->qpu.sig.ldunif || inst->qpu.sig.ldunifa) {
|
||||
+ const uint32_t dst_n =
|
||||
+ temp_to_node(c, inst->dst.index);
|
||||
+ c->nodes.info[dst_n].is_ldunif_dst = true;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1222,6 +1245,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
* without accumulators that can have implicit writes to phys regs.
|
||||
*/
|
||||
for (uint32_t i = 0; i < num_ra_nodes; i++) {
|
||||
+ c->nodes.info[i].is_ldunif_dst = false;
|
||||
if (c->devinfo->has_accumulators && i < ACC_COUNT) {
|
||||
acc_nodes[i] = i;
|
||||
ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
|
||||
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
|
||||
index afc4941fdb1..cbbb495592b 100644
|
||||
--- a/src/broadcom/compiler/vir_to_qpu.c
|
||||
+++ b/src/broadcom/compiler/vir_to_qpu.c
|
||||
@@ -345,8 +345,15 @@ v3d_generate_code_block(struct v3d_compile *c,
|
||||
assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
|
||||
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
|
||||
|
||||
- if (!dst.magic ||
|
||||
- dst.index != V3D_QPU_WADDR_R5) {
|
||||
+ bool use_rf;
|
||||
+ if (c->devinfo->has_accumulators) {
|
||||
+ use_rf = !dst.magic ||
|
||||
+ dst.index != V3D_QPU_WADDR_R5;
|
||||
+ } else {
|
||||
+ use_rf = dst.magic || dst.index != 0;
|
||||
+ }
|
||||
+
|
||||
+ if (use_rf) {
|
||||
assert(c->devinfo->ver >= 40);
|
||||
|
||||
if (qinst->qpu.sig.ldunif) {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,82 +0,0 @@
|
||||
From cbed3b97394da09c9ae644c79e098e3ba8b5c3e8 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Fri, 29 Oct 2021 13:00:56 +0200
|
||||
Subject: [PATCH 055/142] broadcom/compiler: don't assign rf0 to temps that
|
||||
conflict with ldvary
|
||||
|
||||
ldvary writes to rf0 implicitly, so we don't want to allocate rf0 to
|
||||
any temps that are live across ldvary's rf0 live ranges.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 39 ++++++++++++++++++-
|
||||
1 file changed, 38 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index 1be091f8518..6f7b1ca0589 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -1019,6 +1019,7 @@ static void
|
||||
update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
|
||||
int *acc_nodes,
|
||||
int *implicit_rf_nodes,
|
||||
+ int last_ldvary_ip,
|
||||
struct qinst *inst)
|
||||
{
|
||||
int32_t ip = inst->ip;
|
||||
@@ -1125,6 +1126,25 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
|
||||
}
|
||||
}
|
||||
|
||||
+ /* Don't allocate rf0 to temps that cross ranges where we have
|
||||
+ * live implicit rf0 writes from ldvary. We can identify these
|
||||
+ * by tracking the last ldvary instruction and explicit reads
|
||||
+ * of rf0.
|
||||
+ */
|
||||
+ if (c->devinfo->ver >= 71 &&
|
||||
+ ((inst->src[0].file == QFILE_REG && inst->src[0].index == 0) ||
|
||||
+ (vir_get_nsrc(inst) > 1 &&
|
||||
+ inst->src[1].file == QFILE_REG && inst->src[1].index == 0))) {
|
||||
+ for (int i = 0; i < c->num_temps; i++) {
|
||||
+ if (c->temp_start[i] < ip &&
|
||||
+ c->temp_end[i] > last_ldvary_ip) {
|
||||
+ ra_add_node_interference(c->g,
|
||||
+ temp_to_node(c, i),
|
||||
+ implicit_rf_nodes[0]);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
if (inst->dst.file == QFILE_TEMP) {
|
||||
/* Only a ldunif gets to write to R5, which only has a
|
||||
* single 32-bit channel of storage.
|
||||
@@ -1270,10 +1290,27 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
* interferences.
|
||||
*/
|
||||
int ip = 0;
|
||||
+ int last_ldvary_ip = -1;
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
inst->ip = ip++;
|
||||
+
|
||||
+ /* ldunif(a) always write to a temporary, so we have
|
||||
+ * liveness info available to decide if rf0 is
|
||||
+ * available for them, however, ldvary is different:
|
||||
+ * it always writes to rf0 directly so we don't have
|
||||
+ * liveness information for its implicit rf0 write.
|
||||
+ *
|
||||
+ * That means the allocator may assign rf0 to a temp
|
||||
+ * that is defined while an implicit rf0 write from
|
||||
+ * ldvary is still live. We fix that by manually
|
||||
+ * tracking rf0 live ranges from ldvary instructions.
|
||||
+ */
|
||||
+ if (inst->qpu.sig.ldvary)
|
||||
+ last_ldvary_ip = ip;
|
||||
+
|
||||
update_graph_and_reg_classes_for_inst(c, acc_nodes,
|
||||
- implicit_rf_nodes, inst);
|
||||
+ implicit_rf_nodes,
|
||||
+ last_ldvary_ip, inst);
|
||||
}
|
||||
|
||||
/* Set the register classes for all our temporaries in the graph */
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,139 +0,0 @@
|
||||
From cbaa469c09974c1574b16f559173694904fe1bb0 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Mon, 25 Oct 2021 09:38:57 +0200
|
||||
Subject: [PATCH 056/142] broadcom/compiler: convert mul to add when needed to
|
||||
allow merge
|
||||
|
||||
V3D 7.x added 'mov' opcodes to the ADD alu, so now it is possible to
|
||||
move these to the ADD alu to facilitate merging them with other MUL
|
||||
instructions.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 102 ++++++++++++++++++++++++---
|
||||
1 file changed, 94 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 334ffdc6d58..caa84254998 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -1086,6 +1086,57 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
|
||||
inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
}
|
||||
|
||||
+static bool
|
||||
+can_do_mul_as_add(const struct v3d_device_info *devinfo, enum v3d_qpu_mul_op op)
|
||||
+{
|
||||
+ switch (op) {
|
||||
+ case V3D_QPU_M_MOV:
|
||||
+ case V3D_QPU_M_FMOV:
|
||||
+ return devinfo->ver >= 71;
|
||||
+ default:
|
||||
+ return false;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static enum v3d_qpu_mul_op
|
||||
+mul_op_as_add_op(enum v3d_qpu_mul_op op)
|
||||
+{
|
||||
+ switch (op) {
|
||||
+ case V3D_QPU_M_MOV:
|
||||
+ return V3D_QPU_A_MOV;
|
||||
+ case V3D_QPU_M_FMOV:
|
||||
+ return V3D_QPU_A_FMOV;
|
||||
+ default:
|
||||
+ unreachable("unexpected mov opcode");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+qpu_convert_mul_to_add(struct v3d_qpu_instr *inst)
|
||||
+{
|
||||
+ STATIC_ASSERT(sizeof(inst->alu.add) == sizeof(inst->alu.mul));
|
||||
+ assert(inst->alu.mul.op != V3D_QPU_M_NOP);
|
||||
+ assert(inst->alu.add.op == V3D_QPU_A_NOP);
|
||||
+
|
||||
+ memcpy(&inst->alu.add, &inst->alu.mul, sizeof(inst->alu.add));
|
||||
+ inst->alu.add.op = mul_op_as_add_op(inst->alu.mul.op);
|
||||
+ inst->alu.mul.op = V3D_QPU_M_NOP;
|
||||
+
|
||||
+ inst->flags.ac = inst->flags.mc;
|
||||
+ inst->flags.apf = inst->flags.mpf;
|
||||
+ inst->flags.auf = inst->flags.muf;
|
||||
+ inst->flags.mc = V3D_QPU_COND_NONE;
|
||||
+ inst->flags.mpf = V3D_QPU_PF_NONE;
|
||||
+ inst->flags.muf = V3D_QPU_UF_NONE;
|
||||
+
|
||||
+ inst->alu.add.output_pack = inst->alu.mul.output_pack;
|
||||
+ inst->alu.add.a.unpack = inst->alu.mul.a.unpack;
|
||||
+ inst->alu.add.b.unpack = inst->alu.mul.b.unpack;
|
||||
+ inst->alu.mul.output_pack = V3D_QPU_PACK_NONE;
|
||||
+ inst->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
|
||||
+ inst->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
+}
|
||||
+
|
||||
static bool
|
||||
qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
struct v3d_qpu_instr *result,
|
||||
@@ -1151,17 +1202,52 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
}
|
||||
}
|
||||
|
||||
+ struct v3d_qpu_instr add_inst;
|
||||
if (b->alu.mul.op != V3D_QPU_M_NOP) {
|
||||
- if (a->alu.mul.op != V3D_QPU_M_NOP)
|
||||
- return false;
|
||||
- merge.alu.mul = b->alu.mul;
|
||||
+ if (a->alu.mul.op == V3D_QPU_M_NOP) {
|
||||
+ merge.alu.mul = b->alu.mul;
|
||||
+
|
||||
+ merge.flags.mc = b->flags.mc;
|
||||
+ merge.flags.mpf = b->flags.mpf;
|
||||
+ merge.flags.muf = b->flags.muf;
|
||||
+
|
||||
+ mul_instr = b;
|
||||
+ add_instr = a;
|
||||
+ }
|
||||
+ /* If a's mul op is used but its add op is not, then see if we
|
||||
+ * can convert either a's mul op or b's mul op to an add op
|
||||
+ * so we can merge.
|
||||
+ */
|
||||
+ else if (a->alu.add.op == V3D_QPU_A_NOP &&
|
||||
+ can_do_mul_as_add(devinfo, b->alu.mul.op)) {
|
||||
+ add_inst = *b;
|
||||
+ qpu_convert_mul_to_add(&add_inst);
|
||||
|
||||
- merge.flags.mc = b->flags.mc;
|
||||
- merge.flags.mpf = b->flags.mpf;
|
||||
- merge.flags.muf = b->flags.muf;
|
||||
+ merge.alu.add = add_inst.alu.add;
|
||||
|
||||
- mul_instr = b;
|
||||
- add_instr = a;
|
||||
+ merge.flags.ac = b->flags.mc;
|
||||
+ merge.flags.apf = b->flags.mpf;
|
||||
+ merge.flags.auf = b->flags.muf;
|
||||
+
|
||||
+ mul_instr = a;
|
||||
+ add_instr = &add_inst;
|
||||
+ } else if (a->alu.add.op == V3D_QPU_A_NOP &&
|
||||
+ can_do_mul_as_add(devinfo, a->alu.mul.op)) {
|
||||
+ add_inst = *a;
|
||||
+ qpu_convert_mul_to_add(&add_inst);
|
||||
+
|
||||
+ merge = add_inst;
|
||||
+ merge.alu.mul = b->alu.mul;
|
||||
+
|
||||
+ merge.flags.mc = b->flags.mc;
|
||||
+ merge.flags.mpf = b->flags.mpf;
|
||||
+ merge.flags.muf = b->flags.muf;
|
||||
+
|
||||
+ mul_instr = b;
|
||||
+ add_instr = &add_inst;
|
||||
+ } else {
|
||||
+ return false;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* V3D 4.x and earlier use muxes to select the inputs for the ALUs and
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,418 +0,0 @@
|
||||
From b59b3725fb16f4ab1ac0db86a5452a4ed6176074 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 3 Nov 2021 10:34:19 +0100
|
||||
Subject: [PATCH 057/142] broadcom/compiler: implement small immediates for v71
|
||||
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 90 +++++++++++++------
|
||||
src/broadcom/compiler/qpu_validate.c | 20 ++++-
|
||||
.../compiler/vir_opt_small_immediates.c | 26 +++++-
|
||||
src/broadcom/compiler/vir_to_qpu.c | 11 ++-
|
||||
src/broadcom/qpu/qpu_disasm.c | 1 -
|
||||
src/broadcom/qpu/qpu_instr.c | 8 +-
|
||||
src/broadcom/qpu/qpu_instr.h | 2 +-
|
||||
src/broadcom/qpu/qpu_pack.c | 36 ++++----
|
||||
8 files changed, 139 insertions(+), 55 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index caa84254998..bd1c920848a 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -714,7 +714,6 @@ qpu_instruction_uses_rf(const struct v3d_device_info *devinfo,
|
||||
!inst->sig.small_imm_b && (inst->raddr_b == waddr))
|
||||
return true;
|
||||
} else {
|
||||
- /* FIXME: skip if small immediate */
|
||||
if (v3d71_qpu_reads_raddr(inst, waddr))
|
||||
return true;
|
||||
}
|
||||
@@ -948,10 +947,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a,
|
||||
return raddrs_used;
|
||||
}
|
||||
|
||||
-/* Take two instructions and attempt to merge their raddr fields
|
||||
- * into one merged instruction. Returns false if the two instructions
|
||||
- * access more than two different rf registers between them, or more
|
||||
- * than one rf register and one small immediate.
|
||||
+/* Takes two instructions and attempts to merge their raddr fields (including
|
||||
+ * small immediates) into one merged instruction. For V3D 4.x, returns false
|
||||
+ * if the two instructions access more than two different rf registers between
|
||||
+ * them, or more than one rf register and one small immediate. For 7.x returns
|
||||
+ * false if both instructions use small immediates.
|
||||
*/
|
||||
static bool
|
||||
qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
||||
@@ -959,6 +959,27 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
||||
const struct v3d_qpu_instr *mul_instr,
|
||||
const struct v3d_device_info *devinfo)
|
||||
{
|
||||
+ if (devinfo->ver >= 71) {
|
||||
+ assert(add_instr->sig.small_imm_a +
|
||||
+ add_instr->sig.small_imm_b <= 1);
|
||||
+ assert(add_instr->sig.small_imm_c +
|
||||
+ add_instr->sig.small_imm_d == 0);
|
||||
+ assert(mul_instr->sig.small_imm_a +
|
||||
+ mul_instr->sig.small_imm_b == 0);
|
||||
+ assert(mul_instr->sig.small_imm_c +
|
||||
+ mul_instr->sig.small_imm_d <= 1);
|
||||
+
|
||||
+ result->sig.small_imm_a = add_instr->sig.small_imm_a;
|
||||
+ result->sig.small_imm_b = add_instr->sig.small_imm_b;
|
||||
+ result->sig.small_imm_c = mul_instr->sig.small_imm_c;
|
||||
+ result->sig.small_imm_d = mul_instr->sig.small_imm_d;
|
||||
+
|
||||
+ return (result->sig.small_imm_a +
|
||||
+ result->sig.small_imm_b +
|
||||
+ result->sig.small_imm_c +
|
||||
+ result->sig.small_imm_d) <= 1;
|
||||
+ }
|
||||
+
|
||||
assert(devinfo->ver <= 42);
|
||||
|
||||
uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr);
|
||||
@@ -1060,7 +1081,8 @@ add_op_as_mul_op(enum v3d_qpu_add_op op)
|
||||
}
|
||||
|
||||
static void
|
||||
-qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
|
||||
+qpu_convert_add_to_mul(const struct v3d_device_info *devinfo,
|
||||
+ struct v3d_qpu_instr *inst)
|
||||
{
|
||||
STATIC_ASSERT(sizeof(inst->alu.mul) == sizeof(inst->alu.add));
|
||||
assert(inst->alu.add.op != V3D_QPU_A_NOP);
|
||||
@@ -1084,6 +1106,18 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
|
||||
inst->alu.add.output_pack = V3D_QPU_PACK_NONE;
|
||||
inst->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
|
||||
inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
+
|
||||
+ if (devinfo->ver >= 71) {
|
||||
+ assert(!inst->sig.small_imm_c && !inst->sig.small_imm_d);
|
||||
+ assert(inst->sig.small_imm_a + inst->sig.small_imm_b <= 1);
|
||||
+ if (inst->sig.small_imm_a) {
|
||||
+ inst->sig.small_imm_c = true;
|
||||
+ inst->sig.small_imm_a = false;
|
||||
+ } else if (inst->sig.small_imm_b) {
|
||||
+ inst->sig.small_imm_d = true;
|
||||
+ inst->sig.small_imm_b = false;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -1135,6 +1169,16 @@ qpu_convert_mul_to_add(struct v3d_qpu_instr *inst)
|
||||
inst->alu.mul.output_pack = V3D_QPU_PACK_NONE;
|
||||
inst->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
|
||||
inst->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
|
||||
+
|
||||
+ assert(!inst->sig.small_imm_a && !inst->sig.small_imm_b);
|
||||
+ assert(inst->sig.small_imm_c + inst->sig.small_imm_d <= 1);
|
||||
+ if (inst->sig.small_imm_c) {
|
||||
+ inst->sig.small_imm_a = true;
|
||||
+ inst->sig.small_imm_c = false;
|
||||
+ } else if (inst->sig.small_imm_d) {
|
||||
+ inst->sig.small_imm_b = true;
|
||||
+ inst->sig.small_imm_d = false;
|
||||
+ }
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -1173,20 +1217,20 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
else if (a->alu.mul.op == V3D_QPU_M_NOP &&
|
||||
can_do_add_as_mul(b->alu.add.op)) {
|
||||
mul_inst = *b;
|
||||
- qpu_convert_add_to_mul(&mul_inst);
|
||||
+ qpu_convert_add_to_mul(devinfo, &mul_inst);
|
||||
|
||||
merge.alu.mul = mul_inst.alu.mul;
|
||||
|
||||
- merge.flags.mc = b->flags.ac;
|
||||
- merge.flags.mpf = b->flags.apf;
|
||||
- merge.flags.muf = b->flags.auf;
|
||||
+ merge.flags.mc = mul_inst.flags.mc;
|
||||
+ merge.flags.mpf = mul_inst.flags.mpf;
|
||||
+ merge.flags.muf = mul_inst.flags.muf;
|
||||
|
||||
add_instr = a;
|
||||
mul_instr = &mul_inst;
|
||||
} else if (a->alu.mul.op == V3D_QPU_M_NOP &&
|
||||
can_do_add_as_mul(a->alu.add.op)) {
|
||||
mul_inst = *a;
|
||||
- qpu_convert_add_to_mul(&mul_inst);
|
||||
+ qpu_convert_add_to_mul(devinfo, &mul_inst);
|
||||
|
||||
merge = mul_inst;
|
||||
merge.alu.add = b->alu.add;
|
||||
@@ -1225,9 +1269,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
|
||||
merge.alu.add = add_inst.alu.add;
|
||||
|
||||
- merge.flags.ac = b->flags.mc;
|
||||
- merge.flags.apf = b->flags.mpf;
|
||||
- merge.flags.auf = b->flags.muf;
|
||||
+ merge.flags.ac = add_inst.flags.ac;
|
||||
+ merge.flags.apf = add_inst.flags.apf;
|
||||
+ merge.flags.auf = add_inst.flags.auf;
|
||||
|
||||
mul_instr = a;
|
||||
add_instr = &add_inst;
|
||||
@@ -1252,17 +1296,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
|
||||
/* V3D 4.x and earlier use muxes to select the inputs for the ALUs and
|
||||
* they have restrictions on the number of raddrs that can be adressed
|
||||
- * in a single instruction.
|
||||
- *
|
||||
- * FIXME: for V3D 7.x we can't merge instructions if they address more
|
||||
- * than one small immediate. For now, we don't support small immediates,
|
||||
- * so it is not a problem.
|
||||
+ * in a single instruction. In V3D 7.x, we don't have that restriction,
|
||||
+ * but we are still limited to a single small immediate per instruction.
|
||||
*/
|
||||
- if (devinfo->ver <= 42) {
|
||||
- if (add_instr && mul_instr &&
|
||||
- !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) {
|
||||
- return false;
|
||||
- }
|
||||
+ if (add_instr && mul_instr &&
|
||||
+ !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) {
|
||||
+ return false;
|
||||
}
|
||||
|
||||
merge.sig.thrsw |= b->sig.thrsw;
|
||||
@@ -1273,7 +1312,6 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
merge.sig.ldtmu |= b->sig.ldtmu;
|
||||
merge.sig.ldvary |= b->sig.ldvary;
|
||||
merge.sig.ldvpm |= b->sig.ldvpm;
|
||||
- merge.sig.small_imm_b |= b->sig.small_imm_b;
|
||||
merge.sig.ldtlb |= b->sig.ldtlb;
|
||||
merge.sig.ldtlbu |= b->sig.ldtlbu;
|
||||
merge.sig.ucb |= b->sig.ucb;
|
||||
@@ -1933,8 +1971,6 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
if (c->devinfo->ver >= 71) {
|
||||
/* RF2-3 might be overwritten during the delay slots by
|
||||
* fragment shader setup.
|
||||
- *
|
||||
- * FIXME: handle small immediate cases
|
||||
*/
|
||||
if (v3d71_qpu_reads_raddr(inst, 2) ||
|
||||
v3d71_qpu_reads_raddr(inst, 3)) {
|
||||
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
|
||||
index fde6695d59b..41070484286 100644
|
||||
--- a/src/broadcom/compiler/qpu_validate.c
|
||||
+++ b/src/broadcom/compiler/qpu_validate.c
|
||||
@@ -116,8 +116,24 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
return;
|
||||
|
||||
if (devinfo->ver < 71) {
|
||||
- if (inst->sig.small_imm_a || inst->sig.small_imm_c || inst->sig.small_imm_d)
|
||||
- fail_instr(state, "small imm a/c/d added after V3D 7.1");
|
||||
+ if (inst->sig.small_imm_a || inst->sig.small_imm_c ||
|
||||
+ inst->sig.small_imm_d) {
|
||||
+ fail_instr(state, "small imm a/c/d added after V3D 7.1");
|
||||
+ }
|
||||
+ } else {
|
||||
+ if ((inst->sig.small_imm_a || inst->sig.small_imm_b) &&
|
||||
+ !vir_is_add(qinst)) {
|
||||
+ fail_instr(state, "small imm a/b used but no ADD inst");
|
||||
+ }
|
||||
+ if ((inst->sig.small_imm_c || inst->sig.small_imm_d) &&
|
||||
+ !vir_is_mul(qinst)) {
|
||||
+ fail_instr(state, "small imm c/d used but no MUL inst");
|
||||
+ }
|
||||
+ if (inst->sig.small_imm_a + inst->sig.small_imm_b +
|
||||
+ inst->sig.small_imm_c + inst->sig.small_imm_d > 1) {
|
||||
+ fail_instr(state, "only one small immediate can be "
|
||||
+ "enabled per instruction");
|
||||
+ }
|
||||
}
|
||||
|
||||
/* LDVARY writes r5 two instructions later and LDUNIF writes
|
||||
diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c
|
||||
index df0d6c36c9b..ed5bc011964 100644
|
||||
--- a/src/broadcom/compiler/vir_opt_small_immediates.c
|
||||
+++ b/src/broadcom/compiler/vir_opt_small_immediates.c
|
||||
@@ -44,7 +44,9 @@ vir_opt_small_immediates(struct v3d_compile *c)
|
||||
/* The small immediate value sits in the raddr B field, so we
|
||||
* can't have 2 small immediates in one instruction (unless
|
||||
* they're the same value, but that should be optimized away
|
||||
- * elsewhere).
|
||||
+ * elsewhere). Since 7.x we can encode small immediates in
|
||||
+ * any raddr field, but each instruction can still only use
|
||||
+ * one.
|
||||
*/
|
||||
bool uses_small_imm = false;
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
@@ -80,7 +82,22 @@ vir_opt_small_immediates(struct v3d_compile *c)
|
||||
*/
|
||||
struct v3d_qpu_sig new_sig = inst->qpu.sig;
|
||||
uint32_t sig_packed;
|
||||
- new_sig.small_imm_b = true;
|
||||
+ if (c->devinfo->ver <= 42) {
|
||||
+ new_sig.small_imm_b = true;
|
||||
+ } else {
|
||||
+ if (vir_is_add(inst)) {
|
||||
+ if (i == 0)
|
||||
+ new_sig.small_imm_a = true;
|
||||
+ else
|
||||
+ new_sig.small_imm_b = true;
|
||||
+ } else {
|
||||
+ if (i == 0)
|
||||
+ new_sig.small_imm_c = true;
|
||||
+ else
|
||||
+ new_sig.small_imm_d = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig_packed))
|
||||
continue;
|
||||
|
||||
@@ -89,7 +106,10 @@ vir_opt_small_immediates(struct v3d_compile *c)
|
||||
vir_dump_inst(c, inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
- inst->qpu.sig.small_imm_b = true;
|
||||
+ inst->qpu.sig.small_imm_a = new_sig.small_imm_a;
|
||||
+ inst->qpu.sig.small_imm_b = new_sig.small_imm_b;
|
||||
+ inst->qpu.sig.small_imm_c = new_sig.small_imm_c;
|
||||
+ inst->qpu.sig.small_imm_d = new_sig.small_imm_d;
|
||||
inst->qpu.raddr_b = packed;
|
||||
|
||||
inst->src[i].file = QFILE_SMALL_IMM;
|
||||
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
|
||||
index cbbb495592b..4ed184cbbcb 100644
|
||||
--- a/src/broadcom/compiler/vir_to_qpu.c
|
||||
+++ b/src/broadcom/compiler/vir_to_qpu.c
|
||||
@@ -89,8 +89,15 @@ new_qpu_nop_before(struct qinst *inst)
|
||||
static void
|
||||
v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src)
|
||||
{
|
||||
- if (src.smimm)
|
||||
- unreachable("v3d71_set_src: pending handling small immediates");
|
||||
+ /* If we have a small immediate move it from inst->raddr_b to the
|
||||
+ * corresponding raddr.
|
||||
+ */
|
||||
+ if (src.smimm) {
|
||||
+ assert(instr->sig.small_imm_a || instr->sig.small_imm_b ||
|
||||
+ instr->sig.small_imm_c || instr->sig.small_imm_d);
|
||||
+ *raddr = instr->raddr_b;
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
assert(!src.magic);
|
||||
*raddr = src.index;
|
||||
diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
|
||||
index b613de781dc..c1590a760de 100644
|
||||
--- a/src/broadcom/qpu/qpu_disasm.c
|
||||
+++ b/src/broadcom/qpu/qpu_disasm.c
|
||||
@@ -113,7 +113,6 @@ v3d71_qpu_disasm_raddr(struct disasm_state *disasm,
|
||||
}
|
||||
|
||||
if (is_small_imm) {
|
||||
- unreachable("Pending handling small immediates");
|
||||
uint32_t val;
|
||||
ASSERTED bool ok =
|
||||
v3d_qpu_small_imm_unpack(disasm->devinfo,
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index f54ce7210fb..c30f4bbbccf 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -975,10 +975,10 @@ v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr)
|
||||
int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
|
||||
int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
|
||||
|
||||
- return (add_nsrc > 0 && inst->alu.add.a.raddr == raddr) ||
|
||||
- (add_nsrc > 1 && inst->alu.add.b.raddr == raddr) ||
|
||||
- (mul_nsrc > 0 && inst->alu.mul.a.raddr == raddr) ||
|
||||
- (mul_nsrc > 1 && inst->alu.mul.b.raddr == raddr);
|
||||
+ return (add_nsrc > 0 && !inst->sig.small_imm_a && inst->alu.add.a.raddr == raddr) ||
|
||||
+ (add_nsrc > 1 && !inst->sig.small_imm_b && inst->alu.add.b.raddr == raddr) ||
|
||||
+ (mul_nsrc > 0 && !inst->sig.small_imm_c && inst->alu.mul.a.raddr == raddr) ||
|
||||
+ (mul_nsrc > 1 && !inst->sig.small_imm_d && inst->alu.mul.b.raddr == raddr);
|
||||
}
|
||||
|
||||
bool
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index dece45c5c54..d408fb426fa 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -402,7 +402,7 @@ struct v3d_qpu_instr {
|
||||
uint8_t sig_addr;
|
||||
bool sig_magic; /* If the signal writes to a magic address */
|
||||
uint8_t raddr_a; /* V3D 4.x */
|
||||
- uint8_t raddr_b; /* V3D 4.x*/
|
||||
+ uint8_t raddr_b; /* V3D 4.x (holds packed small immediate in 7.x too) */
|
||||
struct v3d_qpu_flags flags;
|
||||
|
||||
union {
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index ed5a8bc667d..7984712d527 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -1218,16 +1218,11 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
|
||||
|
||||
instr->alu.add.op = desc->op;
|
||||
|
||||
- /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
|
||||
+ /* FADD/FADDNF and FMIN/FMAX are determined by the order of the
|
||||
* operands.
|
||||
*/
|
||||
- /* FIXME: for now hardcoded values, until we got the small_imm support
|
||||
- * in place
|
||||
- */
|
||||
- uint32_t small_imm_a = 0;
|
||||
- uint32_t small_imm_b = 0;
|
||||
- if (small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
|
||||
- small_imm_b *256 + (op & 3) * 64 + raddr_b) {
|
||||
+ if (instr->sig.small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
|
||||
+ instr->sig.small_imm_b * 256 + (op & 3) * 64 + raddr_b) {
|
||||
if (instr->alu.add.op == V3D_QPU_A_FMIN)
|
||||
instr->alu.add.op = V3D_QPU_A_FMAX;
|
||||
if (instr->alu.add.op == V3D_QPU_A_FADD)
|
||||
@@ -1858,11 +1853,6 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
uint32_t output_pack;
|
||||
uint32_t a_unpack;
|
||||
uint32_t b_unpack;
|
||||
- /* FIXME: for now hardcoded values, until we got the small_imm
|
||||
- * support in place
|
||||
- */
|
||||
- uint32_t small_imm_a = 0;
|
||||
- uint32_t small_imm_b = 0;
|
||||
|
||||
if (instr->alu.add.op != V3D_QPU_A_FCMP) {
|
||||
if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
|
||||
@@ -1886,8 +1876,8 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
* distinguished by which order their operands come in.
|
||||
*/
|
||||
bool ordering =
|
||||
- small_imm_a * 256 + a_unpack * 64 + raddr_a >
|
||||
- small_imm_b * 256 + b_unpack * 64 + raddr_b;
|
||||
+ instr->sig.small_imm_a * 256 + a_unpack * 64 + raddr_a >
|
||||
+ instr->sig.small_imm_b * 256 + b_unpack * 64 + raddr_b;
|
||||
if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
|
||||
instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
|
||||
((instr->alu.add.op == V3D_QPU_A_FMAX ||
|
||||
@@ -1901,6 +1891,22 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
|
||||
temp = raddr_a;
|
||||
raddr_a = raddr_b;
|
||||
raddr_b = temp;
|
||||
+
|
||||
+ /* If we are swapping raddr_a/b we also need to swap
|
||||
+ * small_imm_a/b.
|
||||
+ */
|
||||
+ if (instr->sig.small_imm_a || instr->sig.small_imm_b) {
|
||||
+ assert(instr->sig.small_imm_a !=
|
||||
+ instr->sig.small_imm_b);
|
||||
+ struct v3d_qpu_sig new_sig = instr->sig;
|
||||
+ new_sig.small_imm_a = !instr->sig.small_imm_a;
|
||||
+ new_sig.small_imm_b = !instr->sig.small_imm_b;
|
||||
+ uint32_t sig;
|
||||
+ if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
|
||||
+ return false;
|
||||
+ *packed_instr &= ~V3D_QPU_SIG_MASK;
|
||||
+ *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
|
||||
+ }
|
||||
}
|
||||
|
||||
opcode |= a_unpack << 2;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,61 +0,0 @@
|
||||
From 3af87d2672da7c928ecf8a0a1cd1bef8a6729364 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Mon, 22 Nov 2021 12:56:03 +0100
|
||||
Subject: [PATCH 058/142] broadcom/compiler: update thread end restrictions for
|
||||
v7.x
|
||||
|
||||
In 4.x it is not allowed to write to the register file in the last
|
||||
3 instructions, but in 7.x we only have this restriction in the
|
||||
thread end instruction itself, and only if the write comes from
|
||||
the ALU ports.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 31 ++++++++++++++++++++--------
|
||||
1 file changed, 22 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index bd1c920848a..cba16c77d67 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -1938,17 +1938,30 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
return false;
|
||||
}
|
||||
|
||||
- /* No writing physical registers at the end. */
|
||||
- bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
|
||||
- bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP;
|
||||
- if ((!add_is_nop && !inst->alu.add.magic_write) ||
|
||||
- (!mul_is_nop && !inst->alu.mul.magic_write)) {
|
||||
- return false;
|
||||
+ if (c->devinfo->ver <= 42) {
|
||||
+ /* No writing physical registers at the end. */
|
||||
+ bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
|
||||
+ bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP;
|
||||
+ if ((!add_is_nop && !inst->alu.add.magic_write) ||
|
||||
+ (!mul_is_nop && !inst->alu.mul.magic_write)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) &&
|
||||
+ !inst->sig_magic) {
|
||||
+ return false;
|
||||
+ }
|
||||
}
|
||||
|
||||
- if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) &&
|
||||
- !inst->sig_magic) {
|
||||
- return false;
|
||||
+ if (c->devinfo->ver >= 71) {
|
||||
+ /* The thread end instruction must not write to the
|
||||
+ * register file via the add/mul ALUs.
|
||||
+ */
|
||||
+ if (slot == 0 &&
|
||||
+ (!inst->alu.add.magic_write ||
|
||||
+ !inst->alu.mul.magic_write)) {
|
||||
+ return false;
|
||||
+ }
|
||||
}
|
||||
|
||||
if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,112 +0,0 @@
|
||||
From 7cfd5b808bb2f1cb17f57435cb5d411c4ac3aa6c Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 23 Nov 2021 10:04:49 +0100
|
||||
Subject: [PATCH 059/142] broadcom/compiler: update ldvary thread switch delay
|
||||
slot restriction for v7.x
|
||||
|
||||
In V3D 7.x we don't have accumulators which would not survive a thread
|
||||
switch, so the only restriction is that ldvary can't be placed in the second
|
||||
delay slot of a thread switch.
|
||||
|
||||
shader-db results for UnrealEngine4 shaders:
|
||||
|
||||
total instructions in shared programs: 446458 -> 446401 (-0.01%)
|
||||
instructions in affected programs: 13492 -> 13435 (-0.42%)
|
||||
helped: 58
|
||||
HURT: 3
|
||||
Instructions are helped.
|
||||
|
||||
total nops in shared programs: 19571 -> 19541 (-0.15%)
|
||||
nops in affected programs: 161 -> 131 (-18.63%)
|
||||
helped: 30
|
||||
HURT: 0
|
||||
Nops are helped.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 33 +++++++++++++++++++++-------
|
||||
src/broadcom/compiler/qpu_validate.c | 10 +++++++--
|
||||
2 files changed, 33 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index cba16c77d67..32f651851cf 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -1491,11 +1491,20 @@ retry:
|
||||
* ldvary now if the follow-up fixup would place
|
||||
* it in the delay slots of a thrsw, which is not
|
||||
* allowed and would prevent the fixup from being
|
||||
- * successful.
|
||||
+ * successful. In V3D 7.x we can allow this to happen
|
||||
+ * as long as it is not the last delay slot.
|
||||
*/
|
||||
- if (inst->sig.ldvary &&
|
||||
- scoreboard->last_thrsw_tick + 2 >= scoreboard->tick - 1) {
|
||||
- continue;
|
||||
+ if (inst->sig.ldvary) {
|
||||
+ if (c->devinfo->ver <= 42 &&
|
||||
+ scoreboard->last_thrsw_tick + 2 >=
|
||||
+ scoreboard->tick - 1) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ if (c->devinfo->ver >= 71 &&
|
||||
+ scoreboard->last_thrsw_tick + 2 ==
|
||||
+ scoreboard->tick - 1) {
|
||||
+ continue;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* We can emit a new tmu lookup with a previous ldtmu
|
||||
@@ -2020,8 +2029,12 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
|
||||
if (slot > 0 && v3d_qpu_instr_is_legacy_sfu(&qinst->qpu))
|
||||
return false;
|
||||
|
||||
- if (slot > 0 && qinst->qpu.sig.ldvary)
|
||||
- return false;
|
||||
+ if (qinst->qpu.sig.ldvary) {
|
||||
+ if (c->devinfo->ver <= 42 && slot > 0)
|
||||
+ return false;
|
||||
+ if (c->devinfo->ver >= 71 && slot == 2)
|
||||
+ return false;
|
||||
+ }
|
||||
|
||||
/* unifa and the following 3 instructions can't overlap a
|
||||
* thread switch/end. The docs further clarify that this means
|
||||
@@ -2618,9 +2631,13 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
||||
|
||||
/* We can't put an ldvary in the delay slots of a thrsw. We should've
|
||||
* prevented this when pairing up the ldvary with another instruction
|
||||
- * and flagging it for a fixup.
|
||||
+ * and flagging it for a fixup. In V3D 7.x this is limited only to the
|
||||
+ * second delay slot.
|
||||
*/
|
||||
- assert(scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1);
|
||||
+ assert((devinfo->ver <= 42 &&
|
||||
+ scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1) ||
|
||||
+ (devinfo->ver >= 71 &&
|
||||
+ scoreboard->last_thrsw_tick + 2 != scoreboard->tick - 1));
|
||||
|
||||
/* Move the ldvary to the previous instruction and remove it from the
|
||||
* current one.
|
||||
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
|
||||
index 41070484286..4f09aa8aef4 100644
|
||||
--- a/src/broadcom/compiler/qpu_validate.c
|
||||
+++ b/src/broadcom/compiler/qpu_validate.c
|
||||
@@ -215,8 +215,14 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
"SFU write started during THRSW delay slots ");
|
||||
}
|
||||
|
||||
- if (inst->sig.ldvary)
|
||||
- fail_instr(state, "LDVARY during THRSW delay slots");
|
||||
+ if (inst->sig.ldvary) {
|
||||
+ if (devinfo->ver <= 42)
|
||||
+ fail_instr(state, "LDVARY during THRSW delay slots");
|
||||
+ if (devinfo->ver >= 71 &&
|
||||
+ state->ip - state->last_thrsw_ip == 2) {
|
||||
+ fail_instr(state, "LDVARY in 2nd THRSW delay slot");
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
(void)qpu_magic_waddr_matches; /* XXX */
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,30 +0,0 @@
|
||||
From ca4063d627cd31c589a8e8688f2876dd8211d1bc Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Thu, 25 Nov 2021 08:31:02 +0100
|
||||
Subject: [PATCH 060/142] broadcom/compiler: lift restriction for branch +
|
||||
msfign after setmsf for v7.x
|
||||
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 32f651851cf..476eae691ab 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -2373,10 +2373,11 @@ emit_branch(struct v3d_compile *c,
|
||||
assert(scoreboard->last_branch_tick + 3 < branch_tick);
|
||||
assert(scoreboard->last_unifa_write_tick + 3 < branch_tick);
|
||||
|
||||
- /* Can't place a branch with msfign != 0 and cond != 0,2,3 after
|
||||
+ /* V3D 4.x can't place a branch with msfign != 0 and cond != 0,2,3 after
|
||||
* setmsf.
|
||||
*/
|
||||
bool is_safe_msf_branch =
|
||||
+ c->devinfo->ver >= 71 ||
|
||||
inst->qpu.branch.msfign == V3D_QPU_MSFIGN_NONE ||
|
||||
inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_ALWAYS ||
|
||||
inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_A0 ||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,38 +0,0 @@
|
||||
From 167510aa43bbcf06e57a64495cee40e8cdaf5f8b Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Fri, 26 Nov 2021 10:37:05 +0100
|
||||
Subject: [PATCH 061/142] broadcom/compiler: start allocating from RF 4 in V7.x
|
||||
|
||||
In V3D 4.x we start at RF3 so that we allocate RF0-2 only if there
|
||||
aren't any other RFs available. This is useful with small shaders
|
||||
to ensure that our TLB writes don't use these registers because
|
||||
these are the last instructions we emit in fragment shaders and
|
||||
the last instructions in a program can't write to these registers,
|
||||
so if we do, we need to emit NOPs.
|
||||
|
||||
In V3D 7.x the registers affected by this restriction are RF2-3,
|
||||
so we choose to start at RF4.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index 6f7b1ca0589..440b093a636 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -1234,9 +1234,10 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
.phys_index = phys_index,
|
||||
.next_acc = 0,
|
||||
/* Start at RF3, to try to keep the TLB writes from using
|
||||
- * RF0-2.
|
||||
+ * RF0-2. Start at RF4 in 7.x to prevent TLB writes from
|
||||
+ * using RF2-3.
|
||||
*/
|
||||
- .next_phys = 3,
|
||||
+ .next_phys = c->devinfo->ver <= 42 ? 3 : 4,
|
||||
.nodes = &c->nodes,
|
||||
.devinfo = c->devinfo,
|
||||
};
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,71 +0,0 @@
|
||||
From d47ea903b96e43b07bdef21f8026da818e30fcd1 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Thu, 25 Nov 2021 13:00:34 +0100
|
||||
Subject: [PATCH 062/142] broadcom/compiler: validate restrictions after TLB Z
|
||||
write
|
||||
|
||||
---
|
||||
src/broadcom/compiler/qpu_validate.c | 28 ++++++++++++++++++++++++++++
|
||||
1 file changed, 28 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
|
||||
index 4f09aa8aef4..1082fb7d50a 100644
|
||||
--- a/src/broadcom/compiler/qpu_validate.c
|
||||
+++ b/src/broadcom/compiler/qpu_validate.c
|
||||
@@ -41,6 +41,7 @@ struct v3d_qpu_validate_state {
|
||||
int last_sfu_write;
|
||||
int last_branch_ip;
|
||||
int last_thrsw_ip;
|
||||
+ int first_tlb_z_write;
|
||||
|
||||
/* Set when we've found the last-THRSW signal, or if we were started
|
||||
* in single-segment mode.
|
||||
@@ -110,11 +111,37 @@ static void
|
||||
qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
{
|
||||
const struct v3d_device_info *devinfo = state->c->devinfo;
|
||||
+
|
||||
+ if (qinst->is_tlb_z_write && state->ip < state->first_tlb_z_write)
|
||||
+ state->first_tlb_z_write = state->ip;
|
||||
+
|
||||
const struct v3d_qpu_instr *inst = &qinst->qpu;
|
||||
|
||||
+ if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH &&
|
||||
+ state->first_tlb_z_write >= 0 &&
|
||||
+ state->ip > state->first_tlb_z_write &&
|
||||
+ inst->branch.msfign != V3D_QPU_MSFIGN_NONE &&
|
||||
+ inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS &&
|
||||
+ inst->branch.cond != V3D_QPU_BRANCH_COND_A0 &&
|
||||
+ inst->branch.cond != V3D_QPU_BRANCH_COND_NA0) {
|
||||
+ fail_instr(state, "Implicit branch MSF read after TLB Z write");
|
||||
+ }
|
||||
+
|
||||
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||
return;
|
||||
|
||||
+ if (inst->alu.add.op == V3D_QPU_A_SETMSF &&
|
||||
+ state->first_tlb_z_write >= 0 &&
|
||||
+ state->ip > state->first_tlb_z_write) {
|
||||
+ fail_instr(state, "SETMSF after TLB Z write");
|
||||
+ }
|
||||
+
|
||||
+ if (state->first_tlb_z_write >= 0 &&
|
||||
+ state->ip > state->first_tlb_z_write &&
|
||||
+ inst->alu.add.op == V3D_QPU_A_MSF) {
|
||||
+ fail_instr(state, "MSF read after TLB Z write");
|
||||
+ }
|
||||
+
|
||||
if (devinfo->ver < 71) {
|
||||
if (inst->sig.small_imm_a || inst->sig.small_imm_c ||
|
||||
inst->sig.small_imm_d) {
|
||||
@@ -348,6 +375,7 @@ qpu_validate(struct v3d_compile *c)
|
||||
.last_sfu_write = -10,
|
||||
.last_thrsw_ip = -10,
|
||||
.last_branch_ip = -10,
|
||||
+ .first_tlb_z_write = INT_MAX,
|
||||
.ip = 0,
|
||||
|
||||
.last_thrsw_found = !c->last_thrsw,
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,26 +0,0 @@
|
||||
From 6cdf01fad49489b5fc66d231b527de5245d5de32 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Mon, 29 Nov 2021 13:23:11 +0100
|
||||
Subject: [PATCH 063/142] broadcom/compiler: lift restriction on vpmwt in last
|
||||
instruction for V3D 7.x
|
||||
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 476eae691ab..77fb6a794e6 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -1934,7 +1934,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
if (slot > 0 && qinst->uniform != ~0)
|
||||
return false;
|
||||
|
||||
- if (v3d_qpu_waits_vpm(inst))
|
||||
+ if (c->devinfo->ver <= 42 && v3d_qpu_waits_vpm(inst))
|
||||
return false;
|
||||
|
||||
if (inst->sig.ldvary)
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,134 +0,0 @@
|
||||
From acc54637f0787ba4dc887130c25c628ccdaf4e38 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 9 Nov 2021 11:34:59 +0100
|
||||
Subject: [PATCH 064/142] broadcom/compiler: fix up copy propagation for v71
|
||||
|
||||
Update rules for unsafe copy propagations to match v7.x.
|
||||
---
|
||||
.../compiler/vir_opt_copy_propagate.c | 83 +++++++++++++------
|
||||
1 file changed, 56 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c
|
||||
index c4aa7255a17..1260838ca05 100644
|
||||
--- a/src/broadcom/compiler/vir_opt_copy_propagate.c
|
||||
+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c
|
||||
@@ -35,7 +35,7 @@
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
static bool
|
||||
-is_copy_mov(struct qinst *inst)
|
||||
+is_copy_mov(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
{
|
||||
if (!inst)
|
||||
return false;
|
||||
@@ -62,36 +62,65 @@ is_copy_mov(struct qinst *inst)
|
||||
return false;
|
||||
}
|
||||
|
||||
- switch (inst->src[0].file) {
|
||||
- case QFILE_MAGIC:
|
||||
- /* No copy propagating from R3/R4/R5 -- the MOVs from those
|
||||
- * are there to register allocate values produced into R3/4/5
|
||||
- * to other regs (though hopefully r3/4/5).
|
||||
- */
|
||||
- switch (inst->src[0].index) {
|
||||
- case V3D_QPU_WADDR_R3:
|
||||
- case V3D_QPU_WADDR_R4:
|
||||
- case V3D_QPU_WADDR_R5:
|
||||
- return false;
|
||||
+ if (devinfo->ver <= 42) {
|
||||
+ switch (inst->src[0].file) {
|
||||
+ case QFILE_MAGIC:
|
||||
+ /* No copy propagating from R3/R4/R5 -- the MOVs from
|
||||
+ * those are there to register allocate values produced
|
||||
+ * into R3/4/5 to other regs (though hopefully r3/4/5).
|
||||
+ */
|
||||
+ switch (inst->src[0].index) {
|
||||
+ case V3D_QPU_WADDR_R3:
|
||||
+ case V3D_QPU_WADDR_R4:
|
||||
+ case V3D_QPU_WADDR_R5:
|
||||
+ return false;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+ break;
|
||||
+
|
||||
+ case QFILE_REG:
|
||||
+ switch (inst->src[0].index) {
|
||||
+ case 0:
|
||||
+ case 1:
|
||||
+ case 2:
|
||||
+ /* MOVs from rf0/1/2 are only to track the live
|
||||
+ * intervals for W/centroid W/Z.
|
||||
+ */
|
||||
+ return false;
|
||||
+ }
|
||||
+ break;
|
||||
+
|
||||
default:
|
||||
break;
|
||||
}
|
||||
- break;
|
||||
-
|
||||
- case QFILE_REG:
|
||||
- switch (inst->src[0].index) {
|
||||
- case 0:
|
||||
- case 1:
|
||||
- case 2:
|
||||
- /* MOVs from rf0/1/2 are only to track the live
|
||||
+ } else {
|
||||
+ assert(devinfo->ver >= 71);
|
||||
+ switch (inst->src[0].file) {
|
||||
+ case QFILE_REG:
|
||||
+ switch (inst->src[0].index) {
|
||||
+ /* MOVs from rf1/2/3 are only to track the live
|
||||
* intervals for W/centroid W/Z.
|
||||
+ *
|
||||
+ * Note: rf0 can be implicitly written by ldvary
|
||||
+ * (no temp involved), so it is not an SSA value and
|
||||
+ * could clash with writes to other temps that are
|
||||
+ * also allocated to rf0. In theory, that would mean
|
||||
+ * that we can't copy propagate from it, but we handle
|
||||
+ * this at register allocation time, preventing temps
|
||||
+ * from being allocated to rf0 while the rf0 value from
|
||||
+ * ldvary is still live.
|
||||
*/
|
||||
- return false;
|
||||
- }
|
||||
- break;
|
||||
+ case 1:
|
||||
+ case 2:
|
||||
+ case 3:
|
||||
+ return false;
|
||||
+ }
|
||||
+ break;
|
||||
|
||||
- default:
|
||||
- break;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -135,7 +164,7 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
|
||||
*/
|
||||
struct qinst *mov = movs[inst->src[i].index];
|
||||
if (!mov) {
|
||||
- if (!is_copy_mov(c->defs[inst->src[i].index]))
|
||||
+ if (!is_copy_mov(c->devinfo, c->defs[inst->src[i].index]))
|
||||
continue;
|
||||
mov = c->defs[inst->src[i].index];
|
||||
|
||||
@@ -245,7 +274,7 @@ vir_opt_copy_propagate(struct v3d_compile *c)
|
||||
|
||||
apply_kills(c, movs, inst);
|
||||
|
||||
- if (is_copy_mov(inst))
|
||||
+ if (is_copy_mov(c->devinfo, inst))
|
||||
movs[inst->dst.index] = inst;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,150 +0,0 @@
|
||||
From c340f7f1eb4a1e5c0fafe1ea2f801f2ebaf82d8d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 26 Nov 2021 01:24:12 +0100
|
||||
Subject: [PATCH 065/142] broadcom/qpu: new packing/conversion v71 instructions
|
||||
|
||||
This commits adds the qpu definitions for several new v71
|
||||
instructions.
|
||||
|
||||
Packing:
|
||||
* vpack does a 2x32 to 2x16 bit integer pack
|
||||
* v8pack: Pack 2 x 2x16 bit integers into 4x8 bits
|
||||
* v10pack packs parts of 2 2x16 bit integer into r10g10b10a2.
|
||||
* v11fpack packs parts of 2 2x16 bit float into r11g11b10 rounding
|
||||
to nearest
|
||||
|
||||
Conversion to unorm/snorm:
|
||||
* vftounorm8/vftosnorm8: converts from 2x16-bit floating point
|
||||
to 2x8 bit unorm/snorm.
|
||||
* ftounorm16/ftosnorm16: converts floating point to 16-bit
|
||||
unorm/snorm
|
||||
* vftounorm10lo: Convert 2x16-bit floating point to 2x10-bit unorm
|
||||
* vftounorm10hi: Convert 2x16-bit floating point to one 2-bit and one 10-bit unorm
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.c | 20 ++++++++++++++++++++
|
||||
src/broadcom/qpu/qpu_instr.h | 12 ++++++++++++
|
||||
src/broadcom/qpu/qpu_pack.c | 12 ++++++++++++
|
||||
3 files changed, 44 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index c30f4bbbccf..44f20618a5a 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -179,6 +179,10 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
|
||||
[V3D_QPU_A_UTOF] = "utof",
|
||||
[V3D_QPU_A_MOV] = "mov",
|
||||
[V3D_QPU_A_FMOV] = "fmov",
|
||||
+ [V3D_QPU_A_VPACK] = "vpack",
|
||||
+ [V3D_QPU_A_V8PACK] = "v8pack",
|
||||
+ [V3D_QPU_A_V10PACK] = "v10pack",
|
||||
+ [V3D_QPU_A_V11FPACK] = "v11fpack",
|
||||
};
|
||||
|
||||
if (op >= ARRAY_SIZE(op_names))
|
||||
@@ -201,6 +205,12 @@ v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
|
||||
[V3D_QPU_M_MOV] = "mov",
|
||||
[V3D_QPU_M_NOP] = "nop",
|
||||
[V3D_QPU_M_FMUL] = "fmul",
|
||||
+ [V3D_QPU_M_FTOUNORM16] = "ftounorm16",
|
||||
+ [V3D_QPU_M_FTOSNORM16] = "ftosnorm16",
|
||||
+ [V3D_QPU_M_VFTOUNORM8] = "vftounorm8",
|
||||
+ [V3D_QPU_M_VFTOSNORM8] = "vftosnorm8",
|
||||
+ [V3D_QPU_M_VFTOUNORM10LO] = "vftounorm10lo",
|
||||
+ [V3D_QPU_M_VFTOUNORM10HI] = "vftounorm10hi",
|
||||
};
|
||||
|
||||
if (op >= ARRAY_SIZE(op_names))
|
||||
@@ -463,6 +473,10 @@ static const uint8_t add_op_args[] = {
|
||||
|
||||
[V3D_QPU_A_MOV] = D | A,
|
||||
[V3D_QPU_A_FMOV] = D | A,
|
||||
+ [V3D_QPU_A_VPACK] = D | A | B,
|
||||
+ [V3D_QPU_A_V8PACK] = D | A | B,
|
||||
+ [V3D_QPU_A_V10PACK] = D | A | B,
|
||||
+ [V3D_QPU_A_V11FPACK] = D | A | B,
|
||||
};
|
||||
|
||||
static const uint8_t mul_op_args[] = {
|
||||
@@ -476,6 +490,12 @@ static const uint8_t mul_op_args[] = {
|
||||
[V3D_QPU_M_NOP] = 0,
|
||||
[V3D_QPU_M_MOV] = D | A,
|
||||
[V3D_QPU_M_FMUL] = D | A | B,
|
||||
+ [V3D_QPU_M_FTOUNORM16] = D | A,
|
||||
+ [V3D_QPU_M_FTOSNORM16] = D | A,
|
||||
+ [V3D_QPU_M_VFTOUNORM8] = D | A,
|
||||
+ [V3D_QPU_M_VFTOSNORM8] = D | A,
|
||||
+ [V3D_QPU_M_VFTOUNORM10LO] = D | A,
|
||||
+ [V3D_QPU_M_VFTOUNORM10HI] = D | A,
|
||||
};
|
||||
|
||||
bool
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index d408fb426fa..56eee9f9cac 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -231,6 +231,10 @@ enum v3d_qpu_add_op {
|
||||
/* V3D 7.x */
|
||||
V3D_QPU_A_FMOV,
|
||||
V3D_QPU_A_MOV,
|
||||
+ V3D_QPU_A_VPACK,
|
||||
+ V3D_QPU_A_V8PACK,
|
||||
+ V3D_QPU_A_V10PACK,
|
||||
+ V3D_QPU_A_V11FPACK,
|
||||
};
|
||||
|
||||
enum v3d_qpu_mul_op {
|
||||
@@ -244,6 +248,14 @@ enum v3d_qpu_mul_op {
|
||||
V3D_QPU_M_MOV,
|
||||
V3D_QPU_M_NOP,
|
||||
V3D_QPU_M_FMUL,
|
||||
+
|
||||
+ /* V3D 7.x */
|
||||
+ V3D_QPU_M_FTOUNORM16,
|
||||
+ V3D_QPU_M_FTOSNORM16,
|
||||
+ V3D_QPU_M_VFTOUNORM8,
|
||||
+ V3D_QPU_M_VFTOSNORM8,
|
||||
+ V3D_QPU_M_VFTOUNORM10LO,
|
||||
+ V3D_QPU_M_VFTOUNORM10HI,
|
||||
};
|
||||
|
||||
enum v3d_qpu_output_pack {
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 7984712d527..6cd75adac6d 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -783,6 +783,9 @@ static const struct opcode_desc add_ops_v71[] = {
|
||||
{ 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
|
||||
{ 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
|
||||
|
||||
+ { 247, 247, .raddr_mask = ANYOPMASK, V3D_QPU_A_VPACK, 71 },
|
||||
+ { 248, 248, .raddr_mask = ANYOPMASK, V3D_QPU_A_V8PACK, 71 },
|
||||
+
|
||||
{ 249, 249, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FMOV, 71 },
|
||||
{ 249, 249, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FMOV, 71 },
|
||||
{ 249, 249, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FMOV, 71 },
|
||||
@@ -797,6 +800,8 @@ static const struct opcode_desc add_ops_v71[] = {
|
||||
{ 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
|
||||
{ 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
|
||||
|
||||
+ { 250, 250, .raddr_mask = ANYOPMASK, V3D_QPU_A_V10PACK, 71 },
|
||||
+ { 251, 251, .raddr_mask = ANYOPMASK, V3D_QPU_A_V11FPACK, 71 },
|
||||
};
|
||||
|
||||
static const struct opcode_desc mul_ops_v71[] = {
|
||||
@@ -822,6 +827,13 @@ static const struct opcode_desc mul_ops_v71[] = {
|
||||
{ 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 },
|
||||
{ 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 },
|
||||
|
||||
+ { 14, 14, .raddr_mask = OP_MASK(32), V3D_QPU_M_FTOUNORM16, 71 },
|
||||
+ { 14, 14, .raddr_mask = OP_MASK(33), V3D_QPU_M_FTOSNORM16, 71 },
|
||||
+ { 14, 14, .raddr_mask = OP_MASK(34), V3D_QPU_M_VFTOUNORM8, 71 },
|
||||
+ { 14, 14, .raddr_mask = OP_MASK(35), V3D_QPU_M_VFTOSNORM8, 71 },
|
||||
+ { 14, 14, .raddr_mask = OP_MASK(48), V3D_QPU_M_VFTOUNORM10LO, 71 },
|
||||
+ { 14, 14, .raddr_mask = OP_MASK(49), V3D_QPU_M_VFTOUNORM10HI, 71 },
|
||||
+
|
||||
{ 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 },
|
||||
|
||||
{ 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL },
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,68 +0,0 @@
|
||||
From f6082e941a3454c8735df2ff2713ae49b3daa74f Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 18 Apr 2023 08:50:13 +0200
|
||||
Subject: [PATCH 068/142] broadcom/compiler: don't allocate spill base to rf0
|
||||
in V3D 7.x
|
||||
|
||||
Otherwise it can be stomped by instructions doing implicit rf0 writes.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 21 +++++++++++++++----
|
||||
1 file changed, 17 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index 440b093a636..121c9b2794f 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -582,7 +582,8 @@ interferes(int32_t t0_start, int32_t t0_end, int32_t t1_start, int32_t t1_end)
|
||||
}
|
||||
|
||||
static void
|
||||
-v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
+v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int *implicit_rf_nodes,
|
||||
+ int spill_temp)
|
||||
{
|
||||
c->spill_start_num_temps = c->num_temps;
|
||||
c->spilling = true;
|
||||
@@ -594,8 +595,20 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
spill_offset = c->spill_size;
|
||||
c->spill_size += V3D_CHANNELS * sizeof(uint32_t);
|
||||
|
||||
- if (spill_offset == 0)
|
||||
+ if (spill_offset == 0) {
|
||||
v3d_setup_spill_base(c);
|
||||
+
|
||||
+ /* Don't allocate our spill base to rf0 to avoid
|
||||
+ * conflicts with instructions doing implicit writes
|
||||
+ * to that register.
|
||||
+ */
|
||||
+ if (!c->devinfo->has_accumulators) {
|
||||
+ ra_add_node_interference(
|
||||
+ c->g,
|
||||
+ temp_to_node(c, c->spill_base.index),
|
||||
+ implicit_rf_nodes[0]);
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
struct qinst *last_thrsw = c->last_thrsw;
|
||||
@@ -1346,7 +1359,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
int node = v3d_choose_spill_node(c);
|
||||
uint32_t temp = node_to_temp(c, node);
|
||||
if (node != -1) {
|
||||
- v3d_spill_reg(c, acc_nodes, temp);
|
||||
+ v3d_spill_reg(c, acc_nodes, implicit_rf_nodes, temp);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -1363,7 +1376,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
enum temp_spill_type spill_type =
|
||||
get_spill_type_for_temp(c, temp);
|
||||
if (spill_type != SPILL_TYPE_TMU || tmu_spilling_allowed(c)) {
|
||||
- v3d_spill_reg(c, acc_nodes, temp);
|
||||
+ v3d_spill_reg(c, acc_nodes, implicit_rf_nodes, temp);
|
||||
if (c->spills + c->fills > c->max_tmu_spills)
|
||||
goto spill_fail;
|
||||
} else {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,186 +0,0 @@
|
||||
From 0e9577fbb18a026390f653ca22f5a98a69a5fe59 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 2 May 2023 10:12:37 +0200
|
||||
Subject: [PATCH 069/142] broadcom/compiler: improve allocation for final
|
||||
program instructions
|
||||
|
||||
The last 3 instructions can't use specific registers so flag all the
|
||||
nodes for temps used in the last program instructions and try to
|
||||
avoid assigning any of these. This may help us avoid injecting nops
|
||||
for the last thread switch instruction.
|
||||
|
||||
Because regisster allocation needs to happen before QPU scheduling
|
||||
and instruction merging we can't tell exactly what the last 3
|
||||
instructions will be, so we do this for a few more instructions than
|
||||
just 3.
|
||||
|
||||
We only do this for fragment shaders because other shader stages
|
||||
always end with VPM store instructions that take an small immediate
|
||||
and therefore will never allow us to merge the final thread switch
|
||||
earlier, so limiting allocation for these shaders will never improve
|
||||
anything and might instead be detrimental.
|
||||
|
||||
total instructions in shared programs: 11471389 -> 11464335 (-0.06%)
|
||||
instructions in affected programs: 582908 -> 575854 (-1.21%)
|
||||
helped: 4669
|
||||
HURT: 578
|
||||
Instructions are helped.
|
||||
|
||||
total max-temps in shared programs: 2230497 -> 2230150 (-0.02%)
|
||||
max-temps in affected programs: 5662 -> 5315 (-6.13%)
|
||||
helped: 344
|
||||
HURT: 44
|
||||
Max-temps are helped.
|
||||
|
||||
total sfu-stalls in shared programs: 18068 -> 18077 (0.05%)
|
||||
sfu-stalls in affected programs: 264 -> 273 (3.41%)
|
||||
helped: 37
|
||||
HURT: 48
|
||||
Inconclusive result (value mean confidence interval includes 0).
|
||||
|
||||
total inst-and-stalls in shared programs: 11489457 -> 11482412 (-0.06%)
|
||||
inst-and-stalls in affected programs: 585180 -> 578135 (-1.20%)
|
||||
helped: 4659
|
||||
HURT: 588
|
||||
Inst-and-stalls are helped.
|
||||
|
||||
total nops in shared programs: 301738 -> 298140 (-1.19%)
|
||||
nops in affected programs: 14680 -> 11082 (-24.51%)
|
||||
helped: 3252
|
||||
HURT: 108
|
||||
Nops are helped.
|
||||
---
|
||||
src/broadcom/compiler/v3d_compiler.h | 1 +
|
||||
src/broadcom/compiler/vir_register_allocate.c | 69 +++++++++++++++++--
|
||||
2 files changed, 66 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
|
||||
index 425ab0cdf9d..2642d23b629 100644
|
||||
--- a/src/broadcom/compiler/v3d_compiler.h
|
||||
+++ b/src/broadcom/compiler/v3d_compiler.h
|
||||
@@ -613,6 +613,7 @@ struct v3d_ra_node_info {
|
||||
struct {
|
||||
uint32_t priority;
|
||||
uint8_t class_bits;
|
||||
+ bool is_program_end;
|
||||
|
||||
/* V3D 7.x */
|
||||
bool is_ldunif_dst;
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index 121c9b2794f..495644bb557 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -385,6 +385,7 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits)
|
||||
c->nodes.info[node].class_bits = class_bits;
|
||||
c->nodes.info[node].priority = 0;
|
||||
c->nodes.info[node].is_ldunif_dst = false;
|
||||
+ c->nodes.info[node].is_program_end = false;
|
||||
}
|
||||
|
||||
/* The spill offset for this thread takes a bit of setup, so do it once at
|
||||
@@ -929,6 +930,17 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
return true;
|
||||
}
|
||||
|
||||
+ /* The last 3 instructions in a shader can't use some specific registers
|
||||
+ * (usually early rf registers, depends on v3d version) so try to
|
||||
+ * avoid allocating these to registers used by the last instructions
|
||||
+ * in the shader.
|
||||
+ */
|
||||
+ const uint32_t safe_rf_start = v3d_ra->devinfo->ver <= 42 ? 3 : 4;
|
||||
+ if (v3d_ra->nodes->info[node].is_program_end &&
|
||||
+ v3d_ra->next_phys < safe_rf_start) {
|
||||
+ v3d_ra->next_phys = safe_rf_start;
|
||||
+ }
|
||||
+
|
||||
for (int i = 0; i < PHYS_COUNT; i++) {
|
||||
int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
|
||||
int phys = v3d_ra->phys_index + phys_off;
|
||||
@@ -1218,6 +1230,44 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
|
||||
}
|
||||
}
|
||||
|
||||
+static void
|
||||
+flag_program_end_nodes(struct v3d_compile *c)
|
||||
+{
|
||||
+ /* Only look for registers used in this many instructions */
|
||||
+ uint32_t last_set_count = 6;
|
||||
+
|
||||
+ struct qblock *last_block = vir_exit_block(c);
|
||||
+ list_for_each_entry_rev(struct qinst, inst, &last_block->instructions, link) {
|
||||
+ if (!inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU)
|
||||
+ continue;
|
||||
+
|
||||
+ int num_src = v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
|
||||
+ for (int i = 0; i < num_src; i++) {
|
||||
+ if (inst->src[i].file == QFILE_TEMP) {
|
||||
+ int node = temp_to_node(c, inst->src[i].index);
|
||||
+ c->nodes.info[node].is_program_end = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ num_src = v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
|
||||
+ for (int i = 0; i < num_src; i++) {
|
||||
+ if (inst->src[i].file == QFILE_TEMP) {
|
||||
+ int node = temp_to_node(c, inst->src[i].index);
|
||||
+ c->nodes.info[node].is_program_end = true;
|
||||
+
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (inst->dst.file == QFILE_TEMP) {
|
||||
+ int node = temp_to_node(c, inst->dst.index);
|
||||
+ c->nodes.info[node].is_program_end = true;
|
||||
+ }
|
||||
+
|
||||
+ if (--last_set_count == 0)
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
|
||||
*
|
||||
@@ -1280,17 +1330,16 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
*/
|
||||
for (uint32_t i = 0; i < num_ra_nodes; i++) {
|
||||
c->nodes.info[i].is_ldunif_dst = false;
|
||||
+ c->nodes.info[i].is_program_end = false;
|
||||
+ c->nodes.info[i].priority = 0;
|
||||
+ c->nodes.info[i].class_bits = 0;
|
||||
if (c->devinfo->has_accumulators && i < ACC_COUNT) {
|
||||
acc_nodes[i] = i;
|
||||
ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
|
||||
- c->nodes.info[i].priority = 0;
|
||||
- c->nodes.info[i].class_bits = 0;
|
||||
} else if (!c->devinfo->has_accumulators &&
|
||||
i < ARRAY_SIZE(implicit_rf_nodes)) {
|
||||
implicit_rf_nodes[i] = i;
|
||||
ra_set_node_reg(c->g, implicit_rf_nodes[i], phys_index + i);
|
||||
- c->nodes.info[i].priority = 0;
|
||||
- c->nodes.info[i].class_bits = 0;
|
||||
} else {
|
||||
uint32_t t = node_to_temp(c, i);
|
||||
c->nodes.info[i].priority =
|
||||
@@ -1327,6 +1376,18 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
last_ldvary_ip, inst);
|
||||
}
|
||||
|
||||
+ /* Flag the nodes that are used in the last instructions of the program
|
||||
+ * (there are some registers that cannot be used in the last 3
|
||||
+ * instructions). We only do this for fragment shaders, because the idea
|
||||
+ * is that by avoiding this conflict we may be able to emit the last
|
||||
+ * thread switch earlier in some cases, however, in non-fragment shaders
|
||||
+ * this won't happen because the last instructions are always VPM stores
|
||||
+ * with a small immediate, which conflicts with other signals,
|
||||
+ * preventing us from ever moving the thrsw earlier.
|
||||
+ */
|
||||
+ if (c->s->info.stage == MESA_SHADER_FRAGMENT)
|
||||
+ flag_program_end_nodes(c);
|
||||
+
|
||||
/* Set the register classes for all our temporaries in the graph */
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
ra_set_node_class(c->g, temp_to_node(c, i),
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,105 +0,0 @@
|
||||
From 645fe451bcecbe3345a144222306d06fb39f6b9f Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 2 May 2023 10:17:47 +0200
|
||||
Subject: [PATCH 070/142] broadcom/compiler: don't assign registers to unused
|
||||
nodes/temps
|
||||
|
||||
In programs with a lot of unused temps, if we don't do this, we may
|
||||
end up recycling previously used rfs more often, which can be
|
||||
detrimental to instruction pairing.
|
||||
|
||||
total instructions in shared programs: 11464335 -> 11444136 (-0.18%)
|
||||
instructions in affected programs: 8976743 -> 8956544 (-0.23%)
|
||||
helped: 33196
|
||||
HURT: 33778
|
||||
Inconclusive result
|
||||
|
||||
total max-temps in shared programs: 2230150 -> 2229445 (-0.03%)
|
||||
max-temps in affected programs: 86413 -> 85708 (-0.82%)
|
||||
helped: 2217
|
||||
HURT: 1523
|
||||
Max-temps are helped.
|
||||
|
||||
total sfu-stalls in shared programs: 18077 -> 17104 (-5.38%)
|
||||
sfu-stalls in affected programs: 8669 -> 7696 (-11.22%)
|
||||
helped: 2657
|
||||
HURT: 2182
|
||||
Sfu-stalls are helped.
|
||||
|
||||
total inst-and-stalls in shared programs: 11482412 -> 11461240 (-0.18%)
|
||||
inst-and-stalls in affected programs: 8995697 -> 8974525 (-0.24%)
|
||||
helped: 33319
|
||||
HURT: 33708
|
||||
Inconclusive result
|
||||
|
||||
total nops in shared programs: 298140 -> 296185 (-0.66%)
|
||||
nops in affected programs: 52805 -> 50850 (-3.70%)
|
||||
helped: 3797
|
||||
HURT: 2662
|
||||
Inconclusive result
|
||||
---
|
||||
src/broadcom/compiler/v3d_compiler.h | 1 +
|
||||
src/broadcom/compiler/vir_register_allocate.c | 14 ++++++++++++++
|
||||
2 files changed, 15 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
|
||||
index 2642d23b629..f1a807e38fd 100644
|
||||
--- a/src/broadcom/compiler/v3d_compiler.h
|
||||
+++ b/src/broadcom/compiler/v3d_compiler.h
|
||||
@@ -614,6 +614,7 @@ struct v3d_ra_node_info {
|
||||
uint32_t priority;
|
||||
uint8_t class_bits;
|
||||
bool is_program_end;
|
||||
+ bool unused;
|
||||
|
||||
/* V3D 7.x */
|
||||
bool is_ldunif_dst;
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index 495644bb557..0ab0474424f 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -386,6 +386,7 @@ add_node(struct v3d_compile *c, uint32_t temp, uint8_t class_bits)
|
||||
c->nodes.info[node].priority = 0;
|
||||
c->nodes.info[node].is_ldunif_dst = false;
|
||||
c->nodes.info[node].is_program_end = false;
|
||||
+ c->nodes.info[node].unused = false;
|
||||
}
|
||||
|
||||
/* The spill offset for this thread takes a bit of setup, so do it once at
|
||||
@@ -918,6 +919,12 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
BITSET_WORD *regs,
|
||||
unsigned int *out)
|
||||
{
|
||||
+ /* If this node is for an unused temp, ignore. */
|
||||
+ if (v3d_ra->nodes->info[node].unused) {
|
||||
+ *out = 0;
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
/* In V3D 7.x, try to assign rf0 to temps used as ldunif's dst
|
||||
* so we can avoid turning them into ldunifrf (which uses the
|
||||
* cond field to encode the dst and would prevent merge with
|
||||
@@ -1331,6 +1338,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
for (uint32_t i = 0; i < num_ra_nodes; i++) {
|
||||
c->nodes.info[i].is_ldunif_dst = false;
|
||||
c->nodes.info[i].is_program_end = false;
|
||||
+ c->nodes.info[i].unused = false;
|
||||
c->nodes.info[i].priority = 0;
|
||||
c->nodes.info[i].class_bits = 0;
|
||||
if (c->devinfo->has_accumulators && i < ACC_COUNT) {
|
||||
@@ -1396,6 +1404,12 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
|
||||
/* Add register interferences based on liveness data */
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
+ /* And while we are here, let's also flag nodes for
|
||||
+ * unused temps.
|
||||
+ */
|
||||
+ if (c->temp_start[i] > c->temp_end[i])
|
||||
+ c->nodes.info[temp_to_node(c, i)].unused = true;
|
||||
+
|
||||
for (uint32_t j = i + 1; j < c->num_temps; j++) {
|
||||
if (interferes(c->temp_start[i], c->temp_end[i],
|
||||
c->temp_start[j], c->temp_end[j])) {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,83 +0,0 @@
|
||||
From 851704169d59e28c5429b06d05e5ef952be893a2 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Mon, 15 May 2023 10:02:10 +0200
|
||||
Subject: [PATCH 071/142] broadcom/compiler: only assign rf0 as last resort in
|
||||
V3D 7.x
|
||||
|
||||
So we can use it for ldunif(a) and avoid generating ldunif(a)rf which
|
||||
can't be paired with conditional instructions.
|
||||
|
||||
shader-db (pi5):
|
||||
|
||||
total instructions in shared programs: 11357802 -> 11338883 (-0.17%)
|
||||
instructions in affected programs: 7117889 -> 7098970 (-0.27%)
|
||||
helped: 24264
|
||||
HURT: 17574
|
||||
Instructions are helped.
|
||||
|
||||
total uniforms in shared programs: 3857808 -> 3857815 (<.01%)
|
||||
uniforms in affected programs: 92 -> 99 (7.61%)
|
||||
helped: 0
|
||||
HURT: 1
|
||||
|
||||
total max-temps in shared programs: 2230904 -> 2230199 (-0.03%)
|
||||
max-temps in affected programs: 52309 -> 51604 (-1.35%)
|
||||
helped: 1219
|
||||
HURT: 725
|
||||
Max-temps are helped.
|
||||
|
||||
total sfu-stalls in shared programs: 15021 -> 15236 (1.43%)
|
||||
sfu-stalls in affected programs: 6848 -> 7063 (3.14%)
|
||||
helped: 1866
|
||||
HURT: 1704
|
||||
Inconclusive result
|
||||
|
||||
total inst-and-stalls in shared programs: 11372823 -> 11354119 (-0.16%)
|
||||
inst-and-stalls in affected programs: 7149177 -> 7130473 (-0.26%)
|
||||
helped: 24315
|
||||
HURT: 17561
|
||||
Inst-and-stalls are helped.
|
||||
|
||||
total nops in shared programs: 273624 -> 273711 (0.03%)
|
||||
nops in affected programs: 31562 -> 31649 (0.28%)
|
||||
helped: 1619
|
||||
HURT: 1854
|
||||
Inconclusive result (value mean confidence interval includes 0).
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 13 +++++++++++++
|
||||
1 file changed, 13 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index 0ab0474424f..8eac2b75bd7 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -950,6 +950,11 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
|
||||
for (int i = 0; i < PHYS_COUNT; i++) {
|
||||
int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
|
||||
+
|
||||
+ /* Try to keep rf0 available for ldunif in 7.x (see above). */
|
||||
+ if (v3d_ra->devinfo->ver >= 71 && phys_off == 0)
|
||||
+ continue;
|
||||
+
|
||||
int phys = v3d_ra->phys_index + phys_off;
|
||||
|
||||
if (BITSET_TEST(regs, phys)) {
|
||||
@@ -959,6 +964,14 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
}
|
||||
}
|
||||
|
||||
+ /* If we couldn't allocate, do try to assign rf0 if it is available. */
|
||||
+ if (v3d_ra->devinfo->ver >= 71 &&
|
||||
+ BITSET_TEST(regs, v3d_ra->phys_index)) {
|
||||
+ v3d_ra->next_phys = 1;
|
||||
+ *out = v3d_ra->phys_index;
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
return false;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,30 +0,0 @@
|
||||
From 0d3fd30d67ffc0195b0783e30ab6afbbe403310a Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 28 Apr 2021 14:31:38 +0200
|
||||
Subject: [PATCH 072/142] v3dv: recover non-conformant warning for not fully
|
||||
supported hw
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dv_device.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
|
||||
index d5de3517670..d29ffad3531 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_device.c
|
||||
+++ b/src/broadcom/vulkan/v3dv_device.c
|
||||
@@ -1212,6 +1212,12 @@ create_physical_device(struct v3dv_instance *instance,
|
||||
|
||||
list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
|
||||
|
||||
+ if (device->devinfo.ver != 42) {
|
||||
+ fprintf(stderr, "WARNING: v3dv support for hw version %i is neither "
|
||||
+ "a complete nor a conformant Vulkan implementation. Testing "
|
||||
+ "use only.\n", device->devinfo.ver);
|
||||
+ }
|
||||
+
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,504 +0,0 @@
|
||||
From 52b5ac62b367ae89574c8031fdcf7c1dae05c942 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 29 Jun 2021 11:59:53 +0200
|
||||
Subject: [PATCH 073/142] v3dv/meson: add v71 hw generation
|
||||
|
||||
Starting point for v71 version inclusion.
|
||||
|
||||
This just adds it as one of the versions to be compiled (on meson),
|
||||
updates the v3dX/v3dv_X macros, and update the code enough to get it
|
||||
compiling when building using the two versions. For any packet not
|
||||
available on v71 we just provide a generic asserted placeholder of
|
||||
generation not supported.
|
||||
|
||||
Any real v71 support will be implemented on following commits.
|
||||
---
|
||||
src/broadcom/vulkan/meson.build | 6 +-
|
||||
src/broadcom/vulkan/v3dv_private.h | 7 +++
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 75 +++++++++++++++++++++++--
|
||||
src/broadcom/vulkan/v3dvx_image.c | 16 +++++-
|
||||
src/broadcom/vulkan/v3dvx_meta_common.c | 32 +++++++++++
|
||||
src/broadcom/vulkan/v3dvx_pipeline.c | 5 ++
|
||||
src/broadcom/vulkan/v3dvx_queue.c | 11 ++++
|
||||
7 files changed, 142 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/meson.build b/src/broadcom/vulkan/meson.build
|
||||
index ad032d832ad..3da7364686f 100644
|
||||
--- a/src/broadcom/vulkan/meson.build
|
||||
+++ b/src/broadcom/vulkan/meson.build
|
||||
@@ -27,6 +27,7 @@ v3dv_entrypoints = custom_target(
|
||||
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'v3dv',
|
||||
'--beta', with_vulkan_beta.to_string(),
|
||||
'--device-prefix', 'ver42',
|
||||
+ '--device-prefix', 'ver71',
|
||||
],
|
||||
depend_files : vk_entrypoints_gen_depend_files,
|
||||
)
|
||||
@@ -67,10 +68,7 @@ files_per_version = files(
|
||||
'v3dvx_queue.c',
|
||||
)
|
||||
|
||||
-# The vulkan driver only supports version >= 42, which is the version present in
|
||||
-# Rpi4. We need to explicitly set it as we are reusing pieces from the GL v3d
|
||||
-# driver.
|
||||
-v3d_versions = ['42']
|
||||
+v3d_versions = ['42', '71']
|
||||
|
||||
v3dv_flags = []
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
|
||||
index c6707211529..6bdf338c67b 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_private.h
|
||||
+++ b/src/broadcom/vulkan/v3dv_private.h
|
||||
@@ -2608,6 +2608,9 @@ u64_compare(const void *key1, const void *key2)
|
||||
case 42: \
|
||||
v3d_X_thing = &v3d42_##thing; \
|
||||
break; \
|
||||
+ case 71: \
|
||||
+ v3d_X_thing = &v3d71_##thing; \
|
||||
+ break; \
|
||||
default: \
|
||||
unreachable("Unsupported hardware generation"); \
|
||||
} \
|
||||
@@ -2626,6 +2629,10 @@ u64_compare(const void *key1, const void *key2)
|
||||
# define v3dX(x) v3d42_##x
|
||||
# include "v3dvx_private.h"
|
||||
# undef v3dX
|
||||
+
|
||||
+# define v3dX(x) v3d71_##x
|
||||
+# include "v3dvx_private.h"
|
||||
+# undef v3dX
|
||||
#endif
|
||||
|
||||
#ifdef ANDROID
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index f182b790d36..b958e634c82 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -56,10 +56,15 @@ v3dX(job_emit_enable_double_buffer)(struct v3dv_job *job)
|
||||
};
|
||||
config.width_in_pixels = tiling->width;
|
||||
config.height_in_pixels = tiling->height;
|
||||
+#if V3D_VERSION == 42
|
||||
config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
|
||||
config.multisample_mode_4x = tiling->msaa;
|
||||
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
uint8_t *rewrite_addr = (uint8_t *)job->bcl_tile_binning_mode_ptr;
|
||||
cl_packet_pack(TILE_BINNING_MODE_CFG)(NULL, rewrite_addr, &config);
|
||||
@@ -82,10 +87,15 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
|
||||
cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
|
||||
config.width_in_pixels = tiling->width;
|
||||
config.height_in_pixels = tiling->height;
|
||||
+#if V3D_VERSION == 42
|
||||
config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
|
||||
config.multisample_mode_4x = tiling->msaa;
|
||||
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
}
|
||||
|
||||
/* There's definitely nothing in the VCD cache we want. */
|
||||
@@ -649,10 +659,15 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
* bit and instead we have to emit a single clear of all tile buffers.
|
||||
*/
|
||||
if (use_global_zs_clear || use_global_rt_clear) {
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
|
||||
clear.clear_z_stencil_buffer = use_global_zs_clear;
|
||||
clear.clear_all_render_targets = use_global_rt_clear;
|
||||
}
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -824,7 +839,12 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
config.number_of_render_targets = MAX2(subpass->color_count, 1);
|
||||
config.multisample_mode_4x = tiling->msaa;
|
||||
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
|
||||
+#if V3D_VERSION == 42
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
|
||||
const struct v3dv_image_view *iview =
|
||||
@@ -920,7 +940,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
const struct v3d_resource_slice *slice =
|
||||
&image->planes[plane].slices[iview->vk.base_mip_level];
|
||||
|
||||
- const uint32_t *clear_color =
|
||||
+ UNUSED const uint32_t *clear_color =
|
||||
&state->attachments[attachment_idx].clear_value.color[0];
|
||||
|
||||
uint32_t clear_pad = 0;
|
||||
@@ -937,13 +957,19 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
|
||||
clear.clear_color_low_32_bits = clear_color[0];
|
||||
clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
|
||||
clear.render_target_number = i;
|
||||
};
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) {
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
|
||||
clear.clear_color_mid_low_32_bits =
|
||||
((clear_color[1] >> 24) | (clear_color[2] << 8));
|
||||
@@ -951,17 +977,28 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
|
||||
clear.render_target_number = i;
|
||||
};
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
+
|
||||
}
|
||||
|
||||
if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
|
||||
clear.uif_padded_height_in_uif_blocks = clear_pad;
|
||||
clear.clear_color_high_16_bits = clear_color[3] >> 16;
|
||||
clear.render_target_number = i;
|
||||
};
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
}
|
||||
}
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
|
||||
v3dX(cmd_buffer_render_pass_setup_render_target)
|
||||
(cmd_buffer, 0, &rt.render_target_0_internal_bpp,
|
||||
@@ -976,6 +1013,10 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
(cmd_buffer, 3, &rt.render_target_3_internal_bpp,
|
||||
&rt.render_target_3_internal_type, &rt.render_target_3_clamp);
|
||||
}
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
/* Ends rendering mode config. */
|
||||
if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
|
||||
@@ -1036,10 +1077,15 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
if (cmd_buffer->state.tile_aligned_render_area &&
|
||||
(i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
|
||||
clear.clear_z_stencil_buffer = !job->early_zs_clear;
|
||||
clear.clear_all_render_targets = true;
|
||||
}
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
}
|
||||
cl_emit(rcl, END_OF_TILE_MARKER, end);
|
||||
}
|
||||
@@ -1065,7 +1111,9 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
* now, would need to change if we allow multiple viewports
|
||||
*/
|
||||
float *vptranslate = dynamic->viewport.translate[0];
|
||||
+#if V3D_VERSION == 42
|
||||
float *vpscale = dynamic->viewport.scale[0];
|
||||
+#endif
|
||||
|
||||
struct v3dv_job *job = cmd_buffer->state.job;
|
||||
assert(job);
|
||||
@@ -1078,10 +1126,15 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size);
|
||||
v3dv_return_if_oom(cmd_buffer, NULL);
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
|
||||
clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f;
|
||||
clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f;
|
||||
}
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
float translate_z, scale_z;
|
||||
v3dv_cmd_buffer_state_get_viewport_z_xform(&cmd_buffer->state, 0,
|
||||
@@ -1591,16 +1644,20 @@ v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
assert(pipeline);
|
||||
|
||||
- bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
|
||||
-
|
||||
v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS));
|
||||
v3dv_return_if_oom(cmd_buffer, NULL);
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
+ bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
|
||||
cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
|
||||
config.early_z_enable = enable_ez;
|
||||
config.early_z_updates_enable = config.early_z_enable &&
|
||||
pipeline->z_updates_enable;
|
||||
}
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
}
|
||||
|
||||
void
|
||||
@@ -2031,10 +2088,12 @@ v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
pipeline->vpm_cfg.Gv);
|
||||
}
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
struct v3dv_bo *default_attribute_values =
|
||||
pipeline->default_attribute_values != NULL ?
|
||||
pipeline->default_attribute_values :
|
||||
pipeline->device->default_attribute_float;
|
||||
+#endif
|
||||
|
||||
cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD,
|
||||
pipeline->shader_state_record, shader) {
|
||||
@@ -2060,8 +2119,10 @@ v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs;
|
||||
shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs;
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
shader.address_of_default_attribute_values =
|
||||
v3dv_cl_address(default_attribute_values, 0);
|
||||
+#endif
|
||||
|
||||
shader.any_shader_reads_hardware_written_primitive_id =
|
||||
(pipeline->has_gs && prog_data_gs->uses_pid) || prog_data_fs->uses_pid;
|
||||
@@ -2399,11 +2460,17 @@ v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buf
|
||||
|
||||
assert(iview->plane_count == 1);
|
||||
*rt_bpp = iview->planes[0].internal_bpp;
|
||||
- *rt_type = iview->planes[0].internal_type;
|
||||
if (vk_format_is_int(iview->vk.view_format))
|
||||
+#if V3D_VERSION == 42
|
||||
+ *rt_type = iview->planes[0].internal_type;
|
||||
+ if (vk_format_is_int(iview->vk.format))
|
||||
*rt_clamp = V3D_RENDER_TARGET_CLAMP_INT;
|
||||
else if (vk_format_is_srgb(iview->vk.view_format))
|
||||
*rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM;
|
||||
else
|
||||
*rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
}
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c
|
||||
index 80a3e5bfde8..dac6ff2741f 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_image.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_image.c
|
||||
@@ -76,8 +76,6 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
|
||||
tex.swizzle_b = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[2]);
|
||||
tex.swizzle_a = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[3]);
|
||||
|
||||
- tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse;
|
||||
-
|
||||
tex.texture_type = image_view->format->planes[plane].tex_type;
|
||||
|
||||
if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
|
||||
@@ -110,7 +108,16 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
|
||||
|
||||
tex.array_stride_64_byte_aligned = image->planes[iplane].cube_map_stride / 64;
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
+ tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse;
|
||||
+#endif
|
||||
+
|
||||
+#if V3D_VERSION == 42
|
||||
tex.srgb = vk_format_is_srgb(image_view->vk.view_format);
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
/* At this point we don't have the job. That's the reason the first
|
||||
* parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
|
||||
@@ -166,7 +173,12 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
|
||||
|
||||
assert(buffer_view->format->plane_count == 1);
|
||||
tex.texture_type = buffer_view->format->planes[0].tex_type;
|
||||
+#if V3D_VERSION == 42
|
||||
tex.srgb = vk_format_is_srgb(buffer_view->vk_format);
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
/* At this point we don't have the job. That's the reason the first
|
||||
* parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
index 04147b82cbd..2db07ea7427 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
@@ -58,7 +58,12 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
config.number_of_render_targets = 1;
|
||||
config.multisample_mode_4x = tiling->msaa;
|
||||
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
|
||||
+#if V3D_VERSION == 42
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
config.internal_depth_type = fb->internal_depth_type;
|
||||
}
|
||||
|
||||
@@ -88,14 +93,20 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
}
|
||||
}
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
const uint32_t *color = &clear_info->clear_value->color[0];
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
|
||||
clear.clear_color_low_32_bits = color[0];
|
||||
clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
|
||||
clear.render_target_number = 0;
|
||||
};
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
|
||||
clear.clear_color_mid_low_32_bits =
|
||||
((color[1] >> 24) | (color[2] << 8));
|
||||
@@ -103,22 +114,37 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
((color[2] >> 24) | ((color[3] & 0xffff) << 8));
|
||||
clear.render_target_number = 0;
|
||||
};
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
+
|
||||
}
|
||||
|
||||
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
|
||||
clear.uif_padded_height_in_uif_blocks = clear_pad;
|
||||
clear.clear_color_high_16_bits = color[3] >> 16;
|
||||
clear.render_target_number = 0;
|
||||
};
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
}
|
||||
}
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
|
||||
rt.render_target_0_internal_bpp = tiling->internal_bpp;
|
||||
rt.render_target_0_internal_type = fb->internal_type;
|
||||
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
|
||||
}
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
|
||||
clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
|
||||
@@ -179,10 +205,16 @@ emit_frame_setup(struct v3dv_job *job,
|
||||
*/
|
||||
if (clear_value &&
|
||||
(i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
|
||||
clear.clear_z_stencil_buffer = true;
|
||||
clear.clear_all_render_targets = true;
|
||||
}
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
+
|
||||
}
|
||||
cl_emit(rcl, END_OF_TILE_MARKER, end);
|
||||
}
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
index 5d32d414ed8..922698b08a2 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
@@ -447,10 +447,15 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
|
||||
/* FIXME: Use combined input/output size flag in the common case (also
|
||||
* on v3d, see v3dx_draw).
|
||||
*/
|
||||
+#if V3D_VERSION == 42
|
||||
shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
|
||||
prog_data_vs_bin->separate_segments;
|
||||
shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
|
||||
prog_data_vs->separate_segments;
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
|
||||
shader.coordinate_shader_input_vpm_segment_size =
|
||||
prog_data_vs_bin->separate_segments ?
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_queue.c b/src/broadcom/vulkan/v3dvx_queue.c
|
||||
index efe63de425c..1a26d04aef7 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_queue.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_queue.c
|
||||
@@ -42,14 +42,25 @@ v3dX(job_emit_noop)(struct v3dv_job *job)
|
||||
config.image_height_pixels = 1;
|
||||
config.number_of_render_targets = 1;
|
||||
config.multisample_mode_4x = false;
|
||||
+#if V3D_VERSION == 42
|
||||
config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("HW generation 71 not supported yet.");
|
||||
+#endif
|
||||
}
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
|
||||
rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32;
|
||||
rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8;
|
||||
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
|
||||
}
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ unreachable("Hardware generation 71 not supported yet.");
|
||||
+#endif
|
||||
+
|
||||
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
|
||||
clear.z_clear_value = 1.0f;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,29 +0,0 @@
|
||||
From 7aa016bca8bb1bf449ea79505692353c0bd174b8 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 10 Nov 2021 10:06:50 +0100
|
||||
Subject: [PATCH 074/142] v3dv: expose V3D revision number in device name
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dv_device.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
|
||||
index d29ffad3531..3034b561480 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_device.c
|
||||
+++ b/src/broadcom/vulkan/v3dv_device.c
|
||||
@@ -1123,8 +1123,10 @@ create_physical_device(struct v3dv_instance *instance,
|
||||
device->next_program_id = 0;
|
||||
|
||||
ASSERTED int len =
|
||||
- asprintf(&device->name, "V3D %d.%d",
|
||||
- device->devinfo.ver / 10, device->devinfo.ver % 10);
|
||||
+ asprintf(&device->name, "V3D %d.%d.%d",
|
||||
+ device->devinfo.ver / 10,
|
||||
+ device->devinfo.ver % 10,
|
||||
+ device->devinfo.rev);
|
||||
assert(len != -1);
|
||||
|
||||
v3dv_physical_device_init_disk_cache(device);
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,54 +0,0 @@
|
||||
From fb9e95b7e1d5987fd25e914635c4e09d81ea9561 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 10 Nov 2021 07:54:35 +0100
|
||||
Subject: [PATCH 075/142] v3dv/device: handle new rpi5 device (bcm2712)
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This includes both master and primary devices.
|
||||
|
||||
Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
|
||||
---
|
||||
src/broadcom/vulkan/v3dv_device.c | 10 +++++++---
|
||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
|
||||
index 3034b561480..c8719d33f15 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_device.c
|
||||
+++ b/src/broadcom/vulkan/v3dv_device.c
|
||||
@@ -1287,7 +1287,8 @@ enumerate_devices(struct vk_instance *vk_instance)
|
||||
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER) {
|
||||
char **compat = devices[i]->deviceinfo.platform->compatible;
|
||||
while (*compat) {
|
||||
- if (strncmp(*compat, "brcm,2711-v3d", 13) == 0) {
|
||||
+ if (strncmp(*compat, "brcm,2711-v3d", 13) == 0 ||
|
||||
+ strncmp(*compat, "brcm,2712-v3d", 13) == 0) {
|
||||
v3d_idx = i;
|
||||
break;
|
||||
}
|
||||
@@ -1296,8 +1297,9 @@ enumerate_devices(struct vk_instance *vk_instance)
|
||||
} else if (devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) {
|
||||
char **compat = devices[i]->deviceinfo.platform->compatible;
|
||||
while (*compat) {
|
||||
- if (strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 ||
|
||||
- strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0 ) {
|
||||
+ if (strncmp(*compat, "brcm,bcm2712-vc6", 16) == 0 ||
|
||||
+ strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 ||
|
||||
+ strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0) {
|
||||
vc4_idx = i;
|
||||
break;
|
||||
}
|
||||
@@ -1334,6 +1336,8 @@ v3dv_physical_device_device_id(struct v3dv_physical_device *dev)
|
||||
switch (dev->devinfo.ver) {
|
||||
case 42:
|
||||
return 0xBE485FD3; /* Broadcom deviceID for 2711 */
|
||||
+ case 71:
|
||||
+ return 0x55701C33; /* Broadcom deviceID for 2712 */
|
||||
default:
|
||||
unreachable("Unsupported V3D version");
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,32 +0,0 @@
|
||||
From c4f957af4fb0e10abf0a7ffad4f7a468633b7d99 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 20 Jul 2021 14:00:44 +0200
|
||||
Subject: [PATCH 076/142] v3dv/cmd_buffer: emit TILE_BINNING_MODE_CFG for v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index b958e634c82..17b2f46850d 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -94,7 +94,14 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
+ config.log2_tile_width = log2_tile_size(tiling->tile_width);
|
||||
+ config.log2_tile_height = log2_tile_size(tiling->tile_height);
|
||||
+ /* FIXME: ideally we would like next assert on the packet header (as is
|
||||
+ * general, so also applies to GL). We would need to expand
|
||||
+ * gen_pack_header for that.
|
||||
+ */
|
||||
+ assert(config.log2_tile_width == config.log2_tile_height ||
|
||||
+ config.log2_tile_width == config.log2_tile_height + 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 1934ac07df73cb685f6550b8b0f5b4f2ead11396 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 20 Jul 2021 14:33:00 +0200
|
||||
Subject: [PATCH 077/142] v3dv: emit TILE_RENDERING_MODE_CFG_COMMON for v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 9 ++++++++-
|
||||
src/broadcom/vulkan/v3dvx_meta_common.c | 9 ++++++++-
|
||||
2 files changed, 16 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index 17b2f46850d..7837b460051 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -850,7 +850,14 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
+ config.log2_tile_width = log2_tile_size(tiling->tile_width);
|
||||
+ config.log2_tile_height = log2_tile_size(tiling->tile_height);
|
||||
+ /* FIXME: ideallly we would like next assert on the packet header (as is
|
||||
+ * general, so also applies to GL). We would need to expand
|
||||
+ * gen_pack_header for that.
|
||||
+ */
|
||||
+ assert(config.log2_tile_width == config.log2_tile_height ||
|
||||
+ config.log2_tile_width == config.log2_tile_height + 1);
|
||||
#endif
|
||||
|
||||
if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
index 2db07ea7427..e4084d851fc 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
@@ -62,7 +62,14 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
+ config.log2_tile_width = log2_tile_size(tiling->tile_width);
|
||||
+ config.log2_tile_height = log2_tile_size(tiling->tile_height);
|
||||
+ /* FIXME: ideallly we would like next assert on the packet header (as is
|
||||
+ * general, so also applies to GL). We would need to expand
|
||||
+ * gen_pack_header for that.
|
||||
+ */
|
||||
+ assert(config.log2_tile_width == config.log2_tile_height ||
|
||||
+ config.log2_tile_width == config.log2_tile_height + 1);
|
||||
#endif
|
||||
config.internal_depth_type = fb->internal_depth_type;
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,315 +0,0 @@
|
||||
From f0f9eea3cad83ed8824c6a7686150327407a5286 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 22 Jul 2021 14:26:13 +0200
|
||||
Subject: [PATCH 078/142] v3dv/cmd_buffer: emit
|
||||
TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1 for v71
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
|
||||
Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 186 +++++++++++++++++-------
|
||||
src/broadcom/vulkan/v3dvx_meta_common.c | 12 +-
|
||||
src/broadcom/vulkan/v3dvx_private.h | 11 +-
|
||||
3 files changed, 147 insertions(+), 62 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index 7837b460051..c6307890da5 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -800,6 +800,103 @@ set_rcl_early_z_config(struct v3dv_job *job,
|
||||
}
|
||||
}
|
||||
|
||||
+/* Note that for v71, render target cfg packets has just one field that
|
||||
+ * combined the internal type and clamp mode. For simplicity we keep just one
|
||||
+ * helper.
|
||||
+ *
|
||||
+ * Note: rt_type is in fact a "enum V3DX(Internal_Type)".
|
||||
+ *
|
||||
+ * FIXME: for v71 we are not returning all the possible combinations for
|
||||
+ * render target internal type and clamp. For example for int types we are
|
||||
+ * always using clamp int, and for 16f we are using clamp none or pos (that
|
||||
+ * seem the equivalent for no-clamp on 4.2), but not pq or hlg. In summary
|
||||
+ * right now we are just porting what we were doing on 4.2
|
||||
+ */
|
||||
+uint32_t
|
||||
+v3dX(clamp_for_format_and_type)(uint32_t rt_type,
|
||||
+ VkFormat vk_format)
|
||||
+{
|
||||
+#if V3D_VERSION == 42
|
||||
+ if (vk_format_is_int(vk_format))
|
||||
+ return V3D_RENDER_TARGET_CLAMP_INT;
|
||||
+ else if (vk_format_is_srgb(vk_format))
|
||||
+ return V3D_RENDER_TARGET_CLAMP_NORM;
|
||||
+ else
|
||||
+ return V3D_RENDER_TARGET_CLAMP_NONE;
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ switch (rt_type) {
|
||||
+ case V3D_INTERNAL_TYPE_8I:
|
||||
+ return V3D_RENDER_TARGET_TYPE_CLAMP_8I_CLAMPED;
|
||||
+ case V3D_INTERNAL_TYPE_8UI:
|
||||
+ return V3D_RENDER_TARGET_TYPE_CLAMP_8UI_CLAMPED;
|
||||
+ case V3D_INTERNAL_TYPE_8:
|
||||
+ return V3D_RENDER_TARGET_TYPE_CLAMP_8;
|
||||
+ case V3D_INTERNAL_TYPE_16I:
|
||||
+ return V3D_RENDER_TARGET_TYPE_CLAMP_16I_CLAMPED;
|
||||
+ case V3D_INTERNAL_TYPE_16UI:
|
||||
+ return V3D_RENDER_TARGET_TYPE_CLAMP_16UI_CLAMPED;
|
||||
+ case V3D_INTERNAL_TYPE_16F:
|
||||
+ return vk_format_is_srgb(vk_format) ?
|
||||
+ V3D_RENDER_TARGET_TYPE_CLAMP_16F_CLAMP_NORM :
|
||||
+ V3D_RENDER_TARGET_TYPE_CLAMP_16F;
|
||||
+ case V3D_INTERNAL_TYPE_32I:
|
||||
+ return V3D_RENDER_TARGET_TYPE_CLAMP_32I_CLAMPED;
|
||||
+ case V3D_INTERNAL_TYPE_32UI:
|
||||
+ return V3D_RENDER_TARGET_TYPE_CLAMP_32UI_CLAMPED;
|
||||
+ case V3D_INTERNAL_TYPE_32F:
|
||||
+ return V3D_RENDER_TARGET_TYPE_CLAMP_32F;
|
||||
+ default:
|
||||
+ unreachable("Unknown internal render target type");
|
||||
+ }
|
||||
+
|
||||
+ return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+cmd_buffer_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
+ int rt,
|
||||
+ uint32_t *rt_bpp,
|
||||
+#if V3D_VERSION == 42
|
||||
+ uint32_t *rt_type,
|
||||
+ uint32_t *rt_clamp)
|
||||
+#else
|
||||
+ uint32_t *rt_type_clamp)
|
||||
+#endif
|
||||
+{
|
||||
+ const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
+
|
||||
+ assert(state->subpass_idx < state->pass->subpass_count);
|
||||
+ const struct v3dv_subpass *subpass =
|
||||
+ &state->pass->subpasses[state->subpass_idx];
|
||||
+
|
||||
+ if (rt >= subpass->color_count)
|
||||
+ return;
|
||||
+
|
||||
+ struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
|
||||
+ const uint32_t attachment_idx = attachment->attachment;
|
||||
+ if (attachment_idx == VK_ATTACHMENT_UNUSED)
|
||||
+ return;
|
||||
+
|
||||
+ assert(attachment_idx < state->framebuffer->attachment_count &&
|
||||
+ attachment_idx < state->attachment_alloc_count);
|
||||
+ struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view;
|
||||
+ assert(vk_format_is_color(iview->vk.format));
|
||||
+
|
||||
+ assert(iview->plane_count == 1);
|
||||
+ *rt_bpp = iview->planes[0].internal_bpp;
|
||||
+#if V3D_VERSION == 42
|
||||
+ *rt_type = iview->planes[0].internal_type;
|
||||
+ *rt_clamp = v3dX(clamp_for_format_and_type)(iview->planes[0].internal_type,
|
||||
+ iview->vk.format);
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ *rt_type_clamp = v3dX(clamp_for_format_and_type)(iview->planes[0].internal_type,
|
||||
+ iview->vk.format);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
void
|
||||
v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
@@ -939,10 +1036,20 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
*/
|
||||
job->early_zs_clear = do_early_zs_clear;
|
||||
|
||||
+#if V3D_VERSION >= 71
|
||||
+ uint32_t base_addr = 0;
|
||||
+#endif
|
||||
for (uint32_t i = 0; i < subpass->color_count; i++) {
|
||||
uint32_t attachment_idx = subpass->color_attachments[i].attachment;
|
||||
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
|
||||
+ if (attachment_idx == VK_ATTACHMENT_UNUSED) {
|
||||
+#if V3D_VERSION >= 71
|
||||
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
|
||||
+ rt.render_target_number = i;
|
||||
+ rt.stride = 1; /* Unused */
|
||||
+ }
|
||||
+#endif
|
||||
continue;
|
||||
+ }
|
||||
|
||||
struct v3dv_image_view *iview =
|
||||
state->attachments[attachment_idx].image_view;
|
||||
@@ -978,9 +1085,6 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
clear.render_target_number = i;
|
||||
};
|
||||
#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
-#endif
|
||||
|
||||
if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) {
|
||||
#if V3D_VERSION == 42
|
||||
@@ -1010,27 +1114,44 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
unreachable("HW generation 71 not supported yet.");
|
||||
#endif
|
||||
}
|
||||
+
|
||||
+#if V3D_VERSION >= 71
|
||||
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
|
||||
+ rt.clear_color_low_bits = clear_color[0];
|
||||
+ cmd_buffer_render_pass_setup_render_target(cmd_buffer, i, &rt.internal_bpp,
|
||||
+ &rt.internal_type_and_clamping);
|
||||
+ rt.stride =
|
||||
+ v3d_compute_rt_row_row_stride_128_bits(tiling->tile_width,
|
||||
+ v3d_internal_bpp_words(rt.internal_bpp));
|
||||
+ rt.base_address = base_addr;
|
||||
+ rt.render_target_number = i;
|
||||
+
|
||||
+ /* base_addr in multiples of 512 bits. We divide by 8 because stride
|
||||
+ * is in 128-bit units, but it is packing 2 rows worth of data, so we
|
||||
+ * need to divide it by 2 so it is only 1 row, and then again by 4 so
|
||||
+ * it is in 512-bit units.
|
||||
+ */
|
||||
+ base_addr += (tiling->tile_height * rt.stride) / 8;
|
||||
+ }
|
||||
+#endif
|
||||
}
|
||||
|
||||
#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
|
||||
- v3dX(cmd_buffer_render_pass_setup_render_target)
|
||||
+ cmd_buffer_render_pass_setup_render_target
|
||||
(cmd_buffer, 0, &rt.render_target_0_internal_bpp,
|
||||
&rt.render_target_0_internal_type, &rt.render_target_0_clamp);
|
||||
- v3dX(cmd_buffer_render_pass_setup_render_target)
|
||||
+ cmd_buffer_render_pass_setup_render_target
|
||||
(cmd_buffer, 1, &rt.render_target_1_internal_bpp,
|
||||
&rt.render_target_1_internal_type, &rt.render_target_1_clamp);
|
||||
- v3dX(cmd_buffer_render_pass_setup_render_target)
|
||||
+ cmd_buffer_render_pass_setup_render_target
|
||||
(cmd_buffer, 2, &rt.render_target_2_internal_bpp,
|
||||
&rt.render_target_2_internal_type, &rt.render_target_2_clamp);
|
||||
- v3dX(cmd_buffer_render_pass_setup_render_target)
|
||||
+ cmd_buffer_render_pass_setup_render_target
|
||||
(cmd_buffer, 3, &rt.render_target_3_internal_bpp,
|
||||
&rt.render_target_3_internal_type, &rt.render_target_3_clamp);
|
||||
}
|
||||
#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
-#endif
|
||||
|
||||
/* Ends rendering mode config. */
|
||||
if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
|
||||
@@ -2445,46 +2566,3 @@ v3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
buffer->mem_offset + offset);
|
||||
}
|
||||
}
|
||||
-
|
||||
-void
|
||||
-v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
- int rt,
|
||||
- uint32_t *rt_bpp,
|
||||
- uint32_t *rt_type,
|
||||
- uint32_t *rt_clamp)
|
||||
-{
|
||||
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
-
|
||||
- assert(state->subpass_idx < state->pass->subpass_count);
|
||||
- const struct v3dv_subpass *subpass =
|
||||
- &state->pass->subpasses[state->subpass_idx];
|
||||
-
|
||||
- if (rt >= subpass->color_count)
|
||||
- return;
|
||||
-
|
||||
- struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
|
||||
- const uint32_t attachment_idx = attachment->attachment;
|
||||
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
|
||||
- return;
|
||||
-
|
||||
- assert(attachment_idx < state->framebuffer->attachment_count &&
|
||||
- attachment_idx < state->attachment_alloc_count);
|
||||
- struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view;
|
||||
- assert(vk_format_is_color(iview->vk.format));
|
||||
-
|
||||
- assert(iview->plane_count == 1);
|
||||
- *rt_bpp = iview->planes[0].internal_bpp;
|
||||
- if (vk_format_is_int(iview->vk.view_format))
|
||||
-#if V3D_VERSION == 42
|
||||
- *rt_type = iview->planes[0].internal_type;
|
||||
- if (vk_format_is_int(iview->vk.format))
|
||||
- *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT;
|
||||
- else if (vk_format_is_srgb(iview->vk.view_format))
|
||||
- *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM;
|
||||
- else
|
||||
- *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
|
||||
-#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
-#endif
|
||||
-}
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
index e4084d851fc..c6391bc6d83 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
@@ -26,6 +26,7 @@
|
||||
|
||||
#include "broadcom/common/v3d_macros.h"
|
||||
#include "broadcom/common/v3d_tfu.h"
|
||||
+#include "broadcom/common/v3d_util.h"
|
||||
#include "broadcom/cle/v3dx_pack.h"
|
||||
#include "broadcom/compiler/v3d_compiler.h"
|
||||
|
||||
@@ -150,7 +151,16 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
}
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
|
||||
+ rt.internal_bpp = tiling->internal_bpp;
|
||||
+ rt.internal_type_and_clamping = v3dX(clamp_for_format_and_type)(fb->internal_type,
|
||||
+ fb->vk_format);
|
||||
+ rt.stride =
|
||||
+ v3d_compute_rt_row_row_stride_128_bits(tiling->tile_width,
|
||||
+ v3d_internal_bpp_words(rt.internal_bpp));
|
||||
+ rt.base_address = 0;
|
||||
+ rt.render_target_number = 0;
|
||||
+ }
|
||||
#endif
|
||||
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h
|
||||
index ad8ddfa5731..a4157d11c7c 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_private.h
|
||||
+++ b/src/broadcom/vulkan/v3dvx_private.h
|
||||
@@ -125,13 +125,6 @@ v3dX(get_hw_clear_color)(const VkClearColorValue *color,
|
||||
uint32_t internal_size,
|
||||
uint32_t *hw_color);
|
||||
|
||||
-void
|
||||
-v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
- int rt,
|
||||
- uint32_t *rt_bpp,
|
||||
- uint32_t *rt_type,
|
||||
- uint32_t *rt_clamp);
|
||||
-
|
||||
/* Used at v3dv_device */
|
||||
|
||||
void
|
||||
@@ -325,3 +318,7 @@ uint32_t v3dX(max_descriptor_bo_size)(void);
|
||||
uint32_t v3dX(combined_image_sampler_texture_state_offset)(uint8_t plane);
|
||||
|
||||
uint32_t v3dX(combined_image_sampler_sampler_state_offset)(uint8_t plane);
|
||||
+
|
||||
+uint32_t
|
||||
+v3dX(clamp_for_format_and_type)(uint32_t rt_type,
|
||||
+ VkFormat vk_format);
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,25 +0,0 @@
|
||||
From 7c89d8026fd550282d54933f37ffc2773869326f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Mon, 26 Jul 2021 15:08:11 +0200
|
||||
Subject: [PATCH 079/142] v3dvx/cmd_buffer: emit CLEAR_RENDER_TARGETS for v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index c6307890da5..ae1c21ae00b 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -1219,7 +1219,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
+ cl_emit(rcl, CLEAR_RENDER_TARGETS, clear_rt);
|
||||
#endif
|
||||
}
|
||||
cl_emit(rcl, END_OF_TILE_MARKER, end);
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,38 +0,0 @@
|
||||
From 2eb29b57fde2acda76e12953b3a1050f3056b39d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Sun, 19 Sep 2021 23:37:32 +0200
|
||||
Subject: [PATCH 080/142] v3dv/cmd_buffer: emit CLIPPER_XY_SCALING for v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 7 ++++---
|
||||
1 file changed, 4 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index ae1c21ae00b..2e525a11619 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -1246,9 +1246,7 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
* now, would need to change if we allow multiple viewports
|
||||
*/
|
||||
float *vptranslate = dynamic->viewport.translate[0];
|
||||
-#if V3D_VERSION == 42
|
||||
float *vpscale = dynamic->viewport.scale[0];
|
||||
-#endif
|
||||
|
||||
struct v3dv_job *job = cmd_buffer->state.job;
|
||||
assert(job);
|
||||
@@ -1268,7 +1266,10 @@ v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
|
||||
+ clip.viewport_half_width_in_1_64th_of_pixel = vpscale[0] * 64.0f;
|
||||
+ clip.viewport_half_height_in_1_64th_of_pixel = vpscale[1] * 64.0f;
|
||||
+ }
|
||||
#endif
|
||||
|
||||
float translate_z, scale_z;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,97 +0,0 @@
|
||||
From 611bf6a7445837c7e20416ff9f11a6dad9c543d7 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 14 Sep 2021 10:08:19 +0200
|
||||
Subject: [PATCH 081/142] v3dv/uniforms: update VIEWPORT_X/Y_SCALE uniforms for
|
||||
v71
|
||||
|
||||
As the packet CLIPPER_XY scaling, this needs to be computed on 1/64ths
|
||||
of pixel, instead of 1/256ths of pixels.
|
||||
|
||||
As this is the usual values that we get from macros, we add manually a
|
||||
v42 and v71 macro, and define a new helper (V3DV_X) to get the value
|
||||
for the current hw version.
|
||||
---
|
||||
src/broadcom/vulkan/v3dv_private.h | 17 +++++++++++++++++
|
||||
src/broadcom/vulkan/v3dv_uniforms.c | 7 ++++---
|
||||
src/broadcom/vulkan/v3dvx_private.h | 9 +++++++++
|
||||
3 files changed, 30 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
|
||||
index 6bdf338c67b..cd6811b19c2 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_private.h
|
||||
+++ b/src/broadcom/vulkan/v3dv_private.h
|
||||
@@ -2617,6 +2617,23 @@ u64_compare(const void *key1, const void *key2)
|
||||
v3d_X_thing; \
|
||||
})
|
||||
|
||||
+/* Helper to get hw-specific macro values */
|
||||
+#define V3DV_X(device, thing) ({ \
|
||||
+ __typeof(V3D42_##thing) V3D_X_THING; \
|
||||
+ switch (device->devinfo.ver) { \
|
||||
+ case 42: \
|
||||
+ V3D_X_THING = V3D42_##thing; \
|
||||
+ break; \
|
||||
+ case 71: \
|
||||
+ V3D_X_THING = V3D71_##thing; \
|
||||
+ break; \
|
||||
+ default: \
|
||||
+ unreachable("Unsupported hardware generation"); \
|
||||
+ } \
|
||||
+ V3D_X_THING; \
|
||||
+})
|
||||
+
|
||||
+
|
||||
|
||||
/* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
|
||||
* define v3dX for each version supported, because when we compile code that
|
||||
diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c
|
||||
index 72fa9a1b39c..0e681cc4ee2 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_uniforms.c
|
||||
+++ b/src/broadcom/vulkan/v3dv_uniforms.c
|
||||
@@ -497,7 +497,8 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
|
||||
|
||||
struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
|
||||
-
|
||||
+ float clipper_xy_granularity =
|
||||
+ V3DV_X(cmd_buffer->device, CLIPPER_XY_GRANULARITY);
|
||||
for (int i = 0; i < uinfo->count; i++) {
|
||||
uint32_t data = uinfo->data[i];
|
||||
|
||||
@@ -520,11 +521,11 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
break;
|
||||
|
||||
case QUNIFORM_VIEWPORT_X_SCALE:
|
||||
- cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
|
||||
+ cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * clipper_xy_granularity);
|
||||
break;
|
||||
|
||||
case QUNIFORM_VIEWPORT_Y_SCALE:
|
||||
- cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
|
||||
+ cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * clipper_xy_granularity);
|
||||
break;
|
||||
|
||||
case QUNIFORM_VIEWPORT_Z_OFFSET: {
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h
|
||||
index a4157d11c7c..ff9ba75cf93 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_private.h
|
||||
+++ b/src/broadcom/vulkan/v3dvx_private.h
|
||||
@@ -319,6 +319,15 @@ uint32_t v3dX(combined_image_sampler_texture_state_offset)(uint8_t plane);
|
||||
|
||||
uint32_t v3dX(combined_image_sampler_sampler_state_offset)(uint8_t plane);
|
||||
|
||||
+/* General utils */
|
||||
+
|
||||
+uint32_t
|
||||
+v3dX(clamp_for_format_and_type)(uint32_t rt_type,
|
||||
+ VkFormat vk_format);
|
||||
+
|
||||
+#define V3D42_CLIPPER_XY_GRANULARITY 256.0f
|
||||
+#define V3D71_CLIPPER_XY_GRANULARITY 64.0f
|
||||
+
|
||||
uint32_t
|
||||
v3dX(clamp_for_format_and_type)(uint32_t rt_type,
|
||||
VkFormat vk_format);
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,40 +0,0 @@
|
||||
From 3819efaf2bb6fd8bd9cd45d54fb7254377b2296a Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Tue, 27 Jul 2021 14:02:30 +0200
|
||||
Subject: [PATCH 082/142] v3dv/cmd_buffer: just don't fill up early-z fields
|
||||
for CFG_BITS for v71
|
||||
|
||||
For v71 early_z_enable/early_z_updates_enable is configured with
|
||||
packet 121.
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 9 +++------
|
||||
1 file changed, 3 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index 2e525a11619..fe9f7e43596 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -1783,17 +1783,14 @@ v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS));
|
||||
v3dv_return_if_oom(cmd_buffer, NULL);
|
||||
|
||||
-#if V3D_VERSION == 42
|
||||
- bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
|
||||
cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
|
||||
+#if V3D_VERSION == 42
|
||||
+ bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer);
|
||||
config.early_z_enable = enable_ez;
|
||||
config.early_z_updates_enable = config.early_z_enable &&
|
||||
pipeline->z_updates_enable;
|
||||
- }
|
||||
-#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
#endif
|
||||
+ }
|
||||
}
|
||||
|
||||
void
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,219 +0,0 @@
|
||||
From e3b1a578f45ea830d790970115b6de978d56edb8 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 28 Jul 2021 12:01:38 +0200
|
||||
Subject: [PATCH 083/142] v3dv: default vertex attribute values are gen
|
||||
dependant
|
||||
|
||||
Content, structure and size would depend on the generation. Even if it
|
||||
is needed at all.
|
||||
|
||||
So let's move it to the v3dvx files.
|
||||
---
|
||||
src/broadcom/vulkan/v3dv_device.c | 2 +-
|
||||
src/broadcom/vulkan/v3dv_pipeline.c | 61 ++-------------------------
|
||||
src/broadcom/vulkan/v3dv_private.h | 4 --
|
||||
src/broadcom/vulkan/v3dvx_pipeline.c | 63 ++++++++++++++++++++++++++++
|
||||
src/broadcom/vulkan/v3dvx_private.h | 8 ++++
|
||||
5 files changed, 75 insertions(+), 63 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
|
||||
index c8719d33f15..01e2dd7ac2d 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_device.c
|
||||
+++ b/src/broadcom/vulkan/v3dv_device.c
|
||||
@@ -2043,7 +2043,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||
v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0,
|
||||
device->instance->default_pipeline_cache_enabled);
|
||||
device->default_attribute_float =
|
||||
- v3dv_pipeline_create_default_attribute_values(device, NULL);
|
||||
+ v3dv_X(device, create_default_attribute_values)(device, NULL);
|
||||
|
||||
device->device_address_mem_ctx = ralloc_context(NULL);
|
||||
util_dynarray_init(&device->device_address_bo_list,
|
||||
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
|
||||
index 22f01bdf64b..d012ff8f948 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_pipeline.c
|
||||
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
|
||||
@@ -2802,62 +2802,6 @@ pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
|
||||
}
|
||||
}
|
||||
|
||||
-static bool
|
||||
-pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
|
||||
-{
|
||||
- for (uint8_t i = 0; i < pipeline->va_count; i++) {
|
||||
- if (vk_format_is_int(pipeline->va[i].vk_format))
|
||||
- return true;
|
||||
- }
|
||||
- return false;
|
||||
-}
|
||||
-
|
||||
-/* @pipeline can be NULL. We assume in that case that all the attributes have
|
||||
- * a float format (we only create an all-float BO once and we reuse it with
|
||||
- * all float pipelines), otherwise we look at the actual type of each
|
||||
- * attribute used with the specific pipeline passed in.
|
||||
- */
|
||||
-struct v3dv_bo *
|
||||
-v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
|
||||
- struct v3dv_pipeline *pipeline)
|
||||
-{
|
||||
- uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
|
||||
- struct v3dv_bo *bo;
|
||||
-
|
||||
- bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
|
||||
-
|
||||
- if (!bo) {
|
||||
- fprintf(stderr, "failed to allocate memory for the default "
|
||||
- "attribute values\n");
|
||||
- return NULL;
|
||||
- }
|
||||
-
|
||||
- bool ok = v3dv_bo_map(device, bo, size);
|
||||
- if (!ok) {
|
||||
- fprintf(stderr, "failed to map default attribute values buffer\n");
|
||||
- return false;
|
||||
- }
|
||||
-
|
||||
- uint32_t *attrs = bo->map;
|
||||
- uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
|
||||
- for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
|
||||
- attrs[i * 4 + 0] = 0;
|
||||
- attrs[i * 4 + 1] = 0;
|
||||
- attrs[i * 4 + 2] = 0;
|
||||
- VkFormat attr_format =
|
||||
- pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
|
||||
- if (i < va_count && vk_format_is_int(attr_format)) {
|
||||
- attrs[i * 4 + 3] = 1;
|
||||
- } else {
|
||||
- attrs[i * 4 + 3] = fui(1.0);
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- v3dv_bo_unmap(device, bo);
|
||||
-
|
||||
- return bo;
|
||||
-}
|
||||
-
|
||||
static void
|
||||
pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
|
||||
const VkPipelineMultisampleStateCreateInfo *ms_info)
|
||||
@@ -2992,9 +2936,10 @@ pipeline_init(struct v3dv_pipeline *pipeline,
|
||||
|
||||
v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
|
||||
|
||||
- if (pipeline_has_integer_vertex_attrib(pipeline)) {
|
||||
+ if (v3dv_X(device, pipeline_needs_default_attribute_values)(pipeline)) {
|
||||
pipeline->default_attribute_values =
|
||||
- v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
|
||||
+ v3dv_X(pipeline->device, create_default_attribute_values)(pipeline->device, pipeline);
|
||||
+
|
||||
if (!pipeline->default_attribute_values)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
} else {
|
||||
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
|
||||
index cd6811b19c2..a9fab24d19e 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_private.h
|
||||
+++ b/src/broadcom/vulkan/v3dv_private.h
|
||||
@@ -2500,10 +2500,6 @@ void
|
||||
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_pipeline_cache *cache);
|
||||
|
||||
-struct v3dv_bo *
|
||||
-v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
|
||||
- struct v3dv_pipeline *pipeline);
|
||||
-
|
||||
VkResult
|
||||
v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
|
||||
nir_shader *nir,
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
index 922698b08a2..e235220cb14 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
@@ -664,3 +664,66 @@ v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
|
||||
}
|
||||
}
|
||||
}
|
||||
+
|
||||
+static bool
|
||||
+pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
|
||||
+{
|
||||
+ for (uint8_t i = 0; i < pipeline->va_count; i++) {
|
||||
+ if (vk_format_is_int(pipeline->va[i].vk_format))
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+bool
|
||||
+v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline)
|
||||
+{
|
||||
+ return pipeline_has_integer_vertex_attrib(pipeline);
|
||||
+}
|
||||
+
|
||||
+/* @pipeline can be NULL. In that case we assume the most common case. For
|
||||
+ * example, for v42 we assume in that case that all the attributes have a
|
||||
+ * float format (we only create an all-float BO once and we reuse it with all
|
||||
+ * float pipelines), otherwise we look at the actual type of each attribute
|
||||
+ * used with the specific pipeline passed in.
|
||||
+ */
|
||||
+struct v3dv_bo *
|
||||
+v3dX(create_default_attribute_values)(struct v3dv_device *device,
|
||||
+ struct v3dv_pipeline *pipeline)
|
||||
+{
|
||||
+ uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
|
||||
+ struct v3dv_bo *bo;
|
||||
+
|
||||
+ bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
|
||||
+
|
||||
+ if (!bo) {
|
||||
+ fprintf(stderr, "failed to allocate memory for the default "
|
||||
+ "attribute values\n");
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ bool ok = v3dv_bo_map(device, bo, size);
|
||||
+ if (!ok) {
|
||||
+ fprintf(stderr, "failed to map default attribute values buffer\n");
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ uint32_t *attrs = bo->map;
|
||||
+ uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
|
||||
+ for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
|
||||
+ attrs[i * 4 + 0] = 0;
|
||||
+ attrs[i * 4 + 1] = 0;
|
||||
+ attrs[i * 4 + 2] = 0;
|
||||
+ VkFormat attr_format =
|
||||
+ pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
|
||||
+ if (i < va_count && vk_format_is_int(attr_format)) {
|
||||
+ attrs[i * 4 + 3] = 1;
|
||||
+ } else {
|
||||
+ attrs[i * 4 + 3] = fui(1.0);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ v3dv_bo_unmap(device, bo);
|
||||
+
|
||||
+ return bo;
|
||||
+}
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h
|
||||
index ff9ba75cf93..036ce11b455 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_private.h
|
||||
+++ b/src/broadcom/vulkan/v3dvx_private.h
|
||||
@@ -306,6 +306,14 @@ void
|
||||
v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
|
||||
const VkPipelineVertexInputStateCreateInfo *vi_info,
|
||||
const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info);
|
||||
+
|
||||
+bool
|
||||
+v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline);
|
||||
+
|
||||
+struct v3dv_bo *
|
||||
+v3dX(create_default_attribute_values)(struct v3dv_device *device,
|
||||
+ struct v3dv_pipeline *pipeline);
|
||||
+
|
||||
/* Used at v3dv_queue */
|
||||
void
|
||||
v3dX(job_emit_noop)(struct v3dv_job *job);
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,87 +0,0 @@
|
||||
From 8464dc8869f3d2eccfecac7b4358cc0ffe05f081 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 28 Jul 2021 12:05:26 +0200
|
||||
Subject: [PATCH 084/142] v3dv/pipeline: default vertex attributes values are
|
||||
not needed for v71
|
||||
|
||||
There are not part of the shader state record.
|
||||
---
|
||||
src/broadcom/vulkan/v3dv_private.h | 10 +++++++++-
|
||||
src/broadcom/vulkan/v3dvx_pipeline.c | 10 ++++++++++
|
||||
2 files changed, 19 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
|
||||
index a9fab24d19e..300a1ec8ae1 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_private.h
|
||||
+++ b/src/broadcom/vulkan/v3dv_private.h
|
||||
@@ -581,6 +581,10 @@ struct v3dv_device {
|
||||
* being float being float, allowing us to reuse the same BO for all
|
||||
* pipelines matching this requirement. Pipelines that need integer
|
||||
* attributes will create their own BO.
|
||||
+ *
|
||||
+ * Note that since v71 the default attribute values are not needed, so this
|
||||
+ * can be NULL.
|
||||
+ *
|
||||
*/
|
||||
struct v3dv_bo *default_attribute_float;
|
||||
|
||||
@@ -2289,11 +2293,15 @@ struct v3dv_pipeline {
|
||||
unsigned char sha1[20];
|
||||
|
||||
/* In general we can reuse v3dv_device->default_attribute_float, so note
|
||||
- * that the following can be NULL.
|
||||
+ * that the following can be NULL. In 7.x this is not used, so it will be
|
||||
+ * NULL.
|
||||
*
|
||||
* FIXME: the content of this BO will be small, so it could be improved to
|
||||
* be uploaded to a common BO. But as in most cases it will be NULL, it is
|
||||
* not a priority.
|
||||
+ *
|
||||
+ * Note that since v71 the default attribute values are not needed, so this
|
||||
+ * can be NULL.
|
||||
*/
|
||||
struct v3dv_bo *default_attribute_values;
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
index e235220cb14..4dc6d70efe1 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
@@ -665,6 +665,7 @@ v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
|
||||
}
|
||||
}
|
||||
|
||||
+#if V3D_VERSION == 42
|
||||
static bool
|
||||
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
|
||||
{
|
||||
@@ -674,11 +675,16 @@ pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
|
||||
}
|
||||
return false;
|
||||
}
|
||||
+#endif
|
||||
|
||||
bool
|
||||
v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline)
|
||||
{
|
||||
+#if V3D_VERSION == 42
|
||||
return pipeline_has_integer_vertex_attrib(pipeline);
|
||||
+#endif
|
||||
+
|
||||
+ return false;
|
||||
}
|
||||
|
||||
/* @pipeline can be NULL. In that case we assume the most common case. For
|
||||
@@ -691,6 +697,10 @@ struct v3dv_bo *
|
||||
v3dX(create_default_attribute_values)(struct v3dv_device *device,
|
||||
struct v3dv_pipeline *pipeline)
|
||||
{
|
||||
+#if V3D_VERSION >= 71
|
||||
+ return NULL;
|
||||
+#endif
|
||||
+
|
||||
uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
|
||||
struct v3dv_bo *bo;
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,39 +0,0 @@
|
||||
From 339096598660ec34be8087007dd4d66581de1c4e Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 28 Jul 2021 13:45:52 +0200
|
||||
Subject: [PATCH 085/142] v3dv/pipeline: handle GL_SHADER_STATE_RECORD changed
|
||||
size on v71
|
||||
|
||||
It is likely that we would need more changes, as this packet changed,
|
||||
but this is enough to get basic tests running. Any additional support
|
||||
will be handled with new commits.
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_pipeline.c | 5 +----
|
||||
1 file changed, 1 insertion(+), 4 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
index 4dc6d70efe1..a640c1d084a 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
@@ -360,7 +360,7 @@ v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
|
||||
static void
|
||||
pack_shader_state_record(struct v3dv_pipeline *pipeline)
|
||||
{
|
||||
- assert(sizeof(pipeline->shader_state_record) ==
|
||||
+ assert(sizeof(pipeline->shader_state_record) >=
|
||||
cl_packet_length(GL_SHADER_STATE_RECORD));
|
||||
|
||||
struct v3d_fs_prog_data *prog_data_fs =
|
||||
@@ -453,9 +453,6 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
|
||||
shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
|
||||
prog_data_vs->separate_segments;
|
||||
#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
-#endif
|
||||
|
||||
shader.coordinate_shader_input_vpm_segment_size =
|
||||
prog_data_vs_bin->separate_segments ?
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,89 +0,0 @@
|
||||
From 5b1342eb1e255d17619b1a7b33eaf7b31f5e50a5 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 22 Sep 2021 12:03:58 +0200
|
||||
Subject: [PATCH 086/142] v3dv: setup render pass color clears for any format
|
||||
bpp in v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 33 ++++++++++++++++----------
|
||||
1 file changed, 20 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index fe9f7e43596..1b39e230580 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -1064,7 +1064,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
UNUSED const uint32_t *clear_color =
|
||||
&state->attachments[attachment_idx].clear_value.color[0];
|
||||
|
||||
- uint32_t clear_pad = 0;
|
||||
+ UNUSED uint32_t clear_pad = 0;
|
||||
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
|
||||
slice->tiling == V3D_TILING_UIF_XOR) {
|
||||
int uif_block_height = v3d_utile_height(image->planes[plane].cpp) * 2;
|
||||
@@ -1084,10 +1084,8 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
|
||||
clear.render_target_number = i;
|
||||
};
|
||||
-#endif
|
||||
|
||||
if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) {
|
||||
-#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
|
||||
clear.clear_color_mid_low_32_bits =
|
||||
((clear_color[1] >> 24) | (clear_color[2] << 8));
|
||||
@@ -1095,25 +1093,16 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
|
||||
clear.render_target_number = i;
|
||||
};
|
||||
-#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
-#endif
|
||||
-
|
||||
}
|
||||
|
||||
if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
|
||||
-#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
|
||||
clear.uif_padded_height_in_uif_blocks = clear_pad;
|
||||
clear.clear_color_high_16_bits = clear_color[3] >> 16;
|
||||
clear.render_target_number = i;
|
||||
};
|
||||
-#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
-#endif
|
||||
}
|
||||
+#endif
|
||||
|
||||
#if V3D_VERSION >= 71
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
|
||||
@@ -1133,6 +1122,24 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
*/
|
||||
base_addr += (tiling->tile_height * rt.stride) / 8;
|
||||
}
|
||||
+
|
||||
+ if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) {
|
||||
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
|
||||
+ rt.clear_color_mid_bits = /* 40 bits (32 + 8) */
|
||||
+ ((uint64_t) clear_color[1]) |
|
||||
+ (((uint64_t) (clear_color[2] & 0xff)) << 32);
|
||||
+ rt.render_target_number = i;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128) {
|
||||
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
|
||||
+ rt.clear_color_top_bits = /* 56 bits (24 + 32) */
|
||||
+ (((uint64_t) (clear_color[2] & 0xffffff00)) >> 8) |
|
||||
+ (((uint64_t) (clear_color[3])) << 24);
|
||||
+ rt.render_target_number = i;
|
||||
+ }
|
||||
+ }
|
||||
#endif
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,126 +0,0 @@
|
||||
From ff5b5d4405b1d5600d7f1c4355202fd303f56700 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 22 Sep 2021 12:04:21 +0200
|
||||
Subject: [PATCH 087/142] v3dv: setup TLB clear color for meta operations in
|
||||
v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_meta_common.c | 46 +++++++++++++++----------
|
||||
1 file changed, 27 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
index c6391bc6d83..09ebcfa97c1 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_meta_common.c
|
||||
@@ -75,8 +75,9 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
config.internal_depth_type = fb->internal_depth_type;
|
||||
}
|
||||
|
||||
+ const uint32_t *color = NULL;
|
||||
if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
|
||||
- uint32_t clear_pad = 0;
|
||||
+ UNUSED uint32_t clear_pad = 0;
|
||||
if (clear_info->image) {
|
||||
const struct v3dv_image *image = clear_info->image;
|
||||
|
||||
@@ -101,20 +102,16 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
}
|
||||
}
|
||||
|
||||
+ color = &clear_info->clear_value->color[0];
|
||||
+
|
||||
#if V3D_VERSION == 42
|
||||
- const uint32_t *color = &clear_info->clear_value->color[0];
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
|
||||
clear.clear_color_low_32_bits = color[0];
|
||||
clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
|
||||
clear.render_target_number = 0;
|
||||
};
|
||||
-#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
-#endif
|
||||
|
||||
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
|
||||
-#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
|
||||
clear.clear_color_mid_low_32_bits =
|
||||
((color[1] >> 24) | (color[2] << 8));
|
||||
@@ -122,25 +119,16 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
((color[2] >> 24) | ((color[3] & 0xffff) << 8));
|
||||
clear.render_target_number = 0;
|
||||
};
|
||||
-#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
-#endif
|
||||
-
|
||||
}
|
||||
|
||||
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
|
||||
-#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
|
||||
clear.uif_padded_height_in_uif_blocks = clear_pad;
|
||||
clear.clear_color_high_16_bits = color[3] >> 16;
|
||||
clear.render_target_number = 0;
|
||||
};
|
||||
-#endif
|
||||
-#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
-#endif
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
#if V3D_VERSION == 42
|
||||
@@ -150,8 +138,11 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
|
||||
}
|
||||
#endif
|
||||
+
|
||||
#if V3D_VERSION >= 71
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
|
||||
+ if (color)
|
||||
+ rt.clear_color_low_bits = color[0];
|
||||
rt.internal_bpp = tiling->internal_bpp;
|
||||
rt.internal_type_and_clamping = v3dX(clamp_for_format_and_type)(fb->internal_type,
|
||||
fb->vk_format);
|
||||
@@ -161,6 +152,24 @@ emit_rcl_prologue(struct v3dv_job *job,
|
||||
rt.base_address = 0;
|
||||
rt.render_target_number = 0;
|
||||
}
|
||||
+
|
||||
+ if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
|
||||
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
|
||||
+ rt.clear_color_mid_bits = /* 40 bits (32 + 8) */
|
||||
+ ((uint64_t) color[1]) |
|
||||
+ (((uint64_t) (color[2] & 0xff)) << 32);
|
||||
+ rt.render_target_number = 0;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_128) {
|
||||
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
|
||||
+ rt.clear_color_top_bits = /* 56 bits (24 + 32) */
|
||||
+ (((uint64_t) (color[2] & 0xffffff00)) >> 8) |
|
||||
+ (((uint64_t) (color[3])) << 24);
|
||||
+ rt.render_target_number = 0;
|
||||
+ }
|
||||
+ }
|
||||
#endif
|
||||
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
|
||||
@@ -229,9 +238,8 @@ emit_frame_setup(struct v3dv_job *job,
|
||||
}
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
+ cl_emit(rcl, CLEAR_RENDER_TARGETS, clear);
|
||||
#endif
|
||||
-
|
||||
}
|
||||
cl_emit(rcl, END_OF_TILE_MARKER, end);
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,49 +0,0 @@
|
||||
From 1e9d7d69849fa646b331f7661c74ee138badc4bb Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Mon, 25 Oct 2021 01:37:12 +0200
|
||||
Subject: [PATCH 088/142] v3dv: fix up texture shader state for v71
|
||||
|
||||
There are some new fields for YCbCr with pointers for the various
|
||||
planes in multi-planar formats. These need to match the base address
|
||||
pointer in the texture state, or the hardware will assume this is a
|
||||
multi-planar texture.
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_image.c | 16 ++++++++++++++++
|
||||
1 file changed, 16 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c
|
||||
index dac6ff2741f..848290c2a47 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_image.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_image.c
|
||||
@@ -129,6 +129,14 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
|
||||
v3dv_layer_offset(image, 0, image_view->vk.base_array_layer,
|
||||
iplane);
|
||||
tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
|
||||
+
|
||||
+#if V3D_VERSION >= 71
|
||||
+ tex.chroma_offset_x = 1;
|
||||
+ tex.chroma_offset_y = 1;
|
||||
+ /* See comment in XML field definition for rationale of the shifts */
|
||||
+ tex.texture_base_pointer_cb = base_offset >> 6;
|
||||
+ tex.texture_base_pointer_cr = base_offset >> 6;
|
||||
+#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -191,5 +199,13 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
|
||||
buffer_view->offset;
|
||||
|
||||
tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
|
||||
+
|
||||
+#if V3D_VERSION >= 71
|
||||
+ tex.chroma_offset_x = 1;
|
||||
+ tex.chroma_offset_y = 1;
|
||||
+ /* See comment in XML field definition for rationale of the shifts */
|
||||
+ tex.texture_base_pointer_cb = base_offset >> 6;
|
||||
+ tex.texture_base_pointer_cr = base_offset >> 6;
|
||||
+#endif
|
||||
}
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,52 +0,0 @@
|
||||
From 1f150a3a92741f7654a13626bd5b27b5575f2b76 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Mon, 25 Oct 2021 01:38:31 +0200
|
||||
Subject: [PATCH 089/142] v3dv: handle new texture state transfer functions in
|
||||
v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_image.c | 11 +++++++----
|
||||
1 file changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c
|
||||
index 848290c2a47..437d4588c7e 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_image.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_image.c
|
||||
@@ -108,15 +108,16 @@ pack_texture_shader_state_helper(struct v3dv_device *device,
|
||||
|
||||
tex.array_stride_64_byte_aligned = image->planes[iplane].cube_map_stride / 64;
|
||||
|
||||
+ bool is_srgb = vk_format_is_srgb(image_view->vk.format);
|
||||
#if V3D_VERSION == 42
|
||||
tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse;
|
||||
#endif
|
||||
|
||||
#if V3D_VERSION == 42
|
||||
- tex.srgb = vk_format_is_srgb(image_view->vk.view_format);
|
||||
+ tex.srgb = is_srgb;
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
+ tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE;
|
||||
#endif
|
||||
|
||||
/* At this point we don't have the job. That's the reason the first
|
||||
@@ -181,11 +182,13 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
|
||||
|
||||
assert(buffer_view->format->plane_count == 1);
|
||||
tex.texture_type = buffer_view->format->planes[0].tex_type;
|
||||
+
|
||||
+ bool is_srgb = vk_format_is_srgb(buffer_view->vk_format);
|
||||
#if V3D_VERSION == 42
|
||||
- tex.srgb = vk_format_is_srgb(buffer_view->vk_format);
|
||||
+ tex.srgb = is_srgb;
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
+ tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE;
|
||||
#endif
|
||||
|
||||
/* At this point we don't have the job. That's the reason the first
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,42 +0,0 @@
|
||||
From 45de9f019ee92635de9a505db58439f0f4561281 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 28 Sep 2021 08:14:11 +0200
|
||||
Subject: [PATCH 090/142] v3dv: implement noop job for v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_queue.c | 10 +++++++---
|
||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_queue.c b/src/broadcom/vulkan/v3dvx_queue.c
|
||||
index 1a26d04aef7..f8cee36e3bf 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_queue.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_queue.c
|
||||
@@ -46,7 +46,8 @@ v3dX(job_emit_noop)(struct v3dv_job *job)
|
||||
config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("HW generation 71 not supported yet.");
|
||||
+ config.log2_tile_width = 3; /* Tile size 64 */
|
||||
+ config.log2_tile_height = 3; /* Tile size 64 */
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -58,10 +59,13 @@ v3dX(job_emit_noop)(struct v3dv_job *job)
|
||||
}
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
|
||||
+ rt.internal_bpp = V3D_INTERNAL_BPP_32;
|
||||
+ rt.internal_type_and_clamping = V3D_RENDER_TARGET_TYPE_CLAMP_8;
|
||||
+ rt.stride = 1; /* Unused RT */
|
||||
+ }
|
||||
#endif
|
||||
|
||||
-
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
|
||||
clear.z_clear_value = 1.0f;
|
||||
clear.stencil_clear_value = 0;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,117 +0,0 @@
|
||||
From 3e607bb28056bb52242be6878281efae84026813 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 28 Sep 2021 08:23:48 +0200
|
||||
Subject: [PATCH 091/142] v3dv: handle render pass global clear for v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 66 ++++++++++++++++----------
|
||||
1 file changed, 41 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index 1b39e230580..48b2e319e51 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -362,6 +362,11 @@ cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
iview->vk.base_array_layer + layer,
|
||||
image_plane);
|
||||
|
||||
+ /* The Clear Buffer bit is not supported for Z/Stencil stores in 7.x and it
|
||||
+ * is broken in earlier V3D versions.
|
||||
+ */
|
||||
+ assert((buffer != Z && buffer != STENCIL && buffer != ZSTENCIL) || !clear);
|
||||
+
|
||||
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
|
||||
store.buffer_to_store = buffer;
|
||||
store.address = v3dv_cl_address(image->planes[image_plane].mem->bo, layer_offset);
|
||||
@@ -484,6 +489,30 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
const VkImageAspectFlags aspects =
|
||||
vk_format_aspects(ds_attachment->desc.format);
|
||||
|
||||
+#if V3D_VERSION <= 42
|
||||
+ /* GFXH-1689: The per-buffer store command's clear buffer bit is broken
|
||||
+ * for depth/stencil.
|
||||
+ *
|
||||
+ * There used to be some confusion regarding the Clear Tile Buffers
|
||||
+ * Z/S bit also being broken, but we confirmed with Broadcom that this
|
||||
+ * is not the case, it was just that some other hardware bugs (that we
|
||||
+ * need to work around, such as GFXH-1461) could cause this bit to behave
|
||||
+ * incorrectly.
|
||||
+ *
|
||||
+ * There used to be another issue where the RTs bit in the Clear Tile
|
||||
+ * Buffers packet also cleared Z/S, but Broadcom confirmed this is
|
||||
+ * fixed since V3D 4.1.
|
||||
+ *
|
||||
+ * So if we have to emit a clear of depth or stencil we don't use
|
||||
+ * the per-buffer store clear bit, even if we need to store the buffers,
|
||||
+ * instead we always have to use the Clear Tile Buffers Z/S bit.
|
||||
+ * If we have configured the job to do early Z/S clearing, then we
|
||||
+ * don't want to emit any Clear Tile Buffers command at all here.
|
||||
+ *
|
||||
+ * Note that GFXH-1689 is not reproduced in the simulator, where
|
||||
+ * using the clear buffer bit in depth/stencil stores works fine.
|
||||
+ */
|
||||
+
|
||||
/* Only clear once on the first subpass that uses the attachment */
|
||||
uint32_t ds_first_subpass = !state->pass->multiview_enabled ?
|
||||
ds_attachment->first_subpass :
|
||||
@@ -503,6 +532,17 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
ds_attachment->desc.stencilLoadOp,
|
||||
subpass->do_stencil_clear_with_draw);
|
||||
|
||||
+ use_global_zs_clear = !state->job->early_zs_clear &&
|
||||
+ (needs_depth_clear || needs_stencil_clear);
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ /* The store command's clear buffer bit cannot be used for Z/S stencil:
|
||||
+ * since V3D 4.5.6 Z/S buffers are automatically cleared between tiles,
|
||||
+ * so we don't want to emit redundant clears here.
|
||||
+ */
|
||||
+ use_global_zs_clear = false;
|
||||
+#endif
|
||||
+
|
||||
/* Skip the last store if it is not required */
|
||||
uint32_t ds_last_subpass = !pass->multiview_enabled ?
|
||||
ds_attachment->last_subpass :
|
||||
@@ -545,30 +585,6 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
needs_stencil_store = subpass->resolve_stencil;
|
||||
}
|
||||
|
||||
- /* GFXH-1689: The per-buffer store command's clear buffer bit is broken
|
||||
- * for depth/stencil.
|
||||
- *
|
||||
- * There used to be some confusion regarding the Clear Tile Buffers
|
||||
- * Z/S bit also being broken, but we confirmed with Broadcom that this
|
||||
- * is not the case, it was just that some other hardware bugs (that we
|
||||
- * need to work around, such as GFXH-1461) could cause this bit to behave
|
||||
- * incorrectly.
|
||||
- *
|
||||
- * There used to be another issue where the RTs bit in the Clear Tile
|
||||
- * Buffers packet also cleared Z/S, but Broadcom confirmed this is
|
||||
- * fixed since V3D 4.1.
|
||||
- *
|
||||
- * So if we have to emit a clear of depth or stencil we don't use
|
||||
- * the per-buffer store clear bit, even if we need to store the buffers,
|
||||
- * instead we always have to use the Clear Tile Buffers Z/S bit.
|
||||
- * If we have configured the job to do early Z/S clearing, then we
|
||||
- * don't want to emit any Clear Tile Buffers command at all here.
|
||||
- *
|
||||
- * Note that GFXH-1689 is not reproduced in the simulator, where
|
||||
- * using the clear buffer bit in depth/stencil stores works fine.
|
||||
- */
|
||||
- use_global_zs_clear = !state->job->early_zs_clear &&
|
||||
- (needs_depth_clear || needs_stencil_clear);
|
||||
if (needs_depth_store || needs_stencil_store) {
|
||||
const uint32_t zs_buffer =
|
||||
v3dv_zs_buffer(needs_depth_store, needs_stencil_store);
|
||||
@@ -673,7 +689,7 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
}
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
- unreachable("Hardware generation 71 not supported yet.");
|
||||
+ cl_emit(cl, CLEAR_RENDER_TARGETS, clear);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,32 +0,0 @@
|
||||
From 3794f6f08c559c4e442b57e992d501fb7d515b9b Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 28 Sep 2021 08:31:04 +0200
|
||||
Subject: [PATCH 092/142] v3dv: GFX-1461 does not affect V3D 7.x
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dv_pass.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dv_pass.c b/src/broadcom/vulkan/v3dv_pass.c
|
||||
index 20f5014268d..3e82c15df88 100644
|
||||
--- a/src/broadcom/vulkan/v3dv_pass.c
|
||||
+++ b/src/broadcom/vulkan/v3dv_pass.c
|
||||
@@ -236,11 +236,13 @@ v3dv_CreateRenderPass2(VkDevice _device,
|
||||
|
||||
/* GFXH-1461: if depth is cleared but stencil is loaded (or vice versa),
|
||||
* the clear might get lost. If a subpass has this then we can't emit
|
||||
- * the clear using the TLB and we have to do it as a draw call.
|
||||
+ * the clear using the TLB and we have to do it as a draw call. This
|
||||
+ * issue is fixed since V3D 4.3.18.
|
||||
*
|
||||
* FIXME: separate stencil.
|
||||
*/
|
||||
- if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
|
||||
+ if (device->devinfo.ver == 42 &&
|
||||
+ subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
|
||||
struct v3dv_render_pass_attachment *att =
|
||||
&pass->attachments[subpass->ds_attachment.attachment];
|
||||
if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,69 +0,0 @@
|
||||
From 5be7f484210103e40b77fa3135042da4a8406659 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 28 Sep 2021 08:59:08 +0200
|
||||
Subject: [PATCH 093/142] v3dv: update thread end restrictions validation for
|
||||
v71
|
||||
|
||||
---
|
||||
src/broadcom/compiler/qpu_validate.c | 37 +++++++++++++++++++++++++---
|
||||
1 file changed, 34 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
|
||||
index 1082fb7d50a..0466ee5d0b6 100644
|
||||
--- a/src/broadcom/compiler/qpu_validate.c
|
||||
+++ b/src/broadcom/compiler/qpu_validate.c
|
||||
@@ -316,17 +316,48 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if ((inst->alu.add.op != V3D_QPU_A_NOP &&
|
||||
!inst->alu.add.magic_write)) {
|
||||
- fail_instr(state, "RF write after THREND");
|
||||
+ if (devinfo->ver <= 42) {
|
||||
+ fail_instr(state, "RF write after THREND");
|
||||
+ } else if (devinfo->ver >= 71) {
|
||||
+ if (state->last_thrsw_ip - state->ip == 0) {
|
||||
+ fail_instr(state,
|
||||
+ "ADD RF write at THREND");
|
||||
+ }
|
||||
+ if (inst->alu.add.waddr == 2 ||
|
||||
+ inst->alu.add.waddr == 3) {
|
||||
+ fail_instr(state,
|
||||
+ "RF2-3 write after THREND");
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
|
||||
!inst->alu.mul.magic_write)) {
|
||||
- fail_instr(state, "RF write after THREND");
|
||||
+ if (devinfo->ver <= 42) {
|
||||
+ fail_instr(state, "RF write after THREND");
|
||||
+ } else if (devinfo->ver >= 71) {
|
||||
+ if (state->last_thrsw_ip - state->ip == 0) {
|
||||
+ fail_instr(state,
|
||||
+ "MUL RF write at THREND");
|
||||
+ }
|
||||
+
|
||||
+ if (inst->alu.mul.waddr == 2 ||
|
||||
+ inst->alu.mul.waddr == 3) {
|
||||
+ fail_instr(state,
|
||||
+ "RF2-3 write after THREND");
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
|
||||
!inst->sig_magic) {
|
||||
- fail_instr(state, "RF write after THREND");
|
||||
+ if (devinfo->ver <= 42) {
|
||||
+ fail_instr(state, "RF write after THREND");
|
||||
+ } else if (devinfo->ver >= 71 &&
|
||||
+ (inst->sig_addr == 2 ||
|
||||
+ inst->sig_addr == 3)) {
|
||||
+ fail_instr(state, "RF2-3 write after THREND");
|
||||
+ }
|
||||
}
|
||||
|
||||
/* GFXH-1625: No TMUWT in the last instruction */
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,68 +0,0 @@
|
||||
From a751dff57b6d769f5b031054cc65415cc3b44c08 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 29 Sep 2021 08:22:59 +0200
|
||||
Subject: [PATCH 094/142] v3dv: handle early Z/S clears for v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 30 ++++++++++++++++++++------
|
||||
1 file changed, 23 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index 48b2e319e51..4580e2a4650 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -998,6 +998,10 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
* Early-Z/S clearing is independent of Early Z/S testing, so it is
|
||||
* possible to enable one but not the other so long as their
|
||||
* respective requirements are met.
|
||||
+ *
|
||||
+ * From V3D 4.5.6, Z/S buffers are always cleared automatically
|
||||
+ * between tiles, but we still want to enable early ZS clears
|
||||
+ * when Z/S are not loaded or stored.
|
||||
*/
|
||||
struct v3dv_render_pass_attachment *ds_attachment =
|
||||
&pass->attachments[ds_attachment_idx];
|
||||
@@ -1005,21 +1009,33 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
const VkImageAspectFlags ds_aspects =
|
||||
vk_format_aspects(ds_attachment->desc.format);
|
||||
|
||||
- bool needs_depth_clear =
|
||||
- check_needs_clear(state,
|
||||
- ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
|
||||
- ds_attachment->first_subpass,
|
||||
- ds_attachment->desc.loadOp,
|
||||
- subpass->do_depth_clear_with_draw);
|
||||
-
|
||||
bool needs_depth_store =
|
||||
v3dv_cmd_buffer_check_needs_store(state,
|
||||
ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
|
||||
ds_attachment->last_subpass,
|
||||
ds_attachment->desc.storeOp) ||
|
||||
subpass->resolve_depth;
|
||||
+#if V3D_VERSION <= 42
|
||||
+ bool needs_depth_clear =
|
||||
+ check_needs_clear(state,
|
||||
+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
|
||||
+ ds_attachment->first_subpass,
|
||||
+ ds_attachment->desc.loadOp,
|
||||
+ subpass->do_depth_clear_with_draw);
|
||||
|
||||
do_early_zs_clear = needs_depth_clear && !needs_depth_store;
|
||||
+#endif
|
||||
+#if V3D_VERSION >= 71
|
||||
+ bool needs_depth_load =
|
||||
+ v3dv_cmd_buffer_check_needs_load(state,
|
||||
+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
|
||||
+ ds_attachment->first_subpass,
|
||||
+ ds_attachment->desc.loadOp,
|
||||
+ ds_attachment->last_subpass,
|
||||
+ ds_attachment->desc.storeOp);
|
||||
+ do_early_zs_clear = !needs_depth_load && !needs_depth_store;
|
||||
+#endif
|
||||
+
|
||||
if (do_early_zs_clear &&
|
||||
vk_format_has_stencil(ds_attachment->desc.format)) {
|
||||
bool needs_stencil_load =
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,34 +0,0 @@
|
||||
From 2add46ebce4760bf8349606201324ee0e6b1f9da Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Wed, 29 Sep 2021 09:07:28 +0200
|
||||
Subject: [PATCH 095/142] v3dv: handle RTs with no color targets in v71
|
||||
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_cmd_buffer.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
index 4580e2a4650..750486a6ccf 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
|
||||
@@ -1175,6 +1175,17 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
#endif
|
||||
}
|
||||
|
||||
+#if V3D_VERSION >= 71
|
||||
+ /* If we don't have any color RTs, we still need to emit one and flag
|
||||
+ * it as not used using stride = 1.
|
||||
+ */
|
||||
+ if (subpass->color_count == 0) {
|
||||
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
|
||||
+ rt.stride = 1;
|
||||
+ }
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
#if V3D_VERSION == 42
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
|
||||
cmd_buffer_render_pass_setup_render_target
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,85 +0,0 @@
|
||||
From 019abbd34d2d904d6bb33f9fa4433cb53ca7899c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 1 Oct 2021 15:18:38 +0200
|
||||
Subject: [PATCH 096/142] v3dv: no specific separate_segments flag for V3D 7.1
|
||||
|
||||
On V3D 7.1 there is not a flag on the Shader State Record to specify
|
||||
if we are using shared or separate segments. This is done by setting
|
||||
the vpm input size to 0 (so we need to ensure that the output would be
|
||||
the max needed for input/output).
|
||||
|
||||
We were already doing the latter on the prog_data_vs, so we just need
|
||||
to use those values, instead of assigning default values.
|
||||
|
||||
As we are here, we also add some comments on the compiler part.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 4 ++++
|
||||
src/broadcom/compiler/vir.c | 4 ++++
|
||||
src/broadcom/vulkan/v3dvx_pipeline.c | 15 +++++++++++++--
|
||||
3 files changed, 21 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 77fb6a794e6..4f767296860 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -297,6 +297,10 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
|
||||
/* If the input and output segments are shared, then all VPM reads to
|
||||
* a location need to happen before all writes. We handle this by
|
||||
* serializing all VPM operations for now.
|
||||
+ *
|
||||
+ * FIXME: we are assuming that the segments are shared. That is
|
||||
+ * correct right now as we are only using shared, but technically you
|
||||
+ * can choose.
|
||||
*/
|
||||
bool separate_vpm_segment = false;
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
|
||||
index 7612eed7130..dd0aa761c43 100644
|
||||
--- a/src/broadcom/compiler/vir.c
|
||||
+++ b/src/broadcom/compiler/vir.c
|
||||
@@ -745,6 +745,10 @@ v3d_vs_set_prog_data(struct v3d_compile *c,
|
||||
|
||||
/* Set us up for shared input/output segments. This is apparently
|
||||
* necessary for our VCM setup to avoid varying corruption.
|
||||
+ *
|
||||
+ * FIXME: initially testing on V3D 7.1 seems to work fine when using
|
||||
+ * separate segments. So we could try to reevaluate in the future, if
|
||||
+ * there is any advantage of using separate segments.
|
||||
*/
|
||||
prog_data->separate_segments = false;
|
||||
prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
index a640c1d084a..a72ca3c241b 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
@@ -452,14 +452,25 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
|
||||
prog_data_vs_bin->separate_segments;
|
||||
shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
|
||||
prog_data_vs->separate_segments;
|
||||
-#endif
|
||||
-
|
||||
shader.coordinate_shader_input_vpm_segment_size =
|
||||
prog_data_vs_bin->separate_segments ?
|
||||
prog_data_vs_bin->vpm_input_size : 1;
|
||||
shader.vertex_shader_input_vpm_segment_size =
|
||||
prog_data_vs->separate_segments ?
|
||||
prog_data_vs->vpm_input_size : 1;
|
||||
+#endif
|
||||
+
|
||||
+ /* On V3D 7.1 there isn't a specific flag to set if we are using
|
||||
+ * shared/separate segments or not. We just set the value of
|
||||
+ * vpm_input_size to 0, and set output to the max needed. That should be
|
||||
+ * already properly set on prog_data_vs_bin
|
||||
+ */
|
||||
+#if V3D_VERSION == 71
|
||||
+ shader.coordinate_shader_input_vpm_segment_size =
|
||||
+ prog_data_vs_bin->vpm_input_size;
|
||||
+ shader.vertex_shader_input_vpm_segment_size =
|
||||
+ prog_data_vs->vpm_input_size;
|
||||
+#endif
|
||||
|
||||
shader.coordinate_shader_output_vpm_segment_size =
|
||||
prog_data_vs_bin->vpm_output_size;
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,39 +0,0 @@
|
||||
From 4f6b4f91577ec04aab907d59d836d0c17731a9d0 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Thu, 7 Oct 2021 12:43:49 +0200
|
||||
Subject: [PATCH 097/142] v3dv: don't convert floating point border colors in
|
||||
v71
|
||||
|
||||
The TMU does this for us now.
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_device.c | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_device.c b/src/broadcom/vulkan/v3dvx_device.c
|
||||
index e235983864c..72daefadb08 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_device.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_device.c
|
||||
@@ -118,7 +118,11 @@ static union pipe_color_union encode_border_color(
|
||||
(1 << (desc->channel[i].size - 1)) - 1);
|
||||
}
|
||||
|
||||
- /* convert from float to expected format */
|
||||
+#if V3D_VERSION <= 42
|
||||
+ /* The TMU in V3D 7.x always takes 32-bit floats and handles conversions
|
||||
+ * for us. In V3D 4.x we need to manually convert floating point color
|
||||
+ * values to the expected format.
|
||||
+ */
|
||||
if (vk_format_is_srgb(bc_info->format) ||
|
||||
vk_format_is_compressed(bc_info->format)) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
@@ -170,6 +174,7 @@ static union pipe_color_union encode_border_color(
|
||||
}
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
|
||||
return border;
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
@ -1,60 +0,0 @@
|
||||
From d8083cb8f104e0f035f5b812e000a500fa52d66f Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Fri, 15 Oct 2021 13:06:31 +0200
|
||||
Subject: [PATCH 098/142] v3dv: handle Z clipping in v71
|
||||
|
||||
Fixes the following tests:
|
||||
|
||||
dEQP-VK.clipping.clip_volume.*
|
||||
dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_* (except deltazero)
|
||||
---
|
||||
src/broadcom/vulkan/v3dvx_pipeline.c | 33 ++++++++++++++++++++++++++++
|
||||
1 file changed, 33 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
index a72ca3c241b..7b1133f8173 100644
|
||||
--- a/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
+++ b/src/broadcom/vulkan/v3dvx_pipeline.c
|
||||
@@ -227,6 +227,39 @@ pack_cfg_bits(struct v3dv_pipeline *pipeline,
|
||||
ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
|
||||
|
||||
pipeline->z_updates_enable = config.z_updates_enable;
|
||||
+
|
||||
+#if V3D_VERSION >= 71
|
||||
+ /* From the Vulkan spec:
|
||||
+ *
|
||||
+ * "depthClampEnable controls whether to clamp the fragment’s depth
|
||||
+ * values as described in Depth Test. If the pipeline is not created
|
||||
+ * with VkPipelineRasterizationDepthClipStateCreateInfoEXT present
|
||||
+ * then enabling depth clamp will also disable clipping primitives to
|
||||
+ * the z planes of the frustrum as described in Primitive Clipping.
|
||||
+ * Otherwise depth clipping is controlled by the state set in
|
||||
+ * VkPipelineRasterizationDepthClipStateCreateInfoEXT."
|
||||
+ *
|
||||
+ * Note: neither depth clamping nor VK_EXT_depth_clip_enable are actually
|
||||
+ * supported in the driver yet, so in practice we are always enabling Z
|
||||
+ * clipping for now.
|
||||
+ */
|
||||
+ bool z_clip_enable = false;
|
||||
+ const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
|
||||
+ ds_info ? vk_find_struct_const(ds_info->pNext,
|
||||
+ PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT) :
|
||||
+ NULL;
|
||||
+ if (clip_info)
|
||||
+ z_clip_enable = clip_info->depthClipEnable;
|
||||
+ else if (!(rs_info && rs_info->depthClampEnable))
|
||||
+ z_clip_enable = true;
|
||||
+
|
||||
+ if (z_clip_enable) {
|
||||
+ config.z_clipping_mode = pipeline->negative_one_to_one ?
|
||||
+ V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_ZERO_TO_ONE;
|
||||
+ } else {
|
||||
+ config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE;
|
||||
+ }
|
||||
+#endif
|
||||
};
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user