RPi5: update mesa patches

- Remove RPiOS Wayland/X specific patches
- Keep the RP1 kmsro patch
- Add latest version of v71 packing/conversion opcodes MR which was
  present in original 23.2.x patch series
  https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25726

Signed-off-by: Matthias Reichl <hias@horus.com>
This commit is contained in:
Matthias Reichl 2023-11-05 15:38:23 +01:00 committed by Rudi Heitbaum
parent be613bfbaf
commit a6928cabcb
6 changed files with 669 additions and 526 deletions

View File

@ -1,7 +1,7 @@
From 3322c102282cf726ae575b122358060abd5b24db Mon Sep 17 00:00:00 2001
From 54cc206be2d48916862d7e264e886f58b27dd653 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Thu, 5 Oct 2023 19:32:10 +0100
Subject: [PATCH 142/142] gallium: Add kmsro drivers for RP1 DSI, DPI, and VEC
Subject: [PATCH 1/3] gallium: Add kmsro drivers for RP1 DSI, DPI, and VEC
devices
Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
@ -11,7 +11,7 @@ Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
2 files changed, 6 insertions(+)
diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build
index fbec1da957b..59daf3b6fb6 100644
index 66619bba0db..443923772e8 100644
--- a/src/gallium/targets/dri/meson.build
+++ b/src/gallium/targets/dri/meson.build
@@ -68,6 +68,9 @@ libgallium_dri = shared_library(
@ -22,10 +22,10 @@ index fbec1da957b..59daf3b6fb6 100644
+ 'drm-rp1-dsi_dri.so',
+ 'drm-rp1-vec_dri.so',
'exynos_dri.so',
'hdlcd_dri.so',
'hx8357d_dri.so',
'ili9225_dri.so',
diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
index d506869cbb4..ecb25edd03b 100644
index 9d3069eb004..79f60a7224a 100644
--- a/src/gallium/targets/dri/target.c
+++ b/src/gallium/targets/dri/target.c
@@ -98,6 +98,9 @@ DEFINE_LOADER_DRM_ENTRYPOINT(tegra);
@ -36,8 +36,8 @@ index d506869cbb4..ecb25edd03b 100644
+DEFINE_LOADER_DRM_ENTRYPOINT(drm_rp1_dsi)
+DEFINE_LOADER_DRM_ENTRYPOINT(drm_rp1_vec)
DEFINE_LOADER_DRM_ENTRYPOINT(exynos)
DEFINE_LOADER_DRM_ENTRYPOINT(hdlcd)
DEFINE_LOADER_DRM_ENTRYPOINT(hx8357d)
DEFINE_LOADER_DRM_ENTRYPOINT(ili9225)
--
2.39.2

View File

@ -0,0 +1,214 @@
From 80050d6960a688d061eac9798c6f5f1b0eb3e960 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Tue, 30 Nov 2021 02:39:20 +0100
Subject: [PATCH 2/3] nir: add new opcodes to map new v71 packing/conversion
instructions
Since v71, broadcom hw include specific packing/conversion
instructions, so this commit adds opcodes to be able to make use of
them, specially for image stores:
* vftounorm8/vftosnorm8: 2x16-bit floating point to 2x8-bit
unorm/snorm
* ftounorm16/ftosnorm16: floating point to 16-bit unorm/snorm
* vftounorm10lo/vftounorm10hi: used to convert a floating point to
a r10g10b10a2 unorm
* v11fpack: packs 2 2x16 FP into R11G11B10.
* v10pack: pack 2 2x16 integer into R10G10B10A2
* v8pack: packs 2 2x16 bit integer into 4x8 bits.
* vpack: 2x32 bit to 2x16 integer pack
For the latter, it can be easly confused with the existing and general
pack_32_2x16_split. But note that this one receives two 16bit integer,
and packs them on a 32bit integer. But broadcom opcode takes two 32bit
integer, takes the lower halfword, and packs them as 2x16 on a 32bit
integer.
Interestingly broadcom also defines a similar one that packs the
higher halfword. Not used yet.
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
src/compiler/nir/nir_constant_expressions.py | 94 ++++++++++++++++++++
src/compiler/nir/nir_opcodes.py | 52 +++++++++++
2 files changed, 146 insertions(+)
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
index e6383b67737..0d0797526a9 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -62,6 +62,8 @@ template = """\
#include "util/softfloat.h"
#include "util/bigmath.h"
#include "util/format/format_utils.h"
+#include "util/format_r11g11b10f.h"
+#include "util/u_math.h"
#include "nir_constant_expressions.h"
/**
@@ -277,6 +279,98 @@ unpack_half_1x16(uint16_t u)
return _mesa_half_to_float(u);
}
+/* Broadcom v3d specific instructions */
+/**
+ * Packs 2 2x16 floating split into a r11g11b10f
+ */
+static uint32_t v11fpack_v3d(const uint32_t src0,
+ const uint32_t src1)
+{
+ float rgb[3] = {
+ unpack_half_1x16((src0 & 0xffff)),
+ unpack_half_1x16((src0 >> 16)),
+ unpack_half_1x16((src1 & 0xffff)),
+ };
+
+ return float3_to_r11g11b10f(rgb);
+}
+
+/**
+ * The three methods below are basically wrappers over pack_s/unorm_1x8/1x16,
+ * as they receives a uint16_t val instead of a float
+ */
+static inline uint8_t _mesa_half_to_snorm8(uint16_t val)
+{
+ return pack_snorm_1x8(_mesa_half_to_float(val));
+}
+
+static uint16_t _mesa_float_to_snorm16(uint32_t val)
+{
+ union fi aux;
+ aux.ui = val;
+ return pack_snorm_1x16(aux.f);
+}
+
+static uint16_t _mesa_float_to_unorm16(uint32_t val)
+{
+ union fi aux;
+ aux.ui = val;
+ return pack_unorm_1x16(aux.f);
+}
+
+static inline uint32_t float_pack16_v3d(uint32_t f32)
+{
+ return _mesa_float_to_half(uif(f32));
+}
+
+static inline uint32_t float_unpack16_v3d(uint32_t f16)
+{
+ return fui(_mesa_half_to_float(f16));
+}
+
+static inline uint32_t vfpack_v3d(uint32_t a, uint32_t b)
+{
+ return float_pack16_v3d(b) << 16 | float_pack16_v3d(a);
+}
+
+static inline uint32_t vfsat_v3d(uint32_t a)
+{
+ const uint32_t low = fui(SATURATE(_mesa_half_to_float(a & 0xffff)));
+ const uint32_t high = fui(SATURATE(_mesa_half_to_float(a >> 16)));
+
+ return vfpack_v3d(low, high);
+}
+
+static inline uint32_t fmul_v3d(uint32_t a, uint32_t b)
+{
+ return fui(uif(a) * uif(b));
+}
+
+static uint32_t vfmul_v3d(uint32_t a, uint32_t b)
+{
+ const uint32_t low = fmul_v3d(float_unpack16_v3d(a & 0xffff),
+ float_unpack16_v3d(b & 0xffff));
+ const uint32_t high = fmul_v3d(float_unpack16_v3d(a >> 16),
+ float_unpack16_v3d(b >> 16));
+
+ return vfpack_v3d(low, high);
+}
+
+/* Convert 2x16-bit floating point to 2x10-bit unorm */
+static uint32_t vftounorm10lo(uint32_t src0)
+{
+ return vfmul_v3d(vfsat_v3d(src0), 0x03ff03ff);
+}
+
+/*
+ * Convert 2x16-bit floating point to one 2-bit and one
+ * 10-bit unorm
+ */
+static uint32_t vftounorm10hi(uint32_t src0)
+{
+ return vfmul_v3d(vfsat_v3d(src0), 0x000303ff);
+}
+
/* Some typed vector structures to make things like src0.y work */
typedef int8_t int1_t;
typedef uint8_t uint1_t;
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 0f81328f441..b70d9567cd6 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -1413,6 +1413,58 @@ for (int i = 0; i < 32; i += 8) {
}
""")
+# v3d-specific opcodes
+
+# v3d-specific (v71) instruction that packs bits of 2 2x16 floating point into
+# r11g11b10 bits, rounding to nearest even, so
+# dst[10:0] = float16_to_float11 (src0[15:0])
+# dst[21:11] = float16_to_float11 (src0[31:16])
+# dst[31:22] = float16_to_float10 (src1[15:0])
+binop_convert("v11fpack_v3d", tuint32, tuint32, "",
+ "v11fpack_v3d(src0, src1)")
+
+# v3d-specific (v71) instruction that packs 2x32 bit to 2x16 bit integer. The
+# difference with pack_32_2x16_split is that the sources are 32bit too. So it
+# receives 2 32-bit integer, and packs the lower halfword as 2x16 on a 32-bit
+# integer.
+binop_horiz("vpack_v3d", 1, tuint32, 1, tuint32, 1, tuint32,
+ "(src0.x & 0xffff) | (src1.x << 16)")
+
+# v3d-specific (v71) instruction that packs bits of 2 2x16 integers into
+# r10g10b10a2:
+# dst[9:0] = src0[9:0]
+# dst[19:10] = src0[25:16]
+# dst[29:20] = src1[9:0]
+# dst[31:30] = src1[17:16]
+binop_convert("v10pack_v3d", tuint32, tuint32, "",
+ "(src0 & 0x3ff) | ((src0 >> 16) & 0x3ff) << 10 | (src1 & 0x3ff) << 20 | ((src1 >> 16) & 0x3ff) << 30")
+
+# v3d-specific (v71) instruction that packs 2 2x16 bit integers into 4x8 bits:
+# dst[7:0] = src0[7:0]
+# dst[15:8] = src0[23:16]
+# dst[23:16] = src1[7:0]
+# dst[31:24] = src1[23:16]
+opcode("v8pack_v3d", 0, tuint32, [0, 0], [tuint32, tuint32],
+ False, "",
+ "(src0 & 0x000000ff) | (src0 & 0x00ff0000) >> 8 | (src1 & 0x000000ff) << 16 | (src1 & 0x00ff0000) << 8")
+
+# v3d-specific (v71) instructions to convert 2x16 floating point to 2x8 bit unorm/snorm
+unop("vftounorm8_v3d", tuint32,
+ "_mesa_half_to_unorm(src0 & 0xffff, 8) | (_mesa_half_to_unorm(src0 >> 16, 8) << 16)")
+unop("vftosnorm8_v3d", tuint32,
+ "_mesa_half_to_snorm(src0 & 0xffff, 8) | (_mesa_half_to_snorm(src0 >> 16, 8) << 16)")
+
+# v3d-specific (v71) instructions to convert 32-bit floating point to 16 bit unorm/snorm
+unop("ftounorm16_v3d", tuint32, "_mesa_float_to_unorm16(src0)")
+unop("ftosnorm16_v3d", tuint32, "_mesa_float_to_snorm16(src0)")
+
+# v3d-specific (v71) instructions to convert 2x16 bit floating points to 2x10 bit unorm
+unop("vftounorm10lo_v3d", tuint32, "vftounorm10lo(src0)")
+
+# v3d-specific (v71) instructions to convert 2x16 bit floating points to one 2-bit
+# and one 10 bit unorm
+unop("vftounorm10hi_v3d", tuint32, "vftounorm10hi(src0)")
+
# Mali-specific opcodes
unop("fsat_signed_mali", tfloat, ("fmin(fmax(src0, -1.0), 1.0)"))
unop("fclamp_pos_mali", tfloat, ("fmax(src0, 0.0)"))
--
2.39.2

View File

@ -0,0 +1,449 @@
From 7e151fd3a213848c8022c9f48e10f2aec76c3e4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Thu, 2 Dec 2021 13:26:43 +0100
Subject: [PATCH 3/3] broadcom/compiler: update image store lowering to use v71
new packing/conversion instructions
Vulkan shaderdb stats with pattern dEQP-VK.image.*.with_format.*.*:
total instructions in shared programs: 35993 -> 33245 (-7.63%)
instructions in affected programs: 21153 -> 18405 (-12.99%)
helped: 394
HURT: 1
Instructions are helped.
total uniforms in shared programs: 8550 -> 7418 (-13.24%)
uniforms in affected programs: 5136 -> 4004 (-22.04%)
helped: 399
HURT: 0
Uniforms are helped.
total max-temps in shared programs: 6014 -> 5905 (-1.81%)
max-temps in affected programs: 473 -> 364 (-23.04%)
helped: 58
HURT: 0
Max-temps are helped.
total nops in shared programs: 1515 -> 1504 (-0.73%)
nops in affected programs: 46 -> 35 (-23.91%)
helped: 14
HURT: 2
Inconclusive result (%-change mean confidence interval includes 0).
FWIW, that one HURT on the instructions count is for just one
instruction.
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
src/broadcom/compiler/nir_to_vir.c | 40 +++
src/broadcom/compiler/v3d_compiler.h | 16 +-
.../compiler/v3d_nir_lower_image_load_store.c | 239 +++++++++++++++++-
src/broadcom/compiler/vir.c | 2 +-
4 files changed, 288 insertions(+), 9 deletions(-)
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 220c864a056..4329d4c85f6 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1688,6 +1688,22 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
result = vir_VFPACK(c, src[0], src[1]);
break;
+ case nir_op_vpack_v3d:
+ result = vir_VPACK(c, src[0], src[1]);
+ break;
+
+ case nir_op_v11fpack_v3d:
+ result = vir_V11FPACK(c, src[0], src[1]);
+ break;
+
+ case nir_op_v10pack_v3d:
+ result = vir_V10PACK(c, src[0], src[1]);
+ break;
+
+ case nir_op_v8pack_v3d:
+ result = vir_V8PACK(c, src[0], src[1]);
+ break;
+
case nir_op_unpack_half_2x16_split_x:
result = vir_FMOV(c, src[0]);
vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_L);
@@ -1698,6 +1714,30 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_H);
break;
+ case nir_op_vftounorm8_v3d:
+ result = vir_VFTOUNORM8(c, src[0]);
+ break;
+
+ case nir_op_vftosnorm8_v3d:
+ result = vir_VFTOSNORM8(c, src[0]);
+ break;
+
+ case nir_op_vftounorm10lo_v3d:
+ result = vir_VFTOUNORM10LO(c, src[0]);
+ break;
+
+ case nir_op_vftounorm10hi_v3d:
+ result = vir_VFTOUNORM10HI(c, src[0]);
+ break;
+
+ case nir_op_ftounorm16_v3d:
+ result = vir_FTOUNORM16(c, src[0]);
+ break;
+
+ case nir_op_ftosnorm16_v3d:
+ result = vir_FTOSNORM16(c, src[0]);
+ break;
+
default:
fprintf(stderr, "unknown NIR ALU inst: ");
nir_print_instr(&instr->instr, stderr);
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 095b33c03b8..5714e85d2b8 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -1180,7 +1180,7 @@ bool v3d_nir_lower_line_smooth(nir_shader *shader);
bool v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c);
bool v3d_nir_lower_scratch(nir_shader *s);
bool v3d_nir_lower_txf_ms(nir_shader *s);
-bool v3d_nir_lower_image_load_store(nir_shader *s);
+bool v3d_nir_lower_image_load_store(nir_shader *s, struct v3d_compile *c);
bool v3d_nir_lower_load_store_bitsize(nir_shader *s);
void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components);
@@ -1421,6 +1421,20 @@ VIR_SFU(LOG)
VIR_SFU(SIN)
VIR_SFU(RSQRT2)
+VIR_A_ALU2(VPACK)
+VIR_A_ALU2(V8PACK)
+VIR_A_ALU2(V10PACK)
+VIR_A_ALU2(V11FPACK)
+
+VIR_M_ALU1(FTOUNORM16)
+VIR_M_ALU1(FTOSNORM16)
+
+VIR_M_ALU1(VFTOUNORM8)
+VIR_M_ALU1(VFTOSNORM8)
+
+VIR_M_ALU1(VFTOUNORM10LO)
+VIR_M_ALU1(VFTOUNORM10HI)
+
static inline struct qinst *
vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
struct qreg dest, struct qreg src)
diff --git a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
index 5f8363377cb..ec43f834897 100644
--- a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
+++ b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
@@ -40,6 +40,10 @@
* calculations and load/store using the TMU general memory access path.
*/
+static const unsigned bits_8[4] = {8, 8, 8, 8};
+static const unsigned bits_16[4] = {16, 16, 16, 16};
+static const unsigned bits_1010102[4] = {10, 10, 10, 2};
+
bool
v3d_gl_format_is_return_32(enum pipe_format format)
{
@@ -59,6 +63,8 @@ v3d_gl_format_is_return_32(enum pipe_format format)
/* Packs a 32-bit vector of colors in the range [0, (1 << bits[i]) - 1] to a
* 32-bit SSA value, with as many channels as necessary to store all the bits
+ *
+ * This is the generic helper, using all common nir operations.
*/
static nir_def *
pack_bits(nir_builder *b, nir_def *color, const unsigned *bits,
@@ -91,8 +97,180 @@ pack_bits(nir_builder *b, nir_def *color, const unsigned *bits,
return nir_vec(b, results, DIV_ROUND_UP(offset, 32));
}
+/* Utility wrapper as half_2x16_split is mapped to vfpack, and sometimes it is
+ * just easier to read vfpack on the code, specially while using the PRM as
+ * reference
+ */
+static inline nir_def *
+nir_vfpack(nir_builder *b, nir_def *p1, nir_def *p2)
+{
+ return nir_pack_half_2x16_split(b, p1, p2);
+}
+
+static inline nir_def *
+pack_11f11f10f(nir_builder *b, nir_def *color)
+{
+ nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
+ nir_channel(b, color, 1));
+ nir_def *undef = nir_undef(b, 1, color->bit_size);
+ nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), undef);
+
+ return nir_v11fpack_v3d(b, p1, p2);
+}
+
+static inline nir_def *
+pack_r10g10b10a2_uint(nir_builder *b, nir_def *color)
+{
+ nir_def *p1 = nir_vpack_v3d(b, nir_channel(b, color, 0),
+ nir_channel(b, color, 1));
+ nir_def *p2 = nir_vpack_v3d(b, nir_channel(b, color, 2),
+ nir_channel(b, color, 3));
+
+ return nir_v10pack_v3d(b, p1, p2);
+}
+
+static inline nir_def *
+pack_r10g10b10a2_unorm(nir_builder *b, nir_def *color)
+{
+ nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0),
+ nir_channel(b, color, 1));
+ p1 = nir_vftounorm10lo_v3d(b, p1);
+
+ nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2),
+ nir_channel(b, color, 3));
+ p2 = nir_vftounorm10hi_v3d(b, p2);
+
+ return nir_v10pack_v3d(b, p1, p2);
+}
+
+enum hw_conversion {
+ NONE,
+ TO_SNORM,
+ TO_UNORM
+};
+
+static inline nir_def *
+pack_8bit(nir_builder *b, nir_def *color,
+ unsigned num_components,
+ enum hw_conversion conversion)
+{
+ /* Note that usually you should not use this method (that relies on
+ * custom packing) for 1 component if we are not doing any
+ * conversion. But we support also that case, and let the caller
+ * decide which method to use.
+ */
+ nir_def *p1;
+ nir_def *p2;
+
+ if (conversion == NONE) {
+ p1 = nir_vpack_v3d(b, nir_channel(b, color, 0),
+ nir_channel(b, color, num_components == 1 ? 0 : 1));
+ } else {
+ p1 = nir_vfpack(b, nir_channel(b, color, 0),
+ nir_channel(b, color, num_components == 1 ? 0 : 1));
+ p1 = (conversion == TO_UNORM) ?
+ nir_vftounorm8_v3d(b, p1) : nir_vftosnorm8_v3d(b, p1);
+ }
+ if (num_components == 4) {
+ if (conversion == NONE) {
+ p2 = nir_vpack_v3d(b, nir_channel(b, color, 2),
+ nir_channel(b, color, 3));
+ } else {
+ p2 = nir_vfpack(b, nir_channel(b, color, 2),
+ nir_channel(b, color, 3));
+ p2 = (conversion == TO_UNORM) ?
+ nir_vftounorm8_v3d(b, p2) : nir_vftosnorm8_v3d(b, p2);
+ }
+ } else {
+ /* Using an undef here would be more correct. But for this
+ * case we are getting worse shader-db values with some CTS
+ * tests, so we just reuse the first packing.
+ */
+ p2 = p1;
+ }
+
+ return nir_v8pack_v3d(b, p1, p2);
+}
+
+static inline nir_def *
+pack_16bit(nir_builder *b, nir_def *color,
+ unsigned num_components,
+ enum hw_conversion conversion)
+{
+ nir_def *results[2];
+ nir_def *channels[4];
+
+ /* Note that usually you should not use this method (that relies on
+ * custom packing) if we are not doing any conversion. But we support
+ * also that case, and let the caller decide which method to use.
+ */
+
+ for (unsigned i = 0; i < num_components; i++) {
+ channels[i] = nir_channel(b, color, i);
+ switch (conversion) {
+ case TO_SNORM:
+ channels[i] = nir_ftosnorm16_v3d(b, channels[i]);
+ break;
+ case TO_UNORM:
+ channels[i] = nir_ftounorm16_v3d(b, channels[i]);
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (num_components) {
+ case 1:
+ results[0] = channels[0];
+ break;
+ case 4:
+ results[1] = nir_vpack_v3d(b, channels[2], channels[3]);
+ FALLTHROUGH;
+ case 2:
+ results[0] = nir_vpack_v3d(b, channels[0], channels[1]);
+ break;
+ }
+
+ return nir_vec(b, results, DIV_ROUND_UP(num_components, 2));
+}
+
+static inline nir_def *
+pack_xbit(nir_builder *b, nir_def *color,
+ unsigned num_components,
+ const struct util_format_channel_description *r_chan)
+{
+ bool pack_mask = (r_chan->type == UTIL_FORMAT_TYPE_SIGNED);
+ enum hw_conversion conversion = NONE;
+ if (r_chan->normalized) {
+ conversion =
+ (r_chan->type == UTIL_FORMAT_TYPE_UNSIGNED) ? TO_UNORM : TO_SNORM;
+ }
+
+ switch (r_chan->size) {
+ case 8:
+ if (conversion == NONE && num_components < 2)
+ return pack_bits(b, color, bits_8, num_components, pack_mask);
+ else
+ return pack_8bit(b, color, num_components, conversion);
+ break;
+ case 16:
+ /* pack_mask implies that the generic packing method would
+ * need to include extra operations to handle negative values,
+ * so in that case, even without a conversion, it is better to
+ * use the packing using custom hw operations.
+ */
+ if (conversion == NONE && !pack_mask)
+ return pack_bits(b, color, bits_16, num_components, pack_mask);
+ else
+ return pack_16bit(b, color, num_components, conversion);
+ break;
+ default:
+ unreachable("unrecognized bits");
+ }
+}
+
static bool
-v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
+v3d_nir_lower_image_store_v42(nir_builder *b, nir_intrinsic_instr *instr)
{
enum pipe_format format = nir_intrinsic_format(instr);
assert(format != PIPE_FORMAT_NONE);
@@ -118,9 +296,6 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
*/
formatted = color;
} else {
- static const unsigned bits_8[4] = {8, 8, 8, 8};
- static const unsigned bits_16[4] = {16, 16, 16, 16};
- static const unsigned bits_1010102[4] = {10, 10, 10, 2};
const unsigned *bits;
switch (r_chan->size) {
@@ -170,6 +345,50 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
return true;
}
+
+static bool
+v3d_nir_lower_image_store_v71(nir_builder *b, nir_intrinsic_instr *instr)
+{
+ enum pipe_format format = nir_intrinsic_format(instr);
+ assert(format != PIPE_FORMAT_NONE);
+ const struct util_format_description *desc =
+ util_format_description(format);
+ const struct util_format_channel_description *r_chan = &desc->channel[0];
+ unsigned num_components = util_format_get_nr_components(format);
+ b->cursor = nir_before_instr(&instr->instr);
+
+ nir_def *color =
+ nir_trim_vector(b, instr->src[3].ssa, num_components);
+ nir_def *formatted = NULL;
+ if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
+ formatted = nir_format_pack_r9g9b9e5(b, color);
+ } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
+ formatted = pack_11f11f10f(b, color);
+ } else if (format == PIPE_FORMAT_R10G10B10A2_UINT) {
+ formatted = pack_r10g10b10a2_uint(b, color);
+ } else if (format == PIPE_FORMAT_R10G10B10A2_UNORM) {
+ formatted = pack_r10g10b10a2_unorm(b, color);
+ } else if (r_chan->size == 32) {
+ /* For 32-bit formats, we just have to move the vector
+ * across (possibly reducing the number of channels).
+ */
+ formatted = color;
+ } else if (r_chan->type == UTIL_FORMAT_TYPE_FLOAT) {
+ assert(r_chan->size == 16);
+ formatted = nir_format_float_to_half(b, color);
+ formatted = pack_bits(b, formatted, bits_16, num_components,
+ false);
+ } else {
+ assert(r_chan->size == 8 || r_chan->size == 16);
+ formatted = pack_xbit(b, color, num_components, r_chan);
+ }
+
+ nir_src_rewrite(&instr->src[3], formatted);
+ instr->num_components = formatted->num_components;
+
+ return true;
+}
+
static bool
v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
{
@@ -207,11 +426,17 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
nir_intrinsic_instr *intr,
void *_state)
{
+ struct v3d_compile *c = (struct v3d_compile *) _state;
+
switch (intr->intrinsic) {
case nir_intrinsic_image_load:
return v3d_nir_lower_image_load(b, intr);
case nir_intrinsic_image_store:
- return v3d_nir_lower_image_store(b, intr);
+ if (c->devinfo->ver >= 71)
+ return v3d_nir_lower_image_store_v71(b, intr);
+ else
+ return v3d_nir_lower_image_store_v42(b, intr);
+ break;
default:
return false;
}
@@ -220,10 +445,10 @@ v3d_nir_lower_image_load_store_cb(nir_builder *b,
}
bool
-v3d_nir_lower_image_load_store(nir_shader *s)
+v3d_nir_lower_image_load_store(nir_shader *s, struct v3d_compile *c)
{
return nir_shader_intrinsics_pass(s,
v3d_nir_lower_image_load_store_cb,
nir_metadata_block_index |
- nir_metadata_dominance, NULL);
+ nir_metadata_dominance, c);
}
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 8c536b8fbcc..acb13a6cbf9 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -1599,7 +1599,7 @@ v3d_attempt_compile(struct v3d_compile *c)
NIR_PASS(_, c->s, v3d_nir_lower_io, c);
NIR_PASS(_, c->s, v3d_nir_lower_txf_ms);
- NIR_PASS(_, c->s, v3d_nir_lower_image_load_store);
+ NIR_PASS(_, c->s, v3d_nir_lower_image_load_store, c);
NIR_PASS(_, c->s, nir_opt_idiv_const, 8);
nir_lower_idiv_options idiv_options = {
--
2.39.2

View File

@ -1,42 +0,0 @@
From b58e1d7fd1c315e6ada0ad9ec4961b65c88f0c2a Mon Sep 17 00:00:00 2001
From: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Date: Mon, 4 Oct 2021 14:30:30 +0200
Subject: [PATCH 137/142] dri: Limit the max_num_back to 2 on
COMPLETE_MODE_FLIP present mode
This is limiting the number of back buffers that mesa can allocate, so
this avoids triple buffering, although that is desirable in some cases.
To get this to upstream, we could convert it to a DRI option
and enable it only in the case of using mutter.
It seems to be feasible to limit this to some kind of configuration, as
we have access to the size of the back-buffer allocated. For example,
only limit for 4k-dual screen setup.
With this Raspberry OS start-up CMA usage is 210Mb with 4k-dual screen
setup instead of 276Mb.
The correct approach would be to check if we can make Mutter to wait
for buffer swaps before starting a new frame.
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7033
---
src/loader/loader_dri3_helper.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index 32135770e9d..2534c817dcc 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -275,7 +275,7 @@ dri3_update_max_num_back(struct loader_dri3_drawable *draw)
if (draw->swap_interval == 0)
draw->max_num_back = 4;
else
- draw->max_num_back = 3;
+ draw->max_num_back = 2;
assert(draw->max_num_back <= LOADER_DRI3_MAX_BACK);
break;
--
2.39.2

View File

@ -1,361 +0,0 @@
From d0f2a99045fa9835fea822ada58a344e2fdc1b13 Mon Sep 17 00:00:00 2001
From: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Date: Thu, 21 Oct 2021 22:04:57 +0200
Subject: [PATCH 138/142] v3d: Ignore SCANOUT usage flags when not needed under
X
These downstream patches force the usage of tiled formats
when possible, they have been tested for the Rasbperry Pi OS
desktop enviroment using Mutter+Xserver.
It includes the following 3 patches:
- v3d: Add driconf options to rewrite SCANOUT usages
- v3d: Check if are under X session
- v3d: enable options to ignore SCANOUT flag on resource creation
v3d: Add driconf options to rewrite SCANOUT usages
We create a new eviroment variable V3D_IGNORE_SCANOUT_USAGES
that will affect v3d_resource_create_with_modifiers so
SCANOUT usages can be ignored. It can be enabled under X11
with a compositor so applications are forces to use tiled render
buffers instead of the default behaviour that uses SCANOUT and
consume the limited CMA memory in the RPi4.
The two new driconf options modulate the effect on two applications
Xorg and mutter.
"v3d_maintain_ignorable_scanout": is enabled in mutter, could be used
in other compositors, the objective is that the enviroment has enable
the V3D_IGNORE_SCANOUT_USAGES, they aren't ignored in the compositor.
"v3d_is_xserver_process": is used to handle a particular case
to avoid checking if an Xserver connection is available using XCB
as in some cases the call stalls the Xserver on boot.
Following patches will use this configuration options to ignore or not
the SCANOUT usage on v3d_resource_allocation with modifiers.
Upstreaming this patch need to review the effects of:
ad50b47a14e9 ("gbm: assume USE_SCANOUT in create_with_modifiers")
v2: driconf for v3d_is_xserver_process is needed under XWayland
to avoid XCB connections in the XWayland process.
v3d: Check if are under X session
If we are using Wayland + XWayland, this is considered *not* being under
X session.
v3d: enable options to ignore SCANOUT flag on resource creation
This is a downstream patch for enabling the usage of more tiled
buffers in Raspberry OS under an enviroment using mutter and Xorg.
This patch enables the following behaviour in order to reduce the
number of CMA usage and use tiled layouts because we ignore
the possible SCANOUT usage of the resource.
This patch makes mutter to not ignore SCANOUT flags because as
compositor it should allocate linear render buffers suitable for display.
Then if the Xserver has enabled the dmabuf_capable option, the
buffers backing the windows pixmaps will allocate using modifiers,
in the patched Xserver downstream making pixmaps exportable will use
gbm_gbm_bo_create_with_modifiers2 that does not add the SCANOUT flag
for exporting pixmaps. With the Mutter compositor we didn't find a
situation were this pixmaps needed to be SCANOUT. But this is not sure,
but it allows us to not use CMA for every window opened, and having them
in tiled format saves all linear->tiled conversion for sampling.
Finally to take advantage of using Tiled render buffers for applications
we can enable in the enviroment V3D_IGNORE_SCANOUT_USAGES so all render
targes use the tiled UIF format without CMA memory instead of a linear one.
As the compositor mutter will composite the final surface for display we
aren't going to use the SCANOUT flag. This only applies if we are under
an X11 session.
v2: v3d: ignore V3D_IGNORE_SCANOUT if only LINEAR modifier available
This is a fixup for the behaviour of ignoring SCANOUT flags
so we don't allocate CMA memory on V3D for render targets under
X11 as UIF isn't included and only LINEAR is a valid modifier
when Xserver is using msdri3. So we cannot ignore the SCANOUT flag.
As the Xserver in this situation is limiting the available modifiers
to linear, we can identify this case just not ignoring the SCANOUT
flag when we can only allocate linear resources.
---
src/gallium/drivers/v3d/driinfo_v3d.h | 2 +
src/gallium/drivers/v3d/meson.build | 17 +++++---
src/gallium/drivers/v3d/v3d_resource.c | 31 ++++++++++++--
src/gallium/drivers/v3d/v3d_screen.c | 59 ++++++++++++++++++++++++++
src/gallium/drivers/v3d/v3d_screen.h | 6 +++
src/util/00-mesa-defaults.conf | 3 ++
src/util/driconf.h | 8 ++++
7 files changed, 117 insertions(+), 9 deletions(-)
diff --git a/src/gallium/drivers/v3d/driinfo_v3d.h b/src/gallium/drivers/v3d/driinfo_v3d.h
index 147ad0b49bd..8f989e8aa57 100644
--- a/src/gallium/drivers/v3d/driinfo_v3d.h
+++ b/src/gallium/drivers/v3d/driinfo_v3d.h
@@ -2,4 +2,6 @@
DRI_CONF_SECTION_MISCELLANEOUS
DRI_CONF_V3D_NONMSAA_TEXTURE_SIZE_LIMIT(false)
+ DRI_CONF_V3D_MAINTAIN_IGNORABLE_SCANOUT(false)
+ DRI_CONF_V3D_IS_XSERVER_PROCESS(false)
DRI_CONF_SECTION_END
diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build
index 289473d2ca1..e47682db1aa 100644
--- a/src/gallium/drivers/v3d/meson.build
+++ b/src/gallium/drivers/v3d/meson.build
@@ -61,6 +61,16 @@ endif
v3d_versions = ['33', '42', '71']
+v3d_deps = [dep_v3dv3, dep_libdrm, dep_valgrind, idep_nir_headers]
+
+if with_platform_x11
+ v3d_deps += dep_xcb
+endif
+
+if with_platform_wayland
+ v3d_deps += dep_wayland_client
+endif
+
per_version_libs = []
foreach ver : v3d_versions
per_version_libs += static_library(
@@ -72,7 +82,7 @@ foreach ver : v3d_versions
],
c_args : [v3d_args, '-DV3D_VERSION=' + ver],
gnu_symbol_visibility : 'hidden',
- dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind, idep_nir_headers],
+ dependencies : v3d_deps,
)
endforeach
@@ -95,10 +105,7 @@ libv3d = static_library(
c_args : [v3d_args],
cpp_args : [v3d_args],
gnu_symbol_visibility : 'hidden',
- dependencies : [
- dep_v3dv3, dep_libdrm, dep_valgrind,
- idep_nir_headers, idep_mesautil,
- ],
+ dependencies : v3d_deps + idep_mesautil,
link_with: [per_version_libs],
)
diff --git a/src/gallium/drivers/v3d/v3d_resource.c b/src/gallium/drivers/v3d/v3d_resource.c
index a0a210ccad5..46de1b16ae0 100644
--- a/src/gallium/drivers/v3d/v3d_resource.c
+++ b/src/gallium/drivers/v3d/v3d_resource.c
@@ -439,7 +439,7 @@ v3d_resource_get_handle(struct pipe_screen *pscreen,
case WINSYS_HANDLE_TYPE_SHARED:
return v3d_bo_flink(bo, &whandle->handle);
case WINSYS_HANDLE_TYPE_KMS:
- if (screen->ro) {
+ if (screen->ro && rsc->scanout) {
if (renderonly_get_handle(rsc->scanout, whandle)) {
whandle->stride = rsc->slices[0].stride;
return true;
@@ -785,6 +785,27 @@ v3d_resource_setup(struct pipe_screen *pscreen,
return rsc;
}
+static bool
+v3d_resource_should_scanout(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl,
+ const uint64_t *modifiers,
+ int count)
+{
+ struct v3d_screen *screen = v3d_screen(pscreen);
+
+ if (tmpl->bind & PIPE_BIND_SCANOUT) {
+ if (screen->maintain_ignorable_scanout)
+ return true;
+ if (screen->has_x_session && screen->ignore_scanout_usages) {
+ if (drm_find_modifier(DRM_FORMAT_MOD_BROADCOM_UIF,
+ modifiers, count))
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
static struct pipe_resource *
v3d_resource_create_with_modifiers(struct pipe_screen *pscreen,
const struct pipe_resource *tmpl,
@@ -798,6 +819,8 @@ v3d_resource_create_with_modifiers(struct pipe_screen *pscreen,
struct pipe_resource *prsc = &rsc->base;
/* Use a tiled layout if we can, for better 3D performance. */
bool should_tile = true;
+ bool should_scanout = v3d_resource_should_scanout(pscreen, tmpl,
+ modifiers, count);
assert(tmpl->target != PIPE_BUFFER ||
(tmpl->format == PIPE_FORMAT_NONE ||
@@ -827,7 +850,7 @@ v3d_resource_create_with_modifiers(struct pipe_screen *pscreen,
/* If using the old-school SCANOUT flag, we don't know what the screen
* might support other than linear. Just force linear.
*/
- if (tmpl->bind & PIPE_BIND_SCANOUT)
+ if ((tmpl->bind & PIPE_BIND_SCANOUT) && should_scanout)
should_tile = false;
/* No user-specified modifier; determine our own. */
@@ -849,7 +872,7 @@ v3d_resource_create_with_modifiers(struct pipe_screen *pscreen,
v3d_setup_slices(rsc, 0, tmpl->bind & PIPE_BIND_SHARED);
- if (screen->ro && (tmpl->bind & PIPE_BIND_SCANOUT)) {
+ if (screen->ro && should_scanout) {
struct winsys_handle handle;
struct pipe_resource scanout_tmpl = {
.target = prsc->target,
@@ -979,7 +1002,7 @@ v3d_resource_from_handle(struct pipe_screen *pscreen,
}
}
- if (screen->ro) {
+ if (screen->ro && !rsc->tiled) {
/* Make sure that renderonly has a handle to our buffer in the
* display's fd, so that a later renderonly_get_handle()
* returns correct handles or GEM names.
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index 2225edf85bd..1d4f619d710 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -47,6 +47,42 @@
#include "compiler/v3d_compiler.h"
#include "drm-uapi/drm_fourcc.h"
+#ifdef HAVE_WAYLAND_PLATFORM
+#include <wayland-client.h>
+#endif
+
+#ifdef HAVE_X11_PLATFORM
+#include <xcb/xcb.h>
+#endif
+
+static bool
+check_x_session()
+{
+ bool xcb_connection = false;
+
+#ifdef HAVE_WAYLAND_PLATFORM
+ struct wl_display *display;
+
+ display = wl_display_connect(NULL);
+
+ if (display) {
+ wl_display_disconnect(display);
+ return xcb_connection;
+ }
+#endif
+
+#ifdef HAVE_X11_PLATFORM
+ xcb_connection_t *conn;
+
+ conn = xcb_connect(NULL, NULL);
+
+ if (!xcb_connection_has_error(conn))
+ xcb_connection = true;
+ xcb_disconnect(conn);
+#endif
+ return xcb_connection;
+}
+
static const char *
v3d_screen_get_name(struct pipe_screen *pscreen)
{
@@ -945,6 +981,29 @@ v3d_screen_create(int fd, const struct pipe_screen_config *config,
v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH);
screen->has_perfmon = v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_PERFMON);
+ screen->ignore_scanout_usages = getenv("V3D_IGNORE_SCANOUT_USAGES");
+
+ const char *is_xserver_process =
+ "v3d_is_xserver_process";
+ screen->is_xserver_process =
+ driCheckOption(config->options,
+ is_xserver_process,
+ DRI_BOOL) &&
+ driQueryOptionb(config->options,
+ is_xserver_process);
+
+ const char *maintain_ignorable_scanout_name =
+ "v3d_maintain_ignorable_scanout";
+ screen->maintain_ignorable_scanout =
+ driCheckOption(config->options,
+ maintain_ignorable_scanout_name,
+ DRI_BOOL) &&
+ driQueryOptionb(config->options,
+ maintain_ignorable_scanout_name);
+
+ screen->has_x_session = !screen->is_xserver_process &&
+ check_x_session();
+
v3d_fence_init(screen);
v3d_process_debug_variable();
diff --git a/src/gallium/drivers/v3d/v3d_screen.h b/src/gallium/drivers/v3d/v3d_screen.h
index 1da9b83c965..c0f22707075 100644
--- a/src/gallium/drivers/v3d/v3d_screen.h
+++ b/src/gallium/drivers/v3d/v3d_screen.h
@@ -83,6 +83,12 @@ struct v3d_screen {
bool has_cache_flush;
bool has_perfmon;
bool nonmsaa_texture_size_limit;
+ bool ignore_scanout_usages;
+ bool is_xserver_process;
+ bool maintain_ignorable_scanout;
+
+ /* Are we running in an X session? */
+ bool has_x_session;
struct v3d_simulator_file *sim_file;
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index 948c1ef78ba..2de7505521c 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -767,6 +768,7 @@ TODO: document the other workarounds.
<application name="mutter" executable="mutter">
<option name="adaptive_sync" value="false" />
<option name="v3d_nonmsaa_texture_size_limit" value="true" />
+ <option name="v3d_maintain_ignorable_scanout" value="true" />
</application>
<application name="muffin" executable="muffin">
<option name="adaptive_sync" value="false" />
@@ -818,6 +820,7 @@ TODO: document the other workarounds.
</application>
<application name="Xorg" executable="Xorg">
<option name="v3d_nonmsaa_texture_size_limit" value="true" />
+ <option name="v3d_is_xserver_process" value="true" />
</application>
<application name="gfxbench" executable="testfw_app">
diff --git a/src/util/driconf.h b/src/util/driconf.h
index 042ee27d9a3..56511f6615e 100644
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@@ -521,6 +521,14 @@
DRI_CONF_OPT_B(v3d_nonmsaa_texture_size_limit, def, \
"Report the non-MSAA-only texture size limit")
+#define DRI_CONF_V3D_IS_XSERVER_PROCESS(def) \
+ DRI_CONF_OPT_B(v3d_is_xserver_process, def, \
+ "Identifies if the application is the Xserver.")
+
+#define DRI_CONF_V3D_MAINTAIN_IGNORABLE_SCANOUT(def) \
+ DRI_CONF_OPT_B(v3d_maintain_ignorable_scanout, def, \
+ "Maintain SCANOUT usage on resource allocations when the environment allows ignoring SCANOUT usage.")
+
/**
* \brief virgl specific configuration options
*/
--
2.39.2

View File

@ -1,117 +0,0 @@
From fc1fe85f01a67ef6e5758f1022950ad79b1b305a Mon Sep 17 00:00:00 2001
From: Neil Roberts <nroberts@igalia.com>
Date: Mon, 5 Jul 2021 20:19:06 +0200
Subject: [PATCH 139/142] Add a hack to avoid the shadow tex update for
imported linear texs
This adds a hacky interface so that an application can override the
mechanism used to detect when to update the shadow texture which is used
when importing a linear texture. The application can enable this by
calling:
glTexParameteri(GL_TEXTURE_2D, GL_SYNC_CONDITION, 1);
And then whenever it determines that the shadow texture should be
updated it can call:
glTexParameteri(GL_TEXTURE_2D, GL_SYNC_STATUS, 1);
(cherry picked from commit 1269e2cfbfa876fdc85037b9435085174d76ad57)
---
src/gallium/drivers/v3d/v3d_resource.c | 5 ++++-
src/gallium/include/pipe/p_state.h | 4 ++++
src/mesa/main/mtypes.h | 3 +++
src/mesa/main/texparam.c | 18 ++++++++++++++++++
4 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/v3d/v3d_resource.c b/src/gallium/drivers/v3d/v3d_resource.c
index 46de1b16ae0..8e31acb0ff0 100644
--- a/src/gallium/drivers/v3d/v3d_resource.c
+++ b/src/gallium/drivers/v3d/v3d_resource.c
@@ -1048,7 +1048,9 @@ v3d_update_shadow_texture(struct pipe_context *pctx,
assert(view->texture != pview->texture);
- if (shadow->writes == orig->writes && orig->bo->private)
+ if (shadow->writes == orig->writes &&
+ orig->base.sync_status == 0 &&
+ (orig->bo->private || orig->base.sync_condition))
return;
perf_debug("Updating %dx%d@%d shadow for linear texture\n",
@@ -1091,6 +1093,7 @@ v3d_update_shadow_texture(struct pipe_context *pctx,
}
shadow->writes = orig->writes;
+ orig->base.sync_status = 0;
}
static struct pipe_surface *
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 549e4d21c05..abc58552544 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -610,6 +610,10 @@ struct pipe_resource
unsigned bind; /**< bitmask of PIPE_BIND_x */
unsigned flags; /**< bitmask of PIPE_RESOURCE_FLAG_x */
+ /* Hack for avoiding sync on v3d */
+ unsigned sync_condition;
+ unsigned sync_status;
+
/**
* For planar images, ie. YUV EGLImage external, etc, pointer to the
* next plane.
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 77c38bf48d5..1eb2dac8018 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1058,6 +1058,9 @@ struct gl_texture_object
* the pipe_resource *pt above.
*/
bool needs_validation;
+
+ /* Hack for avoiding sync on v3d */
+ GLboolean SyncCondition;
};
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 001cc185722..139db3ce3e2 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -274,6 +274,13 @@ set_tex_parameteri(struct gl_context *ctx,
}
switch (pname) {
+ case GL_SYNC_CONDITION:
+ if (!!texObj->SyncCondition == !!params[0])
+ return GL_FALSE;
+ texObj->SyncCondition = !!params[0];
+ return GL_TRUE;
+ case GL_SYNC_STATUS:
+ return GL_TRUE;
case GL_TEXTURE_MIN_FILTER:
if (!_mesa_target_allows_setting_sampler_parameters(texObj->Target))
goto invalid_dsa;
@@ -931,6 +938,17 @@ _mesa_texture_parameter_invalidate(struct gl_context *ctx,
{
if (texparam_invalidates_sampler_views(pname))
st_texture_release_all_sampler_views(st_context(ctx), texObj);
+
+ switch (pname) {
+ case GL_SYNC_CONDITION:
+ texObj->pt->sync_condition = texObj->SyncCondition;
+ break;
+ case GL_SYNC_STATUS:
+ texObj->pt->sync_status = 1;
+ break;
+ default:
+ ; /* nothing */
+ }
}
void
--
2.39.2