mirror of
https://github.com/LibreELEC/LibreELEC.tv.git
synced 2025-07-29 13:46:49 +00:00
linux (Generic): Replace waitboosting revert with Interactive RPS Mode fix
This commit is contained in:
parent
6b607ab2ec
commit
2a3c34d22e
@ -0,0 +1,265 @@
|
|||||||
|
From 77a8e4fee1ad8cf7ec4a8899f76dfb9303b422f8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chris Wilson <chris@chris-wilson.co.uk>
|
||||||
|
Date: Wed, 4 Jul 2018 08:45:48 +0100
|
||||||
|
Subject: [PATCH] drm/i915: Interactive RPS mode
|
||||||
|
|
||||||
|
RPS provides a feedback loop where we use the load during the previous
|
||||||
|
evaluation interval to decide whether to up or down clock the GPU
|
||||||
|
frequency. Our responsiveness is split into 3 regimes, a high and low
|
||||||
|
plateau with the intent to keep the gpu clocked high to cover occasional
|
||||||
|
stalls under high load, and low despite occasional glitches under steady
|
||||||
|
low load, and inbetween. However, we run into situations like kodi where
|
||||||
|
we want to stay at low power (video decoding is done efficiently
|
||||||
|
inside the fixed function HW and doesn't need high clocks even for high
|
||||||
|
bitrate streams), but just occasionally the pipeline is more complex
|
||||||
|
than a video decode and we need a smidgen of extra GPU power to present
|
||||||
|
on time. In the high power regime, we sample at sub frame intervals with
|
||||||
|
a bias to upclocking, and conversely at low power we sample over a few
|
||||||
|
frames worth to provide what we consider to be the right levels of
|
||||||
|
responsiveness respectively. At low power, we more or less expect to be
|
||||||
|
kicked out to high power at the start of a busy sequence by waitboosting.
|
||||||
|
|
||||||
|
Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active
|
||||||
|
request") whenever we missed the frame or stalled, we would immediate go
|
||||||
|
full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we
|
||||||
|
relaxed the waitboosting to only apply if the pipeline was deep to avoid
|
||||||
|
over-committing resources for a near miss. Sadly though, a near miss is
|
||||||
|
still a miss, and perceptible as jitter in the frame delivery.
|
||||||
|
|
||||||
|
To try and prevent the near miss before having to resort to boosting
|
||||||
|
after the fact, we use the pageflip queue as an indication that we are
|
||||||
|
in an "interactive" regime and so should sample the load more frequently
|
||||||
|
to provide power before the frame misses it vblank. This will make us
|
||||||
|
more favorable to providing a small power increase (one or two bins) as
|
||||||
|
required rather than going all the way to maximum and then having to
|
||||||
|
work back down again. (We still keep the waitboosting mechanism around
|
||||||
|
just in case a dramatic change in system load requires urgent uplocking,
|
||||||
|
faster than we can provide in a few evaluation intervals.)
|
||||||
|
|
||||||
|
References: https://bugs.freedesktop.org/show_bug.cgi?id=107111
|
||||||
|
References: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request")
|
||||||
|
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
|
||||||
|
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
|
||||||
|
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
|
||||||
|
Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
|
||||||
|
---
|
||||||
|
drivers/gpu/drm/i915/i915_debugfs.c | 1 +
|
||||||
|
drivers/gpu/drm/i915/i915_drv.h | 6 +-
|
||||||
|
drivers/gpu/drm/i915/intel_display.c | 20 +++++++
|
||||||
|
drivers/gpu/drm/i915/intel_drv.h | 2 +
|
||||||
|
drivers/gpu/drm/i915/intel_pm.c | 87 ++++++++++++++++++----------
|
||||||
|
5 files changed, 86 insertions(+), 30 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
|
||||||
|
index 544e5e7f011f..7d974cad1cdf 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/i915_debugfs.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
|
||||||
|
@@ -2218,6 +2218,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
|
||||||
|
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
|
||||||
|
seq_printf(m, "Boosts outstanding? %d\n",
|
||||||
|
atomic_read(&rps->num_waiters));
|
||||||
|
+ seq_printf(m, "Power overrides? %d\n", READ_ONCE(rps->power_override));
|
||||||
|
seq_printf(m, "Frequency requested %d\n",
|
||||||
|
intel_gpu_freq(dev_priv, rps->cur_freq));
|
||||||
|
seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n",
|
||||||
|
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
|
||||||
|
index 09ab12458244..3f9ae1095b98 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/i915_drv.h
|
||||||
|
+++ b/drivers/gpu/drm/i915/i915_drv.h
|
||||||
|
@@ -783,7 +783,9 @@ struct intel_rps {
|
||||||
|
u8 down_threshold; /* Current %busy required to downclock */
|
||||||
|
|
||||||
|
int last_adj;
|
||||||
|
- enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
|
||||||
|
+ enum { LOW_POWER, BETWEEN, HIGH_POWER, AUTO_POWER } power;
|
||||||
|
+ unsigned int power_override;
|
||||||
|
+ struct mutex power_lock;
|
||||||
|
|
||||||
|
bool enabled;
|
||||||
|
atomic_t num_waiters;
|
||||||
|
@@ -3429,6 +3431,8 @@ extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
|
||||||
|
extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
|
||||||
|
extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
|
||||||
|
extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
|
||||||
|
+extern void intel_rps_set_power(struct drm_i915_private *dev_priv,
|
||||||
|
+ int new_power);
|
||||||
|
extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
|
||||||
|
bool enable);
|
||||||
|
|
||||||
|
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
|
||||||
|
index 8f3199b06d1f..acedd9b2d54a 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/intel_display.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/intel_display.c
|
||||||
|
@@ -13104,6 +13104,19 @@ intel_prepare_plane_fb(struct drm_plane *plane,
|
||||||
|
add_rps_boost_after_vblank(new_state->crtc, new_state->fence);
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * We declare pageflips to be interactive and so merit a small bias
|
||||||
|
+ * towards upclocking to deliver the frame on time. By only changing
|
||||||
|
+ * the RPS thresholds to sample more regularly and aim for higher
|
||||||
|
+ * clocks we can hopefully deliver low power workloads (like kodi)
|
||||||
|
+ * that are not quite steady state without resorting to forcing
|
||||||
|
+ * maximum clocks following a vblank miss (see do_rps_boost()).
|
||||||
|
+ */
|
||||||
|
+ if (!intel_state->rps_override) {
|
||||||
|
+ intel_rps_set_power(dev_priv, HIGH_POWER);
|
||||||
|
+ intel_state->rps_override = true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -13120,8 +13133,15 @@ void
|
||||||
|
intel_cleanup_plane_fb(struct drm_plane *plane,
|
||||||
|
struct drm_plane_state *old_state)
|
||||||
|
{
|
||||||
|
+ struct intel_atomic_state *intel_state =
|
||||||
|
+ to_intel_atomic_state(old_state->state);
|
||||||
|
struct drm_i915_private *dev_priv = to_i915(plane->dev);
|
||||||
|
|
||||||
|
+ if (intel_state->rps_override) {
|
||||||
|
+ intel_rps_set_power(dev_priv, AUTO_POWER);
|
||||||
|
+ intel_state->rps_override = false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* Should only be called after a successful intel_prepare_plane_fb()! */
|
||||||
|
mutex_lock(&dev_priv->drm.struct_mutex);
|
||||||
|
intel_plane_unpin_fb(to_intel_plane_state(old_state));
|
||||||
|
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
|
||||||
|
index 61e715ddd0d5..86685c43e1d2 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/intel_drv.h
|
||||||
|
+++ b/drivers/gpu/drm/i915/intel_drv.h
|
||||||
|
@@ -482,6 +482,8 @@ struct intel_atomic_state {
|
||||||
|
*/
|
||||||
|
bool skip_intermediate_wm;
|
||||||
|
|
||||||
|
+ bool rps_override;
|
||||||
|
+
|
||||||
|
/* Gen9+ only */
|
||||||
|
struct skl_ddb_values wm_results;
|
||||||
|
|
||||||
|
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
|
||||||
|
index 53aaaa3e6886..0245a90d0fe5 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/intel_pm.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/intel_pm.c
|
||||||
|
@@ -6264,41 +6264,14 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
|
||||||
|
return limits;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
|
||||||
|
+static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
|
||||||
|
{
|
||||||
|
struct intel_rps *rps = &dev_priv->gt_pm.rps;
|
||||||
|
- int new_power;
|
||||||
|
u32 threshold_up = 0, threshold_down = 0; /* in % */
|
||||||
|
u32 ei_up = 0, ei_down = 0;
|
||||||
|
|
||||||
|
- new_power = rps->power;
|
||||||
|
- switch (rps->power) {
|
||||||
|
- case LOW_POWER:
|
||||||
|
- if (val > rps->efficient_freq + 1 &&
|
||||||
|
- val > rps->cur_freq)
|
||||||
|
- new_power = BETWEEN;
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- case BETWEEN:
|
||||||
|
- if (val <= rps->efficient_freq &&
|
||||||
|
- val < rps->cur_freq)
|
||||||
|
- new_power = LOW_POWER;
|
||||||
|
- else if (val >= rps->rp0_freq &&
|
||||||
|
- val > rps->cur_freq)
|
||||||
|
- new_power = HIGH_POWER;
|
||||||
|
- break;
|
||||||
|
+ lockdep_assert_held(&rps->power_lock);
|
||||||
|
|
||||||
|
- case HIGH_POWER:
|
||||||
|
- if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
|
||||||
|
- val < rps->cur_freq)
|
||||||
|
- new_power = BETWEEN;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
- /* Max/min bins are special */
|
||||||
|
- if (val <= rps->min_freq_softlimit)
|
||||||
|
- new_power = LOW_POWER;
|
||||||
|
- if (val >= rps->max_freq_softlimit)
|
||||||
|
- new_power = HIGH_POWER;
|
||||||
|
if (new_power == rps->power)
|
||||||
|
return;
|
||||||
|
|
||||||
|
@@ -6365,9 +6338,64 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
|
||||||
|
rps->power = new_power;
|
||||||
|
rps->up_threshold = threshold_up;
|
||||||
|
rps->down_threshold = threshold_down;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
|
||||||
|
+{
|
||||||
|
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
|
||||||
|
+ int new_power;
|
||||||
|
+
|
||||||
|
+ new_power = rps->power;
|
||||||
|
+ switch (rps->power) {
|
||||||
|
+ case AUTO_POWER:
|
||||||
|
+ case LOW_POWER:
|
||||||
|
+ if (val > rps->efficient_freq + 1 &&
|
||||||
|
+ val > rps->cur_freq)
|
||||||
|
+ new_power = BETWEEN;
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ case BETWEEN:
|
||||||
|
+ if (val <= rps->efficient_freq &&
|
||||||
|
+ val < rps->cur_freq)
|
||||||
|
+ new_power = LOW_POWER;
|
||||||
|
+ else if (val >= rps->rp0_freq &&
|
||||||
|
+ val > rps->cur_freq)
|
||||||
|
+ new_power = HIGH_POWER;
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ case HIGH_POWER:
|
||||||
|
+ if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
|
||||||
|
+ val < rps->cur_freq)
|
||||||
|
+ new_power = BETWEEN;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ /* Max/min bins are special */
|
||||||
|
+ if (val <= rps->min_freq_softlimit)
|
||||||
|
+ new_power = LOW_POWER;
|
||||||
|
+ if (val >= rps->max_freq_softlimit)
|
||||||
|
+ new_power = HIGH_POWER;
|
||||||
|
+
|
||||||
|
+ mutex_lock(&rps->power_lock);
|
||||||
|
+ if (!rps->power_override)
|
||||||
|
+ rps_set_power(dev_priv, new_power);
|
||||||
|
+ mutex_unlock(&rps->power_lock);
|
||||||
|
rps->last_adj = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+void intel_rps_set_power(struct drm_i915_private *dev_priv, int power)
|
||||||
|
+{
|
||||||
|
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
|
||||||
|
+
|
||||||
|
+ mutex_lock(&rps->power_lock);
|
||||||
|
+ if (power != AUTO_POWER) {
|
||||||
|
+ rps->power_override++;
|
||||||
|
+ rps_set_power(dev_priv, power);
|
||||||
|
+ } else {
|
||||||
|
+ rps->power_override--;
|
||||||
|
+ }
|
||||||
|
+ mutex_unlock(&rps->power_lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
|
||||||
|
{
|
||||||
|
struct intel_rps *rps = &dev_priv->gt_pm.rps;
|
||||||
|
@@ -9604,6 +9632,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
|
||||||
|
void intel_pm_setup(struct drm_i915_private *dev_priv)
|
||||||
|
{
|
||||||
|
mutex_init(&dev_priv->pcu_lock);
|
||||||
|
+ mutex_init(&dev_priv->gt_pm.rps.power_lock);
|
||||||
|
|
||||||
|
atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.18.0
|
||||||
|
|
@ -1,36 +0,0 @@
|
|||||||
From 836bcce2ecdd9d6ea0650acc9da4f74239a8463b Mon Sep 17 00:00:00 2001
|
|
||||||
From: MilhouseVH <milhouseVH.github@nmacleod.com>
|
|
||||||
Date: Tue, 3 Jul 2018 06:17:27 +0100
|
|
||||||
Subject: [PATCH] drm/i915: partial revert of
|
|
||||||
e9af4ea2b9e7e5d3caa6354be14de06b678ed0fa
|
|
||||||
|
|
||||||
The original change leaves the GPU more or less permanently in low power mode
|
|
||||||
with the GPU running at idle frequency and never boosting which may result in
|
|
||||||
stutter in the Kodi GUI. Tested on Skylake NUC6i5SYH.
|
|
||||||
---
|
|
||||||
drivers/gpu/drm/i915/i915_request.h | 9 +--------
|
|
||||||
1 file changed, 1 insertion(+), 8 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
|
|
||||||
index eddbd424..3f2895a 100644
|
|
||||||
--- a/drivers/gpu/drm/i915/i915_request.h
|
|
||||||
+++ b/drivers/gpu/drm/i915/i915_request.h
|
|
||||||
@@ -298,14 +298,7 @@ static inline bool i915_request_completed(const struct i915_request *rq)
|
|
||||||
|
|
||||||
static inline bool i915_request_started(const struct i915_request *rq)
|
|
||||||
{
|
|
||||||
- u32 seqno;
|
|
||||||
-
|
|
||||||
- seqno = i915_request_global_seqno(rq);
|
|
||||||
- if (!seqno)
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- return i915_seqno_passed(intel_engine_get_seqno(rq->engine),
|
|
||||||
- seqno - 1);
|
|
||||||
+ return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool i915_sched_node_signaled(const struct i915_sched_node *node)
|
|
||||||
--
|
|
||||||
2.14.1
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user