diff --git a/packages/linux/patches/default/linux-999-drm_i915-interactive-rps-mode.patch b/packages/linux/patches/default/linux-999-drm_i915-interactive-rps-mode.patch new file mode 100644 index 0000000000..3dff8206d8 --- /dev/null +++ b/packages/linux/patches/default/linux-999-drm_i915-interactive-rps-mode.patch @@ -0,0 +1,265 @@ +From 77a8e4fee1ad8cf7ec4a8899f76dfb9303b422f8 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Wed, 4 Jul 2018 08:45:48 +0100 +Subject: [PATCH] drm/i915: Interactive RPS mode + +RPS provides a feedback loop where we use the load during the previous +evaluation interval to decide whether to up or down clock the GPU +frequency. Our responsiveness is split into 3 regimes, a high and low +plateau with the intent to keep the gpu clocked high to cover occasional +stalls under high load, and low despite occasional glitches under steady +low load, and inbetween. However, we run into situations like kodi where +we want to stay at low power (video decoding is done efficiently +inside the fixed function HW and doesn't need high clocks even for high +bitrate streams), but just occasionally the pipeline is more complex +than a video decode and we need a smidgen of extra GPU power to present +on time. In the high power regime, we sample at sub frame intervals with +a bias to upclocking, and conversely at low power we sample over a few +frames worth to provide what we consider to be the right levels of +responsiveness respectively. At low power, we more or less expect to be +kicked out to high power at the start of a busy sequence by waitboosting. + +Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active +request") whenever we missed the frame or stalled, we would immediate go +full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we +relaxed the waitboosting to only apply if the pipeline was deep to avoid +over-committing resources for a near miss. Sadly though, a near miss is +still a miss, and perceptible as jitter in the frame delivery. + +To try and prevent the near miss before having to resort to boosting +after the fact, we use the pageflip queue as an indication that we are +in an "interactive" regime and so should sample the load more frequently +to provide power before the frame misses it vblank. This will make us +more favorable to providing a small power increase (one or two bins) as +required rather than going all the way to maximum and then having to +work back down again. (We still keep the waitboosting mechanism around +just in case a dramatic change in system load requires urgent uplocking, +faster than we can provide in a few evaluation intervals.) + +References: https://bugs.freedesktop.org/show_bug.cgi?id=107111 +References: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") +Signed-off-by: Chris Wilson +Cc: Joonas Lahtinen +Cc: Tvrtko Ursulin +Cc: Radoslaw Szwichtenberg +--- + drivers/gpu/drm/i915/i915_debugfs.c | 1 + + drivers/gpu/drm/i915/i915_drv.h | 6 +- + drivers/gpu/drm/i915/intel_display.c | 20 +++++++ + drivers/gpu/drm/i915/intel_drv.h | 2 + + drivers/gpu/drm/i915/intel_pm.c | 87 ++++++++++++++++++---------- + 5 files changed, 86 insertions(+), 30 deletions(-) + +diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c +index 544e5e7f011f..7d974cad1cdf 100644 +--- a/drivers/gpu/drm/i915/i915_debugfs.c ++++ b/drivers/gpu/drm/i915/i915_debugfs.c +@@ -2218,6 +2218,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) + seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); + seq_printf(m, "Boosts outstanding? %d\n", + atomic_read(&rps->num_waiters)); ++ seq_printf(m, "Power overrides? %d\n", READ_ONCE(rps->power_override)); + seq_printf(m, "Frequency requested %d\n", + intel_gpu_freq(dev_priv, rps->cur_freq)); + seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", +diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h +index 09ab12458244..3f9ae1095b98 100644 +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -783,7 +783,9 @@ struct intel_rps { + u8 down_threshold; /* Current %busy required to downclock */ + + int last_adj; +- enum { LOW_POWER, BETWEEN, HIGH_POWER } power; ++ enum { LOW_POWER, BETWEEN, HIGH_POWER, AUTO_POWER } power; ++ unsigned int power_override; ++ struct mutex power_lock; + + bool enabled; + atomic_t num_waiters; +@@ -3429,6 +3431,8 @@ extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); + extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); + extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv); + extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); ++extern void intel_rps_set_power(struct drm_i915_private *dev_priv, ++ int new_power); + extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, + bool enable); + +diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c +index 8f3199b06d1f..acedd9b2d54a 100644 +--- a/drivers/gpu/drm/i915/intel_display.c ++++ b/drivers/gpu/drm/i915/intel_display.c +@@ -13104,6 +13104,19 @@ intel_prepare_plane_fb(struct drm_plane *plane, + add_rps_boost_after_vblank(new_state->crtc, new_state->fence); + } + ++ /* ++ * We declare pageflips to be interactive and so merit a small bias ++ * towards upclocking to deliver the frame on time. By only changing ++ * the RPS thresholds to sample more regularly and aim for higher ++ * clocks we can hopefully deliver low power workloads (like kodi) ++ * that are not quite steady state without resorting to forcing ++ * maximum clocks following a vblank miss (see do_rps_boost()). ++ */ ++ if (!intel_state->rps_override) { ++ intel_rps_set_power(dev_priv, HIGH_POWER); ++ intel_state->rps_override = true; ++ } ++ + return 0; + } + +@@ -13120,8 +13133,15 @@ void + intel_cleanup_plane_fb(struct drm_plane *plane, + struct drm_plane_state *old_state) + { ++ struct intel_atomic_state *intel_state = ++ to_intel_atomic_state(old_state->state); + struct drm_i915_private *dev_priv = to_i915(plane->dev); + ++ if (intel_state->rps_override) { ++ intel_rps_set_power(dev_priv, AUTO_POWER); ++ intel_state->rps_override = false; ++ } ++ + /* Should only be called after a successful intel_prepare_plane_fb()! */ + mutex_lock(&dev_priv->drm.struct_mutex); + intel_plane_unpin_fb(to_intel_plane_state(old_state)); +diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h +index 61e715ddd0d5..86685c43e1d2 100644 +--- a/drivers/gpu/drm/i915/intel_drv.h ++++ b/drivers/gpu/drm/i915/intel_drv.h +@@ -482,6 +482,8 @@ struct intel_atomic_state { + */ + bool skip_intermediate_wm; + ++ bool rps_override; ++ + /* Gen9+ only */ + struct skl_ddb_values wm_results; + +diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c +index 53aaaa3e6886..0245a90d0fe5 100644 +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -6264,41 +6264,14 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) + return limits; + } + +-static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) ++static void rps_set_power(struct drm_i915_private *dev_priv, int new_power) + { + struct intel_rps *rps = &dev_priv->gt_pm.rps; +- int new_power; + u32 threshold_up = 0, threshold_down = 0; /* in % */ + u32 ei_up = 0, ei_down = 0; + +- new_power = rps->power; +- switch (rps->power) { +- case LOW_POWER: +- if (val > rps->efficient_freq + 1 && +- val > rps->cur_freq) +- new_power = BETWEEN; +- break; +- +- case BETWEEN: +- if (val <= rps->efficient_freq && +- val < rps->cur_freq) +- new_power = LOW_POWER; +- else if (val >= rps->rp0_freq && +- val > rps->cur_freq) +- new_power = HIGH_POWER; +- break; ++ lockdep_assert_held(&rps->power_lock); + +- case HIGH_POWER: +- if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && +- val < rps->cur_freq) +- new_power = BETWEEN; +- break; +- } +- /* Max/min bins are special */ +- if (val <= rps->min_freq_softlimit) +- new_power = LOW_POWER; +- if (val >= rps->max_freq_softlimit) +- new_power = HIGH_POWER; + if (new_power == rps->power) + return; + +@@ -6365,9 +6338,64 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) + rps->power = new_power; + rps->up_threshold = threshold_up; + rps->down_threshold = threshold_down; ++} ++ ++static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) ++{ ++ struct intel_rps *rps = &dev_priv->gt_pm.rps; ++ int new_power; ++ ++ new_power = rps->power; ++ switch (rps->power) { ++ case AUTO_POWER: ++ case LOW_POWER: ++ if (val > rps->efficient_freq + 1 && ++ val > rps->cur_freq) ++ new_power = BETWEEN; ++ break; ++ ++ case BETWEEN: ++ if (val <= rps->efficient_freq && ++ val < rps->cur_freq) ++ new_power = LOW_POWER; ++ else if (val >= rps->rp0_freq && ++ val > rps->cur_freq) ++ new_power = HIGH_POWER; ++ break; ++ ++ case HIGH_POWER: ++ if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && ++ val < rps->cur_freq) ++ new_power = BETWEEN; ++ break; ++ } ++ /* Max/min bins are special */ ++ if (val <= rps->min_freq_softlimit) ++ new_power = LOW_POWER; ++ if (val >= rps->max_freq_softlimit) ++ new_power = HIGH_POWER; ++ ++ mutex_lock(&rps->power_lock); ++ if (!rps->power_override) ++ rps_set_power(dev_priv, new_power); ++ mutex_unlock(&rps->power_lock); + rps->last_adj = 0; + } + ++void intel_rps_set_power(struct drm_i915_private *dev_priv, int power) ++{ ++ struct intel_rps *rps = &dev_priv->gt_pm.rps; ++ ++ mutex_lock(&rps->power_lock); ++ if (power != AUTO_POWER) { ++ rps->power_override++; ++ rps_set_power(dev_priv, power); ++ } else { ++ rps->power_override--; ++ } ++ mutex_unlock(&rps->power_lock); ++} ++ + static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) + { + struct intel_rps *rps = &dev_priv->gt_pm.rps; +@@ -9604,6 +9632,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) + void intel_pm_setup(struct drm_i915_private *dev_priv) + { + mutex_init(&dev_priv->pcu_lock); ++ mutex_init(&dev_priv->gt_pm.rps.power_lock); + + atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); + +-- +2.18.0 + diff --git a/packages/linux/patches/default/linux-999-drm_i915-partial-revert-of-waitboosting-fix.patch b/packages/linux/patches/default/linux-999-drm_i915-partial-revert-of-waitboosting-fix.patch deleted file mode 100644 index 78b6bdbe53..0000000000 --- a/packages/linux/patches/default/linux-999-drm_i915-partial-revert-of-waitboosting-fix.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 836bcce2ecdd9d6ea0650acc9da4f74239a8463b Mon Sep 17 00:00:00 2001 -From: MilhouseVH -Date: Tue, 3 Jul 2018 06:17:27 +0100 -Subject: [PATCH] drm/i915: partial revert of - e9af4ea2b9e7e5d3caa6354be14de06b678ed0fa - -The original change leaves the GPU more or less permanently in low power mode -with the GPU running at idle frequency and never boosting which may result in -stutter in the Kodi GUI. Tested on Skylake NUC6i5SYH. ---- - drivers/gpu/drm/i915/i915_request.h | 9 +-------- - 1 file changed, 1 insertion(+), 8 deletions(-) - -diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h -index eddbd424..3f2895a 100644 ---- a/drivers/gpu/drm/i915/i915_request.h -+++ b/drivers/gpu/drm/i915/i915_request.h -@@ -298,14 +298,7 @@ static inline bool i915_request_completed(const struct i915_request *rq) - - static inline bool i915_request_started(const struct i915_request *rq) - { -- u32 seqno; -- -- seqno = i915_request_global_seqno(rq); -- if (!seqno) -- return false; -- -- return i915_seqno_passed(intel_engine_get_seqno(rq->engine), -- seqno - 1); -+ return false; - } - - static inline bool i915_sched_node_signaled(const struct i915_sched_node *node) --- -2.14.1 -