From 797f55286997115aef2b6a0ac1d2f31038714409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 25 May 2026 12:39:17 -0300 Subject: [PATCH 1/5] pmdomain: bcm: bcm2835-power: Raise ASB poll timeout to 100us MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 18605b1b936b ("pmdomain: bcm: bcm2835-power: Increase ASB control timeout") raised the ASB handshake polling budget from 1us to 5us. Surveying the pmdomain subsystem, 5us is still the smallest polling budget by a wide margin - comparable handshakes in other drivers use: - 100us : starfive jh71xx-pmu, apple pmgr-pwrstate - 1ms : renesas rcar-sysc, rmobile-sysc (power-on) - 10ms : renesas rcar-gen4-sysc, sunxi sun55i-pck600 - 1s : mediatek mtk-pm-domains, mtk-scpsys Raise the bcm2835 timeout to 100us, matching analogous drivers. 100us is still negligible relative to a power-domain transition and gives the V3D master ASB substantially more headroom to drain under heavy workloads, where 5us has been observed to be insufficient in practice. Cc: stable@vger.kernel.org Fixes: b826d2c0b0ec ("pmdomain: bcm: bcm2835-power: Increase ASB control timeout") Signed-off-by: Maíra Canal --- drivers/pmdomain/bcm/bcm2835-power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pmdomain/bcm/bcm2835-power.c b/drivers/pmdomain/bcm/bcm2835-power.c index 6c0ae6b6202771..b008b47a8ce495 100644 --- a/drivers/pmdomain/bcm/bcm2835-power.c +++ b/drivers/pmdomain/bcm/bcm2835-power.c @@ -175,7 +175,7 @@ static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable writel(PM_PASSWORD | val, base + reg); if (readl_poll_timeout_atomic(base + reg, val, - !!(val & ASB_ACK) != enable, 0, 5)) + !!(val & ASB_ACK) != enable, 0, 100)) return -ETIMEDOUT; return 0; From b3b6213d4310738220b955d2b022306f9b918180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 25 May 2026 16:53:06 -0300 Subject: [PATCH 2/5] [BACKPORT] drm/v3d: Introduce Runtime Power Management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the downstream version match the upstream commit 458f2a712ab4 ("drm/v3d: Introduce Runtime Power Management"). Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_power.c | 38 ++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_power.c b/drivers/gpu/drm/v3d/v3d_power.c index f3d30ef5de4ea6..769e90032b042a 100644 --- a/drivers/gpu/drm/v3d/v3d_power.c +++ b/drivers/gpu/drm/v3d/v3d_power.c @@ -2,53 +2,61 @@ /* Copyright (C) 2026 Raspberry Pi */ #include -#include #include #include "v3d_drv.h" #include "v3d_regs.h" -static void +static int v3d_resume_sms(struct v3d_dev *v3d) { if (v3d->ver < V3D_GEN_71) - return; + return 0; V3D_SMS_WRITE(V3D_SMS_TEE_CS, V3D_SMS_CLEAR_POWER_OFF); if (wait_for((V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_TEE_CS), V3D_SMS_STATE) == V3D_SMS_IDLE), 100)) { drm_err(&v3d->drm, "Failed to power up SMS\n"); + return -ETIMEDOUT; } v3d_reset_sms(v3d); + + return 0; } -static void +static int v3d_suspend_sms(struct v3d_dev *v3d) { if (v3d->ver < V3D_GEN_71) - return; + return 0; V3D_SMS_WRITE(V3D_SMS_TEE_CS, V3D_SMS_POWER_OFF); if (wait_for((V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_TEE_CS), V3D_SMS_STATE) == V3D_SMS_POWER_OFF_STATE), 100)) { drm_err(&v3d->drm, "Failed to power off SMS\n"); + return -ETIMEDOUT; } + + return 0; } int v3d_power_suspend(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); struct v3d_dev *v3d = to_v3d_dev(drm); + int ret; v3d_irq_disable(v3d); - v3d_suspend_sms(v3d); - if (v3d->reset) - reset_control_assert(v3d->reset); + ret = v3d_suspend_sms(v3d); + if (ret) { + v3d_irq_enable(v3d); + return ret; + } clk_disable_unprepare(v3d->clk); @@ -65,19 +73,15 @@ int v3d_power_resume(struct device *dev) if (ret) return ret; - if (v3d->reset) { - ret = reset_control_deassert(v3d->reset); - if (ret) - goto clk_disable; + ret = v3d_resume_sms(v3d); + if (ret) { + clk_disable_unprepare(v3d->clk); + return ret; } - v3d_resume_sms(v3d); + v3d_init_hw_state(v3d); v3d_mmu_set_page_table(v3d); v3d_irq_enable(v3d); return 0; - -clk_disable: - clk_disable_unprepare(v3d->clk); - return ret; } From eae7ca51bd99acb893fca6a82ba1dc70bcf8bfd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 25 May 2026 16:15:53 -0300 Subject: [PATCH 3/5] drm/v3d: Flush MMU TLB and cache during runtime resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v3d_mmu_set_page_table() ends by calling v3d_mmu_flush_all() to flush the MMU cache and clear the TLB after reprogramming V3D_MMU_PT_PA_BASE. v3d_mmu_flush_all() is gated by pm_runtime_get_if_active(), which returns 0 unless runtime_status == RPM_ACTIVE. v3d_mmu_set_page_table() is called from two paths that *know* V3D is reachable, but where the runtime PM status might be wrong: 1. v3d_power_resume(): the runtime resume callback itself, where runtime_status is RPM_RESUMING. 2. v3d_reset(): called from the DRM scheduler timeout handler with the hung job's pm_runtime reference held, so RPM_ACTIVE, but here we don't need to take an extra reference for the duration of the flush either. In the first case pm_runtime_get_if_active() returns 0, the flush is silently skipped, and V3D resumes executing with whatever MMUC/TLB state happened to survive the last reset. On BCM2711, this leaves stale translations live across runtime PM cycles, manifesting as random GPU hangs. Split the actual flush sequence into a helper that does the writes unconditionally, and have v3d_mmu_set_page_table() call it directly. Fixes: 17af1d14deaf ("drm/v3d: Introduce Runtime Power Management") Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_mmu.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c index 01f0b7c7b864dc..b2c12a664d8aaa 100644 --- a/drivers/gpu/drm/v3d/v3d_mmu.c +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -35,13 +35,14 @@ static bool v3d_mmu_is_aligned(u32 page, u32 page_address, size_t alignment) IS_ALIGNED(page_address, alignment >> V3D_MMU_PAGE_SHIFT); } -int v3d_mmu_flush_all(struct v3d_dev *v3d) +/* + * Issue the MMUC flush and TLB clear unconditionally. The caller must + * already know that V3D is reachable. In particular, this is used from + * the runtime resume callback. + */ +static int v3d_mmu_flush_all_locked(struct v3d_dev *v3d) { - int ret = 0; - - /* Flush the PTs only if we're already awake */ - if (!pm_runtime_get_if_active(v3d->drm.dev)) - return 0; + int ret; V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_FLUSH | V3D_MMUC_CONTROL_ENABLE); @@ -50,7 +51,7 @@ int v3d_mmu_flush_all(struct v3d_dev *v3d) V3D_MMUC_CONTROL_FLUSHING), 100); if (ret) { dev_err(v3d->drm.dev, "MMUC flush wait idle failed\n"); - goto pm_put; + return ret; } V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) | @@ -61,7 +62,19 @@ int v3d_mmu_flush_all(struct v3d_dev *v3d) if (ret) dev_err(v3d->drm.dev, "MMU TLB clear wait idle failed\n"); -pm_put: + return ret; +} + +int v3d_mmu_flush_all(struct v3d_dev *v3d) +{ + int ret; + + /* Flush the PTs only if we're already awake */ + if (!pm_runtime_get_if_active(v3d->drm.dev)) + return 0; + + ret = v3d_mmu_flush_all_locked(v3d); + v3d_pm_runtime_put(v3d); return ret; } @@ -83,7 +96,7 @@ int v3d_mmu_set_page_table(struct v3d_dev *v3d) V3D_MMU_ILLEGAL_ADDR_ENABLE); V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_ENABLE); - return v3d_mmu_flush_all(v3d); + return v3d_mmu_flush_all_locked(v3d); } void v3d_mmu_insert_ptes(struct v3d_bo *bo) From b7bf1611ceee4e03effbc6133f9bc724e6639a08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Tue, 26 May 2026 20:30:58 -0300 Subject: [PATCH 4/5] drm/v3d: Wait for pending L2T flush before cleaning caches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v3d_clean_caches() starts the cache-clean sequence by writing V3D_L2TCACTL_TMUWCF to V3D_CTL_L2TCACTL and then polling for that bit to clear. It does not, however, check for an L2T flush (L2TFLS) that may still be in flight from a previous operation. On pre-V3D 7.1 hardware, kicking off the TMU write-combiner flush while an L2T flush is still pending can clobber bits in L2TCACTL and cause cache inconsistencies. Poll for L2TFLS to clear before writing L2TCACTL on V3D < 7.1, ensuring any pending flush has completed before a new clean is issued. Cc: stable@vger.kernel.org Fixes: d223f98f0209 ("drm/v3d: Add support for compute shader dispatch.") Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_gem.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 82fda76a2daf1d..3d771ff45feb21 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -206,6 +206,14 @@ v3d_clean_caches(struct v3d_dev *v3d) trace_v3d_cache_clean_begin(dev); + /* GFXH-1897: Ensure pending flushes complete before writing L2TCACTL */ + if (v3d->ver < V3D_GEN_71) { + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L2T clean\n"); + } + } + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & V3D_L2TCACTL_TMUWCF), 100)) { From 716bd384d28b6eabbc037d9d8c2ac18c6b96bff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Tue, 26 May 2026 20:31:23 -0300 Subject: [PATCH 5/5] drm/v3d: Clean caches before runtime suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On runtime suspend, clean the V3D caches before suspending so all dirty lines are written back to memory before the power domain is shut down. Fixes several system hangs reported in [1][2][3]. Closes: https://github.com/raspberrypi/linux/issues/7381 [1] Closes: https://github.com/raspberrypi/linux/issues/7396 [2] Closes: https://github.com/raspberrypi/linux/issues/7397 [3] Fixes: 17af1d14deaf ("drm/v3d: Introduce Runtime Power Management") Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_power.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_power.c b/drivers/gpu/drm/v3d/v3d_power.c index 769e90032b042a..865fb9b7b365cc 100644 --- a/drivers/gpu/drm/v3d/v3d_power.c +++ b/drivers/gpu/drm/v3d/v3d_power.c @@ -52,6 +52,9 @@ int v3d_power_suspend(struct device *dev) v3d_irq_disable(v3d); + /* Always clean V3D caches on shutdown. */ + v3d_clean_caches(v3d); + ret = v3d_suspend_sms(v3d); if (ret) { v3d_irq_enable(v3d);