public inbox for linux-arm-kernel@lists.infradead.org 
 help / color / mirror / Atom feed
* [PATCH] thermal/drivers/exynos: fix clock ordering race and shared IRQ handling
@ 2026-06-03  1:30 Rosen Penev
  2026-06-03  8:29 ` Alexey Klimov
  0 siblings, 1 reply; 2+ messages in thread
From: Rosen Penev @ 2026-06-03  1:30 UTC (permalink / raw)
  To: linux-pm
  Cc: Bartlomiej Zolnierkiewicz, Krzysztof Kozlowski, Rafael J. Wysocki,
	Daniel Lezcano, Zhang Rui, Lukasz Luba, Peter Griffin,
	Alim Akhtar, open list:SAMSUNG THERMAL DRIVER,
	moderated list:ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES,
	open list

Fix two pre-existing issues in exynos_tmu_probe/remove:

1. Clock ordering race: The driver manually unprepares clocks in
   exynos_tmu_remove() and the probe error path, but the IRQ handler and
   thermal zone are devm-managed and remain active until after the manual
   cleanup.  If the shared IRQ fires or the thermal zone is polled in that
   window, clk_enable() is called on an unprepared clock, which is illegal.
   Replace devm_clk_get() + manual clk_prepare() with devm_clk_get_prepared(),
   and devm_clk_get() + manual clk_prepare_enable() with
   devm_clk_get_enabled(), so clock unprepare is tied to the devm lifetime
   and happens after the IRQ and thermal zone are released.  Remove the
   now-redundant manual cleanup from the error path and remove function.

2. Shared IRQ handling: The driver requests a shared IRQ (IRQF_SHARED) with
   NULL as the hardirq handler, causing the kernel to wake the threaded
   handler for every interrupt on the shared line.  The threaded handler
   unconditionally returned IRQ_HANDLED without verifying that the TMU
   actually generated the interrupt, which could cause other shared-IRQ
   devices to be starved.  Change tmu_clear_irqs to return the interrupt
   status register value, and return IRQ_NONE from the handler if no TMU
   interrupt was pending.

Assisted-by: opencode:big-pickle
Signed-off-by: Rosen Penev <rosenp@gmail•com>
---
 drivers/thermal/samsung/exynos_tmu.c | 81 +++++++++-------------------
 1 file changed, 24 insertions(+), 57 deletions(-)

diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index 47a99b3c5395..5dc22006d7f8 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -196,7 +196,7 @@ struct exynos_tmu_data {
 	void (*tmu_control)(struct platform_device *pdev, bool on);
 	int (*tmu_read)(struct exynos_tmu_data *data);
 	void (*tmu_set_emulation)(struct exynos_tmu_data *data, int temp);
-	void (*tmu_clear_irqs)(struct exynos_tmu_data *data);
+	u32 (*tmu_clear_irqs)(struct exynos_tmu_data *data);
 };
 
 /*
@@ -760,24 +760,28 @@ static irqreturn_t exynos_tmu_threaded_irq(int irq, void *id)
 {
 	struct exynos_tmu_data *data = id;
 
-	thermal_zone_device_update(data->tzd, THERMAL_EVENT_UNSPECIFIED);
-
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
 
+	if (!data->tmu_clear_irqs(data)) {
+		clk_disable(data->clk);
+		mutex_unlock(&data->lock);
+		return IRQ_NONE;
+	}
+
 	/* TODO: take action based on particular interrupt */
-	data->tmu_clear_irqs(data);
 
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
 
+	thermal_zone_device_update(data->tzd, THERMAL_EVENT_UNSPECIFIED);
+
 	return IRQ_HANDLED;
 }
 
-static void exynos4210_tmu_clear_irqs(struct exynos_tmu_data *data)
+static u32 exynos4210_tmu_clear_irqs(struct exynos_tmu_data *data)
 {
-	unsigned int val_irq;
-	u32 tmu_intstat, tmu_intclear;
+	u32 val_irq, tmu_intstat, tmu_intclear;
 
 	if (data->soc == SOC_ARCH_EXYNOS5260) {
 		tmu_intstat = EXYNOS5260_TMU_REG_INTSTAT;
@@ -803,6 +807,8 @@ static void exynos4210_tmu_clear_irqs(struct exynos_tmu_data *data)
 	 * support FALL IRQs at all).
 	 */
 	writel(val_irq, data->base + tmu_intclear);
+
+	return val_irq;
 }
 
 static const struct of_device_id exynos_tmu_match[] = {
@@ -1036,43 +1042,22 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	data->clk = devm_clk_get(dev, "tmu_apbif");
+	data->clk = devm_clk_get_prepared(dev, "tmu_apbif");
 	if (IS_ERR(data->clk))
 		return dev_err_probe(dev, PTR_ERR(data->clk), "Failed to get clock\n");
 
-	data->clk_sec = devm_clk_get(dev, "tmu_triminfo_apbif");
-	if (IS_ERR(data->clk_sec)) {
+	data->clk_sec = devm_clk_get_prepared(dev, "tmu_triminfo_apbif");
+	if (IS_ERR(data->clk_sec))
 		if (data->soc == SOC_ARCH_EXYNOS5420_TRIMINFO)
 			return dev_err_probe(dev, PTR_ERR(data->clk_sec),
 					     "Failed to get triminfo clock\n");
-	} else {
-		ret = clk_prepare(data->clk_sec);
-		if (ret) {
-			dev_err(dev, "Failed to get clock\n");
-			return ret;
-		}
-	}
-
-	ret = clk_prepare(data->clk);
-	if (ret) {
-		dev_err(dev, "Failed to get clock\n");
-		goto err_clk_sec;
-	}
 
 	switch (data->soc) {
 	case SOC_ARCH_EXYNOS5433:
 	case SOC_ARCH_EXYNOS7:
-		data->sclk = devm_clk_get(dev, "tmu_sclk");
-		if (IS_ERR(data->sclk)) {
-			ret = dev_err_probe(dev, PTR_ERR(data->sclk), "Failed to get sclk\n");
-			goto err_clk;
-		} else {
-			ret = clk_prepare_enable(data->sclk);
-			if (ret) {
-				dev_err(dev, "Failed to enable sclk\n");
-				goto err_clk;
-			}
-		}
+		data->sclk = devm_clk_get_enabled(dev, "tmu_sclk");
+		if (IS_ERR(data->sclk))
+			return dev_err_probe(dev, PTR_ERR(data->sclk), "Failed to get sclk\n");
 		break;
 	default:
 		break;
@@ -1081,20 +1066,18 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 	ret = exynos_tmu_initialize(pdev);
 	if (ret) {
 		dev_err(dev, "Failed to initialize TMU\n");
-		goto err_sclk;
+		return ret;
 	}
 
 	data->tzd = devm_thermal_of_zone_register(dev, 0, data,
 						  &exynos_sensor_ops);
-	if (IS_ERR(data->tzd)) {
-		ret = dev_err_probe(dev, PTR_ERR(data->tzd), "Failed to register sensor\n");
-		goto err_sclk;
-	}
+	if (IS_ERR(data->tzd))
+		return dev_err_probe(dev, PTR_ERR(data->tzd), "Failed to register sensor\n");
 
 	ret = exynos_thermal_zone_configure(pdev);
 	if (ret) {
 		dev_err(dev, "Failed to configure the thermal zone\n");
-		goto err_sclk;
+		return ret;
 	}
 
 	ret = devm_request_threaded_irq(dev, data->irq, NULL,
@@ -1104,32 +1087,16 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 					dev_name(dev), data);
 	if (ret) {
 		dev_err(dev, "Failed to request irq: %d\n", data->irq);
-		goto err_sclk;
+		return ret;
 	}
 
 	exynos_tmu_control(pdev, true);
 	return 0;
-
-err_sclk:
-	clk_disable_unprepare(data->sclk);
-err_clk:
-	clk_unprepare(data->clk);
-err_clk_sec:
-	if (!IS_ERR(data->clk_sec))
-		clk_unprepare(data->clk_sec);
-	return ret;
 }
 
 static void exynos_tmu_remove(struct platform_device *pdev)
 {
-	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
-
 	exynos_tmu_control(pdev, false);
-
-	clk_disable_unprepare(data->sclk);
-	clk_unprepare(data->clk);
-	if (!IS_ERR(data->clk_sec))
-		clk_unprepare(data->clk_sec);
 }
 
 #ifdef CONFIG_PM_SLEEP
-- 
2.54.0



^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] thermal/drivers/exynos: fix clock ordering race and shared IRQ handling
  2026-06-03  1:30 [PATCH] thermal/drivers/exynos: fix clock ordering race and shared IRQ handling Rosen Penev
@ 2026-06-03  8:29 ` Alexey Klimov
  0 siblings, 0 replies; 2+ messages in thread
From: Alexey Klimov @ 2026-06-03  8:29 UTC (permalink / raw)
  To: Rosen Penev, linux-pm
  Cc: Bartlomiej Zolnierkiewicz, Krzysztof Kozlowski, Rafael J. Wysocki,
	Daniel Lezcano, Zhang Rui, Lukasz Luba, Peter Griffin,
	Alim Akhtar, open list:SAMSUNG THERMAL DRIVER,
	moderated list:ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES,
	open list

On Wed Jun 3, 2026 at 2:30 AM BST, Rosen Penev wrote:
> Fix two pre-existing issues in exynos_tmu_probe/remove:
>
> 1. Clock ordering race: The driver manually unprepares clocks in
>    exynos_tmu_remove() and the probe error path, but the IRQ handler and
>    thermal zone are devm-managed and remain active until after the manual
>    cleanup.  If the shared IRQ fires or the thermal zone is polled in that
>    window, clk_enable() is called on an unprepared clock, which is illegal.
>    Replace devm_clk_get() + manual clk_prepare() with devm_clk_get_prepared(),
>    and devm_clk_get() + manual clk_prepare_enable() with
>    devm_clk_get_enabled(), so clock unprepare is tied to the devm lifetime
>    and happens after the IRQ and thermal zone are released.  Remove the
>    now-redundant manual cleanup from the error path and remove function.
>
> 2. Shared IRQ handling: The driver requests a shared IRQ (IRQF_SHARED) with
>    NULL as the hardirq handler, causing the kernel to wake the threaded
>    handler for every interrupt on the shared line.  The threaded handler
>    unconditionally returned IRQ_HANDLED without verifying that the TMU
>    actually generated the interrupt, which could cause other shared-IRQ
>    devices to be starved.  Change tmu_clear_irqs to return the interrupt
>    status register value, and return IRQ_NONE from the handler if no TMU
>    interrupt was pending.
>
> Assisted-by: opencode:big-pickle
> Signed-off-by: Rosen Penev <rosenp@gmail•com>
> ---
>  drivers/thermal/samsung/exynos_tmu.c | 81 +++++++++-------------------
>  1 file changed, 24 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
> index 47a99b3c5395..5dc22006d7f8 100644
> --- a/drivers/thermal/samsung/exynos_tmu.c
> +++ b/drivers/thermal/samsung/exynos_tmu.c
> @@ -196,7 +196,7 @@ struct exynos_tmu_data {
>  	void (*tmu_control)(struct platform_device *pdev, bool on);
>  	int (*tmu_read)(struct exynos_tmu_data *data);
>  	void (*tmu_set_emulation)(struct exynos_tmu_data *data, int temp);
> -	void (*tmu_clear_irqs)(struct exynos_tmu_data *data);
> +	u32 (*tmu_clear_irqs)(struct exynos_tmu_data *data);
>  };
>  
>  /*
> @@ -760,24 +760,28 @@ static irqreturn_t exynos_tmu_threaded_irq(int irq, void *id)
>  {
>  	struct exynos_tmu_data *data = id;
>  
> -	thermal_zone_device_update(data->tzd, THERMAL_EVENT_UNSPECIFIED);
> -
>  	mutex_lock(&data->lock);
>  	clk_enable(data->clk);
>  
> +	if (!data->tmu_clear_irqs(data)) {
> +		clk_disable(data->clk);
> +		mutex_unlock(&data->lock);
> +		return IRQ_NONE;
> +	}
> +
>  	/* TODO: take action based on particular interrupt */
> -	data->tmu_clear_irqs(data);

After that change the TODO comment now feels misplaced.
From a quick glance, it seems it should be moved as well to just
before if (!data->tmu_clear_irqs(data))

>  
> +	thermal_zone_device_update(data->tzd, THERMAL_EVENT_UNSPECIFIED);
> +
>  	return IRQ_HANDLED;

Best regards,
Alexey



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-06-03  8:29 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-03  1:30 [PATCH] thermal/drivers/exynos: fix clock ordering race and shared IRQ handling Rosen Penev
2026-06-03  8:29 ` Alexey Klimov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox