* [PATCH v5 01/10] ACPI: APEI: GHES: share macros via a private header
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 15:52 ` Jonathan Cameron
2026-06-01 22:46 ` Borislav Petkov
2026-05-29 9:50 ` [PATCH v5 02/10] ACPI: APEI: GHES: move CPER read helpers Ahmed Tiba
` (9 subsequent siblings)
10 siblings, 2 replies; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Carve the CPER helper macros out of ghes.c and place them in a private
header so they can be shared with upcoming helper files. This is a
mechanical include change with no functional differences.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
drivers/acpi/apei/ghes.c | 94 +++++++++----------------------------------
include/acpi/ghes_cper.h | 102 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 120 insertions(+), 76 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 3236a3ce79d6..4f67f46410c4 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -49,6 +49,7 @@
#include <acpi/actbl1.h>
#include <acpi/ghes.h>
+#include <acpi/ghes_cper.h>
#include <acpi/apei.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
@@ -57,40 +58,6 @@
#include "apei-internal.h"
-#define GHES_PFX "GHES: "
-
-#define GHES_ESTATUS_MAX_SIZE 65536
-#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
-
-#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
-
-/* This is just an estimation for memory pool allocation */
-#define GHES_ESTATUS_CACHE_AVG_SIZE 512
-
-#define GHES_ESTATUS_CACHES_SIZE 4
-
-#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
-/* Prevent too many caches are allocated because of RCU */
-#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
-
-#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
- (sizeof(struct ghes_estatus_cache) + (estatus_len))
-#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
- ((struct acpi_hest_generic_status *) \
- ((struct ghes_estatus_cache *)(estatus_cache) + 1))
-
-#define GHES_ESTATUS_NODE_LEN(estatus_len) \
- (sizeof(struct ghes_estatus_node) + (estatus_len))
-#define GHES_ESTATUS_FROM_NODE(estatus_node) \
- ((struct acpi_hest_generic_status *) \
- ((struct ghes_estatus_node *)(estatus_node) + 1))
-
-#define GHES_VENDOR_ENTRY_LEN(gdata_len) \
- (sizeof(struct ghes_vendor_record_entry) + (gdata_len))
-#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \
- ((struct acpi_hest_generic_data *) \
- ((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
-
/*
* NMI-like notifications vary by architecture, before the compiler can prune
* unused static functions it needs a value for these enums.
@@ -102,25 +69,6 @@
static ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
-static inline bool is_hest_type_generic_v2(struct ghes *ghes)
-{
- return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
-}
-
-/*
- * A platform may describe one error source for the handling of synchronous
- * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI
- * or External Interrupt). On x86, the HEST notifications are always
- * asynchronous, so only SEA on ARM is delivered as a synchronous
- * notification.
- */
-static inline bool is_hest_sync_notify(struct ghes *ghes)
-{
- u8 notify_type = ghes->generic->notify.type;
-
- return notify_type == ACPI_HEST_NOTIFY_SEA;
-}
-
/*
* This driver isn't really modular, however for the time being,
* continuing to use module_param is the easiest way to remain
@@ -165,12 +113,6 @@ static DEFINE_MUTEX(ghes_devs_mutex);
*/
static DEFINE_SPINLOCK(ghes_notify_lock_irq);
-struct ghes_vendor_record_entry {
- struct work_struct work;
- int error_severity;
- char vendor_record[];
-};
-
static struct gen_pool *ghes_estatus_pool;
static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
@@ -266,7 +208,7 @@ static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
apei_write(val, &gv2->read_ack_register);
}
-static struct ghes *ghes_new(struct acpi_hest_generic *generic)
+struct ghes *ghes_new(struct acpi_hest_generic *generic)
{
struct ghes *ghes;
unsigned int error_block_length;
@@ -313,7 +255,7 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
return ERR_PTR(rc);
}
-static void ghes_fini(struct ghes *ghes)
+void ghes_fini(struct ghes *ghes)
{
kfree(ghes->estatus);
apei_unmap_generic_address(&ghes->generic->error_status_address);
@@ -363,8 +305,8 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
}
/* Check the top-level record header has an appropriate size. */
-static int __ghes_check_estatus(struct ghes *ghes,
- struct acpi_hest_generic_status *estatus)
+int __ghes_check_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus)
{
u32 len = cper_estatus_len(estatus);
u32 max_len = min(ghes->generic->error_block_length,
@@ -389,9 +331,9 @@ static int __ghes_check_estatus(struct ghes *ghes,
}
/* Read the CPER block, returning its address, and header in estatus. */
-static int __ghes_peek_estatus(struct ghes *ghes,
- struct acpi_hest_generic_status *estatus,
- u64 *buf_paddr, enum fixed_addresses fixmap_idx)
+int __ghes_peek_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx)
{
struct acpi_hest_generic *g = ghes->generic;
int rc;
@@ -400,7 +342,7 @@ static int __ghes_peek_estatus(struct ghes *ghes,
if (rc) {
*buf_paddr = 0;
pr_warn_ratelimited(FW_WARN GHES_PFX
-"Failed to read error status block address for hardware error source: %d.\n",
+ "Failed to read error status block address for hardware error source: %d.\n",
g->header.source_id);
return -EIO;
}
@@ -417,9 +359,9 @@ static int __ghes_peek_estatus(struct ghes *ghes,
return 0;
}
-static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
- u64 buf_paddr, enum fixed_addresses fixmap_idx,
- size_t buf_len)
+int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
+ u64 buf_paddr, enum fixed_addresses fixmap_idx,
+ size_t buf_len)
{
ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
if (cper_estatus_check(estatus)) {
@@ -431,9 +373,9 @@ static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
return 0;
}
-static int ghes_read_estatus(struct ghes *ghes,
- struct acpi_hest_generic_status *estatus,
- u64 *buf_paddr, enum fixed_addresses fixmap_idx)
+int ghes_read_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx)
{
int rc;
@@ -449,9 +391,9 @@ static int ghes_read_estatus(struct ghes *ghes,
cper_estatus_len(estatus));
}
-static void ghes_clear_estatus(struct ghes *ghes,
- struct acpi_hest_generic_status *estatus,
- u64 buf_paddr, enum fixed_addresses fixmap_idx)
+void ghes_clear_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 buf_paddr, enum fixed_addresses fixmap_idx)
{
estatus->block_status = 0;
diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
new file mode 100644
index 000000000000..6b7632cfaf66
--- /dev/null
+++ b/include/acpi/ghes_cper.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * GHES declarations used by both the ACPI APEI GHES driver
+ * and the firmware-first CPER provider.
+ *
+ * These declarations lets GHES and other firmware-first error sources use
+ * the same helper so the non-ACPI path follows the same
+ * behavior as GHES instead of carrying a separate copy.
+ *
+ * Derived from the ACPI APEI GHES driver.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel•com>
+ */
+
+#ifndef ACPI_APEI_GHES_CPER_H
+#define ACPI_APEI_GHES_CPER_H
+
+#include <linux/workqueue.h>
+
+#include <acpi/ghes.h>
+
+#define GHES_PFX "GHES: "
+
+#define GHES_ESTATUS_MAX_SIZE 65536
+#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
+
+#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
+
+/* This is just an estimation for memory pool allocation */
+#define GHES_ESTATUS_CACHE_AVG_SIZE 512
+
+#define GHES_ESTATUS_CACHES_SIZE 4
+
+#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
+/* Prevent too many caches are allocated because of RCU */
+#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
+
+#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_cache) + (estatus_len))
+#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
+ ((struct acpi_hest_generic_status *) \
+ ((struct ghes_estatus_cache *)(estatus_cache) + 1))
+
+#define GHES_ESTATUS_NODE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_node) + (estatus_len))
+#define GHES_ESTATUS_FROM_NODE(estatus_node) \
+ ((struct acpi_hest_generic_status *) \
+ ((struct ghes_estatus_node *)(estatus_node) + 1))
+
+#define GHES_VENDOR_ENTRY_LEN(gdata_len) \
+ (sizeof(struct ghes_vendor_record_entry) + (gdata_len))
+#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \
+ ((struct acpi_hest_generic_data *) \
+ ((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
+
+static inline bool is_hest_type_generic_v2(struct ghes *ghes)
+{
+ return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
+}
+
+/*
+ * A platform may describe one error source for the handling of synchronous
+ * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI
+ * or External Interrupt). On x86, the HEST notifications are always
+ * asynchronous, so only SEA on ARM is delivered as a synchronous
+ * notification.
+ */
+static inline bool is_hest_sync_notify(struct ghes *ghes)
+{
+ u8 notify_type = ghes->generic->notify.type;
+
+ return notify_type == ACPI_HEST_NOTIFY_SEA;
+}
+
+struct ghes_vendor_record_entry {
+ struct work_struct work;
+ int error_severity;
+ char vendor_record[];
+};
+
+#ifdef CONFIG_ACPI_APEI
+struct ghes *ghes_new(struct acpi_hest_generic *generic);
+void ghes_fini(struct ghes *ghes);
+
+int ghes_read_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx);
+void ghes_clear_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 buf_paddr, enum fixed_addresses fixmap_idx);
+int __ghes_peek_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx);
+int __ghes_check_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus);
+int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
+ u64 buf_paddr, enum fixed_addresses fixmap_idx,
+ size_t buf_len);
+#endif
+
+#endif /* ACPI_APEI_GHES_CPER_H */
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 01/10] ACPI: APEI: GHES: share macros via a private header
2026-05-29 9:50 ` [PATCH v5 01/10] ACPI: APEI: GHES: share macros via a private header Ahmed Tiba
@ 2026-05-29 15:52 ` Jonathan Cameron
2026-06-01 22:46 ` Borislav Petkov
1 sibling, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 15:52 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:41 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Carve the CPER helper macros out of ghes.c and place them in a private
> header so they can be shared with upcoming helper files. This is a
> mechanical include change with no functional differences.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
I'm not sure there is a 'right' way to handle the use of ACPI deifned stuff
outside of ACPI, but this seems reasonable to me.
Reviewed-by: Jonathan Cameron <jic23@kernel•org>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v5 01/10] ACPI: APEI: GHES: share macros via a private header
2026-05-29 9:50 ` [PATCH v5 01/10] ACPI: APEI: GHES: share macros via a private header Ahmed Tiba
2026-05-29 15:52 ` Jonathan Cameron
@ 2026-06-01 22:46 ` Borislav Petkov
1 sibling, 0 replies; 23+ messages in thread
From: Borislav Petkov @ 2026-06-01 22:46 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt, linux-arm-kernel, Michael.Zhao2,
linux-doc, linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree,
linux-acpi, linux-edac, acpica-devel
On Fri, May 29, 2026 at 10:50:41AM +0100, Ahmed Tiba wrote:
> Carve the CPER helper macros out of ghes.c and place them in a private
> header so they can be shared with upcoming helper files. This is a
> mechanical include change with no functional differences.
What Sashiko said.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 02/10] ACPI: APEI: GHES: move CPER read helpers
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
2026-05-29 9:50 ` [PATCH v5 01/10] ACPI: APEI: GHES: share macros via a private header Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 15:51 ` Jonathan Cameron
2026-05-29 9:50 ` [PATCH v5 03/10] ACPI: APEI: GHES: move GHESv2 ack and alloc helpers Ahmed Tiba
` (8 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Relocate the CPER buffer mapping, peek, and clear helpers from ghes.c into
ghes_cper.c so they can be shared with other firmware-first providers.
This commit only shuffles code; behavior stays the same.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
drivers/acpi/apei/Makefile | 2 +-
drivers/acpi/apei/ghes.c | 166 -----------------------------------
drivers/acpi/apei/ghes_cper.c | 195 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 196 insertions(+), 167 deletions(-)
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
index 66588d6be56f..f57f3b009d8e 100644
--- a/drivers/acpi/apei/Makefile
+++ b/drivers/acpi/apei/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ACPI_APEI) += apei.o
-obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o
+obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o ghes_cper.o
# clang versions prior to 18 may blow out the stack with KASAN
ifeq ($(CONFIG_COMPILE_TEST)_$(CONFIG_CC_IS_CLANG)_$(call clang-min-version, 180000),y_y_)
KASAN_SANITIZE_ghes.o := n
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 4f67f46410c4..3f35580e8efd 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -118,26 +118,6 @@ static struct gen_pool *ghes_estatus_pool;
static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
static atomic_t ghes_estatus_cache_alloced;
-static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
-{
- phys_addr_t paddr;
- pgprot_t prot;
-
- paddr = PFN_PHYS(pfn);
- prot = arch_apei_get_mem_attribute(paddr);
- __set_fixmap(fixmap_idx, paddr, prot);
-
- return (void __iomem *) __fix_to_virt(fixmap_idx);
-}
-
-static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
-{
- int _idx = virt_to_fix((unsigned long)vaddr);
-
- WARN_ON_ONCE(fixmap_idx != _idx);
- clear_fixmap(fixmap_idx);
-}
-
int ghes_estatus_pool_init(unsigned int num_ghes)
{
unsigned long addr, len;
@@ -193,21 +173,6 @@ static void unmap_gen_v2(struct ghes *ghes)
apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
}
-static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
-{
- int rc;
- u64 val = 0;
-
- rc = apei_read(&val, &gv2->read_ack_register);
- if (rc)
- return;
-
- val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
- val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
-
- apei_write(val, &gv2->read_ack_register);
-}
-
struct ghes *ghes_new(struct acpi_hest_generic *generic)
{
struct ghes *ghes;
@@ -280,137 +245,6 @@ static inline int ghes_severity(int severity)
}
}
-static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
- int from_phys,
- enum fixed_addresses fixmap_idx)
-{
- void __iomem *vaddr;
- u64 offset;
- u32 trunk;
-
- while (len > 0) {
- offset = paddr - (paddr & PAGE_MASK);
- vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx);
- trunk = PAGE_SIZE - offset;
- trunk = min(trunk, len);
- if (from_phys)
- memcpy_fromio(buffer, vaddr + offset, trunk);
- else
- memcpy_toio(vaddr + offset, buffer, trunk);
- len -= trunk;
- paddr += trunk;
- buffer += trunk;
- ghes_unmap(vaddr, fixmap_idx);
- }
-}
-
-/* Check the top-level record header has an appropriate size. */
-int __ghes_check_estatus(struct ghes *ghes,
- struct acpi_hest_generic_status *estatus)
-{
- u32 len = cper_estatus_len(estatus);
- u32 max_len = min(ghes->generic->error_block_length,
- ghes->estatus_length);
-
- if (len < sizeof(*estatus)) {
- pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
- return -EIO;
- }
-
- if (!len || len > max_len) {
- pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
- return -EIO;
- }
-
- if (cper_estatus_check_header(estatus)) {
- pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
- return -EIO;
- }
-
- return 0;
-}
-
-/* Read the CPER block, returning its address, and header in estatus. */
-int __ghes_peek_estatus(struct ghes *ghes,
- struct acpi_hest_generic_status *estatus,
- u64 *buf_paddr, enum fixed_addresses fixmap_idx)
-{
- struct acpi_hest_generic *g = ghes->generic;
- int rc;
-
- rc = apei_read(buf_paddr, &g->error_status_address);
- if (rc) {
- *buf_paddr = 0;
- pr_warn_ratelimited(FW_WARN GHES_PFX
- "Failed to read error status block address for hardware error source: %d.\n",
- g->header.source_id);
- return -EIO;
- }
- if (!*buf_paddr)
- return -ENOENT;
-
- ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
- fixmap_idx);
- if (!estatus->block_status) {
- *buf_paddr = 0;
- return -ENOENT;
- }
-
- return 0;
-}
-
-int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
- u64 buf_paddr, enum fixed_addresses fixmap_idx,
- size_t buf_len)
-{
- ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
- if (cper_estatus_check(estatus)) {
- pr_warn_ratelimited(FW_WARN GHES_PFX
- "Failed to read error status block!\n");
- return -EIO;
- }
-
- return 0;
-}
-
-int ghes_read_estatus(struct ghes *ghes,
- struct acpi_hest_generic_status *estatus,
- u64 *buf_paddr, enum fixed_addresses fixmap_idx)
-{
- int rc;
-
- rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx);
- if (rc)
- return rc;
-
- rc = __ghes_check_estatus(ghes, estatus);
- if (rc)
- return rc;
-
- return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx,
- cper_estatus_len(estatus));
-}
-
-void ghes_clear_estatus(struct ghes *ghes,
- struct acpi_hest_generic_status *estatus,
- u64 buf_paddr, enum fixed_addresses fixmap_idx)
-{
- estatus->block_status = 0;
-
- if (!buf_paddr)
- return;
-
- ghes_copy_tofrom_phys(estatus, buf_paddr,
- sizeof(estatus->block_status), 0,
- fixmap_idx);
-
- /*
- * GHESv2 type HEST entries introduce support for error acknowledgment,
- * so only acknowledge the error if this support is present.
- */
- if (is_hest_type_generic_v2(ghes))
- ghes_ack_error(ghes->generic_v2);
-}
/**
* struct ghes_task_work - for synchronous RAS event
diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
new file mode 100644
index 000000000000..7bb72fe57838
--- /dev/null
+++ b/drivers/acpi/apei/ghes_cper.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shared GHES helpers for firmware-first CPER error handling.
+ *
+ * This file holds the GHES helper code that is shared by the in-tree GHES
+ * driver and by other firmware-first error sources that reuse the same CPER
+ * handling flow.
+ *
+ * Derived from the ACPI APEI GHES driver.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel•com>
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/ratelimit.h>
+#include <linux/slab.h>
+
+#include <acpi/apei.h>
+#include <acpi/ghes_cper.h>
+
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+
+#include "apei-internal.h"
+
+static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
+{
+ phys_addr_t paddr;
+ pgprot_t prot;
+
+ paddr = PFN_PHYS(pfn);
+ prot = arch_apei_get_mem_attribute(paddr);
+ __set_fixmap(fixmap_idx, paddr, prot);
+
+ return (void __iomem *) __fix_to_virt(fixmap_idx);
+}
+
+static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
+{
+ int _idx = virt_to_fix((unsigned long)vaddr);
+
+ WARN_ON_ONCE(fixmap_idx != _idx);
+ clear_fixmap(fixmap_idx);
+}
+
+static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
+{
+ int rc;
+ u64 val = 0;
+
+ rc = apei_read(&val, &gv2->read_ack_register);
+ if (rc)
+ return;
+
+ val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
+ val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
+
+ apei_write(val, &gv2->read_ack_register);
+}
+
+static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
+ int from_phys,
+ enum fixed_addresses fixmap_idx)
+{
+ void __iomem *vaddr;
+ u64 offset;
+ u32 trunk;
+
+ while (len > 0) {
+ offset = paddr - (paddr & PAGE_MASK);
+ vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx);
+ trunk = PAGE_SIZE - offset;
+ trunk = min(trunk, len);
+ if (from_phys)
+ memcpy_fromio(buffer, vaddr + offset, trunk);
+ else
+ memcpy_toio(vaddr + offset, buffer, trunk);
+ len -= trunk;
+ paddr += trunk;
+ buffer += trunk;
+ ghes_unmap(vaddr, fixmap_idx);
+ }
+}
+
+/* Check the top-level record header has an appropriate size. */
+int __ghes_check_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus)
+{
+ u32 len = cper_estatus_len(estatus);
+ u32 max_len = min(ghes->generic->error_block_length,
+ ghes->estatus_length);
+
+ if (len < sizeof(*estatus)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
+ return -EIO;
+ }
+
+ if (!len || len > max_len) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
+ return -EIO;
+ }
+
+ if (cper_estatus_check_header(estatus)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* Read the CPER block, returning its address, and header in estatus. */
+int __ghes_peek_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx)
+{
+ struct acpi_hest_generic *g = ghes->generic;
+ int rc;
+
+ rc = apei_read(buf_paddr, &g->error_status_address);
+ if (rc) {
+ *buf_paddr = 0;
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Failed to read error status block address for hardware error source: %d.\n",
+ g->header.source_id);
+ return -EIO;
+ }
+ if (!*buf_paddr)
+ return -ENOENT;
+
+ ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
+ fixmap_idx);
+ if (!estatus->block_status) {
+ *buf_paddr = 0;
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
+ u64 buf_paddr, enum fixed_addresses fixmap_idx,
+ size_t buf_len)
+{
+ ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
+ if (cper_estatus_check(estatus)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Failed to read error status block!\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int ghes_read_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx)
+{
+ int rc;
+
+ rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx);
+ if (rc)
+ return rc;
+
+ rc = __ghes_check_estatus(ghes, estatus);
+ if (rc)
+ return rc;
+
+ return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx,
+ cper_estatus_len(estatus));
+}
+
+void ghes_clear_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 buf_paddr, enum fixed_addresses fixmap_idx)
+{
+ estatus->block_status = 0;
+
+ if (!buf_paddr)
+ return;
+
+ ghes_copy_tofrom_phys(estatus, buf_paddr,
+ sizeof(estatus->block_status), 0,
+ fixmap_idx);
+
+ /*
+ * GHESv2 type HEST entries introduce support for error acknowledgment,
+ * so only acknowledge the error if this support is present.
+ */
+ if (is_hest_type_generic_v2(ghes))
+ ghes_ack_error(ghes->generic_v2);
+}
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 02/10] ACPI: APEI: GHES: move CPER read helpers
2026-05-29 9:50 ` [PATCH v5 02/10] ACPI: APEI: GHES: move CPER read helpers Ahmed Tiba
@ 2026-05-29 15:51 ` Jonathan Cameron
0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 15:51 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:42 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Relocate the CPER buffer mapping, peek, and clear helpers from ghes.c into
> ghes_cper.c so they can be shared with other firmware-first providers.
> This commit only shuffles code; behavior stays the same.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
A couple of things I noticed whilst quickly scan through what was moved inline.
Move itself looks fine to me.
Reviewed-by: Jonathan Cameron <jic23@kernel•org>
> diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
> new file mode 100644
> index 000000000000..7bb72fe57838
> --- /dev/null
> +++ b/drivers/acpi/apei/ghes_cper.c
> @@ -0,0 +1,195 @@
> +
> +/* Check the top-level record header has an appropriate size. */
> +int __ghes_check_estatus(struct ghes *ghes,
> + struct acpi_hest_generic_status *estatus)
> +{
> + u32 len = cper_estatus_len(estatus);
> + u32 max_len = min(ghes->generic->error_block_length,
> + ghes->estatus_length);
> +
> + if (len < sizeof(*estatus)) {
> + pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
> + return -EIO;
> + }
> +
> + if (!len || len > max_len) {
Obviously this is just a move, but that if !len is silly given that will have failed
the previous check as 0 is definitely less than sizeof(*estatus)
Maybe add a trivial patch to tidy this up and any similar oddities.
> + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
> + return -EIO;
> + }
> +
> + if (cper_estatus_check_header(estatus)) {
> + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
> + return -EIO;
> + }
> +
> + return 0;
> +}
> +
> +/* Read the CPER block, returning its address, and header in estatus. */
> +int __ghes_peek_estatus(struct ghes *ghes,
> + struct acpi_hest_generic_status *estatus,
> + u64 *buf_paddr, enum fixed_addresses fixmap_idx)
> +{
> + struct acpi_hest_generic *g = ghes->generic;
> + int rc;
> +
> + rc = apei_read(buf_paddr, &g->error_status_address);
> + if (rc) {
> + *buf_paddr = 0;
> + pr_warn_ratelimited(FW_WARN GHES_PFX
> + "Failed to read error status block address for hardware error source: %d.\n",
> + g->header.source_id);
Whilst you are here I don't think anyone would mind if you fix the alignment.
Looks like it's one space short.
> + return -EIO;
> + }
> + if (!*buf_paddr)
> + return -ENOENT;
> +
> + ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
> + fixmap_idx);
> + if (!estatus->block_status) {
> + *buf_paddr = 0;
> + return -ENOENT;
> + }
> +
> + return 0;
> +}
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 03/10] ACPI: APEI: GHES: move GHESv2 ack and alloc helpers
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
2026-05-29 9:50 ` [PATCH v5 01/10] ACPI: APEI: GHES: share macros via a private header Ahmed Tiba
2026-05-29 9:50 ` [PATCH v5 02/10] ACPI: APEI: GHES: move CPER read helpers Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 15:54 ` Jonathan Cameron
2026-05-29 9:50 ` [PATCH v5 04/10] ACPI: APEI: GHES: move estatus cache helpers Ahmed Tiba
` (7 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Move the GHESv2 acknowledgment and error-source allocation helpers from
ghes.c into ghes_cper.c. This is a mechanical refactor that keeps the
logic unchanged while making the helpers reusable.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
drivers/acpi/apei/ghes.c | 65 -------------------------------------------
drivers/acpi/apei/ghes_cper.c | 65 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 65 insertions(+), 65 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 3f35580e8efd..91638ae7e05e 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -163,71 +163,6 @@ void ghes_estatus_pool_region_free(unsigned long addr, u32 size)
}
EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
-static int map_gen_v2(struct ghes *ghes)
-{
- return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
-}
-
-static void unmap_gen_v2(struct ghes *ghes)
-{
- apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
-}
-
-struct ghes *ghes_new(struct acpi_hest_generic *generic)
-{
- struct ghes *ghes;
- unsigned int error_block_length;
- int rc;
-
- ghes = kzalloc_obj(*ghes);
- if (!ghes)
- return ERR_PTR(-ENOMEM);
-
- ghes->generic = generic;
- if (is_hest_type_generic_v2(ghes)) {
- rc = map_gen_v2(ghes);
- if (rc)
- goto err_free;
- }
-
- rc = apei_map_generic_address(&generic->error_status_address);
- if (rc)
- goto err_unmap_read_ack_addr;
- error_block_length = generic->error_block_length;
- if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
- pr_warn(FW_WARN GHES_PFX
- "Error status block length is too long: %u for "
- "generic hardware error source: %d.\n",
- error_block_length, generic->header.source_id);
- error_block_length = GHES_ESTATUS_MAX_SIZE;
- }
- ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
- ghes->estatus_length = error_block_length;
- if (!ghes->estatus) {
- rc = -ENOMEM;
- goto err_unmap_status_addr;
- }
-
- return ghes;
-
-err_unmap_status_addr:
- apei_unmap_generic_address(&generic->error_status_address);
-err_unmap_read_ack_addr:
- if (is_hest_type_generic_v2(ghes))
- unmap_gen_v2(ghes);
-err_free:
- kfree(ghes);
- return ERR_PTR(rc);
-}
-
-void ghes_fini(struct ghes *ghes)
-{
- kfree(ghes->estatus);
- apei_unmap_generic_address(&ghes->generic->error_status_address);
- if (is_hest_type_generic_v2(ghes))
- unmap_gen_v2(ghes);
-}
-
static inline int ghes_severity(int severity)
{
switch (severity) {
diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
index 7bb72fe57838..8080e0f76dac 100644
--- a/drivers/acpi/apei/ghes_cper.c
+++ b/drivers/acpi/apei/ghes_cper.c
@@ -62,6 +62,71 @@ static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
apei_write(val, &gv2->read_ack_register);
}
+static int map_gen_v2(struct ghes *ghes)
+{
+ return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
+}
+
+static void unmap_gen_v2(struct ghes *ghes)
+{
+ apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
+}
+
+struct ghes *ghes_new(struct acpi_hest_generic *generic)
+{
+ struct ghes *ghes;
+ unsigned int error_block_length;
+ int rc;
+
+ ghes = kzalloc_obj(*ghes);
+ if (!ghes)
+ return ERR_PTR(-ENOMEM);
+
+ ghes->generic = generic;
+ if (is_hest_type_generic_v2(ghes)) {
+ rc = map_gen_v2(ghes);
+ if (rc)
+ goto err_free;
+ }
+
+ rc = apei_map_generic_address(&generic->error_status_address);
+ if (rc)
+ goto err_unmap_read_ack_addr;
+ error_block_length = generic->error_block_length;
+ if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
+ pr_warn(FW_WARN GHES_PFX
+ "Error status block length is too long: %u for "
+ "generic hardware error source: %d.\n",
+ error_block_length, generic->header.source_id);
+ error_block_length = GHES_ESTATUS_MAX_SIZE;
+ }
+ ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
+ ghes->estatus_length = error_block_length;
+ if (!ghes->estatus) {
+ rc = -ENOMEM;
+ goto err_unmap_status_addr;
+ }
+
+ return ghes;
+
+err_unmap_status_addr:
+ apei_unmap_generic_address(&generic->error_status_address);
+err_unmap_read_ack_addr:
+ if (is_hest_type_generic_v2(ghes))
+ unmap_gen_v2(ghes);
+err_free:
+ kfree(ghes);
+ return ERR_PTR(rc);
+}
+
+void ghes_fini(struct ghes *ghes)
+{
+ kfree(ghes->estatus);
+ apei_unmap_generic_address(&ghes->generic->error_status_address);
+ if (is_hest_type_generic_v2(ghes))
+ unmap_gen_v2(ghes);
+}
+
static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
int from_phys,
enum fixed_addresses fixmap_idx)
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 03/10] ACPI: APEI: GHES: move GHESv2 ack and alloc helpers
2026-05-29 9:50 ` [PATCH v5 03/10] ACPI: APEI: GHES: move GHESv2 ack and alloc helpers Ahmed Tiba
@ 2026-05-29 15:54 ` Jonathan Cameron
0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 15:54 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:43 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Move the GHESv2 acknowledgment and error-source allocation helpers from
> ghes.c into ghes_cper.c. This is a mechanical refactor that keeps the
> logic unchanged while making the helpers reusable.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
Reviewed-by: Jonathan Cameron <jic23@kernel•org>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 04/10] ACPI: APEI: GHES: move estatus cache helpers
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
` (2 preceding siblings ...)
2026-05-29 9:50 ` [PATCH v5 03/10] ACPI: APEI: GHES: move GHESv2 ack and alloc helpers Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 16:03 ` Jonathan Cameron
2026-05-29 9:50 ` [PATCH v5 05/10] ACPI: APEI: GHES: move vendor record helpers Ahmed Tiba
` (6 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Relocate the estatus cache allocation and lookup helpers from ghes.c into
ghes_cper.c. This code move keeps the logic intact while making the cache
implementation available to forthcoming users.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
drivers/acpi/apei/ghes.c | 138 +----------------------------------------
drivers/acpi/apei/ghes_cper.c | 140 ++++++++++++++++++++++++++++++++++++++++++
include/acpi/ghes_cper.h | 6 ++
3 files changed, 147 insertions(+), 137 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 91638ae7e05e..adab7404310e 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -113,10 +113,7 @@ static DEFINE_MUTEX(ghes_devs_mutex);
*/
static DEFINE_SPINLOCK(ghes_notify_lock_irq);
-static struct gen_pool *ghes_estatus_pool;
-
-static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
-static atomic_t ghes_estatus_cache_alloced;
+struct gen_pool *ghes_estatus_pool;
int ghes_estatus_pool_init(unsigned int num_ghes)
{
@@ -733,139 +730,6 @@ static int ghes_print_estatus(const char *pfx,
return 0;
}
-/*
- * GHES error status reporting throttle, to report more kinds of
- * errors, instead of just most frequently occurred errors.
- */
-static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
-{
- u32 len;
- int i, cached = 0;
- unsigned long long now;
- struct ghes_estatus_cache *cache;
- struct acpi_hest_generic_status *cache_estatus;
-
- len = cper_estatus_len(estatus);
- rcu_read_lock();
- for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
- cache = rcu_dereference(ghes_estatus_caches[i]);
- if (cache == NULL)
- continue;
- if (len != cache->estatus_len)
- continue;
- cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
- if (memcmp(estatus, cache_estatus, len))
- continue;
- atomic_inc(&cache->count);
- now = sched_clock();
- if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
- cached = 1;
- break;
- }
- rcu_read_unlock();
- return cached;
-}
-
-static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
- struct acpi_hest_generic *generic,
- struct acpi_hest_generic_status *estatus)
-{
- int alloced;
- u32 len, cache_len;
- struct ghes_estatus_cache *cache;
- struct acpi_hest_generic_status *cache_estatus;
-
- alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
- if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
- atomic_dec(&ghes_estatus_cache_alloced);
- return NULL;
- }
- len = cper_estatus_len(estatus);
- cache_len = GHES_ESTATUS_CACHE_LEN(len);
- cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
- if (!cache) {
- atomic_dec(&ghes_estatus_cache_alloced);
- return NULL;
- }
- cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
- memcpy(cache_estatus, estatus, len);
- cache->estatus_len = len;
- atomic_set(&cache->count, 0);
- cache->generic = generic;
- cache->time_in = sched_clock();
- return cache;
-}
-
-static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
-{
- struct ghes_estatus_cache *cache;
- u32 len;
-
- cache = container_of(head, struct ghes_estatus_cache, rcu);
- len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
- len = GHES_ESTATUS_CACHE_LEN(len);
- gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
- atomic_dec(&ghes_estatus_cache_alloced);
-}
-
-static void
-ghes_estatus_cache_add(struct acpi_hest_generic *generic,
- struct acpi_hest_generic_status *estatus)
-{
- unsigned long long now, duration, period, max_period = 0;
- struct ghes_estatus_cache *cache, *new_cache;
- struct ghes_estatus_cache __rcu *victim;
- int i, slot = -1, count;
-
- new_cache = ghes_estatus_cache_alloc(generic, estatus);
- if (!new_cache)
- return;
-
- rcu_read_lock();
- now = sched_clock();
- for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
- cache = rcu_dereference(ghes_estatus_caches[i]);
- if (cache == NULL) {
- slot = i;
- break;
- }
- duration = now - cache->time_in;
- if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
- slot = i;
- break;
- }
- count = atomic_read(&cache->count);
- period = duration;
- do_div(period, (count + 1));
- if (period > max_period) {
- max_period = period;
- slot = i;
- }
- }
- rcu_read_unlock();
-
- if (slot != -1) {
- /*
- * Use release semantics to ensure that ghes_estatus_cached()
- * running on another CPU will see the updated cache fields if
- * it can see the new value of the pointer.
- */
- victim = xchg_release(&ghes_estatus_caches[slot],
- RCU_INITIALIZER(new_cache));
-
- /*
- * At this point, victim may point to a cached item different
- * from the one based on which we selected the slot. Instead of
- * going to the loop again to pick another slot, let's just
- * drop the other item anyway: this may cause a false cache
- * miss later on, but that won't cause any problems.
- */
- if (victim)
- call_rcu(&unrcu_pointer(victim)->rcu,
- ghes_estatus_cache_rcu_free);
- }
-}
-
static void __ghes_panic(struct ghes *ghes,
struct acpi_hest_generic_status *estatus,
u64 buf_paddr, enum fixed_addresses fixmap_idx)
diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
index 8080e0f76dac..0a117f478afb 100644
--- a/drivers/acpi/apei/ghes_cper.c
+++ b/drivers/acpi/apei/ghes_cper.c
@@ -13,10 +13,14 @@
*/
#include <linux/err.h>
+#include <linux/genalloc.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/math64.h>
#include <linux/mm.h>
#include <linux/ratelimit.h>
+#include <linux/rcupdate.h>
+#include <linux/sched/clock.h>
#include <linux/slab.h>
#include <acpi/apei.h>
@@ -27,6 +31,9 @@
#include "apei-internal.h"
+static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
+static atomic_t ghes_estatus_cache_alloced;
+
static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
{
phys_addr_t paddr;
@@ -258,3 +265,136 @@ void ghes_clear_estatus(struct ghes *ghes,
if (is_hest_type_generic_v2(ghes))
ghes_ack_error(ghes->generic_v2);
}
+
+/*
+ * GHES error status reporting throttle, to report more kinds of
+ * errors, instead of just most frequently occurred errors.
+ */
+int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
+{
+ u32 len;
+ int i, cached = 0;
+ unsigned long long now;
+ struct ghes_estatus_cache *cache;
+ struct acpi_hest_generic_status *cache_estatus;
+
+ len = cper_estatus_len(estatus);
+ rcu_read_lock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL)
+ continue;
+ if (len != cache->estatus_len)
+ continue;
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ if (memcmp(estatus, cache_estatus, len))
+ continue;
+ atomic_inc(&cache->count);
+ now = sched_clock();
+ if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
+ cached = 1;
+ break;
+ }
+ rcu_read_unlock();
+ return cached;
+}
+
+static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
+ struct acpi_hest_generic *generic,
+ struct acpi_hest_generic_status *estatus)
+{
+ int alloced;
+ u32 len, cache_len;
+ struct ghes_estatus_cache *cache;
+ struct acpi_hest_generic_status *cache_estatus;
+
+ alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
+ if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ len = cper_estatus_len(estatus);
+ cache_len = GHES_ESTATUS_CACHE_LEN(len);
+ cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
+ if (cache == NULL) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ memcpy(cache_estatus, estatus, len);
+ cache->estatus_len = len;
+ atomic_set(&cache->count, 0);
+ cache->generic = generic;
+ cache->time_in = sched_clock();
+ return cache;
+}
+
+static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
+{
+ struct ghes_estatus_cache *cache;
+ u32 len;
+
+ cache = container_of(head, struct ghes_estatus_cache, rcu);
+ len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
+ len = GHES_ESTATUS_CACHE_LEN(len);
+ gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
+ atomic_dec(&ghes_estatus_cache_alloced);
+}
+
+void
+ghes_estatus_cache_add(struct acpi_hest_generic *generic,
+ struct acpi_hest_generic_status *estatus)
+{
+ unsigned long long now, duration, period, max_period = 0;
+ struct ghes_estatus_cache *cache, *new_cache;
+ struct ghes_estatus_cache __rcu *victim;
+ int i, slot = -1, count;
+
+ new_cache = ghes_estatus_cache_alloc(generic, estatus);
+ if (!new_cache)
+ return;
+
+ rcu_read_lock();
+ now = sched_clock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL) {
+ slot = i;
+ break;
+ }
+ duration = now - cache->time_in;
+ if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
+ slot = i;
+ break;
+ }
+ count = atomic_read(&cache->count);
+ period = duration;
+ do_div(period, (count + 1));
+ if (period > max_period) {
+ max_period = period;
+ slot = i;
+ }
+ }
+ rcu_read_unlock();
+
+ if (slot != -1) {
+ /*
+ * Use release semantics to ensure that ghes_estatus_cached()
+ * running on another CPU will see the updated cache fields if
+ * it can see the new value of the pointer.
+ */
+ victim = xchg_release(&ghes_estatus_caches[slot],
+ RCU_INITIALIZER(new_cache));
+
+ /*
+ * At this point, victim may point to a cached item different
+ * from the one based on which we selected the slot. Instead of
+ * going to the loop again to pick another slot, let's just
+ * drop the other item anyway: this may cause a false cache
+ * miss later on, but that won't cause any problems.
+ */
+ if (victim)
+ call_rcu(&unrcu_pointer(victim)->rcu,
+ ghes_estatus_cache_rcu_free);
+ }
+}
diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
index 6b7632cfaf66..1b5dbeca9bb6 100644
--- a/include/acpi/ghes_cper.h
+++ b/include/acpi/ghes_cper.h
@@ -16,6 +16,7 @@
#ifndef ACPI_APEI_GHES_CPER_H
#define ACPI_APEI_GHES_CPER_H
+#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <acpi/ghes.h>
@@ -54,6 +55,8 @@
((struct acpi_hest_generic_data *) \
((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
+extern struct gen_pool *ghes_estatus_pool;
+
static inline bool is_hest_type_generic_v2(struct ghes *ghes)
{
return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
@@ -98,5 +101,8 @@ int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
u64 buf_paddr, enum fixed_addresses fixmap_idx,
size_t buf_len);
#endif
+int ghes_estatus_cached(struct acpi_hest_generic_status *estatus);
+void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
+ struct acpi_hest_generic_status *estatus);
#endif /* ACPI_APEI_GHES_CPER_H */
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 04/10] ACPI: APEI: GHES: move estatus cache helpers
2026-05-29 9:50 ` [PATCH v5 04/10] ACPI: APEI: GHES: move estatus cache helpers Ahmed Tiba
@ 2026-05-29 16:03 ` Jonathan Cameron
0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 16:03 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:44 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Relocate the estatus cache allocation and lookup helpers from ghes.c into
> ghes_cper.c. This code move keeps the logic intact while making the cache
> implementation available to forthcoming users.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
A couple of minor things inline.
With the two I've called out tidied up
Reviewed-by: Jonathan Cameron <jic23@kernel•org>
> diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
> index 8080e0f76dac..0a117f478afb 100644
> --- a/drivers/acpi/apei/ghes_cper.c
> +++ b/drivers/acpi/apei/ghes_cper.c
> @@ -13,10 +13,14 @@
> */
>
> #include <linux/err.h>
> +#include <linux/genalloc.h>
> #include <linux/io.h>
> #include <linux/kernel.h>
> +#include <linux/math64.h>
> #include <linux/mm.h>
> #include <linux/ratelimit.h>
> +#include <linux/rcupdate.h>
> +#include <linux/sched/clock.h>
> #include <linux/slab.h>
> +static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
> +{
> + struct ghes_estatus_cache *cache;
> + u32 len;
> +
> + cache = container_of(head, struct ghes_estatus_cache, rcu);
> + len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
> + len = GHES_ESTATUS_CACHE_LEN(len);
> + gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
> + atomic_dec(&ghes_estatus_cache_alloced);
> +}
> +
> +void
> +ghes_estatus_cache_add(struct acpi_hest_generic *generic,
> + struct acpi_hest_generic_status *estatus)
void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
struct acpi_hest_generic_status *estatus)
is under 80 chars (and how you have it in the header!)
(RB assumes you fix this - or argue against perhaps because of a change in
a future patch)
> +{
> + unsigned long long now, duration, period, max_period = 0;
> + struct ghes_estatus_cache *cache, *new_cache;
> + struct ghes_estatus_cache __rcu *victim;
> + int i, slot = -1, count;
> +
> + new_cache = ghes_estatus_cache_alloc(generic, estatus);
> + if (!new_cache)
> + return;
> +
> + rcu_read_lock();
> + now = sched_clock();
> + for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
> + cache = rcu_dereference(ghes_estatus_caches[i]);
> + if (cache == NULL) {
> + slot = i;
> + break;
> + }
> + duration = now - cache->time_in;
> + if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
> + slot = i;
> + break;
> + }
> + count = atomic_read(&cache->count);
> + period = duration;
> + do_div(period, (count + 1));
> + if (period > max_period) {
> + max_period = period;
> + slot = i;
> + }
> + }
> + rcu_read_unlock();
> +
> + if (slot != -1) {
If you even end up doing tidy up of this code, would be nicer to flip
the logic here and do an early return.
if (slot == -1)
return;
Then the rest is much less indented.
No need to do that in this series though as nothing 'wrong' with the
current code as such.
> + /*
> + * Use release semantics to ensure that ghes_estatus_cached()
> + * running on another CPU will see the updated cache fields if
> + * it can see the new value of the pointer.
> + */
> + victim = xchg_release(&ghes_estatus_caches[slot],
> + RCU_INITIALIZER(new_cache));
> +
> + /*
> + * At this point, victim may point to a cached item different
> + * from the one based on which we selected the slot. Instead of
> + * going to the loop again to pick another slot, let's just
> + * drop the other item anyway: this may cause a false cache
> + * miss later on, but that won't cause any problems.
> + */
> + if (victim)
> + call_rcu(&unrcu_pointer(victim)->rcu,
> + ghes_estatus_cache_rcu_free);
> + }
> +}
> diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
> index 6b7632cfaf66..1b5dbeca9bb6 100644
> --- a/include/acpi/ghes_cper.h
> +++ b/include/acpi/ghes_cper.h
> @@ -16,6 +16,7 @@
> #ifndef ACPI_APEI_GHES_CPER_H
> #define ACPI_APEI_GHES_CPER_H
>
> +#include <linux/atomic.h>
Why? Nothing in in the types used in what is added to the header needs
it - maybe I'm suffering Friday syndrome. Seems like it belongs in another
patch or in a c file rather than the header.
(RB assumes this fixed or argued against)
> #include <linux/workqueue.h>
>
> #include <acpi/ghes.h>
> @@ -54,6 +55,8 @@
> ((struct acpi_hest_generic_data *) \
> ((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
>
> +extern struct gen_pool *ghes_estatus_pool;
> +
> static inline bool is_hest_type_generic_v2(struct ghes *ghes)
> {
> return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
> @@ -98,5 +101,8 @@ int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
> u64 buf_paddr, enum fixed_addresses fixmap_idx,
> size_t buf_len);
> #endif
> +int ghes_estatus_cached(struct acpi_hest_generic_status *estatus);
> +void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
> + struct acpi_hest_generic_status *estatus);
>
> #endif /* ACPI_APEI_GHES_CPER_H */
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 05/10] ACPI: APEI: GHES: move vendor record helpers
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
` (3 preceding siblings ...)
2026-05-29 9:50 ` [PATCH v5 04/10] ACPI: APEI: GHES: move estatus cache helpers Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 16:10 ` Jonathan Cameron
2026-05-29 9:50 ` [PATCH v5 06/10] ACPI: APEI: GHES: move CXL CPER helpers Ahmed Tiba
` (5 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Shift the vendor record workqueue helpers into ghes_cper.c so both GHES
and future DT-based providers can use the same implementation. The change
is mechanical and keeps the notifier behavior identical.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
drivers/acpi/apei/ghes.c | 86 +++++++++----------------------------------
drivers/acpi/apei/ghes_cper.c | 55 +++++++++++++++++++++++++++
include/acpi/ghes_cper.h | 2 +
3 files changed, 75 insertions(+), 68 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index adab7404310e..81ac51632f21 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -383,74 +383,6 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
#endif
}
-static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
-
-int ghes_register_vendor_record_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
-}
-EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
-
-void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
-{
- blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
-}
-EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
-
-static void ghes_vendor_record_notifier_destroy(void *nb)
-{
- ghes_unregister_vendor_record_notifier(nb);
-}
-
-int devm_ghes_register_vendor_record_notifier(struct device *dev,
- struct notifier_block *nb)
-{
- int ret;
-
- ret = ghes_register_vendor_record_notifier(nb);
- if (ret)
- return ret;
-
- return devm_add_action_or_reset(dev, ghes_vendor_record_notifier_destroy, nb);
-}
-EXPORT_SYMBOL_GPL(devm_ghes_register_vendor_record_notifier);
-
-static void ghes_vendor_record_work_func(struct work_struct *work)
-{
- struct ghes_vendor_record_entry *entry;
- struct acpi_hest_generic_data *gdata;
- u32 len;
-
- entry = container_of(work, struct ghes_vendor_record_entry, work);
- gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
-
- blocking_notifier_call_chain(&vendor_record_notify_list,
- entry->error_severity, gdata);
-
- len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
- gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
-}
-
-static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
- int sev)
-{
- struct acpi_hest_generic_data *copied_gdata;
- struct ghes_vendor_record_entry *entry;
- u32 len;
-
- len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
- entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
- if (!entry)
- return;
-
- copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
- memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
- entry->error_severity = sev;
-
- INIT_WORK(&entry->work, ghes_vendor_record_work_func);
- schedule_work(&entry->work);
-}
-
/* Room for 8 entries */
#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
@@ -514,6 +446,24 @@ int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
}
EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL");
+static void ghes_vendor_record_notifier_destroy(void *nb)
+{
+ ghes_unregister_vendor_record_notifier(nb);
+}
+
+int devm_ghes_register_vendor_record_notifier(struct device *dev,
+ struct notifier_block *nb)
+{
+ int ret;
+
+ ret = ghes_register_vendor_record_notifier(nb);
+ if (ret)
+ return ret;
+
+ return devm_add_action_or_reset(dev, ghes_vendor_record_notifier_destroy, nb);
+}
+EXPORT_SYMBOL_GPL(devm_ghes_register_vendor_record_notifier);
+
/* Room for 8 entries for each of the 4 event log queues */
#define CXL_CPER_FIFO_DEPTH 32
DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
index 0a117f478afb..131980d36064 100644
--- a/drivers/acpi/apei/ghes_cper.c
+++ b/drivers/acpi/apei/ghes_cper.c
@@ -14,12 +14,17 @@
#include <linux/err.h>
#include <linux/genalloc.h>
+#include <linux/irq_work.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/math64.h>
#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/llist.h>
#include <linux/ratelimit.h>
#include <linux/rcupdate.h>
+#include <linux/rculist.h>
#include <linux/sched/clock.h>
#include <linux/slab.h>
@@ -266,6 +271,56 @@ void ghes_clear_estatus(struct ghes *ghes,
ghes_ack_error(ghes->generic_v2);
}
+static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
+
+int ghes_register_vendor_record_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
+
+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
+
+static void ghes_vendor_record_work_func(struct work_struct *work)
+{
+ struct ghes_vendor_record_entry *entry;
+ struct acpi_hest_generic_data *gdata;
+ u32 len;
+
+ entry = container_of(work, struct ghes_vendor_record_entry, work);
+ gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+
+ blocking_notifier_call_chain(&vendor_record_notify_list,
+ entry->error_severity, gdata);
+
+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+ gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
+}
+
+void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
+ int sev)
+{
+ struct acpi_hest_generic_data *copied_gdata;
+ struct ghes_vendor_record_entry *entry;
+ u32 len;
+
+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+ entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
+ if (!entry)
+ return;
+
+ copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+ memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
+ entry->error_severity = sev;
+
+ INIT_WORK(&entry->work, ghes_vendor_record_work_func);
+ schedule_work(&entry->work);
+}
+
/*
* GHES error status reporting throttle, to report more kinds of
* errors, instead of just most frequently occurred errors.
diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
index 1b5dbeca9bb6..51725f25c516 100644
--- a/include/acpi/ghes_cper.h
+++ b/include/acpi/ghes_cper.h
@@ -104,5 +104,7 @@ int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
int ghes_estatus_cached(struct acpi_hest_generic_status *estatus);
void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
struct acpi_hest_generic_status *estatus);
+void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
+ int sev);
#endif /* ACPI_APEI_GHES_CPER_H */
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 05/10] ACPI: APEI: GHES: move vendor record helpers
2026-05-29 9:50 ` [PATCH v5 05/10] ACPI: APEI: GHES: move vendor record helpers Ahmed Tiba
@ 2026-05-29 16:10 ` Jonathan Cameron
0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 16:10 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:45 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Shift the vendor record workqueue helpers into ghes_cper.c so both GHES
> and future DT-based providers can use the same implementation. The change
> is mechanical and keeps the notifier behavior identical.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
A few questions / comments inline
J
> ---
> drivers/acpi/apei/ghes.c | 86 +++++++++----------------------------------
> drivers/acpi/apei/ghes_cper.c | 55 +++++++++++++++++++++++++++
> include/acpi/ghes_cper.h | 2 +
> 3 files changed, 75 insertions(+), 68 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index adab7404310e..81ac51632f21 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
...
> -
> -static void ghes_vendor_record_notifier_destroy(void *nb)
> -{
> - ghes_unregister_vendor_record_notifier(nb);
> -}
> -
> -int devm_ghes_register_vendor_record_notifier(struct device *dev,
> - struct notifier_block *nb)
> -{
> - int ret;
> -
> - ret = ghes_register_vendor_record_notifier(nb);
> - if (ret)
> - return ret;
> -
> - return devm_add_action_or_reset(dev, ghes_vendor_record_notifier_destroy, nb);
> -}
> -EXPORT_SYMBOL_GPL(devm_ghes_register_vendor_record_notifier);
> #define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
> static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
> @@ -514,6 +446,24 @@ int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
> }
> EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL");
>
> +static void ghes_vendor_record_notifier_destroy(void *nb)
> +{
> + ghes_unregister_vendor_record_notifier(nb);
> +}
> +
> +int devm_ghes_register_vendor_record_notifier(struct device *dev,
> + struct notifier_block *nb)
> +{
> + int ret;
> +
> + ret = ghes_register_vendor_record_notifier(nb);
> + if (ret)
> + return ret;
> +
> + return devm_add_action_or_reset(dev, ghes_vendor_record_notifier_destroy, nb);
> +}
> +EXPORT_SYMBOL_GPL(devm_ghes_register_vendor_record_notifier);
> +
Why did these two move inside the file? It is a bit odd to leave the devm calls in
a different place to what they are wrapping. I guess someone argued for that in an
earlier version? (hopefully not me ;)
If the move puts them in an ifdef block then I'd not bother - it's tiny code and
to me doing this is more confusing than just leaving them where they were.
> /* Room for 8 entries for each of the 4 event log queues */
> #define CXL_CPER_FIFO_DEPTH 32
> DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
> diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
> index 0a117f478afb..131980d36064 100644
> --- a/drivers/acpi/apei/ghes_cper.c
> +++ b/drivers/acpi/apei/ghes_cper.c
> @@ -14,12 +14,17 @@
>
> #include <linux/err.h>
> #include <linux/genalloc.h>
> +#include <linux/irq_work.h>
> #include <linux/io.h>
> #include <linux/kernel.h>
> +#include <linux/list.h>
> #include <linux/math64.h>
> #include <linux/mm.h>
> +#include <linux/notifier.h>
> +#include <linux/llist.h>
> #include <linux/ratelimit.h>
> #include <linux/rcupdate.h>
> +#include <linux/rculist.h>
I'm not seeing anything reason for most of these new includes.
Probably in the wrong patch
> #include <linux/sched/clock.h>
> #include <linux/slab.h>
>
> @@ -266,6 +271,56 @@ void ghes_clear_estatus(struct ghes *ghes,
> ghes_ack_error(ghes->generic_v2);
> }
>
> +static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
> +
> +int ghes_register_vendor_record_notifier(struct notifier_block *nb)
> +{
> + return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
> +}
> +EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
> +
> +void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
> +{
> + blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
> +}
> +EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
> +
> +static void ghes_vendor_record_work_func(struct work_struct *work)
> +{
> + struct ghes_vendor_record_entry *entry;
> + struct acpi_hest_generic_data *gdata;
> + u32 len;
> +
> + entry = container_of(work, struct ghes_vendor_record_entry, work);
> + gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
> +
> + blocking_notifier_call_chain(&vendor_record_notify_list,
> + entry->error_severity, gdata);
> +
> + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
> + gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
> +}
> +
> +void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
> + int sev)
> +{
> + struct acpi_hest_generic_data *copied_gdata;
> + struct ghes_vendor_record_entry *entry;
> + u32 len;
> +
> + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
> + entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
> + if (!entry)
> + return;
> +
> + copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
> + memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
> + entry->error_severity = sev;
> +
> + INIT_WORK(&entry->work, ghes_vendor_record_work_func);
> + schedule_work(&entry->work);
> +}
> +
> /*
> * GHES error status reporting throttle, to report more kinds of
> * errors, instead of just most frequently occurred errors.
> diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
> index 1b5dbeca9bb6..51725f25c516 100644
> --- a/include/acpi/ghes_cper.h
> +++ b/include/acpi/ghes_cper.h
> @@ -104,5 +104,7 @@ int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
> int ghes_estatus_cached(struct acpi_hest_generic_status *estatus);
> void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
> struct acpi_hest_generic_status *estatus);
> +void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
> + int sev);
>
> #endif /* ACPI_APEI_GHES_CPER_H */
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 06/10] ACPI: APEI: GHES: move CXL CPER helpers
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
` (4 preceding siblings ...)
2026-05-29 9:50 ` [PATCH v5 05/10] ACPI: APEI: GHES: move vendor record helpers Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 16:16 ` Jonathan Cameron
2026-05-29 9:50 ` [PATCH v5 07/10] ACPI: APEI: introduce GHES helper Ahmed Tiba
` (4 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Move the CXL CPER handling paths out of ghes.c and into ghes_cper.c so the
helpers can be reused. The code is moved as-is, with the public
prototypes updated so GHES keeps calling into the new translation unit.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
drivers/acpi/apei/ghes.c | 132 -----------------------------------------
drivers/acpi/apei/ghes_cper.c | 134 ++++++++++++++++++++++++++++++++++++++++++
include/acpi/ghes_cper.h | 11 ++++
3 files changed, 145 insertions(+), 132 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 81ac51632f21..85be2ebf4d3e 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -383,69 +383,6 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
#endif
}
-/* Room for 8 entries */
-#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
-static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
- CXL_CPER_PROT_ERR_FIFO_DEPTH);
-
-/* Synchronize schedule_work() with cxl_cper_prot_err_work changes */
-static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock);
-struct work_struct *cxl_cper_prot_err_work;
-
-static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
- int severity)
-{
-#ifdef CONFIG_ACPI_APEI_PCIEAER
- struct cxl_cper_prot_err_work_data wd;
-
- if (cxl_cper_sec_prot_err_valid(prot_err))
- return;
-
- guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock);
-
- if (!cxl_cper_prot_err_work)
- return;
-
- if (cxl_cper_setup_prot_err_work_data(&wd, prot_err, severity))
- return;
-
- if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) {
- pr_err_ratelimited("CXL CPER kfifo overflow\n");
- return;
- }
-
- schedule_work(cxl_cper_prot_err_work);
-#endif
-}
-
-int cxl_cper_register_prot_err_work(struct work_struct *work)
-{
- if (cxl_cper_prot_err_work)
- return -EINVAL;
-
- guard(spinlock)(&cxl_cper_prot_err_work_lock);
- cxl_cper_prot_err_work = work;
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL");
-
-int cxl_cper_unregister_prot_err_work(struct work_struct *work)
-{
- if (cxl_cper_prot_err_work != work)
- return -EINVAL;
-
- guard(spinlock)(&cxl_cper_prot_err_work_lock);
- cxl_cper_prot_err_work = NULL;
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL");
-
-int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
-{
- return kfifo_get(&cxl_cper_prot_err_fifo, wd);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL");
-
static void ghes_vendor_record_notifier_destroy(void *nb)
{
ghes_unregister_vendor_record_notifier(nb);
@@ -464,75 +401,6 @@ int devm_ghes_register_vendor_record_notifier(struct device *dev,
}
EXPORT_SYMBOL_GPL(devm_ghes_register_vendor_record_notifier);
-/* Room for 8 entries for each of the 4 event log queues */
-#define CXL_CPER_FIFO_DEPTH 32
-DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
-
-/* Synchronize schedule_work() with cxl_cper_work changes */
-static DEFINE_SPINLOCK(cxl_cper_work_lock);
-struct work_struct *cxl_cper_work;
-
-static void cxl_cper_post_event(enum cxl_event_type event_type,
- struct cxl_cper_event_rec *rec)
-{
- struct cxl_cper_work_data wd;
-
- if (rec->hdr.length <= sizeof(rec->hdr) ||
- rec->hdr.length > sizeof(*rec)) {
- pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
- rec->hdr.length);
- return;
- }
-
- if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
- pr_err(FW_WARN "CXL CPER invalid event\n");
- return;
- }
-
- guard(spinlock_irqsave)(&cxl_cper_work_lock);
-
- if (!cxl_cper_work)
- return;
-
- wd.event_type = event_type;
- memcpy(&wd.rec, rec, sizeof(wd.rec));
-
- if (!kfifo_put(&cxl_cper_fifo, wd)) {
- pr_err_ratelimited("CXL CPER kfifo overflow\n");
- return;
- }
-
- schedule_work(cxl_cper_work);
-}
-
-int cxl_cper_register_work(struct work_struct *work)
-{
- if (cxl_cper_work)
- return -EINVAL;
-
- guard(spinlock)(&cxl_cper_work_lock);
- cxl_cper_work = work;
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL");
-
-int cxl_cper_unregister_work(struct work_struct *work)
-{
- if (cxl_cper_work != work)
- return -EINVAL;
-
- guard(spinlock)(&cxl_cper_work_lock);
- cxl_cper_work = NULL;
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL");
-
-int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
-{
- return kfifo_get(&cxl_cper_fifo, wd);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL");
-
static void ghes_log_hwerr(int sev, guid_t *sec_type)
{
if (sev != CPER_SEV_RECOVERABLE)
diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
index 131980d36064..d7a666a163c3 100644
--- a/drivers/acpi/apei/ghes_cper.c
+++ b/drivers/acpi/apei/ghes_cper.c
@@ -12,10 +12,12 @@
* Author: Huang Ying <ying.huang@intel•com>
*/
+#include <linux/aer.h>
#include <linux/err.h>
#include <linux/genalloc.h>
#include <linux/irq_work.h>
#include <linux/io.h>
+#include <linux/kfifo.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/math64.h>
@@ -321,6 +323,138 @@ void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}
+/* Room for 8 entries */
+#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
+static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
+ CXL_CPER_PROT_ERR_FIFO_DEPTH);
+
+/* Synchronize schedule_work() with cxl_cper_prot_err_work changes */
+static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock);
+struct work_struct *cxl_cper_prot_err_work;
+
+void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
+ int severity)
+{
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ struct cxl_cper_prot_err_work_data wd;
+
+ if (cxl_cper_sec_prot_err_valid(prot_err))
+ return;
+
+ guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock);
+
+ if (!cxl_cper_prot_err_work)
+ return;
+
+ if (cxl_cper_setup_prot_err_work_data(&wd, prot_err, severity))
+ return;
+
+ if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) {
+ pr_err_ratelimited("CXL CPER kfifo overflow\n");
+ return;
+ }
+
+ schedule_work(cxl_cper_prot_err_work);
+#endif
+}
+
+int cxl_cper_register_prot_err_work(struct work_struct *work)
+{
+ if (cxl_cper_prot_err_work)
+ return -EINVAL;
+
+ guard(spinlock)(&cxl_cper_prot_err_work_lock);
+ cxl_cper_prot_err_work = work;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL");
+
+int cxl_cper_unregister_prot_err_work(struct work_struct *work)
+{
+ if (cxl_cper_prot_err_work != work)
+ return -EINVAL;
+
+ guard(spinlock)(&cxl_cper_prot_err_work_lock);
+ cxl_cper_prot_err_work = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL");
+
+int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
+{
+ return kfifo_get(&cxl_cper_prot_err_fifo, wd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL");
+
+/* Room for 8 entries for each of the 4 event log queues */
+#define CXL_CPER_FIFO_DEPTH 32
+static DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
+
+/* Synchronize schedule_work() with cxl_cper_work changes */
+static DEFINE_SPINLOCK(cxl_cper_work_lock);
+struct work_struct *cxl_cper_work;
+
+void cxl_cper_post_event(enum cxl_event_type event_type,
+ struct cxl_cper_event_rec *rec)
+{
+ struct cxl_cper_work_data wd;
+
+ if (rec->hdr.length <= sizeof(rec->hdr) ||
+ rec->hdr.length > sizeof(*rec)) {
+ pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
+ rec->hdr.length);
+ return;
+ }
+
+ if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
+ pr_err(FW_WARN "CXL CPER invalid event\n");
+ return;
+ }
+
+ guard(spinlock_irqsave)(&cxl_cper_work_lock);
+
+ if (!cxl_cper_work)
+ return;
+
+ wd.event_type = event_type;
+ memcpy(&wd.rec, rec, sizeof(wd.rec));
+
+ if (!kfifo_put(&cxl_cper_fifo, wd)) {
+ pr_err_ratelimited("CXL CPER kfifo overflow\n");
+ return;
+ }
+
+ schedule_work(cxl_cper_work);
+}
+
+int cxl_cper_register_work(struct work_struct *work)
+{
+ if (cxl_cper_work)
+ return -EINVAL;
+
+ guard(spinlock)(&cxl_cper_work_lock);
+ cxl_cper_work = work;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL");
+
+int cxl_cper_unregister_work(struct work_struct *work)
+{
+ if (cxl_cper_work != work)
+ return -EINVAL;
+
+ guard(spinlock)(&cxl_cper_work_lock);
+ cxl_cper_work = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL");
+
+int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
+{
+ return kfifo_get(&cxl_cper_fifo, wd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL");
+
/*
* GHES error status reporting throttle, to report more kinds of
* errors, instead of just most frequently occurred errors.
diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
index 51725f25c516..dd49e9179b63 100644
--- a/include/acpi/ghes_cper.h
+++ b/include/acpi/ghes_cper.h
@@ -20,6 +20,7 @@
#include <linux/workqueue.h>
#include <acpi/ghes.h>
+#include <cxl/event.h>
#define GHES_PFX "GHES: "
@@ -106,5 +107,15 @@ void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
struct acpi_hest_generic_status *estatus);
void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
int sev);
+void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
+ int severity);
+int cxl_cper_register_prot_err_work(struct work_struct *work);
+int cxl_cper_unregister_prot_err_work(struct work_struct *work);
+int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd);
+void cxl_cper_post_event(enum cxl_event_type event_type,
+ struct cxl_cper_event_rec *rec);
+int cxl_cper_register_work(struct work_struct *work);
+int cxl_cper_unregister_work(struct work_struct *work);
+int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd);
#endif /* ACPI_APEI_GHES_CPER_H */
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 06/10] ACPI: APEI: GHES: move CXL CPER helpers
2026-05-29 9:50 ` [PATCH v5 06/10] ACPI: APEI: GHES: move CXL CPER helpers Ahmed Tiba
@ 2026-05-29 16:16 ` Jonathan Cameron
0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 16:16 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel, Terry Bowman
On Fri, 29 May 2026 10:50:46 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Move the CXL CPER handling paths out of ghes.c and into ghes_cper.c so the
> helpers can be reused. The code is moved as-is, with the public
> prototypes updated so GHES keeps calling into the new translation unit.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
This is going to clash with the series Terry is working on for CXL error handling.
I think this patch in particular:
https://lore.kernel.org/all/20260505173029.2718246-6-terry.bowman@amd.com/
That series is high priority to land from a CXL point of view. +CC Terry
Subject to that,
Reviewed-by: Jonathan Cameron <jic23@kernel•org>
Patch left so Terry can see it. No comments.
> ---
> drivers/acpi/apei/ghes.c | 132 -----------------------------------------
> drivers/acpi/apei/ghes_cper.c | 134 ++++++++++++++++++++++++++++++++++++++++++
> include/acpi/ghes_cper.h | 11 ++++
> 3 files changed, 145 insertions(+), 132 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 81ac51632f21..85be2ebf4d3e 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -383,69 +383,6 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> #endif
> }
>
> -/* Room for 8 entries */
> -#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
> -static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
> - CXL_CPER_PROT_ERR_FIFO_DEPTH);
> -
> -/* Synchronize schedule_work() with cxl_cper_prot_err_work changes */
> -static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock);
> -struct work_struct *cxl_cper_prot_err_work;
> -
> -static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
> - int severity)
> -{
> -#ifdef CONFIG_ACPI_APEI_PCIEAER
> - struct cxl_cper_prot_err_work_data wd;
> -
> - if (cxl_cper_sec_prot_err_valid(prot_err))
> - return;
> -
> - guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock);
> -
> - if (!cxl_cper_prot_err_work)
> - return;
> -
> - if (cxl_cper_setup_prot_err_work_data(&wd, prot_err, severity))
> - return;
> -
> - if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) {
> - pr_err_ratelimited("CXL CPER kfifo overflow\n");
> - return;
> - }
> -
> - schedule_work(cxl_cper_prot_err_work);
> -#endif
> -}
> -
> -int cxl_cper_register_prot_err_work(struct work_struct *work)
> -{
> - if (cxl_cper_prot_err_work)
> - return -EINVAL;
> -
> - guard(spinlock)(&cxl_cper_prot_err_work_lock);
> - cxl_cper_prot_err_work = work;
> - return 0;
> -}
> -EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL");
> -
> -int cxl_cper_unregister_prot_err_work(struct work_struct *work)
> -{
> - if (cxl_cper_prot_err_work != work)
> - return -EINVAL;
> -
> - guard(spinlock)(&cxl_cper_prot_err_work_lock);
> - cxl_cper_prot_err_work = NULL;
> - return 0;
> -}
> -EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL");
> -
> -int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
> -{
> - return kfifo_get(&cxl_cper_prot_err_fifo, wd);
> -}
> -EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL");
> -
> static void ghes_vendor_record_notifier_destroy(void *nb)
> {
> ghes_unregister_vendor_record_notifier(nb);
> @@ -464,75 +401,6 @@ int devm_ghes_register_vendor_record_notifier(struct device *dev,
> }
> EXPORT_SYMBOL_GPL(devm_ghes_register_vendor_record_notifier);
>
> -/* Room for 8 entries for each of the 4 event log queues */
> -#define CXL_CPER_FIFO_DEPTH 32
> -DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
> -
> -/* Synchronize schedule_work() with cxl_cper_work changes */
> -static DEFINE_SPINLOCK(cxl_cper_work_lock);
> -struct work_struct *cxl_cper_work;
> -
> -static void cxl_cper_post_event(enum cxl_event_type event_type,
> - struct cxl_cper_event_rec *rec)
> -{
> - struct cxl_cper_work_data wd;
> -
> - if (rec->hdr.length <= sizeof(rec->hdr) ||
> - rec->hdr.length > sizeof(*rec)) {
> - pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
> - rec->hdr.length);
> - return;
> - }
> -
> - if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
> - pr_err(FW_WARN "CXL CPER invalid event\n");
> - return;
> - }
> -
> - guard(spinlock_irqsave)(&cxl_cper_work_lock);
> -
> - if (!cxl_cper_work)
> - return;
> -
> - wd.event_type = event_type;
> - memcpy(&wd.rec, rec, sizeof(wd.rec));
> -
> - if (!kfifo_put(&cxl_cper_fifo, wd)) {
> - pr_err_ratelimited("CXL CPER kfifo overflow\n");
> - return;
> - }
> -
> - schedule_work(cxl_cper_work);
> -}
> -
> -int cxl_cper_register_work(struct work_struct *work)
> -{
> - if (cxl_cper_work)
> - return -EINVAL;
> -
> - guard(spinlock)(&cxl_cper_work_lock);
> - cxl_cper_work = work;
> - return 0;
> -}
> -EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL");
> -
> -int cxl_cper_unregister_work(struct work_struct *work)
> -{
> - if (cxl_cper_work != work)
> - return -EINVAL;
> -
> - guard(spinlock)(&cxl_cper_work_lock);
> - cxl_cper_work = NULL;
> - return 0;
> -}
> -EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL");
> -
> -int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
> -{
> - return kfifo_get(&cxl_cper_fifo, wd);
> -}
> -EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL");
> -
> static void ghes_log_hwerr(int sev, guid_t *sec_type)
> {
> if (sev != CPER_SEV_RECOVERABLE)
> diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
> index 131980d36064..d7a666a163c3 100644
> --- a/drivers/acpi/apei/ghes_cper.c
> +++ b/drivers/acpi/apei/ghes_cper.c
> @@ -12,10 +12,12 @@
> * Author: Huang Ying <ying.huang@intel•com>
> */
>
> +#include <linux/aer.h>
> #include <linux/err.h>
> #include <linux/genalloc.h>
> #include <linux/irq_work.h>
> #include <linux/io.h>
> +#include <linux/kfifo.h>
> #include <linux/kernel.h>
> #include <linux/list.h>
> #include <linux/math64.h>
> @@ -321,6 +323,138 @@ void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
> schedule_work(&entry->work);
> }
>
> +/* Room for 8 entries */
> +#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
> +static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
> + CXL_CPER_PROT_ERR_FIFO_DEPTH);
> +
> +/* Synchronize schedule_work() with cxl_cper_prot_err_work changes */
> +static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock);
> +struct work_struct *cxl_cper_prot_err_work;
> +
> +void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
> + int severity)
> +{
> +#ifdef CONFIG_ACPI_APEI_PCIEAER
> + struct cxl_cper_prot_err_work_data wd;
> +
> + if (cxl_cper_sec_prot_err_valid(prot_err))
> + return;
> +
> + guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock);
> +
> + if (!cxl_cper_prot_err_work)
> + return;
> +
> + if (cxl_cper_setup_prot_err_work_data(&wd, prot_err, severity))
> + return;
> +
> + if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) {
> + pr_err_ratelimited("CXL CPER kfifo overflow\n");
> + return;
> + }
> +
> + schedule_work(cxl_cper_prot_err_work);
> +#endif
> +}
> +
> +int cxl_cper_register_prot_err_work(struct work_struct *work)
> +{
> + if (cxl_cper_prot_err_work)
> + return -EINVAL;
> +
> + guard(spinlock)(&cxl_cper_prot_err_work_lock);
> + cxl_cper_prot_err_work = work;
> + return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL");
> +
> +int cxl_cper_unregister_prot_err_work(struct work_struct *work)
> +{
> + if (cxl_cper_prot_err_work != work)
> + return -EINVAL;
> +
> + guard(spinlock)(&cxl_cper_prot_err_work_lock);
> + cxl_cper_prot_err_work = NULL;
> + return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL");
> +
> +int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
> +{
> + return kfifo_get(&cxl_cper_prot_err_fifo, wd);
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL");
> +
> +/* Room for 8 entries for each of the 4 event log queues */
> +#define CXL_CPER_FIFO_DEPTH 32
> +static DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
> +
> +/* Synchronize schedule_work() with cxl_cper_work changes */
> +static DEFINE_SPINLOCK(cxl_cper_work_lock);
> +struct work_struct *cxl_cper_work;
> +
> +void cxl_cper_post_event(enum cxl_event_type event_type,
> + struct cxl_cper_event_rec *rec)
> +{
> + struct cxl_cper_work_data wd;
> +
> + if (rec->hdr.length <= sizeof(rec->hdr) ||
> + rec->hdr.length > sizeof(*rec)) {
> + pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
> + rec->hdr.length);
> + return;
> + }
> +
> + if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
> + pr_err(FW_WARN "CXL CPER invalid event\n");
> + return;
> + }
> +
> + guard(spinlock_irqsave)(&cxl_cper_work_lock);
> +
> + if (!cxl_cper_work)
> + return;
> +
> + wd.event_type = event_type;
> + memcpy(&wd.rec, rec, sizeof(wd.rec));
> +
> + if (!kfifo_put(&cxl_cper_fifo, wd)) {
> + pr_err_ratelimited("CXL CPER kfifo overflow\n");
> + return;
> + }
> +
> + schedule_work(cxl_cper_work);
> +}
> +
> +int cxl_cper_register_work(struct work_struct *work)
> +{
> + if (cxl_cper_work)
> + return -EINVAL;
> +
> + guard(spinlock)(&cxl_cper_work_lock);
> + cxl_cper_work = work;
> + return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL");
> +
> +int cxl_cper_unregister_work(struct work_struct *work)
> +{
> + if (cxl_cper_work != work)
> + return -EINVAL;
> +
> + guard(spinlock)(&cxl_cper_work_lock);
> + cxl_cper_work = NULL;
> + return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL");
> +
> +int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
> +{
> + return kfifo_get(&cxl_cper_fifo, wd);
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL");
> +
> /*
> * GHES error status reporting throttle, to report more kinds of
> * errors, instead of just most frequently occurred errors.
> diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
> index 51725f25c516..dd49e9179b63 100644
> --- a/include/acpi/ghes_cper.h
> +++ b/include/acpi/ghes_cper.h
> @@ -20,6 +20,7 @@
> #include <linux/workqueue.h>
>
> #include <acpi/ghes.h>
> +#include <cxl/event.h>
>
> #define GHES_PFX "GHES: "
>
> @@ -106,5 +107,15 @@ void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
> struct acpi_hest_generic_status *estatus);
> void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
> int sev);
> +void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
> + int severity);
> +int cxl_cper_register_prot_err_work(struct work_struct *work);
> +int cxl_cper_unregister_prot_err_work(struct work_struct *work);
> +int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd);
> +void cxl_cper_post_event(enum cxl_event_type event_type,
> + struct cxl_cper_event_rec *rec);
> +int cxl_cper_register_work(struct work_struct *work);
> +int cxl_cper_unregister_work(struct work_struct *work);
> +int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd);
>
> #endif /* ACPI_APEI_GHES_CPER_H */
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 07/10] ACPI: APEI: introduce GHES helper
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
` (5 preceding siblings ...)
2026-05-29 9:50 ` [PATCH v5 06/10] ACPI: APEI: GHES: move CXL CPER helpers Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 16:21 ` Jonathan Cameron
2026-05-29 9:50 ` [PATCH v5 08/10] ACPI: APEI: share GHES CPER helpers Ahmed Tiba
` (3 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Add a dedicated GHES_CPER_HELPERS Kconfig entry so the shared helper code
can be built even when ACPI_APEI_GHES is disabled. Update the build glue
and headers to depend on the new symbol.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
drivers/Makefile | 1 +
drivers/acpi/Kconfig | 4 ++++
drivers/acpi/apei/Kconfig | 1 +
drivers/acpi/apei/Makefile | 2 +-
include/acpi/ghes.h | 10 ++++++----
include/cxl/event.h | 2 +-
6 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/drivers/Makefile b/drivers/Makefile
index 0841ea851847..27a664cb45ea 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -31,6 +31,7 @@ obj-y += idle/
obj-y += char/ipmi/
obj-$(CONFIG_ACPI) += acpi/
+obj-$(CONFIG_GHES_CPER_HELPERS) += acpi/apei/ghes_cper.o
# PnP must come after ACPI since it will eventually need to check if acpi
# was used and do nothing if so
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index f165d14cf61a..13ef0e99f840 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -6,6 +6,10 @@
config ARCH_SUPPORTS_ACPI
bool
+config GHES_CPER_HELPERS
+ bool
+ select UEFI_CPER
+
menuconfig ACPI
bool "ACPI (Advanced Configuration and Power Interface) Support"
depends on ARCH_SUPPORTS_ACPI
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 428458c623f0..ddb62638eb02 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -21,6 +21,7 @@ config ACPI_APEI_GHES
bool "APEI Generic Hardware Error Source"
depends on ACPI_APEI
select ACPI_HED
+ select GHES_CPER_HELPERS
select IRQ_WORK
select GENERIC_ALLOCATOR
select ARM_SDE_INTERFACE if ARM64
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
index f57f3b009d8e..66588d6be56f 100644
--- a/drivers/acpi/apei/Makefile
+++ b/drivers/acpi/apei/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ACPI_APEI) += apei.o
-obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o ghes_cper.o
+obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o
# clang versions prior to 18 may blow out the stack with KASAN
ifeq ($(CONFIG_COMPILE_TEST)_$(CONFIG_CC_IS_CLANG)_$(call clang-min-version, 180000),y_y_)
KASAN_SANITIZE_ghes.o := n
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 8d7e5caef3f1..2ffab36b6154 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -83,15 +83,17 @@ int devm_ghes_register_vendor_record_notifier(struct device *dev,
struct notifier_block *nb);
struct list_head *ghes_get_devices(void);
-
-void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
#else
static inline struct list_head *ghes_get_devices(void) { return NULL; }
-
-static inline void ghes_estatus_pool_region_free(unsigned long addr, u32 size) { return; }
#endif
+#ifdef CONFIG_GHES_CPER_HELPERS
int ghes_estatus_pool_init(unsigned int num_ghes);
+void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
+#else
+static inline int ghes_estatus_pool_init(unsigned int num_ghes) { return -ENODEV; }
+static inline void ghes_estatus_pool_region_free(unsigned long addr, u32 size) { }
+#endif
static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
{
diff --git a/include/cxl/event.h b/include/cxl/event.h
index ff97fea718d2..2ebd65b0d9d6 100644
--- a/include/cxl/event.h
+++ b/include/cxl/event.h
@@ -285,7 +285,7 @@ struct cxl_cper_prot_err_work_data {
int severity;
};
-#ifdef CONFIG_ACPI_APEI_GHES
+#ifdef CONFIG_GHES_CPER_HELPERS
int cxl_cper_register_work(struct work_struct *work);
int cxl_cper_unregister_work(struct work_struct *work);
int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd);
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 07/10] ACPI: APEI: introduce GHES helper
2026-05-29 9:50 ` [PATCH v5 07/10] ACPI: APEI: introduce GHES helper Ahmed Tiba
@ 2026-05-29 16:21 ` Jonathan Cameron
0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 16:21 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:47 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Add a dedicated GHES_CPER_HELPERS Kconfig entry so the shared helper code
> can be built even when ACPI_APEI_GHES is disabled. Update the build glue
> and headers to depend on the new symbol.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
I guess it doesn't matter too much as in practice all exiting CXL systems
are ACPI based, but is this new symbol sufficient for the
CONFIG_CXL_RAS dependency?
Rest of this looks fine to me.
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 08/10] ACPI: APEI: share GHES CPER helpers
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
` (6 preceding siblings ...)
2026-05-29 9:50 ` [PATCH v5 07/10] ACPI: APEI: introduce GHES helper Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 16:32 ` Jonathan Cameron
2026-05-29 9:50 ` [PATCH v5 09/10] dt-bindings: firmware: add arm,ras-cper Ahmed Tiba
` (2 subsequent siblings)
10 siblings, 1 reply; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Wire GHES up to the helper routines in ghes_cper.c and remove the local
copies from ghes.c. This keeps the control flow identical while letting
the helpers be shared with other firmware-first providers.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
drivers/acpi/apei/ghes.c | 416 +--------------------------------------
drivers/acpi/apei/ghes_cper.c | 438 +++++++++++++++++++++++++++++++++++++++++-
include/acpi/ghes_cper.h | 20 ++
3 files changed, 459 insertions(+), 415 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 85be2ebf4d3e..f85b97c4db4c 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -67,8 +67,6 @@
#define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses
#endif
-static ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
-
/*
* This driver isn't really modular, however for the time being,
* continuing to use module_param is the easiest way to remain
@@ -113,276 +111,6 @@ static DEFINE_MUTEX(ghes_devs_mutex);
*/
static DEFINE_SPINLOCK(ghes_notify_lock_irq);
-struct gen_pool *ghes_estatus_pool;
-
-int ghes_estatus_pool_init(unsigned int num_ghes)
-{
- unsigned long addr, len;
- int rc;
-
- ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
- if (!ghes_estatus_pool)
- return -ENOMEM;
-
- len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
- len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
-
- addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
- if (!addr)
- goto err_pool_alloc;
-
- rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
- if (rc)
- goto err_pool_add;
-
- return 0;
-
-err_pool_add:
- vfree((void *)addr);
-
-err_pool_alloc:
- gen_pool_destroy(ghes_estatus_pool);
-
- return -ENOMEM;
-}
-
-/**
- * ghes_estatus_pool_region_free - free previously allocated memory
- * from the ghes_estatus_pool.
- * @addr: address of memory to free.
- * @size: size of memory to free.
- *
- * Returns none.
- */
-void ghes_estatus_pool_region_free(unsigned long addr, u32 size)
-{
- gen_pool_free(ghes_estatus_pool, addr, size);
-}
-EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
-
-static inline int ghes_severity(int severity)
-{
- switch (severity) {
- case CPER_SEV_INFORMATIONAL:
- return GHES_SEV_NO;
- case CPER_SEV_CORRECTED:
- return GHES_SEV_CORRECTED;
- case CPER_SEV_RECOVERABLE:
- return GHES_SEV_RECOVERABLE;
- case CPER_SEV_FATAL:
- return GHES_SEV_PANIC;
- default:
- /* Unknown, go panic */
- return GHES_SEV_PANIC;
- }
-}
-
-
-/**
- * struct ghes_task_work - for synchronous RAS event
- *
- * @twork: callback_head for task work
- * @pfn: page frame number of corrupted page
- * @flags: work control flags
- *
- * Structure to pass task work to be handled before
- * returning to user-space via task_work_add().
- */
-struct ghes_task_work {
- struct callback_head twork;
- u64 pfn;
- int flags;
-};
-
-static void memory_failure_cb(struct callback_head *twork)
-{
- struct ghes_task_work *twcb = container_of(twork, struct ghes_task_work, twork);
- int ret;
-
- ret = memory_failure(twcb->pfn, twcb->flags);
- gen_pool_free(ghes_estatus_pool, (unsigned long)twcb, sizeof(*twcb));
-
- if (!ret || ret == -EHWPOISON || ret == -EOPNOTSUPP)
- return;
-
- pr_err("%#llx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
- twcb->pfn, current->comm, task_pid_nr(current));
- force_sig(SIGBUS);
-}
-
-static bool ghes_do_memory_failure(u64 physical_addr, int flags)
-{
- struct ghes_task_work *twcb;
- unsigned long pfn;
-
- if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
- return false;
-
- pfn = PHYS_PFN(physical_addr);
-
- if (flags == MF_ACTION_REQUIRED && current->mm) {
- twcb = (void *)gen_pool_alloc(ghes_estatus_pool, sizeof(*twcb));
- if (!twcb)
- return false;
-
- twcb->pfn = pfn;
- twcb->flags = flags;
- init_task_work(&twcb->twork, memory_failure_cb);
- task_work_add(current, &twcb->twork, TWA_RESUME);
- return true;
- }
-
- memory_failure_queue(pfn, flags);
- return true;
-}
-
-static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
- int sev, bool sync)
-{
- int flags = -1;
- int sec_sev = ghes_severity(gdata->error_severity);
- struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
-
- if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
- return false;
-
- /* iff following two events can be handled properly by now */
- if (sec_sev == GHES_SEV_CORRECTED &&
- (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
- flags = MF_SOFT_OFFLINE;
- if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
- flags = sync ? MF_ACTION_REQUIRED : 0;
-
- if (flags != -1)
- return ghes_do_memory_failure(mem_err->physical_addr, flags);
-
- return false;
-}
-
-static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
- int sev, bool sync)
-{
- struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
- int flags = sync ? MF_ACTION_REQUIRED : 0;
- int length = gdata->error_data_length;
- char error_type[120];
- bool queued = false;
- int sec_sev, i;
- char *p;
-
- sec_sev = ghes_severity(gdata->error_severity);
- if (length >= sizeof(*err)) {
- log_arm_hw_error(err, sec_sev);
- } else {
- pr_warn(FW_BUG "arm error length: %d\n", length);
- pr_warn(FW_BUG "length is too small\n");
- pr_warn(FW_BUG "firmware-generated error record is incorrect\n");
- return false;
- }
-
- if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
- return false;
-
- p = (char *)(err + 1);
- length -= sizeof(err);
-
- for (i = 0; i < err->err_info_num; i++) {
- struct cper_arm_err_info *err_info;
- bool is_cache, has_pa;
-
- /* Ensure we have enough data for the error info header */
- if (length < sizeof(*err_info))
- break;
-
- err_info = (struct cper_arm_err_info *)p;
-
- /* Validate the claimed length before using it */
- length -= err_info->length;
- if (length < 0)
- break;
-
- is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
- has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
-
- /*
- * The field (err_info->error_info & BIT(26)) is fixed to set to
- * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
- * firmware won't mix corrected errors in an uncorrected section,
- * and don't filter out 'corrected' error here.
- */
- if (is_cache && has_pa) {
- queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
- p += err_info->length;
- continue;
- }
-
- cper_bits_to_str(error_type, sizeof(error_type),
- FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type),
- cper_proc_error_type_strs,
- ARRAY_SIZE(cper_proc_error_type_strs));
-
- pr_warn_ratelimited(FW_WARN GHES_PFX
- "Unhandled processor error type 0x%02x: %s%s\n",
- err_info->type, error_type,
- (err_info->type & ~CPER_ARM_ERR_TYPE_MASK) ? " with reserved bit(s)" : "");
- p += err_info->length;
- }
-
- return queued;
-}
-
-/*
- * PCIe AER errors need to be sent to the AER driver for reporting and
- * recovery. The GHES severities map to the following AER severities and
- * require the following handling:
- *
- * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
- * These need to be reported by the AER driver but no recovery is
- * necessary.
- * GHES_SEV_RECOVERABLE -> AER_NONFATAL
- * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
- * These both need to be reported and recovered from by the AER driver.
- * GHES_SEV_PANIC does not make it to this handling since the kernel must
- * panic.
- */
-static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
-{
-#ifdef CONFIG_ACPI_APEI_PCIEAER
- struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
-
- if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
- pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
- unsigned int devfn;
- int aer_severity;
- u8 *aer_info;
-
- devfn = PCI_DEVFN(pcie_err->device_id.device,
- pcie_err->device_id.function);
- aer_severity = cper_severity_to_aer(gdata->error_severity);
-
- /*
- * If firmware reset the component to contain
- * the error, we must reinitialize it before
- * use, so treat it as a fatal AER error.
- */
- if (gdata->flags & CPER_SEC_RESET)
- aer_severity = AER_FATAL;
-
- aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
- sizeof(struct aer_capability_regs));
- if (!aer_info)
- return;
- memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs));
-
- aer_recover_queue(pcie_err->device_id.segment,
- pcie_err->device_id.bus,
- devfn, aer_severity,
- (struct aer_capability_regs *)
- aer_info);
- }
-#endif
-}
-
static void ghes_vendor_record_notifier_destroy(void *nb)
{
ghes_unregister_vendor_record_notifier(nb);
@@ -401,151 +129,11 @@ int devm_ghes_register_vendor_record_notifier(struct device *dev,
}
EXPORT_SYMBOL_GPL(devm_ghes_register_vendor_record_notifier);
-static void ghes_log_hwerr(int sev, guid_t *sec_type)
-{
- if (sev != CPER_SEV_RECOVERABLE)
- return;
-
- if (guid_equal(sec_type, &CPER_SEC_PROC_ARM) ||
- guid_equal(sec_type, &CPER_SEC_PROC_GENERIC) ||
- guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
- hwerr_log_error_type(HWERR_RECOV_CPU);
- return;
- }
-
- if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR) ||
- guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID) ||
- guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID) ||
- guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
- hwerr_log_error_type(HWERR_RECOV_CXL);
- return;
- }
-
- if (guid_equal(sec_type, &CPER_SEC_PCIE) ||
- guid_equal(sec_type, &CPER_SEC_PCI_X_BUS)) {
- hwerr_log_error_type(HWERR_RECOV_PCI);
- return;
- }
-
- if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
- hwerr_log_error_type(HWERR_RECOV_MEMORY);
- return;
- }
-
- hwerr_log_error_type(HWERR_RECOV_OTHERS);
-}
-
static void ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
- int sev, sec_sev;
- struct acpi_hest_generic_data *gdata;
- guid_t *sec_type;
- const guid_t *fru_id = &guid_null;
- char *fru_text = "";
- bool queued = false;
- bool sync = is_hest_sync_notify(ghes);
-
- sev = ghes_severity(estatus->error_severity);
- apei_estatus_for_each_section(estatus, gdata) {
- sec_type = (guid_t *)gdata->section_type;
- sec_sev = ghes_severity(gdata->error_severity);
- if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
- fru_id = (guid_t *)gdata->fru_id;
-
- if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
- fru_text = gdata->fru_text;
-
- ghes_log_hwerr(sev, sec_type);
- if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
- struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
-
- atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
-
- arch_apei_report_mem_error(sev, mem_err);
- queued = ghes_handle_memory_failure(gdata, sev, sync);
- } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
- ghes_handle_aer(gdata);
- } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
- queued = ghes_handle_arm_hw_error(gdata, sev, sync);
- } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
- struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
-
- cxl_cper_post_prot_err(prot_err, gdata->error_severity);
- } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
- struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
-
- cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
- } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
- struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
-
- cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
- } else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
- struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
-
- cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
- } else {
- void *err = acpi_hest_get_payload(gdata);
-
- ghes_defer_non_standard_event(gdata, sev);
- log_non_standard_event(sec_type, fru_id, fru_text,
- sec_sev, err,
- gdata->error_data_length);
- }
- }
-
- /*
- * If no memory failure work is queued for abnormal synchronous
- * errors, do a force kill.
- */
- if (sync && !queued) {
- dev_err(ghes->dev,
- HW_ERR GHES_PFX "%s:%d: synchronous unrecoverable error (SIGBUS)\n",
- current->comm, task_pid_nr(current));
- force_sig(SIGBUS);
- }
-}
-
-static void __ghes_print_estatus(const char *pfx,
- const struct acpi_hest_generic *generic,
- const struct acpi_hest_generic_status *estatus)
-{
- static atomic_t seqno;
- unsigned int curr_seqno;
- char pfx_seq[64];
-
- if (pfx == NULL) {
- if (ghes_severity(estatus->error_severity) <=
- GHES_SEV_CORRECTED)
- pfx = KERN_WARNING;
- else
- pfx = KERN_ERR;
- }
- curr_seqno = atomic_inc_return(&seqno);
- snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
- printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
- pfx_seq, generic->header.source_id);
- cper_estatus_print(pfx_seq, estatus);
-}
-
-static int ghes_print_estatus(const char *pfx,
- const struct acpi_hest_generic *generic,
- const struct acpi_hest_generic_status *estatus)
-{
- /* Not more than 2 messages every 5 seconds */
- static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
- static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
- struct ratelimit_state *ratelimit;
-
- if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
- ratelimit = &ratelimit_corrected;
- else
- ratelimit = &ratelimit_uncorrected;
- if (__ratelimit(ratelimit)) {
- __ghes_print_estatus(pfx, generic, estatus);
- return 1;
- }
- return 0;
+ ghes_cper_handle_status(ghes->dev, ghes->generic,
+ estatus, is_hest_sync_notify(ghes));
}
static void __ghes_panic(struct ghes *ghes,
diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
index d7a666a163c3..0ff9d06eb78f 100644
--- a/drivers/acpi/apei/ghes_cper.c
+++ b/drivers/acpi/apei/ghes_cper.c
@@ -13,22 +13,32 @@
*/
#include <linux/aer.h>
+#include <linux/bitfield.h>
+#include <linux/device.h>
#include <linux/err.h>
#include <linux/genalloc.h>
-#include <linux/irq_work.h>
#include <linux/io.h>
+#include <linux/irq_work.h>
#include <linux/kfifo.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/math64.h>
#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/uuid.h>
+#include <linux/sched/signal.h>
+#include <linux/task_work.h>
#include <linux/notifier.h>
#include <linux/llist.h>
+#include <linux/ras.h>
+#include <ras/ras_event.h>
#include <linux/ratelimit.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
#include <linux/sched/clock.h>
#include <linux/slab.h>
+#include <linux/vmcore_info.h>
+#include <linux/vmalloc.h>
#include <acpi/apei.h>
#include <acpi/ghes_cper.h>
@@ -38,9 +48,363 @@
#include "apei-internal.h"
+ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
+
+#ifndef CONFIG_ACPI_APEI
+void __weak arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) { }
+#endif
+
static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
static atomic_t ghes_estatus_cache_alloced;
+struct gen_pool *ghes_estatus_pool;
+
+int ghes_estatus_pool_init(unsigned int num_ghes)
+{
+ unsigned long addr, len;
+ int rc;
+
+ ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
+ if (!ghes_estatus_pool)
+ return -ENOMEM;
+
+ len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
+ len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
+
+ addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
+ if (!addr)
+ goto err_pool_alloc;
+
+ rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
+ if (rc)
+ goto err_pool_add;
+
+ return 0;
+
+err_pool_add:
+ vfree((void *)addr);
+
+err_pool_alloc:
+ gen_pool_destroy(ghes_estatus_pool);
+
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(ghes_estatus_pool_init);
+
+/**
+ * ghes_estatus_pool_region_free - free previously allocated memory
+ * from the ghes_estatus_pool.
+ * @addr: address of memory to free.
+ * @size: size of memory to free.
+ *
+ * Returns none.
+ */
+void ghes_estatus_pool_region_free(unsigned long addr, u32 size)
+{
+ gen_pool_free(ghes_estatus_pool, addr, size);
+}
+EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
+
+int ghes_severity(int severity)
+{
+ switch (severity) {
+ case CPER_SEV_INFORMATIONAL:
+ return GHES_SEV_NO;
+ case CPER_SEV_CORRECTED:
+ return GHES_SEV_CORRECTED;
+ case CPER_SEV_RECOVERABLE:
+ return GHES_SEV_RECOVERABLE;
+ case CPER_SEV_FATAL:
+ return GHES_SEV_PANIC;
+ default:
+ /* Unknown, go panic */
+ return GHES_SEV_PANIC;
+ }
+}
+
+/**
+ * struct ghes_task_work - for synchronous RAS event
+ *
+ * @twork: callback_head for task work
+ * @pfn: page frame number of corrupted page
+ * @flags: work control flags
+ *
+ * Structure to pass task work to be handled before
+ * returning to user-space via task_work_add().
+ */
+struct ghes_task_work {
+ struct callback_head twork;
+ u64 pfn;
+ int flags;
+};
+
+static void memory_failure_cb(struct callback_head *twork)
+{
+ struct ghes_task_work *twcb = container_of(twork, struct ghes_task_work, twork);
+ int ret;
+
+ ret = memory_failure(twcb->pfn, twcb->flags);
+ gen_pool_free(ghes_estatus_pool, (unsigned long)twcb, sizeof(*twcb));
+
+ if (!ret || ret == -EHWPOISON || ret == -EOPNOTSUPP)
+ return;
+
+ pr_err("%#llx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
+ twcb->pfn, current->comm, task_pid_nr(current));
+ force_sig(SIGBUS);
+}
+
+static bool ghes_do_memory_failure(u64 physical_addr, int flags)
+{
+ struct ghes_task_work *twcb;
+ unsigned long pfn;
+
+ if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
+ return false;
+
+ pfn = PHYS_PFN(physical_addr);
+
+ if (flags == MF_ACTION_REQUIRED && current->mm) {
+ twcb = (void *)gen_pool_alloc(ghes_estatus_pool, sizeof(*twcb));
+ if (!twcb)
+ return false;
+
+ twcb->pfn = pfn;
+ twcb->flags = flags;
+ init_task_work(&twcb->twork, memory_failure_cb);
+ task_work_add(current, &twcb->twork, TWA_RESUME);
+ return true;
+ }
+
+ memory_failure_queue(pfn, flags);
+ return true;
+}
+
+bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
+ int sev, bool sync)
+{
+ int flags = -1;
+ int sec_sev = ghes_severity(gdata->error_severity);
+ struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
+ if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+ return false;
+
+ /* iff following two events can be handled properly by now */
+ if (sec_sev == GHES_SEV_CORRECTED &&
+ (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
+ flags = MF_SOFT_OFFLINE;
+ if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
+ flags = sync ? MF_ACTION_REQUIRED : 0;
+
+ if (flags != -1)
+ return ghes_do_memory_failure(mem_err->physical_addr, flags);
+
+ return false;
+}
+
+bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
+ int sev, bool sync)
+{
+ struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+ int flags = sync ? MF_ACTION_REQUIRED : 0;
+ int length = gdata->error_data_length;
+ char error_type[120];
+ bool queued = false;
+ int sec_sev, i;
+ char *p;
+
+ sec_sev = ghes_severity(gdata->error_severity);
+ if (length >= sizeof(*err)) {
+ log_arm_hw_error(err, sec_sev);
+ } else {
+ pr_warn(FW_BUG "arm error length: %d\n", length);
+ pr_warn(FW_BUG "length is too small\n");
+ pr_warn(FW_BUG "firmware-generated error record is incorrect\n");
+ return false;
+ }
+
+ if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
+ return false;
+
+ p = (char *)(err + 1);
+ length -= sizeof(err);
+
+ for (i = 0; i < err->err_info_num; i++) {
+ struct cper_arm_err_info *err_info;
+ bool is_cache, has_pa;
+
+ /* Ensure we have enough data for the error info header */
+ if (length < sizeof(*err_info))
+ break;
+
+ err_info = (struct cper_arm_err_info *)p;
+
+ /* Validate the claimed length before using it */
+ length -= err_info->length;
+ if (length < 0)
+ break;
+
+ is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
+ has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
+
+ /*
+ * The field (err_info->error_info & BIT(26)) is fixed to set to
+ * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
+ * firmware won't mix corrected errors in an uncorrected section,
+ * and don't filter out 'corrected' error here.
+ */
+ if (is_cache && has_pa) {
+ queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
+ p += err_info->length;
+ continue;
+ }
+
+ cper_bits_to_str(error_type, sizeof(error_type),
+ FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type),
+ cper_proc_error_type_strs,
+ ARRAY_SIZE(cper_proc_error_type_strs));
+
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Unhandled processor error type 0x%02x: %s%s\n",
+ err_info->type, error_type,
+ err_info->type & ~CPER_ARM_ERR_TYPE_MASK ?
+ " with reserved bit(s)" : "");
+ p += err_info->length;
+ }
+
+ return queued;
+}
+
+/*
+ * PCIe AER errors need to be sent to the AER driver for reporting and
+ * recovery. The GHES severities map to the following AER severities and
+ * require the following handling:
+ *
+ * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
+ * These need to be reported by the AER driver but no recovery is
+ * necessary.
+ * GHES_SEV_RECOVERABLE -> AER_NONFATAL
+ * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
+ * These both need to be reported and recovered from by the AER driver.
+ * GHES_SEV_PANIC does not make it to this handling since the kernel must
+ * panic.
+ */
+void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
+{
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
+
+ if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
+ pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+ unsigned int devfn;
+ int aer_severity;
+ u8 *aer_info;
+
+ devfn = PCI_DEVFN(pcie_err->device_id.device,
+ pcie_err->device_id.function);
+ aer_severity = cper_severity_to_aer(gdata->error_severity);
+
+ /*
+ * If firmware reset the component to contain
+ * the error, we must reinitialize it before
+ * use, so treat it as a fatal AER error.
+ */
+ if (gdata->flags & CPER_SEC_RESET)
+ aer_severity = AER_FATAL;
+
+ aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
+ sizeof(struct aer_capability_regs));
+ if (!aer_info)
+ return;
+ memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs));
+
+ aer_recover_queue(pcie_err->device_id.segment,
+ pcie_err->device_id.bus,
+ devfn, aer_severity,
+ (struct aer_capability_regs *)
+ aer_info);
+ }
+#endif
+}
+
+void ghes_log_hwerr(int sev, guid_t *sec_type)
+{
+ if (sev != CPER_SEV_RECOVERABLE)
+ return;
+
+ if (guid_equal(sec_type, &CPER_SEC_PROC_ARM) ||
+ guid_equal(sec_type, &CPER_SEC_PROC_GENERIC) ||
+ guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
+ hwerr_log_error_type(HWERR_RECOV_CPU);
+ return;
+ }
+
+ if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR) ||
+ guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID) ||
+ guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID) ||
+ guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
+ hwerr_log_error_type(HWERR_RECOV_CXL);
+ return;
+ }
+
+ if (guid_equal(sec_type, &CPER_SEC_PCIE) ||
+ guid_equal(sec_type, &CPER_SEC_PCI_X_BUS)) {
+ hwerr_log_error_type(HWERR_RECOV_PCI);
+ return;
+ }
+
+ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+ hwerr_log_error_type(HWERR_RECOV_MEMORY);
+ return;
+ }
+
+ hwerr_log_error_type(HWERR_RECOV_OTHERS);
+}
+
+void __ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus)
+{
+ static atomic_t seqno;
+ unsigned int curr_seqno;
+ char pfx_seq[64];
+
+ if (!pfx) {
+ if (ghes_severity(estatus->error_severity) <=
+ GHES_SEV_CORRECTED)
+ pfx = KERN_WARNING;
+ else
+ pfx = KERN_ERR;
+ }
+ curr_seqno = atomic_inc_return(&seqno);
+ snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
+ printk("%sHardware error from APEI Generic Hardware Error Source: %d\n",
+ pfx_seq, generic->header.source_id);
+ cper_estatus_print(pfx_seq, estatus);
+}
+
+int ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus)
+{
+ /* Not more than 2 messages every 5 seconds */
+ static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5 * HZ, 2);
+ static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5 * HZ, 2);
+ struct ratelimit_state *ratelimit;
+
+ if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
+ ratelimit = &ratelimit_corrected;
+ else
+ ratelimit = &ratelimit_uncorrected;
+ if (__ratelimit(ratelimit)) {
+ __ghes_print_estatus(pfx, generic, estatus);
+ return 1;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_ACPI_APEI
static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
{
phys_addr_t paddr;
@@ -272,6 +636,7 @@ void ghes_clear_estatus(struct ghes *ghes,
if (is_hest_type_generic_v2(ghes))
ghes_ack_error(ghes->generic_v2);
}
+#endif /* CONFIG_ACPI_APEI */
static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
@@ -323,6 +688,77 @@ void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}
+void ghes_cper_handle_status(struct device *dev,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus,
+ bool sync)
+{
+ int sev, sec_sev;
+ struct acpi_hest_generic_data *gdata;
+ guid_t *sec_type;
+ const guid_t *fru_id = &guid_null;
+ char *fru_text = "";
+ bool queued = false;
+
+ sev = ghes_severity(estatus->error_severity);
+ apei_estatus_for_each_section(estatus, gdata) {
+ sec_type = (guid_t *)gdata->section_type;
+ sec_sev = ghes_severity(gdata->error_severity);
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+ fru_id = (guid_t *)gdata->fru_id;
+
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+ fru_text = gdata->fru_text;
+
+ ghes_log_hwerr(sev, sec_type);
+ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+ struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
+ atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
+
+ arch_apei_report_mem_error(sev, mem_err);
+ queued = ghes_handle_memory_failure(gdata, sev, sync);
+ } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
+ ghes_handle_aer(gdata);
+ } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
+ queued = ghes_handle_arm_hw_error(gdata, sev, sync);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
+ struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_prot_err(prot_err, gdata->error_severity);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
+ struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
+ struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
+ struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
+ } else {
+ void *err = acpi_hest_get_payload(gdata);
+
+ ghes_defer_non_standard_event(gdata, sev);
+ log_non_standard_event(sec_type, fru_id, fru_text,
+ sec_sev, err,
+ gdata->error_data_length);
+ }
+ }
+
+ /*
+ * If no memory failure work is queued for abnormal synchronous
+ * errors, do a force kill.
+ */
+ if (sync && !queued) {
+ dev_err(dev,
+ HW_ERR GHES_PFX "%s:%d: synchronous unrecoverable error (SIGBUS)\n",
+ current->comm, task_pid_nr(current));
+ force_sig(SIGBUS);
+ }
+}
/* Room for 8 entries */
#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
index dd49e9179b63..511b95b50911 100644
--- a/include/acpi/ghes_cper.h
+++ b/include/acpi/ghes_cper.h
@@ -17,6 +17,8 @@
#define ACPI_APEI_GHES_CPER_H
#include <linux/atomic.h>
+#include <linux/device.h>
+#include <linux/notifier.h>
#include <linux/workqueue.h>
#include <acpi/ghes.h>
@@ -57,6 +59,7 @@
((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
extern struct gen_pool *ghes_estatus_pool;
+extern struct atomic_notifier_head ghes_report_chain;
static inline bool is_hest_type_generic_v2(struct ghes *ghes)
{
@@ -107,6 +110,23 @@ void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
struct acpi_hest_generic_status *estatus);
void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
int sev);
+int ghes_severity(int severity);
+bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
+ int sev, bool sync);
+bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
+ int sev, bool sync);
+void ghes_handle_aer(struct acpi_hest_generic_data *gdata);
+void ghes_log_hwerr(int sev, guid_t *sec_type);
+void __ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus);
+int ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus);
+void ghes_cper_handle_status(struct device *dev,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus,
+ bool sync);
void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
int severity);
int cxl_cper_register_prot_err_work(struct work_struct *work);
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 08/10] ACPI: APEI: share GHES CPER helpers
2026-05-29 9:50 ` [PATCH v5 08/10] ACPI: APEI: share GHES CPER helpers Ahmed Tiba
@ 2026-05-29 16:32 ` Jonathan Cameron
0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 16:32 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:48 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Wire GHES up to the helper routines in ghes_cper.c and remove the local
> copies from ghes.c. This keeps the control flow identical while letting
> the helpers be shared with other firmware-first providers.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
Mostly looks fine. The one bit that rather makes this exercise of breaking
out generic code look dodgy is the ifdefs in the generic file.
I'm haven't looked closely but that to me implies a coupling that should not be
here.
Jonathan
> ---
> drivers/acpi/apei/ghes.c | 416 +--------------------------------------
> drivers/acpi/apei/ghes_cper.c | 438 +++++++++++++++++++++++++++++++++++++++++-
> include/acpi/ghes_cper.h | 20 ++
> 3 files changed, 459 insertions(+), 415 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 85be2ebf4d3e..f85b97c4db4c 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
>
> static void __ghes_panic(struct ghes *ghes,
> diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
> index d7a666a163c3..0ff9d06eb78f 100644
> --- a/drivers/acpi/apei/ghes_cper.c
> +++ b/drivers/acpi/apei/ghes_cper.c
> @@ -13,22 +13,32 @@
>
> #include "apei-internal.h"
>
> +ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
> +
> +#ifndef CONFIG_ACPI_APEI
> +void __weak arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) { }
> +#endif
This is non obvious enough that the reasoning for a new weak function should be mentioned in
the patch description. Why not stub it in include/acpi/apei.h?
> +
> static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
> static atomic_t ghes_estatus_cache_alloced;
> +void __ghes_print_estatus(const char *pfx,
> + const struct acpi_hest_generic *generic,
> + const struct acpi_hest_generic_status *estatus)
> +{
> + static atomic_t seqno;
> + unsigned int curr_seqno;
> + char pfx_seq[64];
> +
> + if (!pfx) {
> + if (ghes_severity(estatus->error_severity) <=
> + GHES_SEV_CORRECTED)
> + pfx = KERN_WARNING;
> + else
> + pfx = KERN_ERR;
> + }
> + curr_seqno = atomic_inc_return(&seqno);
> + snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
> + printk("%sHardware error from APEI Generic Hardware Error Source: %d\n",
> + pfx_seq, generic->header.source_id);
> + cper_estatus_print(pfx_seq, estatus);
> +}
> +
> +int ghes_print_estatus(const char *pfx,
> + const struct acpi_hest_generic *generic,
> + const struct acpi_hest_generic_status *estatus)
> +{
> + /* Not more than 2 messages every 5 seconds */
> + static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5 * HZ, 2);
> + static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5 * HZ, 2);
> + struct ratelimit_state *ratelimit;
> +
> + if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
> + ratelimit = &ratelimit_corrected;
> + else
> + ratelimit = &ratelimit_uncorrected;
> + if (__ratelimit(ratelimit)) {
> + __ghes_print_estatus(pfx, generic, estatus);
> + return 1;
> + }
> + return 0;
> +}
> +
> +#ifdef CONFIG_ACPI_APEI
So after the effort to break the the generic stuff we end up with non generic
bits in the broken out file? Is there no way to avoid this?
> static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
> {
> phys_addr_t paddr;
> @@ -272,6 +636,7 @@ void ghes_clear_estatus(struct ghes *ghes,
> if (is_hest_type_generic_v2(ghes))
> ghes_ack_error(ghes->generic_v2);
> }
> +#endif /* CONFIG_ACPI_APEI */
> +void ghes_cper_handle_status(struct device *dev,
> + const struct acpi_hest_generic *generic,
> + const struct acpi_hest_generic_status *estatus,
> + bool sync)
> +{
> + int sev, sec_sev;
> + struct acpi_hest_generic_data *gdata;
> + guid_t *sec_type;
> + const guid_t *fru_id = &guid_null;
> + char *fru_text = "";
> + bool queued = false;
> +
> + sev = ghes_severity(estatus->error_severity);
> + apei_estatus_for_each_section(estatus, gdata) {
> + sec_type = (guid_t *)gdata->section_type;
> + sec_sev = ghes_severity(gdata->error_severity);
> + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
> + fru_id = (guid_t *)gdata->fru_id;
> +
> + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
> + fru_text = gdata->fru_text;
> +
> + ghes_log_hwerr(sev, sec_type);
> + if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
> + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
> +
> + atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
> +
> + arch_apei_report_mem_error(sev, mem_err);
> + queued = ghes_handle_memory_failure(gdata, sev, sync);
> + } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
> + ghes_handle_aer(gdata);
> + } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
> + queued = ghes_handle_arm_hw_error(gdata, sev, sync);
> + } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
> + struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
> +
> + cxl_cper_post_prot_err(prot_err, gdata->error_severity);
> + } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
> + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
> +
> + cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
> + } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
> + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
> +
> + cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
> + } else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
> + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
> +
> + cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
> + } else {
> + void *err = acpi_hest_get_payload(gdata);
> +
> + ghes_defer_non_standard_event(gdata, sev);
> + log_non_standard_event(sec_type, fru_id, fru_text,
> + sec_sev, err,
> + gdata->error_data_length);
> + }
> + }
> +
> + /*
> + * If no memory failure work is queued for abnormal synchronous
> + * errors, do a force kill.
> + */
> + if (sync && !queued) {
> + dev_err(dev,
> + HW_ERR GHES_PFX "%s:%d: synchronous unrecoverable error (SIGBUS)\n",
> + current->comm, task_pid_nr(current));
> + force_sig(SIGBUS);
> + }
> +}
Blank line here
> /* Room for 8 entries */
> #define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
> static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
> diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
> index dd49e9179b63..511b95b50911 100644
> --- a/include/acpi/ghes_cper.h
> +++ b/include/acpi/ghes_cper.h
> @@ -17,6 +17,8 @@
> #define ACPI_APEI_GHES_CPER_H
>
> #include <linux/atomic.h>
> +#include <linux/device.h>
> +#include <linux/notifier.h>
> #include <linux/workqueue.h>
>
> #include <acpi/ghes.h>
> @@ -57,6 +59,7 @@
> ((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
>
> extern struct gen_pool *ghes_estatus_pool;
> +extern struct atomic_notifier_head ghes_report_chain;
>
> static inline bool is_hest_type_generic_v2(struct ghes *ghes)
> {
> @@ -107,6 +110,23 @@ void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
> struct acpi_hest_generic_status *estatus);
> void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
> int sev);
> +int ghes_severity(int severity);
> +bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
> + int sev, bool sync);
> +bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
> + int sev, bool sync);
> +void ghes_handle_aer(struct acpi_hest_generic_data *gdata);
> +void ghes_log_hwerr(int sev, guid_t *sec_type);
> +void __ghes_print_estatus(const char *pfx,
> + const struct acpi_hest_generic *generic,
> + const struct acpi_hest_generic_status *estatus);
> +int ghes_print_estatus(const char *pfx,
> + const struct acpi_hest_generic *generic,
> + const struct acpi_hest_generic_status *estatus);
> +void ghes_cper_handle_status(struct device *dev,
> + const struct acpi_hest_generic *generic,
> + const struct acpi_hest_generic_status *estatus,
> + bool sync);
> void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
> int severity);
> int cxl_cper_register_prot_err_work(struct work_struct *work);
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 09/10] dt-bindings: firmware: add arm,ras-cper
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
` (7 preceding siblings ...)
2026-05-29 9:50 ` [PATCH v5 08/10] ACPI: APEI: share GHES CPER helpers Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 16:44 ` Jonathan Cameron
2026-05-29 9:50 ` [PATCH v5 10/10] RAS: add firmware-first CPER provider Ahmed Tiba
2026-05-29 16:36 ` [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Jonathan Cameron
10 siblings, 1 reply; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Describe the DeviceTree node that exposes the Arm firmware-first
CPER provider and hook the file into MAINTAINERS so the
binding has an owner.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
.../devicetree/bindings/firmware/arm,ras-cper.yaml | 54 ++++++++++++++++++++++
MAINTAINERS | 5 ++
2 files changed, 59 insertions(+)
diff --git a/Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml b/Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
new file mode 100644
index 000000000000..3d4de096093f
--- /dev/null
+++ b/Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/firmware/arm,ras-cper.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm RAS CPER provider
+
+maintainers:
+ - Ahmed Tiba <ahmed.tiba@arm•com>
+
+description:
+ Arm Reliability, Availability and Serviceability (RAS) firmware can expose
+ a firmware-first CPER error source directly via DeviceTree. Firmware
+ provides the CPER Generic Error Status block and notifies the OS through
+ an interrupt.
+
+properties:
+ compatible:
+ const: arm,ras-cper
+
+ memory-region:
+ minItems: 1
+ items:
+ - description:
+ CPER Generic Error Status block exposed by firmware.
+ - description:
+ Optional firmware-owned ack buffer used on platforms
+ where firmware needs an explicit "ack" handshake before overwriting
+ the CPER buffer. Firmware watches bit 0 and expects the OS to set it
+ once the current status block has been consumed.
+
+ interrupts:
+ maxItems: 1
+ description:
+ Interrupt used to signal that a new status record is ready.
+
+required:
+ - compatible
+ - memory-region
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ error-handler {
+ compatible = "arm,ras-cper";
+ memory-region = <&ras_cper_buffer>, <&ras_cper_ack>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+ };
+...
diff --git a/MAINTAINERS b/MAINTAINERS
index 461a3eed6129..8a9714603a7d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22261,6 +22261,11 @@ M: Alexandre Bounine <alex.bou9@gmail•com>
S: Maintained
F: drivers/rapidio/
+RAS ERROR STATUS
+M: Ahmed Tiba <ahmed.tiba@arm•com>
+S: Maintained
+F: Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
+
RAS INFRASTRUCTURE
M: Tony Luck <tony.luck@intel•com>
M: Borislav Petkov <bp@alien8•de>
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 09/10] dt-bindings: firmware: add arm,ras-cper
2026-05-29 9:50 ` [PATCH v5 09/10] dt-bindings: firmware: add arm,ras-cper Ahmed Tiba
@ 2026-05-29 16:44 ` Jonathan Cameron
0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 16:44 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:49 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Describe the DeviceTree node that exposes the Arm firmware-first
> CPER provider and hook the file into MAINTAINERS so the
> binding has an owner.
Odd wrap. Pick a length (72 / 75 typical for commit messages) and
stick to it.
Request for references inline + a question on whether we have
to allow for the ack register not existing. I'd rather we required
it if possible.
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
> ---
> .../devicetree/bindings/firmware/arm,ras-cper.yaml | 54 ++++++++++++++++++++++
> MAINTAINERS | 5 ++
> 2 files changed, 59 insertions(+)
>
> diff --git a/Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml b/Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
> new file mode 100644
> index 000000000000..3d4de096093f
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
> @@ -0,0 +1,54 @@
> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/firmware/arm,ras-cper.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: Arm RAS CPER provider
> +
> +maintainers:
> + - Ahmed Tiba <ahmed.tiba@arm•com>
> +
> +description:
> + Arm Reliability, Availability and Serviceability (RAS) firmware can expose
> + a firmware-first CPER error source directly via DeviceTree. Firmware
> + provides the CPER Generic Error Status block and notifies the OS through
> + an interrupt.
I'd like some spec references in here if possible.
> +
> +properties:
> + compatible:
> + const: arm,ras-cper
> +
> + memory-region:
> + minItems: 1
> + items:
> + - description:
> + CPER Generic Error Status block exposed by firmware.
> + - description:
> + Optional firmware-owned ack buffer used on platforms
> + where firmware needs an explicit "ack" handshake before overwriting
> + the CPER buffer. Firmware watches bit 0 and expects the OS to set it
> + once the current status block has been consumed.
Does the arm spec really make this optional? Can we constraint it to not be
just to make our lives easier? I've never been sure how you would actually
make a working platform without the ack support.
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 461a3eed6129..8a9714603a7d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -22261,6 +22261,11 @@ M: Alexandre Bounine <alex.bou9@gmail•com>
> S: Maintained
> F: drivers/rapidio/
>
> +RAS ERROR STATUS
> +M: Ahmed Tiba <ahmed.tiba@arm•com>
> +S: Maintained
> +F: Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
> +
> RAS INFRASTRUCTURE
> M: Tony Luck <tony.luck@intel•com>
> M: Borislav Petkov <bp@alien8•de>
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 10/10] RAS: add firmware-first CPER provider
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
` (8 preceding siblings ...)
2026-05-29 9:50 ` [PATCH v5 09/10] dt-bindings: firmware: add arm,ras-cper Ahmed Tiba
@ 2026-05-29 9:50 ` Ahmed Tiba
2026-05-29 17:06 ` Jonathan Cameron
2026-05-29 16:36 ` [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Jonathan Cameron
10 siblings, 1 reply; 23+ messages in thread
From: Ahmed Tiba @ 2026-05-29 9:50 UTC (permalink / raw)
To: will, jic23, xueshuai, saket.dumbre, mchehab, dave, djbw, bp,
tony.luck, guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet,
ira.weiny, dave.jiang, krzk+dt, robh, catalin.marinas,
alison.schofield, conor+dt
Cc: Ahmed Tiba, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
Add a firmware-first CPER provider that reuses the shared
GHES helpers, wire it into the RAS Kconfig/Makefile and
document it in the admin guide.
Update MAINTAINERS now that the driver exists.
Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
---
Documentation/admin-guide/RAS/main.rst | 18 +++
MAINTAINERS | 1 +
drivers/acpi/apei/apei-internal.h | 10 +-
drivers/acpi/apei/ghes_cper.c | 2 +
drivers/ras/Kconfig | 11 ++
drivers/ras/Makefile | 1 +
drivers/ras/cper-esource.c | 257 +++++++++++++++++++++++++++++++++
include/acpi/ghes_cper.h | 10 ++
8 files changed, 301 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/RAS/main.rst b/Documentation/admin-guide/RAS/main.rst
index 5a45db32c49b..84219d25a072 100644
--- a/Documentation/admin-guide/RAS/main.rst
+++ b/Documentation/admin-guide/RAS/main.rst
@@ -205,6 +205,24 @@ Architecture (MCA)\ [#f3]_.
.. [#f3] For more details about the Machine Check Architecture (MCA),
please read Documentation/arch/x86/x86_64/machinecheck.rst at the Kernel tree.
+Firmware-first CPER providers
+-----------------------------
+
+Some systems expose Common Platform Error Record (CPER) data
+through platform firmware instead of ACPI HEST tables.
+Enable ``CONFIG_RAS_CPER_ESOURCE`` to build the ``drivers/ras/cper-esource.c``
+driver. The current in-tree firmware description uses the
+``Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml`` binding.
+The driver reuses the GHES CPER helper object in
+``drivers/acpi/apei/ghes_cper.c`` so the logging, notifier chains, and
+memory failure handling match the ACPI GHES behaviour even when
+ACPI is disabled.
+
+Once a platform describes a firmware-first provider, both ACPI GHES and the
+firmware-described driver reuse the same code paths. This keeps the
+behaviour consistent regardless of whether the error source is described
+by ACPI tables or another firmware description.
+
EDAC - Error Detection And Correction
*************************************
diff --git a/MAINTAINERS b/MAINTAINERS
index 8a9714603a7d..c14638cd97f6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22265,6 +22265,7 @@ RAS ERROR STATUS
M: Ahmed Tiba <ahmed.tiba@arm•com>
S: Maintained
F: Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
+F: drivers/ras/cper-esource.c
RAS INFRASTRUCTURE
M: Tony Luck <tony.luck@intel•com>
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 77c10a7a7a9f..c16ac541f15b 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -8,6 +8,7 @@
#define APEI_INTERNAL_H
#include <linux/acpi.h>
+#include <acpi/ghes_cper.h>
struct apei_exec_context;
@@ -120,15 +121,6 @@ int apei_exec_collect_resources(struct apei_exec_context *ctx,
struct dentry;
struct dentry *apei_get_debugfs_dir(void);
-static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
-{
- if (estatus->raw_data_length)
- return estatus->raw_data_offset + \
- estatus->raw_data_length;
- else
- return sizeof(*estatus) + estatus->data_length;
-}
-
int apei_osc_setup(void);
int einj_get_available_error_type(u32 *type, int einj_action);
diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
index 0ff9d06eb78f..a7691aa5011c 100644
--- a/drivers/acpi/apei/ghes_cper.c
+++ b/drivers/acpi/apei/ghes_cper.c
@@ -46,7 +46,9 @@
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
+#ifdef CONFIG_ACPI_APEI
#include "apei-internal.h"
+#endif
ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig
index fc4f4bb94a4c..3c1c63b2fefc 100644
--- a/drivers/ras/Kconfig
+++ b/drivers/ras/Kconfig
@@ -34,6 +34,17 @@ if RAS
source "arch/x86/ras/Kconfig"
source "drivers/ras/amd/atl/Kconfig"
+config RAS_CPER_ESOURCE
+ bool "Firmware-first CPER error source block provider"
+ select GHES_CPER_HELPERS
+ help
+ Enable support for firmware-first Common Platform Error Record
+ (CPER) error source block providers. The current in-tree user is
+ described by the arm,ras-cper DeviceTree binding. The driver
+ reuses the existing GHES CPER helpers so the error processing
+ matches the ACPI code paths, but it can be built even when ACPI is
+ disabled.
+
config RAS_FMPM
tristate "FRU Memory Poison Manager"
default m
diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
index 11f95d59d397..0de069557f31 100644
--- a/drivers/ras/Makefile
+++ b/drivers/ras/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_RAS) += ras.o
obj-$(CONFIG_DEBUG_FS) += debugfs.o
obj-$(CONFIG_RAS_CEC) += cec.o
+obj-$(CONFIG_RAS_CPER_ESOURCE) += cper-esource.o
obj-$(CONFIG_RAS_FMPM) += amd/fmpm.o
obj-y += amd/atl/
diff --git a/drivers/ras/cper-esource.c b/drivers/ras/cper-esource.c
new file mode 100644
index 000000000000..83f7a910e50a
--- /dev/null
+++ b/drivers/ras/cper-esource.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Firmware-first CPER error source provider.
+ *
+ * This driver shares the GHES CPER helpers so we keep the reporting and
+ * notifier behaviour identical to ACPI GHES.
+ *
+ * Copyright (C) 2026 ARM Ltd.
+ * Author: Ahmed Tiba <ahmed.tiba@arm•com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/cleanup.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/panic.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <acpi/ghes.h>
+#include <acpi/ghes_cper.h>
+
+static DEFINE_IDA(cper_esource_source_ids);
+
+struct cper_esource_ack {
+ void __iomem *addr;
+ u64 preserve;
+ u64 set;
+ u8 width;
+ bool present;
+};
+
+struct cper_esource {
+ struct device *dev;
+ void __iomem *status;
+ size_t status_len;
+
+ struct cper_esource_ack ack;
+
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+
+ bool sync;
+ int irq;
+
+ /* Serializes access while firmware and the OS share the status buffer. */
+ spinlock_t lock;
+};
+
+static void cper_esource_release_source_id(void *data)
+{
+ struct acpi_hest_generic *generic = data;
+
+ ida_free(&cper_esource_source_ids, generic->header.source_id);
+}
+
+static int cper_esource_init_pool(void)
+{
+ if (ghes_estatus_pool)
+ return 0;
+
+ return ghes_estatus_pool_init(1);
+}
+
+static int cper_esource_copy_status(struct cper_esource *ctx)
+{
+ memcpy_fromio(ctx->estatus, ctx->status, ctx->status_len);
+ return 0;
+}
+
+static void cper_esource_ack(struct cper_esource *ctx)
+{
+ u64 val;
+
+ if (!ctx->ack.present)
+ return;
+
+ if (ctx->ack.width == 64) {
+ val = readq(ctx->ack.addr);
+ val &= ctx->ack.preserve;
+ val |= ctx->ack.set;
+ writeq(val, ctx->ack.addr);
+ } else {
+ val = readl(ctx->ack.addr);
+ val &= (u32)ctx->ack.preserve;
+ val |= (u32)ctx->ack.set;
+ writel(val, ctx->ack.addr);
+ }
+}
+
+static void cper_esource_fatal(struct cper_esource *ctx)
+{
+ __ghes_print_estatus(KERN_EMERG, ctx->generic, ctx->estatus);
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
+ panic("GHES: fatal firmware-first CPER record from %s\n",
+ dev_name(ctx->dev));
+}
+
+static void cper_esource_process(struct cper_esource *ctx)
+{
+ int sev;
+
+ guard(spinlock_irqsave)(&ctx->lock);
+
+ if (cper_esource_copy_status(ctx))
+ return;
+
+ sev = ghes_severity(ctx->estatus->error_severity);
+ if (sev >= GHES_SEV_PANIC)
+ cper_esource_fatal(ctx);
+
+ if (!ghes_estatus_cached(ctx->estatus) &&
+ ghes_print_estatus(NULL, ctx->generic, ctx->estatus))
+ ghes_estatus_cache_add(ctx->generic, ctx->estatus);
+
+ ghes_cper_handle_status(ctx->dev, ctx->generic, ctx->estatus, ctx->sync);
+ cper_esource_ack(ctx);
+}
+
+static irqreturn_t cper_esource_irq(int irq, void *data)
+{
+ struct cper_esource *ctx = data;
+
+ cper_esource_process(ctx);
+
+ return IRQ_HANDLED;
+}
+
+static int cper_esource_init_ack(struct platform_device *pdev,
+ struct cper_esource *ctx)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ size_t size;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return 0;
+
+ ctx->ack.addr = devm_platform_get_and_ioremap_resource(pdev, 1, &res);
+ if (IS_ERR(ctx->ack.addr))
+ return PTR_ERR(ctx->ack.addr);
+
+ size = resource_size(res);
+ switch (size) {
+ case 4:
+ ctx->ack.width = 32;
+ ctx->ack.preserve = ~0U;
+ break;
+ case 8:
+ ctx->ack.width = 64;
+ ctx->ack.preserve = ~0ULL;
+ break;
+ default:
+ return dev_err_probe(dev, -EINVAL,
+ "unsupported ack resource size %zu\n", size);
+ }
+
+ ctx->ack.set = BIT_ULL(0);
+ ctx->ack.present = true;
+ return 0;
+}
+
+static int cper_esource_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct cper_esource *ctx;
+ struct resource *res;
+ int source_id;
+ int rc;
+
+ ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ spin_lock_init(&ctx->lock);
+ ctx->dev = dev;
+ ctx->sync = device_property_read_bool(dev, "arm,sea-notify");
+
+ ctx->status = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(ctx->status))
+ return dev_err_probe(dev, PTR_ERR(ctx->status),
+ "failed to map status region\n");
+
+ ctx->status_len = resource_size(res);
+ if (!ctx->status_len)
+ return dev_err_probe(dev, -EINVAL, "status region has zero length\n");
+
+ rc = cper_esource_init_ack(pdev, ctx);
+ if (rc)
+ return rc;
+
+ rc = cper_esource_init_pool();
+ if (rc)
+ return rc;
+
+ ctx->estatus = devm_kzalloc(dev, ctx->status_len, GFP_KERNEL);
+ if (!ctx->estatus)
+ return -ENOMEM;
+
+ ctx->generic = devm_kzalloc(dev, sizeof(*ctx->generic), GFP_KERNEL);
+ if (!ctx->generic)
+ return -ENOMEM;
+
+ source_id = ida_alloc_min(&cper_esource_source_ids, 1, GFP_KERNEL);
+ if (source_id < 0)
+ return source_id;
+
+ ctx->generic->header.type = ACPI_HEST_TYPE_GENERIC_ERROR;
+ ctx->generic->header.source_id = source_id;
+
+ rc = devm_add_action_or_reset(dev, cper_esource_release_source_id,
+ ctx->generic);
+ if (rc)
+ return rc;
+
+ ctx->generic->notify.type = ctx->sync ?
+ ACPI_HEST_NOTIFY_SEA : ACPI_HEST_NOTIFY_EXTERNAL;
+ ctx->generic->error_block_length = ctx->status_len;
+
+ ctx->irq = platform_get_irq(pdev, 0);
+ if (ctx->irq < 0)
+ return ctx->irq;
+
+ rc = devm_request_threaded_irq(dev, ctx->irq, NULL, cper_esource_irq,
+ IRQF_ONESHOT,
+ dev_name(dev), ctx);
+ if (rc)
+ return dev_err_probe(dev, rc, "failed to request interrupt\n");
+
+ return 0;
+}
+
+static const struct of_device_id cper_esource_of_match[] = {
+ { .compatible = "arm,ras-cper" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, cper_esource_of_match);
+
+static struct platform_driver cper_esource_driver = {
+ .driver = {
+ .name = "cper-esource",
+ .of_match_table = cper_esource_of_match,
+ },
+ .probe = cper_esource_probe,
+};
+
+module_platform_driver(cper_esource_driver);
+
+MODULE_AUTHOR("Ahmed Tiba <ahmed.tiba@arm•com>");
+MODULE_DESCRIPTION("Firmware-first CPER provider");
+MODULE_LICENSE("GPL");
diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
index 511b95b50911..a78d4a773129 100644
--- a/include/acpi/ghes_cper.h
+++ b/include/acpi/ghes_cper.h
@@ -80,6 +80,14 @@ static inline bool is_hest_sync_notify(struct ghes *ghes)
return notify_type == ACPI_HEST_NOTIFY_SEA;
}
+static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
+{
+ if (estatus->raw_data_length)
+ return estatus->raw_data_offset + estatus->raw_data_length;
+ else
+ return sizeof(*estatus) + estatus->data_length;
+}
+
struct ghes_vendor_record_entry {
struct work_struct work;
int error_severity;
@@ -108,6 +116,8 @@ int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
int ghes_estatus_cached(struct acpi_hest_generic_status *estatus);
void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
struct acpi_hest_generic_status *estatus);
+int ghes_register_vendor_record_notifier(struct notifier_block *nb);
+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
int sev);
int ghes_severity(int severity);
--
2.43.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v5 10/10] RAS: add firmware-first CPER provider
2026-05-29 9:50 ` [PATCH v5 10/10] RAS: add firmware-first CPER provider Ahmed Tiba
@ 2026-05-29 17:06 ` Jonathan Cameron
0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 17:06 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:50 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> Add a firmware-first CPER provider that reuses the shared
> GHES helpers, wire it into the RAS Kconfig/Makefile and
> document it in the admin guide.
>
> Update MAINTAINERS now that the driver exists.
Sashiko had some interesting comments on this one. Please take a look and
reply to the thread if any look plausible but aren't!
https://sashiko.dev/#/patchset/20260529-topics-ahmtib01-ras_ffh_arm_internal_review-v5-0-2e0500d42642@arm.com
The irq thread / force_sig() one is something I'd not have
considered. I obviously haven't tested it and so it might be wrong :)
A few things inline.
Jonathan
>
> Signed-off-by: Ahmed Tiba <ahmed.tiba@arm•com>
> ---
> Documentation/admin-guide/RAS/main.rst | 18 +++
> MAINTAINERS | 1 +
> drivers/acpi/apei/apei-internal.h | 10 +-
> drivers/acpi/apei/ghes_cper.c | 2 +
> drivers/ras/Kconfig | 11 ++
> drivers/ras/Makefile | 1 +
> drivers/ras/cper-esource.c | 257 +++++++++++++++++++++++++++++++++
> include/acpi/ghes_cper.h | 10 ++
> 8 files changed, 301 insertions(+), 9 deletions(-)
>
> diff --git a/Documentation/admin-guide/RAS/main.rst b/Documentation/admin-guide/RAS/main.rst
> index 5a45db32c49b..84219d25a072 100644
> --- a/Documentation/admin-guide/RAS/main.rst
> +++ b/Documentation/admin-guide/RAS/main.rst
> @@ -205,6 +205,24 @@ Architecture (MCA)\ [#f3]_.
> .. [#f3] For more details about the Machine Check Architecture (MCA),
> please read Documentation/arch/x86/x86_64/machinecheck.rst at the Kernel tree.
>
> +Firmware-first CPER providers
> +-----------------------------
> +
> +Some systems expose Common Platform Error Record (CPER) data
> +through platform firmware instead of ACPI HEST tables.
Given the data doesn't come through HEST but rather in places it points to and
that is via a different type of platform firmware it seems to me this
bit needs a rewrite.
> +Enable ``CONFIG_RAS_CPER_ESOURCE`` to build the ``drivers/ras/cper-esource.c``
> +driver. The current in-tree firmware description uses the
> +``Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml`` binding.
> +The driver reuses the GHES CPER helper object in
> +``drivers/acpi/apei/ghes_cper.c`` so the logging, notifier chains, and
> +memory failure handling match the ACPI GHES behaviour even when
> +ACPI is disabled.
Rewrap. Line lengths appear very random.
> +
> +Once a platform describes a firmware-first provider, both ACPI GHES and the
> +firmware-described driver reuse the same code paths. This keeps the
> +behaviour consistent regardless of whether the error source is described
> +by ACPI tables or another firmware description.
> +
> EDAC - Error Detection And Correction
> *************************************
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 8a9714603a7d..c14638cd97f6 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -22265,6 +22265,7 @@ RAS ERROR STATUS
> M: Ahmed Tiba <ahmed.tiba@arm•com>
> S: Maintained
> F: Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
> +F: drivers/ras/cper-esource.c
>
> RAS INFRASTRUCTURE
> M: Tony Luck <tony.luck@intel•com>
> diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
> index 77c10a7a7a9f..c16ac541f15b 100644
> --- a/drivers/acpi/apei/apei-internal.h
> +++ b/drivers/acpi/apei/apei-internal.h
> @@ -8,6 +8,7 @@
> #define APEI_INTERNAL_H
>
> #include <linux/acpi.h>
> +#include <acpi/ghes_cper.h>
>
> struct apei_exec_context;
>
> @@ -120,15 +121,6 @@ int apei_exec_collect_resources(struct apei_exec_context *ctx,
> struct dentry;
> struct dentry *apei_get_debugfs_dir(void);
>
> -static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
As below. I'd not expect to see this moving in this patch given all the precursor
code movement patches.
> -{
> - if (estatus->raw_data_length)
> - return estatus->raw_data_offset + \
> - estatus->raw_data_length;
> - else
> - return sizeof(*estatus) + estatus->data_length;
> -}
> -
> int apei_osc_setup(void);
>
> int einj_get_available_error_type(u32 *type, int einj_action);
> diff --git a/drivers/acpi/apei/ghes_cper.c b/drivers/acpi/apei/ghes_cper.c
> index 0ff9d06eb78f..a7691aa5011c 100644
> --- a/drivers/acpi/apei/ghes_cper.c
> +++ b/drivers/acpi/apei/ghes_cper.c
> @@ -46,7 +46,9 @@
> #include <asm/fixmap.h>
> #include <asm/tlbflush.h>
>
> +#ifdef CONFIG_ACPI_APEI
> #include "apei-internal.h"
Why is this dance needed? Add a comment.
We'd normally expect a header to safe to include if it's not used with
any stubbing done in the header.
> +#endif
>
> ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
>
> diff --git a/drivers/ras/cper-esource.c b/drivers/ras/cper-esource.c
> new file mode 100644
> index 000000000000..83f7a910e50a
> --- /dev/null
> +++ b/drivers/ras/cper-esource.c
> +struct cper_esource {
> + struct device *dev;
> + void __iomem *status;
> + size_t status_len;
> +
> + struct cper_esource_ack ack;
> +
> + struct acpi_hest_generic *generic;
See below - maybe this can just be embedded here.
> + struct acpi_hest_generic_status *estatus;
> +
> + bool sync;
> + int irq;
> +
> + /* Serializes access while firmware and the OS share the status buffer. */
> + spinlock_t lock;
> +};
> +
> +static int cper_esource_copy_status(struct cper_esource *ctx)
> +{
> + memcpy_fromio(ctx->estatus, ctx->status, ctx->status_len);
> + return 0;
Seems an unnecessary helper. In particular if it returns 0 always
no need to do so or to check the return value.
> +}
>
> +static int cper_esource_probe(struct platform_device *pdev)
> +{
> + struct device *dev = &pdev->dev;
> + struct cper_esource *ctx;
> + struct resource *res;
> + int source_id;
> + int rc;
> +
> + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
> + if (!ctx)
> + return -ENOMEM;
> +
> + spin_lock_init(&ctx->lock);
> + ctx->dev = dev;
> + ctx->sync = device_property_read_bool(dev, "arm,sea-notify");
> +
> + ctx->status = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
> + if (IS_ERR(ctx->status))
> + return dev_err_probe(dev, PTR_ERR(ctx->status),
> + "failed to map status region\n");
> +
> + ctx->status_len = resource_size(res);
> + if (!ctx->status_len)
> + return dev_err_probe(dev, -EINVAL, "status region has zero length\n");
> +
> + rc = cper_esource_init_ack(pdev, ctx);
> + if (rc)
> + return rc;
> +
> + rc = cper_esource_init_pool();
> + if (rc)
> + return rc;
> +
> + ctx->estatus = devm_kzalloc(dev, ctx->status_len, GFP_KERNEL);
> + if (!ctx->estatus)
> + return -ENOMEM;
> +
> + ctx->generic = devm_kzalloc(dev, sizeof(*ctx->generic), GFP_KERNEL);
Lifetime of this is shorter than that of containing structure. Why not just
embed it and avoid need to allocate here?
> + if (!ctx->generic)
> + return -ENOMEM;
> +
> + source_id = ida_alloc_min(&cper_esource_source_ids, 1, GFP_KERNEL);
Maybe a comment on what is special about id 0
> + if (source_id < 0)
> + return source_id;
> +
> + ctx->generic->header.type = ACPI_HEST_TYPE_GENERIC_ERROR;
> + ctx->generic->header.source_id = source_id;
> +
> + rc = devm_add_action_or_reset(dev, cper_esource_release_source_id,
> + ctx->generic);
> + if (rc)
> + return rc;
> +
> + ctx->generic->notify.type = ctx->sync ?
> + ACPI_HEST_NOTIFY_SEA : ACPI_HEST_NOTIFY_EXTERNAL;
> + ctx->generic->error_block_length = ctx->status_len;
> +
> + ctx->irq = platform_get_irq(pdev, 0);
> + if (ctx->irq < 0)
> + return ctx->irq;
> +
> + rc = devm_request_threaded_irq(dev, ctx->irq, NULL, cper_esource_irq,
> + IRQF_ONESHOT,
> + dev_name(dev), ctx);
> + if (rc)
> + return dev_err_probe(dev, rc, "failed to request interrupt\n");
> +
> + return 0;
> +}
> +
> +static const struct of_device_id cper_esource_of_match[] = {
> + { .compatible = "arm,ras-cper" },
> + { /* sentinel */ }
> +};
> +MODULE_DEVICE_TABLE(of, cper_esource_of_match);
> +
> +static struct platform_driver cper_esource_driver = {
> + .driver = {
> + .name = "cper-esource",
> + .of_match_table = cper_esource_of_match,
> + },
> + .probe = cper_esource_probe,
> +};
Common practice to not have a blank line here. Keeps the
structure and the macro visually closely coupled.
> +
> +module_platform_driver(cper_esource_driver);
> +
> +MODULE_AUTHOR("Ahmed Tiba <ahmed.tiba@arm•com>");
> +MODULE_DESCRIPTION("Firmware-first CPER provider");
> +MODULE_LICENSE("GPL");
> diff --git a/include/acpi/ghes_cper.h b/include/acpi/ghes_cper.h
> index 511b95b50911..a78d4a773129 100644
> --- a/include/acpi/ghes_cper.h
> +++ b/include/acpi/ghes_cper.h
> @@ -80,6 +80,14 @@ static inline bool is_hest_sync_notify(struct ghes *ghes)
> return notify_type == ACPI_HEST_NOTIFY_SEA;
> }
>
> +static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
I'm surprised to still see code movement in this patch. To me this should have
happened earlier in series.
> +{
> + if (estatus->raw_data_length)
> + return estatus->raw_data_offset + estatus->raw_data_length;
> + else
> + return sizeof(*estatus) + estatus->data_length;
> +}
> +
> struct ghes_vendor_record_entry {
> struct work_struct work;
> int error_severity;
> @@ -108,6 +116,8 @@ int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
> int ghes_estatus_cached(struct acpi_hest_generic_status *estatus);
> void ghes_estatus_cache_add(struct acpi_hest_generic *generic,
> struct acpi_hest_generic_status *estatus);
> +int ghes_register_vendor_record_notifier(struct notifier_block *nb);
> +void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
Why are we seeing these here now? Stray from earlier patch?
Which c file are they in at this point?
> void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
> int sev);
> int ghes_severity(int severity);
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider
2026-05-29 9:50 [PATCH v5 00/10] ACPI: APEI: share GHES CPER helpers and add DT FFH provider Ahmed Tiba
` (9 preceding siblings ...)
2026-05-29 9:50 ` [PATCH v5 10/10] RAS: add firmware-first CPER provider Ahmed Tiba
@ 2026-05-29 16:36 ` Jonathan Cameron
10 siblings, 0 replies; 23+ messages in thread
From: Jonathan Cameron @ 2026-05-29 16:36 UTC (permalink / raw)
To: Ahmed Tiba
Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
linux-edac, acpica-devel
On Fri, 29 May 2026 10:50:40 +0100
Ahmed Tiba <ahmed.tiba@arm•com> wrote:
> This is v5 of the GHES refactor series. Compared to v4, it only updates
> the DT binding to address the latest review comments.
https://sashiko.dev/#/patchset/20260529-topics-ahmtib01-ras_ffh_arm_internal_review-v5-0-2e0500d42642@arm.com
It is mostly not happy with existing code, but there is one point where it notes
a header that should be there (linux/cleanup.h)
Given you are working with this code at the moment, maybe take a look at some
of the reported issues whilst you are here!
Jonathan
^ permalink raw reply [flat|nested] 23+ messages in thread