* [PATCH v2 1/5] powerpc/rtas: Handle special return format for RTAS_FN_IBM_OPEN_ERRINJCT
2026-05-27 7:24 [PATCH v2 0/5] powerpc/pseries: Add full RTAS-based error injection support Narayana Murty N
@ 2026-05-27 7:24 ` Narayana Murty N
2026-06-07 11:19 ` Sourabh Jain
2026-05-27 7:24 ` [PATCH v2 2/5] powerpc/pseries: Add RTAS error injection buffer infrastructure Narayana Murty N
` (3 subsequent siblings)
4 siblings, 1 reply; 10+ messages in thread
From: Narayana Murty N @ 2026-05-27 7:24 UTC (permalink / raw)
To: mahesh, maddy, mpe, christophe.leroy, gregkh, oohall, npiggin
Cc: linuxppc-dev, linux-kernel, tyreld, vaibhav, sbhat, ganeshgr,
sourabhjain, haren, nnmlinux, thuth
RTAS_FN_IBM_OPEN_ERRINJCT returns results in special format:
rets[0] = session token (output)
rets[1] = status code
rets[2..] = additional outputs (if any)
Unlike standard RTAS calls where:
rets[0] = status code
rets[1..] = outputs
This patch adds special handling for OPEN_ERRINJCT to:
1. Check correct status position (rets[1]) for __fetch_rtas_last_error()
2. Copy all rets[0..nret-1] to outputs[] (including token at rets[0])
3. Return status from rets[1] instead of rets[0]
Reference: OpenPOWER PAPR documentation
https://files.openpower.foundation/s/XFgfMaqLMD5Bcm8
Signed-off-by: Narayana Murty N <nnmlinux@linux•ibm.com>
---
arch/powerpc/kernel/rtas.c | 47 ++++++++++++++++++++++++++++++++------
1 file changed, 40 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8d81c1e7a8db..a2dd94eed9d0 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1183,7 +1183,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
unsigned long flags;
struct rtas_args *args;
char *buff_copy = NULL;
- int ret;
+ int ret = 0;
if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
return -1;
@@ -1213,15 +1213,48 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
va_rtas_call_unlocked(args, token, nargs, nret, list);
va_end(list);
+ /*
+ * Special handling for RTAS_FN_IBM_OPEN_ERRINJCT:
+ * Per PAPR, ibm,open-errinjct has a unique return format:
+ * rets[0] = injection session token (output parameter)
+ * rets[1] = status code
+ *
+ * This differs from standard RTAS calls which return:
+ * rets[0] = status code
+ * rets[1..] = output parameters
+ *
+ * We must extract status from rets[1] (not rets[0]) to correctly
+ * detect errors and trigger __fetch_rtas_last_error() when status == -1.
+ */
/* A -1 return code indicates that the last command couldn't
- be completed due to a hardware error. */
- if (be32_to_cpu(args->rets[0]) == -1)
+ * be completed due to a hardware error.
+ */
+ if (token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) && nret > 1)
+ ret = be32_to_cpu(args->rets[1]);
+ else if (nret > 0)
+ ret = be32_to_cpu(args->rets[0]);
+
+ if (ret == -1)
buff_copy = __fetch_rtas_last_error(NULL);
- if (nret > 1 && outputs != NULL)
- for (i = 0; i < nret-1; ++i)
- outputs[i] = be32_to_cpu(args->rets[i + 1]);
- ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
+ /* Copy all return values to caller's outputs buffer if provided */
+ if (nret > 1 && outputs != NULL) {
+ if (token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT)) {
+ /* Special case: rets[0]=token, rets[1]=status, rets[2..]=outputs */
+ for (i = 0; i < nret; ++i)
+ outputs[i] = be32_to_cpu(args->rets[i]);
+ } else {
+ /* Normal case: rets[0]=status, rets[1..]=outputs */
+ for (i = 0; i < nret - 1; ++i)
+ outputs[i] = be32_to_cpu(args->rets[i + 1]);
+ }
+ } else {
+ /* Either no outputs to copy (nret <= 1) or caller
+ * didn't provide output buffer ensure ret contains
+ * the status code for standard RTAS calls.
+ */
+ ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
+ }
lockdep_unpin_lock(&rtas_lock, cookie);
raw_spin_unlock_irqrestore(&rtas_lock, flags);
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v2 1/5] powerpc/rtas: Handle special return format for RTAS_FN_IBM_OPEN_ERRINJCT
2026-05-27 7:24 ` [PATCH v2 1/5] powerpc/rtas: Handle special return format for RTAS_FN_IBM_OPEN_ERRINJCT Narayana Murty N
@ 2026-06-07 11:19 ` Sourabh Jain
0 siblings, 0 replies; 10+ messages in thread
From: Sourabh Jain @ 2026-06-07 11:19 UTC (permalink / raw)
To: Narayana Murty N, mahesh, maddy, mpe, christophe.leroy, gregkh,
oohall, npiggin
Cc: linuxppc-dev, linux-kernel, tyreld, vaibhav, sbhat, ganeshgr,
haren, thuth
On 27/05/26 12:54, Narayana Murty N wrote:
> RTAS_FN_IBM_OPEN_ERRINJCT returns results in special format:
> rets[0] = session token (output)
> rets[1] = status code
> rets[2..] = additional outputs (if any)
>
> Unlike standard RTAS calls where:
> rets[0] = status code
> rets[1..] = outputs
>
> This patch adds special handling for OPEN_ERRINJCT to:
> 1. Check correct status position (rets[1]) for __fetch_rtas_last_error()
You can consider fixing the same for the RTAS syscall.
> 2. Copy all rets[0..nret-1] to outputs[] (including token at rets[0])
> 3. Return status from rets[1] instead of rets[0]
>
> Reference: OpenPOWER PAPR documentation
> https://files.openpower.foundation/s/XFgfMaqLMD5Bcm8
> Signed-off-by: Narayana Murty N <nnmlinux@linux•ibm.com>
> ---
> arch/powerpc/kernel/rtas.c | 47 ++++++++++++++++++++++++++++++++------
> 1 file changed, 40 insertions(+), 7 deletions(-)
>
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index 8d81c1e7a8db..a2dd94eed9d0 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -1183,7 +1183,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
> unsigned long flags;
> struct rtas_args *args;
> char *buff_copy = NULL;
> - int ret;
> + int ret = 0;
>
> if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
> return -1;
> @@ -1213,15 +1213,48 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
> va_rtas_call_unlocked(args, token, nargs, nret, list);
> va_end(list);
>
> + /*
> + * Special handling for RTAS_FN_IBM_OPEN_ERRINJCT:
> + * Per PAPR, ibm,open-errinjct has a unique return format:
> + * rets[0] = injection session token (output parameter)
> + * rets[1] = status code
> + *
> + * This differs from standard RTAS calls which return:
> + * rets[0] = status code
> + * rets[1..] = output parameters
> + *
> + * We must extract status from rets[1] (not rets[0]) to correctly
> + * detect errors and trigger __fetch_rtas_last_error() when status == -1.
> + */
> /* A -1 return code indicates that the last command couldn't
> - be completed due to a hardware error. */
The above comment should be moved to next if block, if (ret == -1).
> - if (be32_to_cpu(args->rets[0]) == -1)
> + * be completed due to a hardware error.
> + */
> + if (token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) && nret > 1)
> + ret = be32_to_cpu(args->rets[1]);
> + else if (nret > 0)
> + ret = be32_to_cpu(args->rets[0]);
> +
> + if (ret == -1)
> buff_copy = __fetch_rtas_last_error(NULL);
>
> - if (nret > 1 && outputs != NULL)
> - for (i = 0; i < nret-1; ++i)
> - outputs[i] = be32_to_cpu(args->rets[i + 1]);
> - ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
> + /* Copy all return values to caller's outputs buffer if provided */
> + if (nret > 1 && outputs != NULL) {
> + if (token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT)) {
> + /* Special case: rets[0]=token, rets[1]=status, rets[2..]=outputs */
> + for (i = 0; i < nret; ++i)
> + outputs[i] = be32_to_cpu(args->rets[i]);
> + } else {
> + /* Normal case: rets[0]=status, rets[1..]=outputs */
> + for (i = 0; i < nret - 1; ++i)
> + outputs[i] = be32_to_cpu(args->rets[i + 1]);
I am surprised that status is never copied to the output buffer even
though it is part
of the output as per PAPR. But status is copied for ibm,open-errinjct,
which is okay.
> + }
> + } else {
> + /* Either no outputs to copy (nret <= 1) or caller
> + * didn't provide output buffer ensure ret contains
> + * the status code for standard RTAS calls.
> + */
> + ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
What is the need to find ret again? Isn't it already done above?
> + }
>
> lockdep_unpin_lock(&rtas_lock, cookie);
> raw_spin_unlock_irqrestore(&rtas_lock, flags);
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 2/5] powerpc/pseries: Add RTAS error injection buffer infrastructure
2026-05-27 7:24 [PATCH v2 0/5] powerpc/pseries: Add full RTAS-based error injection support Narayana Murty N
2026-05-27 7:24 ` [PATCH v2 1/5] powerpc/rtas: Handle special return format for RTAS_FN_IBM_OPEN_ERRINJCT Narayana Murty N
@ 2026-05-27 7:24 ` Narayana Murty N
2026-05-27 7:24 ` [PATCH v2 3/5] powerpc/pseries: Add RTAS error injection validation helpers Narayana Murty N
` (2 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Narayana Murty N @ 2026-05-27 7:24 UTC (permalink / raw)
To: mahesh, maddy, mpe, christophe.leroy, gregkh, oohall, npiggin
Cc: linuxppc-dev, linux-kernel, tyreld, vaibhav, sbhat, ganeshgr,
sourabhjain, haren, nnmlinux, thuth
Adds global infrastructure required by the injection engine:
- a 1KB aligned RTAS working buffer in rtas.c
- a spinlock to serialize buffer access
- UAPI definitions for error-injection tokens (added to eeh.h)
Signed-off-by: Narayana Murty N <nnmlinux@linux•ibm.com>
---
arch/powerpc/include/asm/rtas.h | 21 +++++++++++++++++++++
arch/powerpc/include/uapi/asm/eeh.h | 18 ++++++++++++++++++
arch/powerpc/kernel/rtas.c | 12 ++++++++++++
3 files changed, 51 insertions(+)
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index d046bbd5017d..82512f822c7a 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -519,6 +519,27 @@ int rtas_get_error_log_max(void);
extern spinlock_t rtas_data_buf_lock;
extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
+/*
+ * RTAS Error Injection Buffer (PAPR-compliant)
+ * ============================================
+ *
+ * 1KB aligned, zero-initialized buffer for ibm,errinjct RTAS work area.
+ * Protected by rtas_errinjct_buf_lock for concurrent access safety.
+ *
+ * PAPR Requirement: ibm,errinjct requires a caller-allocated buffer passed
+ * via physical address. Buffer must accommodate largest error type layouts:
+ * - IOA bus error (64-bit): 8x32-bit words (32 bytes)
+ * - All other types: <=4x32-bit words (16 bytes)
+ *
+ * Usage:
+ * prepare_errinjct_buffer() -> spin_lock() -> rtas_call() -> spin_unlock()
+ *
+ * Alignment: SZ_1K ensures PAPR firmware requirements and cache-line safety.
+ */
+#define RTAS_ERRINJCT_BUF_SIZE 1024
+extern spinlock_t rtas_errinjct_buf_lock;
+extern char rtas_errinjct_buf[RTAS_ERRINJCT_BUF_SIZE];
+
/* RMO buffer reserved for user-space RTAS use */
extern unsigned long rtas_rmo_buf;
diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h
index 3b5c47ff3fc4..86645cab2827 100644
--- a/arch/powerpc/include/uapi/asm/eeh.h
+++ b/arch/powerpc/include/uapi/asm/eeh.h
@@ -41,4 +41,22 @@
#define EEH_ERR_FUNC_DMA_WR_TARGET 19
#define EEH_ERR_FUNC_MAX 19
+/* RTAS PCI Error Injection Token Types */
+#define RTAS_ERR_TYPE_FATAL 0x1
+#define RTAS_ERR_TYPE_RECOVERED_RANDOM_EVENT 0x2
+#define RTAS_ERR_TYPE_RECOVERED_SPECIAL_EVENT 0x3
+#define RTAS_ERR_TYPE_CORRUPTED_PAGE 0x4
+#define RTAS_ERR_TYPE_CORRUPTED_SLB 0x5
+#define RTAS_ERR_TYPE_TRANSLATOR_FAILURE 0x6
+#define RTAS_ERR_TYPE_IOA_BUS_ERROR 0x7
+#define RTAS_ERR_TYPE_PLATFORM_SPECIFIC 0x8
+#define RTAS_ERR_TYPE_CORRUPTED_DCACHE_START 0x9
+#define RTAS_ERR_TYPE_CORRUPTED_DCACHE_END 0xA
+#define RTAS_ERR_TYPE_CORRUPTED_ICACHE_START 0xB
+#define RTAS_ERR_TYPE_CORRUPTED_ICACHE_END 0xC
+#define RTAS_ERR_TYPE_CORRUPTED_TLB_START 0xD
+#define RTAS_ERR_TYPE_CORRUPTED_TLB_END 0xE
+#define RTAS_ERR_TYPE_IOA_BUS_ERROR_64 0xF
+#define RTAS_ERR_TYPE_UPSTREAM_IO_ERROR 0x10
+
#endif /* _ASM_POWERPC_EEH_H */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index a2dd94eed9d0..c110965ea1d9 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -769,6 +769,18 @@ EXPORT_SYMBOL_GPL(rtas_data_buf);
unsigned long rtas_rmo_buf;
+/*
+ * RTAS Error Injection Buffer - Global Definitions
+ * Global 1KB buffer and spinlock for ibm,errinjct RTAS service.
+ * Exported for pseries EEH error injection usage.
+ */
+
+DEFINE_SPINLOCK(rtas_errinjct_buf_lock);
+EXPORT_SYMBOL_GPL(rtas_errinjct_buf_lock);
+
+char rtas_errinjct_buf[1024] __aligned(SZ_1K);
+EXPORT_SYMBOL_GPL(rtas_errinjct_buf);
+
/*
* If non-NULL, this gets called when the kernel terminates.
* This is done like this so rtas_flash can be a module.
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 3/5] powerpc/pseries: Add RTAS error injection validation helpers
2026-05-27 7:24 [PATCH v2 0/5] powerpc/pseries: Add full RTAS-based error injection support Narayana Murty N
2026-05-27 7:24 ` [PATCH v2 1/5] powerpc/rtas: Handle special return format for RTAS_FN_IBM_OPEN_ERRINJCT Narayana Murty N
2026-05-27 7:24 ` [PATCH v2 2/5] powerpc/pseries: Add RTAS error injection buffer infrastructure Narayana Murty N
@ 2026-05-27 7:24 ` Narayana Murty N
2026-06-07 12:17 ` Sourabh Jain
2026-05-27 7:24 ` [PATCH v2 4/5] powerpc/pseries: Implement RTAS error injection via pseries_eeh_err_inject Narayana Murty N
2026-05-27 7:24 ` [PATCH v2 5/5] powerpc/powernv: Map EEH error types to OPAL error injection types Narayana Murty N
4 siblings, 1 reply; 10+ messages in thread
From: Narayana Murty N @ 2026-05-27 7:24 UTC (permalink / raw)
To: mahesh, maddy, mpe, christophe.leroy, gregkh, oohall, npiggin
Cc: linuxppc-dev, linux-kernel, tyreld, vaibhav, sbhat, ganeshgr,
sourabhjain, haren, nnmlinux, thuth
Add comprehensive validation helpers for RTAS error injection parameters:
- validate_addr_mask_in_pe(): BAR range validation
- validate_err_type(): Token range check
- Type-specific validators (special-event, corrupted-page, ioa-bus-error)
Reported-by: kernel test robot <lkp@intel•com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512101130.EYUo0oZx-lkp@intel.com/
Signed-off-by: Narayana Murty N <nnmlinux@linux•ibm.com>
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 261 +++++++++++++++++++
1 file changed, 261 insertions(+)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index b12ef382fec7..d6f2e0d43b89 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -33,6 +33,10 @@
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
+#ifndef pr_fmt
+#define pr_fmt(fmt) "EEH: " fmt
+#endif
+
/* RTAS tokens */
static int ibm_set_eeh_option;
static int ibm_set_slot_reset;
@@ -786,6 +790,263 @@ static int pseries_notify_resume(struct eeh_dev *edev)
}
#endif
+/**
+ * validate_addr_mask_in_pe - Validate that an addr+mask fall within PE's BARs
+ * @pe: EEH PE containing one or more PCI devices
+ * @addr: Address to validate
+ * @mask: Address mask to validate
+ *
+ * Checks that @addr is mapped into a BAR/MMIO region of any device belonging
+ * to the PE. If @mask is non-zero, ensures it is consistent with @addr.
+ *
+ * Return: 0 if valid, RTAS_INVALID_PARAMETER on failure.
+ */
+
+static int validate_addr_mask_in_pe(struct eeh_pe *pe, unsigned long addr,
+ unsigned long mask)
+{
+ struct eeh_dev *edev, *tmp;
+ struct pci_dev *pdev;
+ int bar;
+ resource_size_t bar_start, bar_len;
+ bool valid = false;
+
+ /* nothing to validate */
+ if (addr == 0 && mask == 0)
+ return 0;
+
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ continue;
+
+ for (bar = 0; bar < PCI_NUM_RESOURCES; bar++) {
+ bar_start = pci_resource_start(pdev, bar);
+ bar_len = pci_resource_len(pdev, bar);
+
+ if (!bar_len)
+ continue;
+
+ if (addr >= bar_start && addr < (bar_start + bar_len)) {
+ /* ensure mask makes sense for the addr value */
+ if ((addr & mask) != addr) {
+ pr_err("Mask 0x%lx invalid for addr 0x%lx in BAR[%d] range 0x%llx-0x%llx\n",
+ mask, addr, bar,
+ (unsigned long long)bar_start,
+ (unsigned long long)(bar_start + bar_len));
+ return RTAS_INVALID_PARAMETER;
+ }
+
+ pr_debug("addr=0x%lx with mask=0x%lx validated in BAR[%d] of %s\n",
+ addr, mask, bar, pci_name(pdev));
+ valid = true;
+ }
+ }
+ }
+
+ if (!valid) {
+ pr_err("addr=0x%lx not valid within any BAR of any device in PE\n",
+ addr);
+ return RTAS_INVALID_PARAMETER;
+ }
+
+ return 0;
+}
+
+/**
+ * validate_err_type - Basic sanity check for RTAS error type
+ * @type: RTAS error type
+ *
+ * Ensures that the error type is within the valid RTAS error type range.
+ *
+ * Return: true if valid, false otherwise.
+ */
+
+static bool validate_err_type(int type)
+{
+ if (type < RTAS_ERR_TYPE_FATAL ||
+ type > RTAS_ERR_TYPE_UPSTREAM_IO_ERROR)
+ return false;
+
+ return true;
+}
+
+/**
+ * validate_special_event - Validate parameters for special-event injection
+ * @addr: Address parameter (should be zero)
+ * @mask: Mask parameter (should be zero)
+ *
+ * Special-event error injection should not take addr/mask. Rejects if either
+ * is set.
+ *
+ * Return: 0 if valid, RTAS_INVALID_PARAMETER otherwise.
+ */
+
+static int validate_special_event(unsigned long addr, unsigned long mask)
+{
+ if (addr || mask) {
+ pr_err("Special-event should not specify addr/mask\n");
+ return RTAS_INVALID_PARAMETER;
+ }
+ return 0;
+}
+
+/**
+ * validate_corrupted_page - Validate parameters for corrupted-page injection
+ * @pe: EEH PE (__maybe_unused)
+ * @addr: Physical page address (required)
+ * @mask: Address mask (ignored if non-zero)
+ *
+ * Ensures a valid non-zero page address is provided. Warns if mask is set.
+ *
+ * Return: 0 if valid, RTAS_INVALID_PARAMETER otherwise.
+ */
+
+static int validate_corrupted_page(struct eeh_pe *pe __maybe_unused,
+ unsigned long addr, unsigned long mask)
+{
+ if (!addr) {
+ pr_err("corrupted-page requires non-zero addr\n");
+ return RTAS_INVALID_PARAMETER;
+ }
+ /* Mask not meaningful for corrupted-page */
+ if (mask)
+ pr_warn("corrupted-page ignoring mask=0x%lx\n", mask);
+
+ return 0;
+}
+
+/**
+ * validate_ioa_bus_error - Validate parameters for IOA bus error injection
+ * @pe: EEH PE whose BARs are validated against
+ * @addr: Address parameter (optional)
+ * @mask: Mask parameter (optional)
+ *
+ * For IOA bus error injections, @addr and @mask are optional. If present,
+ * they must map into the PE's MMIO/CFG space.
+ *
+ * Return: 0 if valid or addr/mask absent, RTAS_INVALID_PARAMETER otherwise.
+ */
+
+static int validate_ioa_bus_error(struct eeh_pe *pe,
+ unsigned long addr, unsigned long mask)
+{
+ /* Must map into BAR/MMIO/CFG space of PE */
+ return validate_addr_mask_in_pe(pe, addr, mask);
+}
+
+
+/**
+ * prepare_errinjct_buffer - Prepare RTAS error injection work buffer
+ * @pe: EEH PE for the target device(s)
+ * @type: RTAS error type
+ * @func: Error function selector (semantics vary by type)
+ * @addr: Address argument (type-dependent)
+ * @mask: Mask argument (type-dependent)
+ *
+ * Clears the global error injection work buffer and populates it based on
+ * the error type and parameters provided. Performs inline validation of the
+ * arguments for each supported error type.
+ *
+ * Return: 0 on success, or RTAS_INVALID_PARAMETER / -EINVAL on failure.
+ */
+
+static int prepare_errinjct_buffer(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask)
+{
+ __be64 *buf64;
+ __be32 *buf32;
+
+ memset(rtas_errinjct_buf, 0, RTAS_ERRINJCT_BUF_SIZE);
+ buf64 = (__be64 *)rtas_errinjct_buf;
+ buf32 = (__be32 *)rtas_errinjct_buf;
+
+ switch (type) {
+ case RTAS_ERR_TYPE_RECOVERED_SPECIAL_EVENT:
+ /* func must be 1 = non-persistent or 2 = persistent */
+ if (func < 1 || func > 2)
+ return RTAS_INVALID_PARAMETER;
+
+ if (validate_special_event(addr, mask))
+ return RTAS_INVALID_PARAMETER;
+
+ buf32[0] = cpu_to_be32(func);
+ break;
+
+ case RTAS_ERR_TYPE_CORRUPTED_PAGE:
+ /* addr required: physical page address */
+ if (addr == 0)
+ return RTAS_INVALID_PARAMETER;
+
+ if (validate_corrupted_page(pe, addr, mask))
+ return RTAS_INVALID_PARAMETER;
+
+ buf32[0] = cpu_to_be32(upper_32_bits(addr));
+ buf32[1] = cpu_to_be32(lower_32_bits(addr));
+ break;
+
+ case RTAS_ERR_TYPE_IOA_BUS_ERROR:
+ /* 32-bit IOA bus error: addr/mask optional */
+ if (func < EEH_ERR_FUNC_LD_MEM_ADDR || func > EEH_ERR_FUNC_MAX)
+ return RTAS_INVALID_PARAMETER;
+
+ if (addr || mask) {
+ if (validate_ioa_bus_error(pe, addr, mask))
+ return RTAS_INVALID_PARAMETER;
+ }
+
+ buf32[0] = cpu_to_be32((u32)addr);
+ buf32[1] = cpu_to_be32((u32)mask);
+ buf32[2] = cpu_to_be32(pe->addr);
+ buf32[3] = cpu_to_be32(BUID_HI(pe->phb->buid));
+ buf32[4] = cpu_to_be32(BUID_LO(pe->phb->buid));
+ buf32[5] = cpu_to_be32(func);
+ break;
+
+ case RTAS_ERR_TYPE_IOA_BUS_ERROR_64:
+ /* 64-bit IOA bus error: addr/mask optional */
+ if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
+ return RTAS_INVALID_PARAMETER;
+
+ if (addr || mask) {
+ if (validate_ioa_bus_error(pe, addr, mask))
+ return RTAS_INVALID_PARAMETER;
+ }
+
+ buf64[0] = cpu_to_be64(addr);
+ buf64[1] = cpu_to_be64(mask);
+ buf32[4] = cpu_to_be32(pe->addr);
+ buf32[5] = cpu_to_be32(BUID_HI(pe->phb->buid));
+ buf32[6] = cpu_to_be32(BUID_LO(pe->phb->buid));
+ buf32[7] = cpu_to_be32(func);
+ break;
+
+ case RTAS_ERR_TYPE_CORRUPTED_DCACHE_START:
+ case RTAS_ERR_TYPE_CORRUPTED_DCACHE_END:
+ case RTAS_ERR_TYPE_CORRUPTED_ICACHE_START:
+ case RTAS_ERR_TYPE_CORRUPTED_ICACHE_END:
+ /* addr/mask optional, no strict validation */
+ buf32[0] = cpu_to_be32(addr);
+ buf32[1] = cpu_to_be32(mask);
+ break;
+
+ case RTAS_ERR_TYPE_CORRUPTED_TLB_START:
+ case RTAS_ERR_TYPE_CORRUPTED_TLB_END:
+ /* only addr field relevant */
+ buf32[0] = cpu_to_be32(addr);
+ break;
+
+ default:
+ pr_err("Unsupported error type 0x%x\n", type);
+ return -EINVAL;
+ }
+
+ pr_debug("RTAS: errinjct buffer prepared: type=%d func=%d addr=0x%lx mask=0x%lx\n",
+ type, func, addr, mask);
+
+ return 0;
+}
+
/**
* pseries_eeh_err_inject - Inject specified error to the indicated PE
* @pe: the indicated PE
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v2 3/5] powerpc/pseries: Add RTAS error injection validation helpers
2026-05-27 7:24 ` [PATCH v2 3/5] powerpc/pseries: Add RTAS error injection validation helpers Narayana Murty N
@ 2026-06-07 12:17 ` Sourabh Jain
0 siblings, 0 replies; 10+ messages in thread
From: Sourabh Jain @ 2026-06-07 12:17 UTC (permalink / raw)
To: Narayana Murty N, mahesh, maddy, mpe, christophe.leroy, gregkh,
oohall, npiggin
Cc: linuxppc-dev, linux-kernel, tyreld, vaibhav, sbhat, ganeshgr,
haren, thuth
On 27/05/26 12:54, Narayana Murty N wrote:
> Add comprehensive validation helpers for RTAS error injection parameters:
> - validate_addr_mask_in_pe(): BAR range validation
> - validate_err_type(): Token range check
> - Type-specific validators (special-event, corrupted-page, ioa-bus-error)
>
> Reported-by: kernel test robot <lkp@intel•com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202512101130.EYUo0oZx-lkp@intel.com/
>
> Signed-off-by: Narayana Murty N <nnmlinux@linux•ibm.com>
> ---
> arch/powerpc/platforms/pseries/eeh_pseries.c | 261 +++++++++++++++++++
> 1 file changed, 261 insertions(+)
>
> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
> index b12ef382fec7..d6f2e0d43b89 100644
> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> @@ -33,6 +33,10 @@
> #include <asm/ppc-pci.h>
> #include <asm/rtas.h>
>
> +#ifndef pr_fmt
> +#define pr_fmt(fmt) "EEH: " fmt
Why this is under ifndef?
> +#endif
> +
> /* RTAS tokens */
> static int ibm_set_eeh_option;
> static int ibm_set_slot_reset;
> @@ -786,6 +790,263 @@ static int pseries_notify_resume(struct eeh_dev *edev)
> }
> #endif
>
> +/**
> + * validate_addr_mask_in_pe - Validate that an addr+mask fall within PE's BARs
> + * @pe: EEH PE containing one or more PCI devices
> + * @addr: Address to validate
> + * @mask: Address mask to validate
> + *
> + * Checks that @addr is mapped into a BAR/MMIO region of any device belonging
> + * to the PE. If @mask is non-zero, ensures it is consistent with @addr.
> + *
> + * Return: 0 if valid, RTAS_INVALID_PARAMETER on failure.
> + */
> +
> +static int validate_addr_mask_in_pe(struct eeh_pe *pe, unsigned long addr,
> + unsigned long mask)
> +{
> + struct eeh_dev *edev, *tmp;
> + struct pci_dev *pdev;
> + int bar;
> + resource_size_t bar_start, bar_len;
> + bool valid = false;
> +
> + /* nothing to validate */
> + if (addr == 0 && mask == 0)
> + return 0;
> +
> + eeh_pe_for_each_dev(pe, edev, tmp) {
> + pdev = eeh_dev_to_pci_dev(edev);
> + if (!pdev)
> + continue;
> +
> + for (bar = 0; bar < PCI_NUM_RESOURCES; bar++) {
> + bar_start = pci_resource_start(pdev, bar);
> + bar_len = pci_resource_len(pdev, bar);
> +
> + if (!bar_len)
> + continue;
> +
> + if (addr >= bar_start && addr < (bar_start + bar_len)) {
> + /* ensure mask makes sense for the addr value */
> + if ((addr & mask) != addr) {
> + pr_err("Mask 0x%lx invalid for addr 0x%lx in BAR[%d] range 0x%llx-0x%llx\n",
> + mask, addr, bar,
> + (unsigned long long)bar_start,
> + (unsigned long long)(bar_start + bar_len));
> + return RTAS_INVALID_PARAMETER;
> + }
> +
> + pr_debug("addr=0x%lx with mask=0x%lx validated in BAR[%d] of %s\n",
> + addr, mask, bar, pci_name(pdev));
> + valid = true;
> + }
> + }
> + }
> +
> + if (!valid) {
> + pr_err("addr=0x%lx not valid within any BAR of any device in PE\n",
> + addr);
> + return RTAS_INVALID_PARAMETER;
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * validate_err_type - Basic sanity check for RTAS error type
> + * @type: RTAS error type
> + *
> + * Ensures that the error type is within the valid RTAS error type range.
> + *
> + * Return: true if valid, false otherwise.
> + */
> +
> +static bool validate_err_type(int type)
> +{
> + if (type < RTAS_ERR_TYPE_FATAL ||
> + type > RTAS_ERR_TYPE_UPSTREAM_IO_ERROR)
> + return false;
> +
> + return true;
> +}
How about defining this and the function below as inline?
> +
> +/**
> + * validate_special_event - Validate parameters for special-event injection
> + * @addr: Address parameter (should be zero)
> + * @mask: Mask parameter (should be zero)
> + *
> + * Special-event error injection should not take addr/mask. Rejects if either
> + * is set.
> + *
> + * Return: 0 if valid, RTAS_INVALID_PARAMETER otherwise.
> + */
> +
> +static int validate_special_event(unsigned long addr, unsigned long mask)
> +{
> + if (addr || mask) {
> + pr_err("Special-event should not specify addr/mask\n");
> + return RTAS_INVALID_PARAMETER;
> + }
> + return 0;
> +}
> +
> +/**
> + * validate_corrupted_page - Validate parameters for corrupted-page injection
> + * @pe: EEH PE (__maybe_unused)
> + * @addr: Physical page address (required)
> + * @mask: Address mask (ignored if non-zero)
> + *
> + * Ensures a valid non-zero page address is provided. Warns if mask is set.
> + *
> + * Return: 0 if valid, RTAS_INVALID_PARAMETER otherwise.
> + */
> +
> +static int validate_corrupted_page(struct eeh_pe *pe __maybe_unused,
> + unsigned long addr, unsigned long mask)
pe is not used in this function and it is removed in the next patch. Why
don't
we define this function properly in this patch itself.
> +{
> + if (!addr) {
> + pr_err("corrupted-page requires non-zero addr\n");
> + return RTAS_INVALID_PARAMETER;
> + }
> + /* Mask not meaningful for corrupted-page */
If it is not meaningful why can't we ignore it?
> + if (mask)
> + pr_warn("corrupted-page ignoring mask=0x%lx\n", mask);
> +
> + return 0;
> +}
> +
> +/**
> + * validate_ioa_bus_error - Validate parameters for IOA bus error injection
> + * @pe: EEH PE whose BARs are validated against
> + * @addr: Address parameter (optional)
> + * @mask: Mask parameter (optional)
> + *
> + * For IOA bus error injections, @addr and @mask are optional. If present,
> + * they must map into the PE's MMIO/CFG space.
> + *
> + * Return: 0 if valid or addr/mask absent, RTAS_INVALID_PARAMETER otherwise.
> + */
> +
> +static int validate_ioa_bus_error(struct eeh_pe *pe,
> + unsigned long addr, unsigned long mask)
> +{
> + /* Must map into BAR/MMIO/CFG space of PE */
> + return validate_addr_mask_in_pe(pe, addr, mask);
What is the benefit of adding a static helper function that just calls
another
static helper function in the same file?
> +}
> +
> +
> +/**
> + * prepare_errinjct_buffer - Prepare RTAS error injection work buffer
> + * @pe: EEH PE for the target device(s)
> + * @type: RTAS error type
> + * @func: Error function selector (semantics vary by type)
> + * @addr: Address argument (type-dependent)
> + * @mask: Mask argument (type-dependent)
Isn't the caller of this helper expected to hold rtas_errinjct_buf_lock?
If that is
the case, let's document it.
> + *
> + * Clears the global error injection work buffer and populates it based on
> + * the error type and parameters provided. Performs inline validation of the
> + * arguments for each supported error type.
> + *
> + * Return: 0 on success, or RTAS_INVALID_PARAMETER / -EINVAL on failure.
> + */
> +
> +static int prepare_errinjct_buffer(struct eeh_pe *pe, int type, int func,
> + unsigned long addr, unsigned long mask)
> +{
> + __be64 *buf64;
> + __be32 *buf32;
> +
> + memset(rtas_errinjct_buf, 0, RTAS_ERRINJCT_BUF_SIZE);
> + buf64 = (__be64 *)rtas_errinjct_buf;
> + buf32 = (__be32 *)rtas_errinjct_buf;
> +
> + switch (type) {
> + case RTAS_ERR_TYPE_RECOVERED_SPECIAL_EVENT:
> + /* func must be 1 = non-persistent or 2 = persistent */
> + if (func < 1 || func > 2)
> + return RTAS_INVALID_PARAMETER;
> +
> + if (validate_special_event(addr, mask))
> + return RTAS_INVALID_PARAMETER;
> +
> + buf32[0] = cpu_to_be32(func);
> + break;
> +
> + case RTAS_ERR_TYPE_CORRUPTED_PAGE:
> + /* addr required: physical page address */
> + if (addr == 0)
> + return RTAS_INVALID_PARAMETER;
> +
> + if (validate_corrupted_page(pe, addr, mask))
> + return RTAS_INVALID_PARAMETER;
> +
> + buf32[0] = cpu_to_be32(upper_32_bits(addr));
> + buf32[1] = cpu_to_be32(lower_32_bits(addr));
> + break;
> +
> + case RTAS_ERR_TYPE_IOA_BUS_ERROR:
> + /* 32-bit IOA bus error: addr/mask optional */
> + if (func < EEH_ERR_FUNC_LD_MEM_ADDR || func > EEH_ERR_FUNC_MAX)
> + return RTAS_INVALID_PARAMETER;
> +
> + if (addr || mask) {
> + if (validate_ioa_bus_error(pe, addr, mask))
> + return RTAS_INVALID_PARAMETER;
> + }
> +
> + buf32[0] = cpu_to_be32((u32)addr);
> + buf32[1] = cpu_to_be32((u32)mask);
> + buf32[2] = cpu_to_be32(pe->addr);
> + buf32[3] = cpu_to_be32(BUID_HI(pe->phb->buid));
> + buf32[4] = cpu_to_be32(BUID_LO(pe->phb->buid));
> + buf32[5] = cpu_to_be32(func);
> + break;
> +
> + case RTAS_ERR_TYPE_IOA_BUS_ERROR_64:
> + /* 64-bit IOA bus error: addr/mask optional */
> + if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
> + return RTAS_INVALID_PARAMETER;
> +
> + if (addr || mask) {
> + if (validate_ioa_bus_error(pe, addr, mask))
> + return RTAS_INVALID_PARAMETER;
> + }
> +
> + buf64[0] = cpu_to_be64(addr);
> + buf64[1] = cpu_to_be64(mask);
> + buf32[4] = cpu_to_be32(pe->addr);
> + buf32[5] = cpu_to_be32(BUID_HI(pe->phb->buid));
> + buf32[6] = cpu_to_be32(BUID_LO(pe->phb->buid));
> + buf32[7] = cpu_to_be32(func);
> + break;
> +
> + case RTAS_ERR_TYPE_CORRUPTED_DCACHE_START:
> + case RTAS_ERR_TYPE_CORRUPTED_DCACHE_END:
> + case RTAS_ERR_TYPE_CORRUPTED_ICACHE_START:
> + case RTAS_ERR_TYPE_CORRUPTED_ICACHE_END:
> + /* addr/mask optional, no strict validation */
> + buf32[0] = cpu_to_be32(addr);
> + buf32[1] = cpu_to_be32(mask);
> + break;
> +
> + case RTAS_ERR_TYPE_CORRUPTED_TLB_START:
> + case RTAS_ERR_TYPE_CORRUPTED_TLB_END:
> + /* only addr field relevant */
> + buf32[0] = cpu_to_be32(addr);
> + break;
> +
> + default:
> + pr_err("Unsupported error type 0x%x\n", type);
> + return -EINVAL;
> + }
> +
> + pr_debug("RTAS: errinjct buffer prepared: type=%d func=%d addr=0x%lx mask=0x%lx\n",
> + type, func, addr, mask);
> +
> + return 0;
> +}
> +
> /**
> * pseries_eeh_err_inject - Inject specified error to the indicated PE
> * @pe: the indicated PE
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 4/5] powerpc/pseries: Implement RTAS error injection via pseries_eeh_err_inject
2026-05-27 7:24 [PATCH v2 0/5] powerpc/pseries: Add full RTAS-based error injection support Narayana Murty N
` (2 preceding siblings ...)
2026-05-27 7:24 ` [PATCH v2 3/5] powerpc/pseries: Add RTAS error injection validation helpers Narayana Murty N
@ 2026-05-27 7:24 ` Narayana Murty N
2026-06-07 13:35 ` Sourabh Jain
2026-05-27 7:24 ` [PATCH v2 5/5] powerpc/powernv: Map EEH error types to OPAL error injection types Narayana Murty N
4 siblings, 1 reply; 10+ messages in thread
From: Narayana Murty N @ 2026-05-27 7:24 UTC (permalink / raw)
To: mahesh, maddy, mpe, christophe.leroy, gregkh, oohall, npiggin
Cc: linuxppc-dev, linux-kernel, tyreld, vaibhav, sbhat, ganeshgr,
sourabhjain, haren, nnmlinux, thuth
Replace legacy MMIO error injection with full PAPR-compliant RTAS error
injection supporting 14+ error types via
- ibm,open-errinjct
- ibm,errinjct
- ibm,close-errinjct.
Key features:
- Complete open-session-inject-close cycle management
- Special handling for ibm,open-errinjct output format (token,status)
- Comprehensive buffer preparation per PAPR layouts
- All pr_* logging uses pr_fmt("EEH: ") prefix
Tested with corresponding QEMU patches:
https://lore.kernel.org/all/20251029150618.186803-1-nnmlinux@linux.ibm.com/
Signed-off-by: Narayana Murty N <nnmlinux@linux•ibm.com>
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 168 ++++++++++++++++---
1 file changed, 147 insertions(+), 21 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index d6f2e0d43b89..6af2a153ec25 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -902,8 +902,7 @@ static int validate_special_event(unsigned long addr, unsigned long mask)
* Return: 0 if valid, RTAS_INVALID_PARAMETER otherwise.
*/
-static int validate_corrupted_page(struct eeh_pe *pe __maybe_unused,
- unsigned long addr, unsigned long mask)
+static int validate_corrupted_page(unsigned long addr, unsigned long mask)
{
if (!addr) {
pr_err("corrupted-page requires non-zero addr\n");
@@ -978,7 +977,7 @@ static int prepare_errinjct_buffer(struct eeh_pe *pe, int type, int func,
if (addr == 0)
return RTAS_INVALID_PARAMETER;
- if (validate_corrupted_page(pe, addr, mask))
+ if (validate_corrupted_page(addr, mask))
return RTAS_INVALID_PARAMETER;
buf32[0] = cpu_to_be32(upper_32_bits(addr));
@@ -1047,6 +1046,97 @@ static int prepare_errinjct_buffer(struct eeh_pe *pe, int type, int func,
return 0;
}
+/**
+ * rtas_open_errinjct_session - Open an RTAS error injection session
+ *
+ * Opens a session with the RTAS ibm,open-errinjct service.
+ *
+ * Return: Positive session token on success, negative error code on failure.
+ */
+static int rtas_open_errinjct_session(void)
+{
+ int open_token, args[2] = {0};
+ int rc, status, session_token = -1;
+
+ open_token = rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT);
+ if (open_token == RTAS_UNKNOWN_SERVICE) {
+ pr_err("RTAS: ibm,open-errinjct not available\n");
+ return RTAS_UNKNOWN_SERVICE;
+ }
+
+ /* Call open; original code treated rtas_call return as session token */
+ rc = rtas_call(open_token, 0, 2, args);
+ status = args[1];
+ if (status != 0) {
+ pr_err("RTAS: open-errinjct failed: status=%d args[1]=%d rc=%d\n",
+ status, args[1], rc);
+ return status ? status : -EIO;
+ }
+
+ session_token = args[0];
+ pr_info("Opened injection session: token=%d\n", session_token);
+ return session_token;
+}
+
+/**
+ * rtas_close_errinjct_session - Close an RTAS error injection session
+ * @session_token: Session token returned from open
+ *
+ * Attempts to close a previously opened error injection session. Best-effort;
+ * logs warnings if close fails or if service is unavailable.
+ */
+
+static void rtas_close_errinjct_session(int session_token)
+{
+ int close_token, args[2] = {0};
+
+ if (session_token <= 0)
+ return;
+
+ close_token = rtas_function_token(RTAS_FN_IBM_CLOSE_ERRINJCT);
+ if (close_token == RTAS_UNKNOWN_SERVICE) {
+ pr_warn("close-errinjct not available\n");
+ return;
+ }
+
+ args[0] = session_token;
+ rtas_call(close_token, 1, 1, args);
+ if (args[0])
+ pr_warn("close-errinjct args[0]=%d\n", args[0]);
+}
+
+/**
+ * do_errinjct_call - Invoke the RTAS error injection service
+ * @errinjct_token: RTAS token for ibm,errinjct
+ * @type: RTAS error type
+ * @session_token: RTAS error injection session token
+ *
+ * Issues the RTAS ibm,errinjct call with the prepared work buffer. Logs errors
+ * on failure.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+
+static int do_errinjct_call(int errinjct_token, int type, int session_token)
+{
+ int rc, status;
+
+ if (errinjct_token == RTAS_UNKNOWN_SERVICE)
+ return -ENODEV;
+
+ /* errinjct takes: type, session_token, workbuf pointer (3 in), returns status */
+ rc = rtas_call(errinjct_token, 3, 1, &status, type, session_token,
+ rtas_errinjct_buf);
+
+ if (rc || status != 0) {
+ pr_err("RTAS: errinjct failed: rc=%d, status=%d\n", rc, status);
+ return status ? status : -EIO;
+ }
+
+ pr_info("RTAS: errinjct ok: rc=%d, status=%d\n", rc, status);
+ return 0;
+}
+
/**
* pseries_eeh_err_inject - Inject specified error to the indicated PE
* @pe: the indicated PE
@@ -1060,30 +1150,66 @@ static int prepare_errinjct_buffer(struct eeh_pe *pe, int type, int func,
static int pseries_eeh_err_inject(struct eeh_pe *pe, int type, int func,
unsigned long addr, unsigned long mask)
{
- struct eeh_dev *pdev;
+ int rc = 0;
+ int session_token = -1;
+ int errinjct_token;
- /* Check on PCI error type */
- if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
- return -EINVAL;
+ /* Validate type */
+ if (!validate_err_type(type)) {
+ pr_err("RTAS: invalid error type 0x%x\n", type);
+ return RTAS_INVALID_PARAMETER;
+ }
+ pr_debug("RTAS: error type 0x%x\n", type);
- switch (func) {
- case EEH_ERR_FUNC_LD_MEM_ADDR:
- case EEH_ERR_FUNC_LD_MEM_DATA:
- case EEH_ERR_FUNC_ST_MEM_ADDR:
- case EEH_ERR_FUNC_ST_MEM_DATA:
- /* injects a MMIO error for all pdev's belonging to PE */
- pci_lock_rescan_remove();
- list_for_each_entry(pdev, &pe->edevs, entry)
- eeh_pe_inject_mmio_error(pdev->pdev);
- pci_unlock_rescan_remove();
- break;
- default:
- return -ERANGE;
+ /* For IOA bus errors we must validate err_func and addr/mask in PE.
+ * For other types: if addr/mask present we'll still validate BAR range;
+ * otherwise skip function checks.
+ */
+ if (type == RTAS_ERR_TYPE_IOA_BUS_ERROR ||
+ type == RTAS_ERR_TYPE_IOA_BUS_ERROR_64) {
+ /* Validate that addr/mask fall in the PE's BAR ranges */
+ rc = validate_addr_mask_in_pe(pe, addr, mask);
+ if (rc)
+ return rc;
+ } else if (addr || mask) {
+ /* If caller provided addr/mask for a non-IOA type, do a BAR check too */
+ rc = validate_addr_mask_in_pe(pe, addr, mask);
+ if (rc)
+ return rc;
}
- return 0;
+ /* Open RTAS session */
+ session_token = rtas_open_errinjct_session();
+ if (session_token < 0)
+ return session_token;
+
+ /* get errinjct token */
+ errinjct_token = rtas_function_token(RTAS_FN_IBM_ERRINJCT);
+ if (errinjct_token == RTAS_UNKNOWN_SERVICE) {
+ pr_err("RTAS: ibm,errinjct not available\n");
+ rc = -ENODEV;
+ goto out_close;
+ }
+
+ /* prepare shared buffer while holding lock */
+ spin_lock(&rtas_errinjct_buf_lock);
+ rc = prepare_errinjct_buffer(pe, type, func, addr, mask);
+ if (rc) {
+ spin_unlock(&rtas_errinjct_buf_lock);
+ goto out_close;
+ }
+
+ /* perform the errinjct RTAS call */
+ rc = do_errinjct_call(errinjct_token, type, session_token);
+ spin_unlock(&rtas_errinjct_buf_lock);
+
+out_close:
+ /* always attempt close if we opened a session */
+ rtas_close_errinjct_session(session_token);
+ return rc;
}
+
static struct eeh_ops pseries_eeh_ops = {
.name = "pseries",
.probe = pseries_eeh_probe,
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v2 4/5] powerpc/pseries: Implement RTAS error injection via pseries_eeh_err_inject
2026-05-27 7:24 ` [PATCH v2 4/5] powerpc/pseries: Implement RTAS error injection via pseries_eeh_err_inject Narayana Murty N
@ 2026-06-07 13:35 ` Sourabh Jain
0 siblings, 0 replies; 10+ messages in thread
From: Sourabh Jain @ 2026-06-07 13:35 UTC (permalink / raw)
To: Narayana Murty N, mahesh, maddy, mpe, christophe.leroy, gregkh,
oohall, npiggin
Cc: linuxppc-dev, linux-kernel, tyreld, vaibhav, sbhat, ganeshgr,
haren, thuth
On 27/05/26 12:54, Narayana Murty N wrote:
> Replace legacy MMIO error injection with full PAPR-compliant RTAS error
> injection supporting 14+ error types via
> - ibm,open-errinjct
> - ibm,errinjct
> - ibm,close-errinjct.
>
> Key features:
> - Complete open-session-inject-close cycle management
> - Special handling for ibm,open-errinjct output format (token,status)
> - Comprehensive buffer preparation per PAPR layouts
> - All pr_* logging uses pr_fmt("EEH: ") prefix
>
> Tested with corresponding QEMU patches:
> https://lore.kernel.org/all/20251029150618.186803-1-nnmlinux@linux.ibm.com/
>
> Signed-off-by: Narayana Murty N <nnmlinux@linux•ibm.com>
> ---
> arch/powerpc/platforms/pseries/eeh_pseries.c | 168 ++++++++++++++++---
> 1 file changed, 147 insertions(+), 21 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
> index d6f2e0d43b89..6af2a153ec25 100644
> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> @@ -902,8 +902,7 @@ static int validate_special_event(unsigned long addr, unsigned long mask)
> * Return: 0 if valid, RTAS_INVALID_PARAMETER otherwise.
> */
>
> -static int validate_corrupted_page(struct eeh_pe *pe __maybe_unused,
> - unsigned long addr, unsigned long mask)
> +static int validate_corrupted_page(unsigned long addr, unsigned long mask)
> {
> if (!addr) {
> pr_err("corrupted-page requires non-zero addr\n");
> @@ -978,7 +977,7 @@ static int prepare_errinjct_buffer(struct eeh_pe *pe, int type, int func,
> if (addr == 0)
> return RTAS_INVALID_PARAMETER;
>
> - if (validate_corrupted_page(pe, addr, mask))
> + if (validate_corrupted_page(addr, mask))
> return RTAS_INVALID_PARAMETER;
>
> buf32[0] = cpu_to_be32(upper_32_bits(addr));
> @@ -1047,6 +1046,97 @@ static int prepare_errinjct_buffer(struct eeh_pe *pe, int type, int func,
> return 0;
> }
>
> +/**
> + * rtas_open_errinjct_session - Open an RTAS error injection session
> + *
> + * Opens a session with the RTAS ibm,open-errinjct service.
> + *
> + * Return: Positive session token on success, negative error code on failure.
session token can't be 0, is it?
> + */
> +static int rtas_open_errinjct_session(void)
> +{
> + int open_token, args[2] = {0};
> + int rc, status, session_token = -1;
> +
> + open_token = rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT);
> + if (open_token == RTAS_UNKNOWN_SERVICE) {
> + pr_err("RTAS: ibm,open-errinjct not available\n");
> + return RTAS_UNKNOWN_SERVICE;
> + }
> +
> + /* Call open; original code treated rtas_call return as session token */
> + rc = rtas_call(open_token, 0, 2, args);
> + status = args[1];
rc and status is same, isn't it? That makes the status variable redundant.
> + if (status != 0) {
> + pr_err("RTAS: open-errinjct failed: status=%d args[1]=%d rc=%d\n",
> + status, args[1], rc);
> + return status ? status : -EIO;
> + }
Not planning to handle extend delay by RTAS, return code 9900...9905?
> +
> + session_token = args[0];
> + pr_info("Opened injection session: token=%d\n", session_token);
> + return session_token;
> +}
> +
> +/**
> + * rtas_close_errinjct_session - Close an RTAS error injection session
> + * @session_token: Session token returned from open
> + *
> + * Attempts to close a previously opened error injection session. Best-effort;
> + * logs warnings if close fails or if service is unavailable.
> + */
> +
> +static void rtas_close_errinjct_session(int session_token)
> +{
> + int close_token, args[2] = {0};
> +
> + if (session_token <= 0)
> + return;
I didn't find a section in the PAPR which says token can't be 0.
> +
> + close_token = rtas_function_token(RTAS_FN_IBM_CLOSE_ERRINJCT);
> + if (close_token == RTAS_UNKNOWN_SERVICE) {
> + pr_warn("close-errinjct not available\n");
> + return;
> + }
> +
> + args[0] = session_token;
> + rtas_call(close_token, 1, 1, args);
> + if (args[0])
> + pr_warn("close-errinjct args[0]=%d\n", args[0]);
IIUC rtas_call do not copy status to output buffer. Let's consider
return value
from rtas_call function as status.
Since status is not copied, int arg is enough.
I think we must handle rtas busy delay for errinjct close rtas call?
> +}
> +
> +/**
> + * do_errinjct_call - Invoke the RTAS error injection service
> + * @errinjct_token: RTAS token for ibm,errinjct
> + * @type: RTAS error type
> + * @session_token: RTAS error injection session token
> + *
> + * Issues the RTAS ibm,errinjct call with the prepared work buffer. Logs errors
> + * on failure.
> + *
> + * Return: 0 on success, negative error code otherwise.
> + */
> +
> +static int do_errinjct_call(int errinjct_token, int type, int session_token)
> +{
> + int rc, status;
> +
> + if (errinjct_token == RTAS_UNKNOWN_SERVICE)
> + return -ENODEV;
> +
> + /* errinjct takes: type, session_token, workbuf pointer (3 in), returns status */
> + rc = rtas_call(errinjct_token, 3, 1, &status, type, session_token,
> + rtas_errinjct_buf);
> +
> + if (rc || status != 0) {
> + pr_err("RTAS: errinjct failed: rc=%d, status=%d\n", rc, status);
> + return status ? status : -EIO;
> + }
> +
> + pr_info("RTAS: errinjct ok: rc=%d, status=%d\n", rc, status);
> + return 0;
> +}
> +
> /**
> * pseries_eeh_err_inject - Inject specified error to the indicated PE
> * @pe: the indicated PE
> @@ -1060,30 +1150,66 @@ static int prepare_errinjct_buffer(struct eeh_pe *pe, int type, int func,
> static int pseries_eeh_err_inject(struct eeh_pe *pe, int type, int func,
> unsigned long addr, unsigned long mask)
> {
> - struct eeh_dev *pdev;
> + int rc = 0;
> + int session_token = -1;
> + int errinjct_token;
>
> - /* Check on PCI error type */
> - if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
> - return -EINVAL;
> + /* Validate type */
> + if (!validate_err_type(type)) {
> + pr_err("RTAS: invalid error type 0x%x\n", type);
> + return RTAS_INVALID_PARAMETER;
> + }
> + pr_debug("RTAS: error type 0x%x\n", type);
>
> - switch (func) {
> - case EEH_ERR_FUNC_LD_MEM_ADDR:
> - case EEH_ERR_FUNC_LD_MEM_DATA:
> - case EEH_ERR_FUNC_ST_MEM_ADDR:
> - case EEH_ERR_FUNC_ST_MEM_DATA:
> - /* injects a MMIO error for all pdev's belonging to PE */
> - pci_lock_rescan_remove();
> - list_for_each_entry(pdev, &pe->edevs, entry)
> - eeh_pe_inject_mmio_error(pdev->pdev);
> - pci_unlock_rescan_remove();
> - break;
> - default:
> - return -ERANGE;
> + /* For IOA bus errors we must validate err_func and addr/mask in PE.
> + * For other types: if addr/mask present we'll still validate BAR range;
> + * otherwise skip function checks.
> + */
> + if (type == RTAS_ERR_TYPE_IOA_BUS_ERROR ||
> + type == RTAS_ERR_TYPE_IOA_BUS_ERROR_64) {
> + /* Validate that addr/mask fall in the PE's BAR ranges */
> + rc = validate_addr_mask_in_pe(pe, addr, mask);
> + if (rc)
> + return rc;
> + } else if (addr || mask) {
> + /* If caller provided addr/mask for a non-IOA type, do a BAR check too */
> + rc = validate_addr_mask_in_pe(pe, addr, mask);
> + if (rc)
> + return rc;
> }
The above if and else if case has identical code. Why don't we merge them?
>
> - return 0;
> + /* Open RTAS session */
> + session_token = rtas_open_errinjct_session();
> + if (session_token < 0)
session_token 0 is considered valid here. Where as it was considered
invalid in other
function above.
> + return session_token;
> +
> + /* get errinjct token */
> + errinjct_token = rtas_function_token(RTAS_FN_IBM_ERRINJCT);
> + if (errinjct_token == RTAS_UNKNOWN_SERVICE) {
How about checking this before getting the session token?
> + pr_err("RTAS: ibm,errinjct not available\n");
> + rc = -ENODEV;
> + goto out_close;
> + }
> +
> + /* prepare shared buffer while holding lock */
> + spin_lock(&rtas_errinjct_buf_lock);
> + rc = prepare_errinjct_buffer(pe, type, func, addr, mask);
> + if (rc) {
> + spin_unlock(&rtas_errinjct_buf_lock);
> + goto out_close;
> + }
> +
> + /* perform the errinjct RTAS call */
> + rc = do_errinjct_call(errinjct_token, type, session_token);
> + spin_unlock(&rtas_errinjct_buf_lock);
> +
> +out_close:
> + /* always attempt close if we opened a session */
> + rtas_close_errinjct_session(session_token);
> + return rc;
> }
>
> +
This new line seems unnecessary.
> static struct eeh_ops pseries_eeh_ops = {
> .name = "pseries",
> .probe = pseries_eeh_probe,
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 5/5] powerpc/powernv: Map EEH error types to OPAL error injection types
2026-05-27 7:24 [PATCH v2 0/5] powerpc/pseries: Add full RTAS-based error injection support Narayana Murty N
` (3 preceding siblings ...)
2026-05-27 7:24 ` [PATCH v2 4/5] powerpc/pseries: Implement RTAS error injection via pseries_eeh_err_inject Narayana Murty N
@ 2026-05-27 7:24 ` Narayana Murty N
2026-06-07 13:46 ` Sourabh Jain
4 siblings, 1 reply; 10+ messages in thread
From: Narayana Murty N @ 2026-05-27 7:24 UTC (permalink / raw)
To: mahesh, maddy, mpe, christophe.leroy, gregkh, oohall, npiggin
Cc: linuxppc-dev, linux-kernel, tyreld, vaibhav, sbhat, ganeshgr,
sourabhjain, haren, nnmlinux, thuth
Add a mapping layer in pnv_eeh_err_inject() to translate generic EEH
error types to OPAL-specific error injection types. This decouples the
VFIO error injection interface from OPAL implementation details.
Map EEH_ERR_TYPE_32 to OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR and
EEH_ERR_TYPE_64 to OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64. Return -EINVAL
for unsupported error types.
This provides better abstraction between the generic EEH layer and
platform-specific implementation.
Signed-off-by: Narayana Murty N <nnmlinux@linux•ibm.com>
---
arch/powerpc/include/uapi/asm/eeh.h | 20 ++++++++++++++++++--
arch/powerpc/platforms/powernv/eeh-powernv.c | 11 +++++++++--
2 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h
index 86645cab2827..d88d82796905 100644
--- a/arch/powerpc/include/uapi/asm/eeh.h
+++ b/arch/powerpc/include/uapi/asm/eeh.h
@@ -16,8 +16,24 @@
#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */
/* EEH error types and functions */
-#define EEH_ERR_TYPE_32 0 /* 32-bits error */
-#define EEH_ERR_TYPE_64 1 /* 64-bits error */
+#define EEH_ERR_TYPE_FATAL 0x1 /* Fatal error */
+#define EEH_ERR_TYPE_RECOVERED_RANDOM 0x2 /* Recovered random event */
+#define EEH_ERR_TYPE_RECOVERED_SPECIAL 0x3 /* Recovered special event */
+#define EEH_ERR_TYPE_CORRUPTED_PAGE 0x4 /* Corrupted page */
+#define EEH_ERR_TYPE_CORRUPTED_SLB 0x5 /* Corrupted SLB */
+#define EEH_ERR_TYPE_TRANSLATOR_FAILURE 0x6 /* Translator failure */
+#define EEH_ERR_TYPE_32 0x7 /* 32-bit IOA bus error */
+#define EEH_ERR_TYPE_PLATFORM_SPECIFIC 0x8 /* Platform specific */
+#define EEH_ERR_TYPE_CORRUPTED_DCACHE_START 0x9 /* Corrupted D-cache start */
+#define EEH_ERR_TYPE_CORRUPTED_DCACHE_END 0xA /* Corrupted D-cache end */
+#define EEH_ERR_TYPE_CORRUPTED_ICACHE_START 0xB /* Corrupted I-cache start */
+#define EEH_ERR_TYPE_CORRUPTED_ICACHE_END 0xC /* Corrupted I-cache end */
+#define EEH_ERR_TYPE_CORRUPTED_TLB_START 0xD /* Corrupted TLB start */
+#define EEH_ERR_TYPE_CORRUPTED_TLB_END 0xE /* Corrupted TLB end */
+#define EEH_ERR_TYPE_64 0xF /* 64-bit IOA bus error */
+#define EEH_ERR_TYPE_UPSTREAM_IO_ERROR 0x10 /* Upstream IO error */
+
+/* EEH supported function types */
#define EEH_ERR_FUNC_MIN 0
#define EEH_ERR_FUNC_LD_MEM_ADDR 0 /* Memory load */
#define EEH_ERR_FUNC_LD_MEM_DATA 1
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index db3370d1673c..ee156d397e93 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1169,8 +1169,15 @@ static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
struct pnv_phb *phb = hose->private_data;
s64 rc;
- if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
- type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
+ /* Map generic EEH Type to OPAL Type */
+ switch (type) {
+ case EEH_ERR_TYPE_32:
+ type = OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR;
+ break;
+ case EEH_ERR_TYPE_64:
+ type = OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64;
+ break;
+ default:
pr_warn("%s: Invalid error type %d\n",
__func__, type);
return -ERANGE;
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v2 5/5] powerpc/powernv: Map EEH error types to OPAL error injection types
2026-05-27 7:24 ` [PATCH v2 5/5] powerpc/powernv: Map EEH error types to OPAL error injection types Narayana Murty N
@ 2026-06-07 13:46 ` Sourabh Jain
0 siblings, 0 replies; 10+ messages in thread
From: Sourabh Jain @ 2026-06-07 13:46 UTC (permalink / raw)
To: Narayana Murty N, mahesh, maddy, mpe, christophe.leroy, gregkh,
oohall, npiggin
Cc: linuxppc-dev, linux-kernel, tyreld, vaibhav, sbhat, ganeshgr,
haren, thuth
With the cover letter, this patch series seems to be about RTAS/pseries.
I am not able to understand why powernv changes are part of this
patch series. Could you please explain why they are included here?
On 27/05/26 12:54, Narayana Murty N wrote:
> Add a mapping layer in pnv_eeh_err_inject() to translate generic EEH
> error types to OPAL-specific error injection types. This decouples the
> VFIO error injection interface from OPAL implementation details.
>
> Map EEH_ERR_TYPE_32 to OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR and
> EEH_ERR_TYPE_64 to OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64. Return -EINVAL
> for unsupported error types.
>
> This provides better abstraction between the generic EEH layer and
> platform-specific implementation.
>
> Signed-off-by: Narayana Murty N <nnmlinux@linux•ibm.com>
> ---
> arch/powerpc/include/uapi/asm/eeh.h | 20 ++++++++++++++++++--
> arch/powerpc/platforms/powernv/eeh-powernv.c | 11 +++++++++--
> 2 files changed, 27 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h
> index 86645cab2827..d88d82796905 100644
> --- a/arch/powerpc/include/uapi/asm/eeh.h
> +++ b/arch/powerpc/include/uapi/asm/eeh.h
> @@ -16,8 +16,24 @@
> #define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */
>
> /* EEH error types and functions */
> -#define EEH_ERR_TYPE_32 0 /* 32-bits error */
> -#define EEH_ERR_TYPE_64 1 /* 64-bits error */
> +#define EEH_ERR_TYPE_FATAL 0x1 /* Fatal error */
> +#define EEH_ERR_TYPE_RECOVERED_RANDOM 0x2 /* Recovered random event */
> +#define EEH_ERR_TYPE_RECOVERED_SPECIAL 0x3 /* Recovered special event */
> +#define EEH_ERR_TYPE_CORRUPTED_PAGE 0x4 /* Corrupted page */
> +#define EEH_ERR_TYPE_CORRUPTED_SLB 0x5 /* Corrupted SLB */
> +#define EEH_ERR_TYPE_TRANSLATOR_FAILURE 0x6 /* Translator failure */
> +#define EEH_ERR_TYPE_32 0x7 /* 32-bit IOA bus error */
> +#define EEH_ERR_TYPE_PLATFORM_SPECIFIC 0x8 /* Platform specific */
> +#define EEH_ERR_TYPE_CORRUPTED_DCACHE_START 0x9 /* Corrupted D-cache start */
> +#define EEH_ERR_TYPE_CORRUPTED_DCACHE_END 0xA /* Corrupted D-cache end */
> +#define EEH_ERR_TYPE_CORRUPTED_ICACHE_START 0xB /* Corrupted I-cache start */
> +#define EEH_ERR_TYPE_CORRUPTED_ICACHE_END 0xC /* Corrupted I-cache end */
> +#define EEH_ERR_TYPE_CORRUPTED_TLB_START 0xD /* Corrupted TLB start */
> +#define EEH_ERR_TYPE_CORRUPTED_TLB_END 0xE /* Corrupted TLB end */
> +#define EEH_ERR_TYPE_64 0xF /* 64-bit IOA bus error */
> +#define EEH_ERR_TYPE_UPSTREAM_IO_ERROR 0x10 /* Upstream IO error */
> +
> +/* EEH supported function types */
> #define EEH_ERR_FUNC_MIN 0
> #define EEH_ERR_FUNC_LD_MEM_ADDR 0 /* Memory load */
> #define EEH_ERR_FUNC_LD_MEM_DATA 1
> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
> index db3370d1673c..ee156d397e93 100644
> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> @@ -1169,8 +1169,15 @@ static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
> struct pnv_phb *phb = hose->private_data;
> s64 rc;
>
> - if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
> - type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
> + /* Map generic EEH Type to OPAL Type */
> + switch (type) {
> + case EEH_ERR_TYPE_32:
> + type = OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR;
> + break;
> + case EEH_ERR_TYPE_64:
> + type = OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64;
> + break;
> + default:
> pr_warn("%s: Invalid error type %d\n",
> __func__, type);
> return -ERANGE;
^ permalink raw reply [flat|nested] 10+ messages in thread