public inbox for linux-arm-kernel@lists.infradead.org 
 help / color / mirror / Atom feed
From: Robin Murphy <robin.murphy@arm•com>
To: Aviv Bakal <avivb@amazon•com>, will@kernel•org, mark.rutland@arm•com
Cc: linux-arm-kernel@lists•infradead.org,
	linux-perf-users@vger•kernel.org, linux-kernel@vger•kernel.org,
	zeev@amazon•com, blakgeof@amazon•com,
	ilkka@os•amperecomputing.com
Subject: Re: [PATCH v4 1/2] perf/arm-cmn: Move DTM index data out of hw_perf_event
Date: Mon, 1 Jun 2026 19:23:57 +0100	[thread overview]
Message-ID: <3dd451b2-6d80-4636-a832-35b24411df2b@arm.com> (raw)
In-Reply-To: <20260531110447.10095-2-avivb@amazon.com>

On 2026-05-31 12:04 pm, Aviv Bakal wrote:
> The amount of data we need to store all the per-DTM counter and
> watchpoint allocations is already testing the limits of hw_perf_event,
> and future CMNs are only likely to keep growing larger, so move these
> arrays out to separate memory allocations. As part of that we can use
> an explicit union for allocating cycle counters to dtc_cycles events,
> which is arguably nicer anyway.

Nit: not that I'm particularly precious about it, but this has lost my 
original authorship - I really should leave the technically-redundant 
"From:" line in when I post inline patches, as frankly even I prefer to 
just copy-paste the patch part rather than export the whole mail with 
its original headers for `git am --scissors`...

Thanks,
Robin.

> Signed-off-by: Robin Murphy <robin.murphy@arm•com>
> Signed-off-by: Aviv Bakal <avivb@amazon•com>
> ---
>   drivers/perf/arm-cmn.c | 89 ++++++++++++++++++++++++++++--------------
>   1 file changed, 59 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index f5305c8fdca4..f1978a53d1c1 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -597,17 +597,14 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
>   
>   struct arm_cmn_hw_event {
>   	struct arm_cmn_node *dn;
> -	u64 dtm_idx[DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64)];
> +	union {
> +		u64 *dtm_idx;
> +		int cc_idx;
> +	};
> +	unsigned long *wp_idx;
>   	s8 dtc_idx[CMN_MAX_DTCS];
>   	u8 num_dns;
>   	u8 dtm_offset;
> -
> -	/*
> -	 * WP config registers are divided to UP and DOWN events. We need to
> -	 * keep to track only one of them.
> -	 */
> -	DECLARE_BITMAP(wp_idx, CMN_MAX_XPS);
> -
>   	bool wide_sel;
>   	enum cmn_filter_select filter_sel;
>   };
> @@ -625,25 +622,42 @@ static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
>   	return (struct arm_cmn_hw_event *)&event->hw;
>   }
>   
> -static void arm_cmn_set_index(u64 x[], unsigned int pos, unsigned int val)
> +static void arm_cmn_set_dtm_idx(struct arm_cmn_hw_event *hw, unsigned int pos, unsigned int val)
>   {
> -	x[pos / 32] |= (u64)val << ((pos % 32) * 2);
> +	hw->dtm_idx[pos / 32] |= (u64)val << ((pos % 32) * 2);
>   }
>   
> -static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos)
> +static unsigned int arm_cmn_get_dtm_idx(struct arm_cmn_hw_event *hw, unsigned int pos)
>   {
> -	return (x[pos / 32] >> ((pos % 32) * 2)) & 3;
> +	return (hw->dtm_idx[pos / 32] >> ((pos % 32) * 2)) & 3;
>   }
>   
> -static void arm_cmn_set_wp_idx(unsigned long *wp_idx, unsigned int pos, bool val)
> +static u64 *arm_cmn_alloc_dtm_idx(void)
> +{
> +	return kzalloc(DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64) * sizeof(u64), GFP_KERNEL);
> +}
> +
> +static void arm_cmn_set_wp_idx(struct arm_cmn_hw_event *hw, unsigned int pos, bool val)
>   {
>   	if (val)
> -		set_bit(pos, wp_idx);
> +		set_bit(pos, hw->wp_idx);
> +}
> +
> +static unsigned int arm_cmn_get_wp_idx(struct arm_cmn_hw_event *hw, unsigned int pos)
> +{
> +	return test_bit(pos, hw->wp_idx);
> +}
> +
> +static unsigned long *arm_cmn_alloc_wp_idx(void)
> +{
> +	return bitmap_zalloc(CMN_MAX_XPS, GFP_KERNEL);
>   }
>   
> -static unsigned int arm_cmn_get_wp_idx(unsigned long *wp_idx, unsigned int pos)
> +static void arm_cmn_clear_idx(struct arm_cmn_hw_event *hw)
>   {
> -	return test_bit(pos, wp_idx);
> +	memset(hw->dtm_idx, 0, DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64) * sizeof(u64));
> +	if (hw->wp_idx)
> +		bitmap_zero(hw->wp_idx, CMN_MAX_XPS);
>   }
>   
>   struct arm_cmn_event_attr {
> @@ -1376,7 +1390,7 @@ static int arm_cmn_get_assigned_wp_idx(struct perf_event *event,
>   				       struct arm_cmn_hw_event *hw,
>   				       unsigned int pos)
>   {
> -	return CMN_EVENT_EVENTID(event) + arm_cmn_get_wp_idx(hw->wp_idx, pos);
> +	return CMN_EVENT_EVENTID(event) + arm_cmn_get_wp_idx(hw, pos);
>   }
>   
>   static void arm_cmn_claim_wp_idx(struct arm_cmn_dtm *dtm,
> @@ -1387,7 +1401,7 @@ static void arm_cmn_claim_wp_idx(struct arm_cmn_dtm *dtm,
>   	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
>   
>   	dtm->wp_event[wp_idx] = hw->dtc_idx[dtc];
> -	arm_cmn_set_wp_idx(hw->wp_idx, pos, wp_idx - CMN_EVENT_EVENTID(event));
> +	arm_cmn_set_wp_idx(hw, pos, wp_idx - CMN_EVENT_EVENTID(event));
>   }
>   
>   static u32 arm_cmn_wp_config(struct perf_event *event, int wp_idx)
> @@ -1458,7 +1472,7 @@ static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw,
>   			dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
>   			reg = readq_relaxed(dtm->base + offset);
>   		}
> -		dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
> +		dtm_idx = arm_cmn_get_dtm_idx(hw, i);
>   		count += (u16)(reg >> (dtm_idx * 16));
>   	}
>   	return count;
> @@ -1505,7 +1519,7 @@ static void arm_cmn_event_read(struct perf_event *event)
>   	unsigned long flags;
>   
>   	if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) {
> -		delta = arm_cmn_read_cc(cmn->dtc + hw->dtc_idx[0]);
> +		delta = arm_cmn_read_cc(cmn->dtc + hw->cc_idx);
>   		local64_add(delta, &event->count);
>   		return;
>   	}
> @@ -1572,7 +1586,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
>   	int i;
>   
>   	if (type == CMN_TYPE_DTC) {
> -		struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
> +		struct arm_cmn_dtc *dtc = cmn->dtc + hw->cc_idx;
>   
>   		writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE,
>   			       dtc->base + CMN_DT_DTC_CTL);
> @@ -1590,7 +1604,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
>   			writeq_relaxed(mask, base + CMN_DTM_WPn_MASK(wp_idx));
>   		}
>   	} else for_each_hw_dn(hw, dn, i) {
> -		int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
> +		int dtm_idx = arm_cmn_get_dtm_idx(hw, i);
>   
>   		arm_cmn_set_event_sel_lo(dn, dtm_idx, CMN_EVENT_EVENTID(event),
>   					 hw->wide_sel);
> @@ -1606,7 +1620,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
>   	int i;
>   
>   	if (type == CMN_TYPE_DTC) {
> -		struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
> +		struct arm_cmn_dtc *dtc = cmn->dtc + hw->cc_idx;
>   
>   		dtc->cc_active = false;
>   		writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
> @@ -1619,7 +1633,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
>   			writeq_relaxed(~0ULL, base + CMN_DTM_WPn_VAL(wp_idx));
>   		}
>   	} else for_each_hw_dn(hw, dn, i) {
> -		int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
> +		int dtm_idx = arm_cmn_get_dtm_idx(hw, i);
>   
>   		arm_cmn_set_event_sel_lo(dn, dtm_idx, 0, hw->wide_sel);
>   	}
> @@ -1763,6 +1777,14 @@ static enum cmn_filter_select arm_cmn_filter_sel(const struct arm_cmn *cmn,
>   }
>   
>   
> +static void arm_cmn_event_destroy(struct perf_event *event)
> +{
> +	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
> +
> +	kfree(hw->dtm_idx);
> +	bitmap_free(hw->wp_idx);
> +}
> +
>   static int arm_cmn_event_init(struct perf_event *event)
>   {
>   	struct arm_cmn *cmn = to_cmn(event->pmu);
> @@ -1787,6 +1809,11 @@ static int arm_cmn_event_init(struct perf_event *event)
>   	if (type == CMN_TYPE_DTC)
>   		return arm_cmn_validate_group(cmn, event);
>   
> +	event->destroy = arm_cmn_event_destroy;
> +	hw->dtm_idx = arm_cmn_alloc_dtm_idx();
> +	if (!hw->dtm_idx)
> +		return -ENOMEM;
> +
>   	eventid = CMN_EVENT_EVENTID(event);
>   	/* For watchpoints we need the actual XP node here */
>   	if (type == CMN_TYPE_WP) {
> @@ -1797,6 +1824,9 @@ static int arm_cmn_event_init(struct perf_event *event)
>   		/* ...but the DTM may depend on which port we're watching */
>   		if (cmn->multi_dtm)
>   			hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
> +		hw->wp_idx = arm_cmn_alloc_wp_idx();
> +		if (!hw->wp_idx)
> +			return -ENOMEM;
>   	} else if (type == CMN_TYPE_XP &&
>   		   (cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) {
>   		hw->wide_sel = true;
> @@ -1847,7 +1877,7 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
>   
>   	while (i--) {
>   		struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm] + hw->dtm_offset;
> -		unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
> +		unsigned int dtm_idx = arm_cmn_get_dtm_idx(hw, i);
>   
>   		if (type == CMN_TYPE_WP) {
>   			int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
> @@ -1861,8 +1891,7 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
>   		dtm->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx);
>   		writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
>   	}
> -	memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
> -	memset(hw->wp_idx, 0, sizeof(hw->wp_idx));
> +	arm_cmn_clear_idx(hw);
>   
>   	for_each_hw_dtc_idx(hw, j, idx)
>   		cmn->dtc[j].counters[idx] = NULL;
> @@ -1882,7 +1911,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
>   				return -ENOSPC;
>   
>   		cmn->dtc[i].cycles = event;
> -		hw->dtc_idx[0] = i;
> +		hw->cc_idx = i;
>   
>   		if (flags & PERF_EF_START)
>   			arm_cmn_event_start(event, 0);
> @@ -1947,7 +1976,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
>   				goto free_dtms;
>   		}
>   
> -		arm_cmn_set_index(hw->dtm_idx, i, dtm_idx);
> +		arm_cmn_set_dtm_idx(hw, i, dtm_idx);
>   
>   		dtm->input_sel[dtm_idx] = input_sel;
>   		shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
> @@ -1980,7 +2009,7 @@ static void arm_cmn_event_del(struct perf_event *event, int flags)
>   	arm_cmn_event_stop(event, PERF_EF_UPDATE);
>   
>   	if (type == CMN_TYPE_DTC)
> -		cmn->dtc[hw->dtc_idx[0]].cycles = NULL;
> +		cmn->dtc[hw->cc_idx].cycles = NULL;
>   	else
>   		arm_cmn_event_clear(cmn, event, hw->num_dns);
>   }



  reply	other threads:[~2026-06-01 18:24 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-03 15:51 [PATCH] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5 Aviv Bakal
2026-05-04 13:39 ` [PATCH v2] " Aviv Bakal
2026-05-05  2:31   ` kernel test robot
2026-05-21 16:02   ` Robin Murphy
2026-05-24 15:38   ` [PATCH v3 0/2] " Aviv Bakal
2026-05-24 15:38     ` [PATCH v3 1/2] perf/arm-cmn: Move struct arm_cmn_hw_event into struct hw_perf_event Aviv Bakal
2026-05-29 16:44       ` Robin Murphy
2026-05-24 15:38     ` [PATCH v3 2/2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5 Aviv Bakal
2026-05-31 11:04     ` [PATCH v4 0/2] " Aviv Bakal
2026-05-31 11:04       ` [PATCH v4 1/2] perf/arm-cmn: Move DTM index data out of hw_perf_event Aviv Bakal
2026-06-01 18:23         ` Robin Murphy [this message]
2026-06-02  5:49         ` Ilkka Koskinen
2026-05-31 11:04       ` [PATCH v4 2/2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5 Aviv Bakal
2026-06-01 17:48         ` Robin Murphy
2026-06-02  5:55         ` Ilkka Koskinen
2026-06-03 15:00       ` [PATCH v5 0/2] " Aviv Bakal
2026-06-03 15:00         ` [PATCH v5 1/2] perf/arm-cmn: Move DTM index data out of hw_perf_event Aviv Bakal
2026-06-03 15:00         ` [PATCH v5 2/2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5 Aviv Bakal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3dd451b2-6d80-4636-a832-35b24411df2b@arm.com \
    --to=robin.murphy@arm$(echo .)com \
    --cc=avivb@amazon$(echo .)com \
    --cc=blakgeof@amazon$(echo .)com \
    --cc=ilkka@os$(echo .)amperecomputing.com \
    --cc=linux-arm-kernel@lists$(echo .)infradead.org \
    --cc=linux-kernel@vger$(echo .)kernel.org \
    --cc=linux-perf-users@vger$(echo .)kernel.org \
    --cc=mark.rutland@arm$(echo .)com \
    --cc=will@kernel$(echo .)org \
    --cc=zeev@amazon$(echo .)com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox