public inbox for linux-arm-kernel@lists.infradead.org 
 help / color / mirror / Atom feed
From: James Clark <james.clark@linaro•org>
To: Leo Yan <leo.yan@arm•com>
Cc: linux-arm-kernel@lists•infradead.org, coresight@lists•linaro.org,
	linux-perf-users@vger•kernel.org,
	Arnaldo Carvalho de Melo <acme@kernel•org>,
	John Garry <john.g.garry@oracle•com>,
	Will Deacon <will@kernel•org>, Mike Leach <mike.leach@arm•com>,
	Suzuki K Poulose <suzuki.poulose@arm•com>,
	Namhyung Kim <namhyung@kernel•org>,
	Mark Rutland <mark.rutland@arm•com>,
	Alexander Shishkin <alexander.shishkin@linux•intel.com>,
	Jiri Olsa <jolsa@kernel•org>, Ian Rogers <irogers@google•com>,
	Adrian Hunter <adrian.hunter@intel•com>,
	Al Grant <al.grant@arm•com>,
	Paschalis Mpeis <paschalis.mpeis@arm•com>,
	Amir Ayupov <aaupov@fb•com>
Subject: Re: [PATCH v6 3/8] perf cs-etm: Use thread-stack for last branch entries
Date: Thu, 4 Jun 2026 15:09:26 +0100	[thread overview]
Message-ID: <9d1e0448-27d7-42d3-aaa3-2d09489f18d9@linaro.org> (raw)
In-Reply-To: <20260526-b4-arm_cs_callchain_support_v1-v6-3-f9f49f53c9dd@arm.com>



On 26/05/2026 5:59 pm, Leo Yan wrote:
> CS ETM maintains its own circular array for last branch entries, with
> local helpers to update, copy and reset the branch stack. This duplicates
> logic already provided by the common code.
> 
> Record branch with thread_stack__event() and synthesize branch stack
> with thread_stack__br_sample(). This removes the local last_branch_rb
> buffer and position tracking. Keep the buffer number updated via
> thread_stack__set_trace_nr(), which is used when exporting samples to
> Python scripts.
> 
> The output should remain same, except that be->flags.predicted is no
> longer set. Since CoreSight trace does not provide branch prediction
> information, clearing the flag avoids confusion.
> 
> Signed-off-by: Leo Yan <leo.yan@arm•com>
> ---
>   tools/perf/util/cs-etm.c | 152 +++++++++++++----------------------------------
>   1 file changed, 41 insertions(+), 111 deletions(-)
> 
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 5bff8811d61e423463b7bd4e20d599d5b5307a1a..398ab3b7a429d402cc8e5f6cccb35c0b7c253732 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -83,14 +83,13 @@ struct cs_etm_auxtrace {
>   struct cs_etm_traceid_queue {
>   	u8 trace_chan_id;
>   	u64 period_instructions;
> -	size_t last_branch_pos;
>   	union perf_event *event_buf;
>   	struct thread *thread;
>   	struct thread *prev_packet_thread;
>   	ocsd_ex_level prev_packet_el;
>   	ocsd_ex_level el;
> +	unsigned int br_stack_sz;
>   	struct branch_stack *last_branch;
> -	struct branch_stack *last_branch_rb;
>   	struct cs_etm_packet *prev_packet;
>   	struct cs_etm_packet *packet;
>   	struct cs_etm_packet_queue packet_queue;
> @@ -635,9 +634,8 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
>   		tidq->last_branch = zalloc(sz);
>   		if (!tidq->last_branch)
>   			goto out_free;
> -		tidq->last_branch_rb = zalloc(sz);
> -		if (!tidq->last_branch_rb)
> -			goto out_free;
> +
> +		tidq->br_stack_sz = etm->synth_opts.last_branch_sz;
>   	}
>   
>   	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
> @@ -647,7 +645,6 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
>   	return 0;
>   
>   out_free:
> -	zfree(&tidq->last_branch_rb);
>   	zfree(&tidq->last_branch);
>   	zfree(&tidq->prev_packet);
>   	zfree(&tidq->packet);
> @@ -941,7 +938,6 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
>   		thread__zput(tidq->prev_packet_thread);
>   		zfree(&tidq->event_buf);
>   		zfree(&tidq->last_branch);
> -		zfree(&tidq->last_branch_rb);
>   		zfree(&tidq->prev_packet);
>   		zfree(&tidq->packet);
>   		zfree(&tidq);
> @@ -1281,57 +1277,6 @@ static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
>   	return ret;
>   }
>   
> -static inline
> -void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
> -				 struct cs_etm_traceid_queue *tidq)
> -{
> -	struct branch_stack *bs_src = tidq->last_branch_rb;
> -	struct branch_stack *bs_dst = tidq->last_branch;
> -	size_t nr = 0;
> -
> -	/*
> -	 * Set the number of records before early exit: ->nr is used to
> -	 * determine how many branches to copy from ->entries.
> -	 */
> -	bs_dst->nr = bs_src->nr;
> -
> -	/*
> -	 * Early exit when there is nothing to copy.
> -	 */
> -	if (!bs_src->nr)
> -		return;
> -
> -	/*
> -	 * As bs_src->entries is a circular buffer, we need to copy from it in
> -	 * two steps.  First, copy the branches from the most recently inserted
> -	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
> -	 */
> -	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
> -	memcpy(&bs_dst->entries[0],
> -	       &bs_src->entries[tidq->last_branch_pos],
> -	       sizeof(struct branch_entry) * nr);
> -
> -	/*
> -	 * If we wrapped around at least once, the branches from the beginning
> -	 * of the bs_src->entries buffer and until the ->last_branch_pos element
> -	 * are older valid branches: copy them over.  The total number of
> -	 * branches copied over will be equal to the number of branches asked by
> -	 * the user in last_branch_sz.
> -	 */
> -	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
> -		memcpy(&bs_dst->entries[nr],
> -		       &bs_src->entries[0],
> -		       sizeof(struct branch_entry) * tidq->last_branch_pos);
> -	}
> -}
> -
> -static inline
> -void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
> -{
> -	tidq->last_branch_pos = 0;
> -	tidq->last_branch_rb->nr = 0;
> -}
> -
>   static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
>   					 u8 trace_chan_id, u64 addr)
>   {
> @@ -1400,38 +1345,6 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
>   	return addr;
>   }
>   
> -static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
> -					  struct cs_etm_traceid_queue *tidq)
> -{
> -	struct branch_stack *bs = tidq->last_branch_rb;
> -	struct branch_entry *be;
> -
> -	/*
> -	 * The branches are recorded in a circular buffer in reverse
> -	 * chronological order: we start recording from the last element of the
> -	 * buffer down.  After writing the first element of the stack, move the
> -	 * insert position back to the end of the buffer.
> -	 */
> -	if (!tidq->last_branch_pos)
> -		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
> -
> -	tidq->last_branch_pos -= 1;
> -
> -	be       = &bs->entries[tidq->last_branch_pos];
> -	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
> -	be->to	 = cs_etm__first_executed_instr(tidq->packet);
> -	/* No support for mispredict */
> -	be->flags.mispred = 0;
> -	be->flags.predicted = 1;
> -
> -	/*
> -	 * Increment bs->nr until reaching the number of last branches asked by
> -	 * the user on the command line.
> -	 */
> -	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
> -		bs->nr += 1;
> -}
> -
>   static int cs_etm__inject_event(struct cs_etm_auxtrace *etm, union perf_event *event,
>   			       struct perf_sample *sample, u64 type)
>   {
> @@ -1579,6 +1492,37 @@ static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
>   		return etm->latest_kernel_timestamp;
>   }
>   
> +static void cs_etm__add_stack_event(struct cs_etm_queue *etmq,
> +				    struct cs_etm_traceid_queue *tidq)
> +{
> +	u64 from, to;
> +	int size;
> +
> +	if (!tidq->prev_packet->last_instr_taken_branch)
> +		return;
> +
> +	if (tidq->prev_packet->sample_type != CS_ETM_RANGE ||
> +	    tidq->packet->sample_type != CS_ETM_RANGE)
> +		return;
> +
> +	if (etmq->etm->synth_opts.last_branch) {
> +		from = cs_etm__last_executed_instr(tidq->prev_packet);
> +		to = cs_etm__first_executed_instr(tidq->packet);
> +
> +		size = cs_etm__instr_size(etmq, tidq->trace_chan_id,
> +					  tidq->prev_packet->isa, from);
> +
> +		/* Enable callchain so thread stack entry can be allocated */
> +		thread_stack__event(tidq->thread, tidq->prev_packet->cpu,
> +				    tidq->prev_packet->flags, from, to, size,
> +				    etmq->buffer->buffer_nr + 1, true,
> +				    tidq->br_stack_sz, 0);
> +	} else {
> +		thread_stack__set_trace_nr(tidq->thread, tidq->prev_packet->cpu,
> +					   etmq->buffer->buffer_nr + 1);
> +	}
> +}
> +
>   static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
>   					    struct cs_etm_traceid_queue *tidq,
>   					    u64 addr, u64 period)
> @@ -1608,8 +1552,12 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
>   
>   	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
>   
> -	if (etm->synth_opts.last_branch)
> +	if (etm->synth_opts.last_branch) {
> +		thread_stack__br_sample(tidq->thread, tidq->packet->cpu,
> +					tidq->last_branch,
> +					tidq->br_stack_sz);
>   		sample.branch_stack = tidq->last_branch;
> +	}
>   
>   	if (etm->synth_opts.inject) {
>   		ret = cs_etm__inject_event(etm, event, &sample,
> @@ -1798,14 +1746,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
>   
>   	tidq->period_instructions += tidq->packet->instr_count;
>   
> -	/*
> -	 * Record a branch when the last instruction in
> -	 * PREV_PACKET is a branch.
> -	 */
> -	if (etm->synth_opts.last_branch &&
> -	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
> -	    tidq->prev_packet->last_instr_taken_branch)
> -		cs_etm__update_last_branch_rb(etmq, tidq);
> +	cs_etm__add_stack_event(etmq, tidq);

Would it be cleaner to call this whenever a branch sample is generated? 
Seems like the conditions for calling thread_stack__event() and 
cs_etm__synth_branch_sample() are slightly different (ignoring the fact 
that branches are only generated when the user asks for them).

Maybe the conditions should be different, but maybe a comment why or if 
they're the same, a shared function for the conditions would help.

For example, we don't push a branch to the stack for 
CS_ETM_DISCONTINUITY, but we do generate a branch sample from 0.

>   
>   	if (etm->synth_opts.instructions &&
>   	    tidq->period_instructions >= etm->instructions_sample_period) {
> @@ -1864,10 +1805,6 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
>   		u64 offset = etm->instructions_sample_period - instrs_prev;
>   		u64 addr;
>   
> -		/* Prepare last branches for instruction sample */
> -		if (etm->synth_opts.last_branch)
> -			cs_etm__copy_last_branch_rb(etmq, tidq);
> -
>   		while (tidq->period_instructions >=
>   				etm->instructions_sample_period) {
>   			/*
> @@ -1947,10 +1884,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
>   	    etmq->etm->synth_opts.instructions &&
>   	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
>   		u64 addr;
> -
> -		/* Prepare last branches for instruction sample */
> -		cs_etm__copy_last_branch_rb(etmq, tidq);
> -
>   		/*
>   		 * Generate a last branch event for the branches left in the
>   		 * circular buffer at the end of the trace.
> @@ -1982,7 +1915,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
>   
>   	/* Reset last branches after flush the trace */
>   	if (etm->synth_opts.last_branch)
> -		cs_etm__reset_last_branch_rb(tidq);
> +		thread_stack__flush(tidq->thread);
>   
>   	return err;
>   }
> @@ -2006,9 +1939,6 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq,
>   	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
>   		u64 addr;
>   
> -		/* Prepare last branches for instruction sample */
> -		cs_etm__copy_last_branch_rb(etmq, tidq);
> -
>   		/*
>   		 * Use the address of the end of the last reported execution
>   		 * range.
> 



  reply	other threads:[~2026-06-04 14:09 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-26 16:59 [PATCH v6 0/8] perf cs-etm: Support thread stack and callchain Leo Yan
2026-05-26 16:59 ` [PATCH v6 1/8] perf cs-etm: Decode ETE exception packets Leo Yan
2026-06-04 14:10   ` James Clark
2026-05-26 16:59 ` [PATCH v6 2/8] perf cs-etm: Refactor instruction size handling Leo Yan
2026-06-04 14:11   ` James Clark
2026-05-26 16:59 ` [PATCH v6 3/8] perf cs-etm: Use thread-stack for last branch entries Leo Yan
2026-06-04 14:09   ` James Clark [this message]
2026-05-26 16:59 ` [PATCH v6 4/8] perf cs-etm: Flush thread stacks after decoder reset Leo Yan
2026-06-04 14:12   ` James Clark
2026-05-26 16:59 ` [PATCH v6 5/8] perf cs-etm: Support call indentation Leo Yan
2026-06-04 14:24   ` James Clark
2026-05-26 16:59 ` [PATCH v6 6/8] perf cs-etm: Filter synthesized branch samples Leo Yan
2026-06-04 14:42   ` James Clark
2026-05-26 16:59 ` [PATCH v6 7/8] perf cs-etm: Synthesize callchains for instruction samples Leo Yan
2026-06-04 15:07   ` James Clark
2026-05-26 16:59 ` [PATCH v6 8/8] perf test: Add Arm CoreSight callchain test Leo Yan
2026-05-29 14:57 ` [PATCH v6 0/8] perf cs-etm: Support thread stack and callchain Arnaldo Carvalho de Melo
2026-06-01 11:03   ` Leo Yan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9d1e0448-27d7-42d3-aaa3-2d09489f18d9@linaro.org \
    --to=james.clark@linaro$(echo .)org \
    --cc=aaupov@fb$(echo .)com \
    --cc=acme@kernel$(echo .)org \
    --cc=adrian.hunter@intel$(echo .)com \
    --cc=al.grant@arm$(echo .)com \
    --cc=alexander.shishkin@linux$(echo .)intel.com \
    --cc=coresight@lists$(echo .)linaro.org \
    --cc=irogers@google$(echo .)com \
    --cc=john.g.garry@oracle$(echo .)com \
    --cc=jolsa@kernel$(echo .)org \
    --cc=leo.yan@arm$(echo .)com \
    --cc=linux-arm-kernel@lists$(echo .)infradead.org \
    --cc=linux-perf-users@vger$(echo .)kernel.org \
    --cc=mark.rutland@arm$(echo .)com \
    --cc=mike.leach@arm$(echo .)com \
    --cc=namhyung@kernel$(echo .)org \
    --cc=paschalis.mpeis@arm$(echo .)com \
    --cc=suzuki.poulose@arm$(echo .)com \
    --cc=will@kernel$(echo .)org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox