From: Arnd Bergmann <arnd@arndb•de>
To: linuxppc-dev@ozlabs•org
Cc: linux-kernel@vger•kernel.org, cbe-oss-dev@ozlabs•org,
oprofile-list@lists•sourceforge.net, Carl Love <cel@us•ibm.com>
Subject: Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Date: Tue, 27 Feb 2007 00:50:59 +0100 [thread overview]
Message-ID: <200702270051.00393.arnd@arndb.de> (raw)
In-Reply-To: <1172102523.5233.31.camel@dyn9047021078.beaverton.ibm.com>
On Thursday 22 February 2007, Carl Love wrote:
> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c
> to add in the SPU profiling capabilities. =A0In addition, a 'cell' subdir=
ectory
> was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling
> code.
There was a significant amount of whitespace breakage in this patch,
which I cleaned up. The patch below consists of the other things
I changed as a further cleanup. Note that I changed the format
of the context switch record, which I found too complicated, as
I described on IRC last week.
Arnd <><
=2D-
Subject: cleanup spu oprofile code
=46rom: Arnd Bergmann <arnd.bergmann@de•ibm.com>
This cleans up some of the new oprofile code. It's mostly
cosmetic changes, like way multi-line comments are formatted.
The most significant change is a simplification of the
context-switch record format.
It does mean the oprofile report tool needs to be adapted,
but I'm sure that it pays off in the end.
Signed-off-by: Arnd Bergmann <arnd.bergmann@de•ibm.com>
Index: linux-2.6/arch/powerpc/oprofile/cell/spu_task_sync.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
=2D-- linux-2.6.orig/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ linux-2.6/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -61,11 +61,12 @@ static void destroy_cached_info(struct k
static struct cached_info * get_cached_info(struct spu * the_spu, int spu_=
num)
{
struct kref * ref;
=2D struct cached_info * ret_info =3D NULL;
+ struct cached_info * ret_info;
if (spu_num >=3D num_spu_nodes) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Invalid index %d into spu info cache\n",
__FUNCTION__, __LINE__, spu_num);
+ ret_info =3D NULL;
goto out;
}
if (!spu_info[spu_num] && the_spu) {
@@ -89,9 +90,9 @@ static struct cached_info * get_cached_i
static int
prepare_cached_spu_info(struct spu * spu, unsigned int objectId)
{
=2D unsigned long flags =3D 0;
+ unsigned long flags;
struct vma_to_fileoffset_map * new_map;
=2D int retval =3D 0;
+ int retval;
struct cached_info * info;
=20
/* We won't bother getting cache_lock here since
@@ -112,6 +113,7 @@ prepare_cached_spu_info(struct spu * spu
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: create vma_map failed\n",
__FUNCTION__, __LINE__);
+ retval =3D -ENOMEM;
goto err_alloc;
}
new_map =3D create_vma_map(spu, objectId);
@@ -119,6 +121,7 @@ prepare_cached_spu_info(struct spu * spu
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: create vma_map failed\n",
__FUNCTION__, __LINE__);
+ retval =3D -ENOMEM;
goto err_alloc;
}
=20
@@ -144,7 +147,7 @@ prepare_cached_spu_info(struct spu * spu
goto out;
=20
err_alloc:
=2D retval =3D -1;
+ kfree(info);
out:
return retval;
}
@@ -215,11 +218,9 @@ static inline unsigned long fast_get_dco
static unsigned long
get_exec_dcookie_and_offset(struct spu * spu, unsigned int * offsetp,
unsigned long * spu_bin_dcookie,
=2D unsigned long * shlib_dcookie,
unsigned int spu_ref)
{
unsigned long app_cookie =3D 0;
=2D unsigned long * image_cookie =3D NULL;
unsigned int my_offset =3D 0;
struct file * app =3D NULL;
struct vm_area_struct * vma;
@@ -252,24 +253,17 @@ get_exec_dcookie_and_offset(struct spu *
my_offset, spu_ref,
vma->vm_file->f_dentry->d_name.name);
*offsetp =3D my_offset;
=2D if (my_offset =3D=3D 0)
=2D image_cookie =3D spu_bin_dcookie;
=2D else if (vma->vm_file !=3D app)
=2D image_cookie =3D shlib_dcookie;
break;
}
=20
=2D if (image_cookie) {
=2D *image_cookie =3D fast_get_dcookie(vma->vm_file->f_dentry,
+ *spu_bin_dcookie =3D fast_get_dcookie(vma->vm_file->f_dentry,
vma->vm_file->f_vfsmnt);
=2D pr_debug("got dcookie for %s\n",
=2D vma->vm_file->f_dentry->d_name.name);
=2D }
+ pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
=20
=2D out:
+out:
return app_cookie;
=20
=2D fail_no_image_cookie:
+fail_no_image_cookie:
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Cannot find dcookie for SPU binary\n",
__FUNCTION__, __LINE__);
@@ -285,18 +279,18 @@ get_exec_dcookie_and_offset(struct spu *
static int process_context_switch(struct spu * spu, unsigned int objectId)
{
unsigned long flags;
=2D int retval =3D 0;
=2D unsigned int offset =3D 0;
=2D unsigned long spu_cookie =3D 0, app_dcookie =3D 0, shlib_cookie =3D 0;
+ int retval;
+ unsigned int offset;
+ unsigned long spu_cookie, app_dcookie;
+
retval =3D prepare_cached_spu_info(spu, objectId);
=2D if (retval =3D=3D -1) {
+ if (retval)
goto out;
=2D }
+
/* Get dcookie first because a mutex_lock is taken in that
* code path, so interrupts must not be disabled.
*/
=2D app_dcookie =3D get_exec_dcookie_and_offset(spu, &offset, &spu_cookie,
=2D &shlib_cookie, objectId);
+ app_dcookie =3D get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, ob=
jectId);
=20
/* Record context info in event buffer */
spin_lock_irqsave(&buffer_lock, flags);
@@ -306,27 +300,8 @@ static int process_context_switch(struct
add_event_entry(spu->pid);
add_event_entry(spu->tgid);
add_event_entry(app_dcookie);
=2D
=2D if (offset) {
=2D /* When offset is non-zero, the SPU ELF was embedded;
=2D * otherwise, it was loaded from a separate binary file. For
=2D * embedded case, we record the offset into the embedding file
=2D * where the SPU ELF was placed. The embedding file may be
=2D * either the executable application binary or shared library.
=2D * For the non-embedded case, we record a dcookie that
=2D * points to the location of the separate SPU binary that was
=2D * loaded.
=2D */
=2D if (shlib_cookie) {
=2D add_event_entry(SPU_SHLIB_COOKIE_CODE);
=2D add_event_entry(shlib_cookie);
=2D }
=2D add_event_entry(SPU_OFFSET_CODE);
=2D add_event_entry(offset);
=2D } else {
=2D add_event_entry(SPU_COOKIE_CODE);
=2D add_event_entry(spu_cookie);
=2D }
+ add_event_entry(spu_cookie);
+ add_event_entry(offset);
spin_unlock_irqrestore(&buffer_lock, flags);
smp_wmb();
out:
@@ -343,8 +318,8 @@ static int spu_active_notify(struct noti
void * data)
{
int retval;
=2D unsigned long flags =3D 0;
=2D struct spu * the_spu =3D data;
+ unsigned long flags;
+ struct spu *the_spu =3D data;
pr_debug("SPU event notification arrived\n");
if (!val){
spin_lock_irqsave(&cache_lock, flags);
@@ -403,8 +378,7 @@ void spu_sync_buffer(int spu_num, unsign
int num_samples)
{
unsigned long long file_offset;
=2D unsigned long cache_lock_flags =3D 0;
=2D unsigned long buffer_lock_flags =3D 0;
+ unsigned long flags;
int i;
struct vma_to_fileoffset_map * map;
struct spu * the_spu;
@@ -417,29 +391,27 @@ void spu_sync_buffer(int spu_num, unsign
* corresponding to this cached_info may end, thus resulting
* in the destruction of the cached_info.
*/
=2D spin_lock_irqsave(&cache_lock, cache_lock_flags);
+ spin_lock_irqsave(&cache_lock, flags);
c_info =3D get_cached_info(NULL, spu_num);
=2D if (c_info =3D=3D NULL) {
+ if (!c_info) {
/* This legitimately happens when the SPU task ends before all
* samples are recorded. No big deal -- so we just drop a few samples.
*/
pr_debug("SPU_PROF: No cached SPU contex "
"for SPU #%d. Dropping samples.\n", spu_num);
=2D spin_unlock_irqrestore(&cache_lock, cache_lock_flags);
=2D return ;
+ goto out;
}
=20
map =3D c_info->map;
the_spu =3D c_info->the_spu;
=2D spin_lock_irqsave(&buffer_lock, buffer_lock_flags);
+ spin_lock(&buffer_lock);
for (i =3D 0; i < num_samples; i++) {
unsigned int sample =3D *(samples+i);
int grd_val =3D 0;
file_offset =3D 0;
if (sample =3D=3D 0)
continue;
=2D file_offset =3D vma_map_lookup(
=2D map, sample, the_spu, &grd_val);
+ file_offset =3D vma_map_lookup( map, sample, the_spu, &grd_val);
=20
/* If overlays are used by this SPU application, the guard
* value is non-zero, indicating which overlay section is in
@@ -460,8 +432,9 @@ void spu_sync_buffer(int spu_num, unsign
continue;
add_event_entry(file_offset | spu_num_shifted);
}
=2D spin_unlock_irqrestore(&buffer_lock, buffer_lock_flags);
=2D spin_unlock_irqrestore(&cache_lock, cache_lock_flags);
+ spin_unlock(&buffer_lock);
+out:
+ spin_unlock_irqrestore(&cache_lock, flags);
}
=20
=20
Index: linux-2.6/arch/powerpc/oprofile/op_model_cell.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
=2D-- linux-2.6.orig/arch/powerpc/oprofile/op_model_cell.c
+++ linux-2.6/arch/powerpc/oprofile/op_model_cell.c
@@ -40,7 +40,8 @@
#include "../platforms/cell/cbe_regs.h"
#include "cell/pr_util.h"
=20
=2D/* spu_cycle_reset is the number of cycles between samples.
+/*
+ * spu_cycle_reset is the number of cycles between samples.
* This variable is used for SPU profiling and should ONLY be set
* at the beginning of cell_reg_setup; otherwise, it's read-only.
*/
@@ -73,7 +74,6 @@ struct pmc_cntrl_data {
/*
* ibm,cbe-perftools rtas parameters
*/
=2D
struct pm_signal {
u16 cpu; /* Processor to modify */
u16 sub_unit; /* hw subunit this applies to (if applicable)*/
@@ -123,7 +123,8 @@ static DEFINE_PER_CPU(unsigned long[NR_P
=20
static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
=20
=2D/* The CELL profiling code makes rtas calls to setup the debug bus to
+/*
+ * The CELL profiling code makes rtas calls to setup the debug bus to
* route the performance signals. Additionally, SPU profiling requires
* a second rtas call to setup the hardware to capture the SPU PCs.
* The EIO error value is returned if the token lookups or the rtas
@@ -137,16 +138,21 @@ static struct pmc_cntrl_data pmc_cntrl[N
* either.
*/
=20
=2D/* Interpetation of hdw_thread:
+/*
+ * Interpetation of hdw_thread:
* 0 - even virtual cpus 0, 2, 4,...
* 1 - odd virtual cpus 1, 3, 5, ...
+ *
+ * FIXME: this is strictly wrong, we need to clean this up in a number
+ * of places. It works for now. -arnd
*/
static u32 hdw_thread;
=20
static u32 virt_cntr_inter_mask;
static struct timer_list timer_virt_cntr;
=20
=2D/* pm_signal needs to be global since it is initialized in
+/*
+ * pm_signal needs to be global since it is initialized in
* cell_reg_setup at the time when the necessary information
* is available.
*/
@@ -167,7 +173,6 @@ static unsigned char input_bus[NUM_INPUT
/*
* Firmware interface functions
*/
=2D
static int
rtas_ibm_cbe_perftools(int subfunc, int passthru,
void *address, unsigned long length)
@@ -183,12 +188,13 @@ static void pm_rtas_reset_signals(u32 no
int ret;
struct pm_signal pm_signal_local;
=20
=2D /* The debug bus is being set to the passthru disable state.
=2D * However, the FW still expects atleast one legal signal routing
=2D * entry or it will return an error on the arguments. If we don't
=2D * supply a valid entry, we must ignore all return values. Ignoring
=2D * all return values means we might miss an error we should be
=2D * concerned about.
+ /*
+ * The debug bus is being set to the passthru disable state.
+ * However, the FW still expects atleast one legal signal routing
+ * entry or it will return an error on the arguments. If we don't
+ * supply a valid entry, we must ignore all return values. Ignoring
+ * all return values means we might miss an error we should be
+ * concerned about.
*/
=20
/* fw expects physical cpu #. */
@@ -203,7 +209,8 @@ static void pm_rtas_reset_signals(u32 no
sizeof(struct pm_signal));
=20
if (unlikely(ret))
=2D /* Not a fatal error. For Oprofile stop, the oprofile
+ /*
+ * Not a fatal error. For Oprofile stop, the oprofile
* functions do not support returning an error for
* failure to stop OProfile.
*/
@@ -217,7 +224,8 @@ static int pm_rtas_activate_signals(u32=20
int i, j;
struct pm_signal pm_signal_local[NR_PHYS_CTRS];
=20
=2D /* There is no debug setup required for the cycles event.
+ /*
+ * There is no debug setup required for the cycles event.
* Note that only events in the same group can be used.
* Otherwise, there will be conflicts in correctly routing
* the signals on the debug bus. It is the responsiblity
@@ -295,7 +303,8 @@ static void set_pm_event(u32 ctr, int ev
pm_regs.pm07_cntrl[ctr] |=3D PM07_CTR_POLARITY(polarity);
pm_regs.pm07_cntrl[ctr] |=3D PM07_CTR_INPUT_CONTROL(input_control);
=20
=2D /* Some of the islands signal selection is based on 64 bit words.
+ /*
+ * Some of the islands signal selection is based on 64 bit words.
* The debug bus words are 32 bits, the input words to the performance
* counters are defined as 32 bits. Need to convert the 64 bit island
* specification to the appropriate 32 input bit and bus word for the
@@ -345,7 +354,8 @@ out:
=20
static void write_pm_cntrl(int cpu)
{
=2D /* Oprofile will use 32 bit counters, set bits 7:10 to 0
+ /*
+ * Oprofile will use 32 bit counters, set bits 7:10 to 0
* pmregs.pm_cntrl is a global
*/
=20
@@ -362,7 +372,8 @@ static void write_pm_cntrl(int cpu)
if (pm_regs.pm_cntrl.freeze =3D=3D 1)
val |=3D CBE_PM_FREEZE_ALL_CTRS;
=20
=2D /* Routine set_count_mode must be called previously to set
+ /*
+ * Routine set_count_mode must be called previously to set
* the count mode based on the user selection of user and kernel.
*/
val |=3D CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
@@ -372,7 +383,8 @@ static void write_pm_cntrl(int cpu)
static inline void
set_count_mode(u32 kernel, u32 user)
{
=2D /* The user must specify user and kernel if they want them. If
+ /*
+ * The user must specify user and kernel if they want them. If
* neither is specified, OProfile will count in hypervisor mode.
* pm_regs.pm_cntrl is a global
*/
@@ -413,17 +425,18 @@ static inline void enable_ctr(u32 cpu, u
* pair of per-cpu arrays is used for storing the previous and next
* pmc values for a given node.
* NOTE: We use the per-cpu variable to improve cache performance.
+ *
+ * This routine will alternate loading the virtual counters for
+ * virtual CPUs
*/
static void cell_virtual_cntr(unsigned long data)
{
=2D /* This routine will alternate loading the virtual counters for
=2D * virtual CPUs
=2D */
int i, prev_hdw_thread, next_hdw_thread;
u32 cpu;
unsigned long flags;
=20
=2D /* Make sure that the interrupt_hander and the virt counter are
+ /*
+ * Make sure that the interrupt_hander and the virt counter are
* not both playing with the counters on the same node.
*/
=20
@@ -435,22 +448,25 @@ static void cell_virtual_cntr(unsigned l
hdw_thread =3D 1 ^ hdw_thread;
next_hdw_thread =3D hdw_thread;
=20
=2D for (i =3D 0; i < num_counters; i++)
=2D /* There are some per thread events. Must do the
+ /*
+ * There are some per thread events. Must do the
* set event, for the thread that is being started
*/
+ for (i =3D 0; i < num_counters; i++)
set_pm_event(i,
pmc_cntrl[next_hdw_thread][i].evnts,
pmc_cntrl[next_hdw_thread][i].masks);
=20
=2D /* The following is done only once per each node, but
+ /*
+ * The following is done only once per each node, but
* we need cpu #, not node #, to pass to the cbe_xxx functions.
*/
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
=20
=2D /* stop counters, save counter values, restore counts
+ /*
+ * stop counters, save counter values, restore counts
* for previous thread
*/
cbe_disable_pm(cpu);
@@ -479,13 +495,15 @@ static void cell_virtual_cntr(unsigned l
next_hdw_thread)[i]);
}
=20
=2D /* Switch to the other thread. Change the interrupt
+ /*
+ * Switch to the other thread. Change the interrupt
* and control regs to be scheduled on the CPU
* corresponding to the thread to execute.
*/
for (i =3D 0; i < num_counters; i++) {
if (pmc_cntrl[next_hdw_thread][i].enabled) {
=2D /* There are some per thread events.
+ /*
+ * There are some per thread events.
* Must do the set event, enable_cntr
* for each cpu.
*/
@@ -517,9 +535,8 @@ static void start_virt_cntrs(void)
}
=20
/* This function is called once for all cpus combined */
=2Dstatic int
=2Dcell_reg_setup(struct op_counter_config *ctr,
=2D struct op_system_config *sys, int num_ctrs)
+static int cell_reg_setup(struct op_counter_config *ctr,
+ struct op_system_config *sys, int num_ctrs)
{
int i, j, cpu;
spu_cycle_reset =3D 0;
@@ -527,7 +544,8 @@ cell_reg_setup(struct op_counter_config=20
if (ctr[0].event =3D=3D SPU_CYCLES_EVENT_NUM) {
spu_cycle_reset =3D ctr[0].count;
=20
=2D /* Each node will need to make the rtas call to start
+ /*
+ * Each node will need to make the rtas call to start
* and stop SPU profiling. Get the token once and store it.
*/
spu_rtas_token =3D rtas_token("ibm,cbe-spu-perftools");
@@ -542,7 +560,8 @@ cell_reg_setup(struct op_counter_config=20
=20
pm_rtas_token =3D rtas_token("ibm,cbe-perftools");
=20
=2D /* For all events excetp PPU CYCLEs, each node will need to make
+ /*
+ * For all events excetp PPU CYCLEs, each node will need to make
* the rtas cbe-perftools call to setup and reset the debug bus.
* Make the token lookup call once and store it in the global
* variable pm_rtas_token.
@@ -579,7 +598,8 @@ cell_reg_setup(struct op_counter_config=20
per_cpu(pmc_values, j)[i] =3D 0;
}
=20
=2D /* Setup the thread 1 events, map the thread 0 event to the
+ /*
+ * Setup the thread 1 events, map the thread 0 event to the
* equivalent thread 1 event.
*/
for (i =3D 0; i < num_ctrs; ++i) {
@@ -603,7 +623,8 @@ cell_reg_setup(struct op_counter_config=20
for (i =3D 0; i < NUM_INPUT_BUS_WORDS; i++)
input_bus[i] =3D 0xff;
=20
=2D /* Our counters count up, and "count" refers to
+ /*
+ * Our counters count up, and "count" refers to
* how much before the next interrupt, and we interrupt
* on overflow. So we calculate the starting value
* which will give us "count" until overflow.
@@ -667,19 +688,19 @@ static int cell_cpu_setup(struct op_coun
}
}
=20
=2D /* the pm_rtas_activate_signals will return -EIO if the FW
+ /*
+ * The pm_rtas_activate_signals will return -EIO if the FW
* call failed.
*/
=2D return (pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled));
=2D
+ return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
}
=20
#define ENTRIES 303
#define MAXLFSR 0xFFFFFF
=20
/* precomputed table of 24 bit LFSR values */
=2Dint initial_lfsr[] =3D
=2D{8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 107534=
24,
+static int initial_lfsr[] =3D {
+ 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
@@ -716,7 +737,8 @@ int initial_lfsr[] =3D
3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 902700=
3,
6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
=2D 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607};
+ 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
+};
=20
/*
* The hardware uses an LFSR counting sequence to determine when to capture
@@ -777,28 +799,25 @@ int initial_lfsr[] =3D
=20
static int calculate_lfsr(int n)
{
=2D /* The ranges and steps are in powers of 2 so the calculations
+ /*
+ * The ranges and steps are in powers of 2 so the calculations
* can be done using shifts rather then divide.
*/
int index;
=20
=2D if ((n >> 16) =3D=3D 0) {
+ if ((n >> 16) =3D=3D 0)
index =3D 0;
=2D
=2D } else if (((n - V2_16) >> 19) =3D=3D 0) {
+ else if (((n - V2_16) >> 19) =3D=3D 0)
index =3D ((n - V2_16) >> 12) + 1;
=2D
=2D } else if (((n - V2_16 - V2_19) >> 22) =3D=3D 0) {
+ else if (((n - V2_16 - V2_19) >> 22) =3D=3D 0)
index =3D ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
+ else if (((n - V2_16 - V2_19 - V2_22) >> 24) =3D=3D 0)
+ index =3D ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
+ else
+ index =3D ENTRIES-1;
=20
=2D } else if (((n - V2_16 - V2_19 - V2_22) >> 24) =3D=3D 0) {
=2D index =3D ((n - V2_16 - V2_19 - V2_22) >> 18 )
=2D + 1 + 256;
=2D }
=2D
=2D if ((index > ENTRIES) || (index < 0)) /* make sure index is
=2D * valid
=2D */
+ /* make sure index is valid */
+ if ((index > ENTRIES) || (index < 0))
index =3D ENTRIES-1;
=20
return initial_lfsr[index];
@@ -809,15 +828,17 @@ static int pm_rtas_activate_spu_profilin
int ret, i;
struct pm_signal pm_signal_local[NR_PHYS_CTRS];
=20
=2D /* Set up the rtas call to configure the debug bus to
=2D * route the SPU PCs. Setup the pm_signal for each SPU */
+ /*
+ * Set up the rtas call to configure the debug bus to
+ * route the SPU PCs. Setup the pm_signal for each SPU
+ */
for (i =3D 0; i < NUM_SPUS_PER_NODE; i++) {
pm_signal_local[i].cpu =3D node;
pm_signal_local[i].signal_group =3D 41;
=2D pm_signal_local[i].bus_word =3D 1 << i / 2; /* spu i on
=2D * word (i/2)
=2D */
=2D pm_signal_local[i].sub_unit =3D i; /* spu i */
+ /* spu i on word (i/2) */
+ pm_signal_local[i].bus_word =3D 1 << i / 2;
+ /* spu i */
+ pm_signal_local[i].sub_unit =3D i;
pm_signal_local[i].bit =3D 63;
}
=20
@@ -858,8 +879,8 @@ static int cell_global_start_spu(struct=20
int subfunc, rtn_value;
unsigned int lfsr_value;
int cpu;
=2D int ret =3D 0;
=2D int rtas_error =3D 0;
+ int ret;
+ int rtas_error;
unsigned int cpu_khzfreq =3D 0;
=20
/* The SPU profiling uses time-based profiling based on
@@ -884,24 +905,23 @@ static int cell_global_start_spu(struct=20
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
=2D /* Setup SPU cycle-based profiling.
+
+ /*
+ * Setup SPU cycle-based profiling.
* Set perf_mon_control bit 0 to a zero before
* enabling spu collection hardware.
*/
cbe_write_pm(cpu, pm_control, 0);
=20
if (spu_cycle_reset > MAX_SPU_COUNT)
=2D /* use largest possible value
=2D */
+ /* use largest possible value */
lfsr_value =3D calculate_lfsr(MAX_SPU_COUNT-1);
else
=2D lfsr_value =3D calculate_lfsr(spu_cycle_reset);
+ lfsr_value =3D calculate_lfsr(spu_cycle_reset);
=20
=2D if (lfsr_value =3D=3D 0) { /* must use a non zero value. Zero
=2D * disables data collection.
=2D */
=2D lfsr_value =3D calculate_lfsr(1);
=2D }
+ /* must use a non zero value. Zero disables data collection. */
+ if (lfsr_value =3D=3D 0)
+ lfsr_value =3D calculate_lfsr(1);
=20
lfsr_value =3D lfsr_value << 8; /* shift lfsr to correct
* register location
@@ -916,7 +936,7 @@ static int cell_global_start_spu(struct=20
}
=20
=20
=2D subfunc =3D 2; // 2 - activate SPU tracing, 3 - deactivate
+ subfunc =3D 2; /* 2 - activate SPU tracing, 3 - deactivate */
=20
/* start profiling */
rtn_value =3D rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
@@ -976,7 +996,8 @@ static int cell_global_start_ppu(struct=20
oprofile_running =3D 1;
smp_wmb();
=20
=2D /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
+ /*
+ * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
* executed which manipulates the PMU. We start the "virtual counter"
* here so that we do not need to synchronize access to the PMU in
* the above for-loop.
@@ -986,7 +1007,6 @@ static int cell_global_start_ppu(struct=20
return 0;
}
=20
=2D
static int cell_global_start(struct op_counter_config *ctr)
{
if (spu_cycle_reset) {
@@ -996,14 +1016,15 @@ static int cell_global_start(struct op_c
}
}
=20
=2Dstatic void cell_global_stop_spu(void)
=2D/* Note the generic OProfile stop calls do not support returning
+/*
+ * Note the generic OProfile stop calls do not support returning
* an error on stop. Hence, will not return an error if the FW
* calls fail on stop. Failure to reset the debug bus is not an issue.
* Failure to disable the SPU profiling is not an issue. The FW calls
* to enable the performance counters and debug bus will work even if
* the hardware was not cleanly reset.
*/
+static void cell_global_stop_spu(void)
{
int subfunc, rtn_value;
unsigned int lfsr_value;
@@ -1020,7 +1041,8 @@ static void cell_global_stop_spu(void)
if (cbe_get_hw_thread_id(cpu))
continue;
=20
=2D subfunc =3D 3; /* 2 - activate SPU tracing,
+ subfunc =3D 3; /*
+ * 2 - activate SPU tracing,
* 3 - deactivate
*/
lfsr_value =3D 0x8f100000;
@@ -1046,7 +1068,8 @@ static void cell_global_stop_ppu(void)
{
int cpu;
=20
=2D /* This routine will be called once for the system.
+ /*
+ * This routine will be called once for the system.
* There is one performance monitor per node, so we
* only need to perform this function once per node.
*/
@@ -1079,8 +1102,8 @@ static void cell_global_stop(void)
}
}
=20
=2Dstatic void
=2Dcell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ct=
r)
+static void cell_handle_interrupt(struct pt_regs *regs,
+ struct op_counter_config *ctr)
{
u32 cpu;
u64 pc;
@@ -1091,13 +1114,15 @@ cell_handle_interrupt(struct pt_regs *re
=20
cpu =3D smp_processor_id();
=20
=2D /* Need to make sure the interrupt handler and the virt counter
+ /*
+ * Need to make sure the interrupt handler and the virt counter
* routine are not running at the same time. See the
* cell_virtual_cntr() routine for additional comments.
*/
spin_lock_irqsave(&virt_cntr_lock, flags);
=20
=2D /* Need to disable and reenable the performance counters
+ /*
+ * Need to disable and reenable the performance counters
* to get the desired behavior from the hardware. This
* is hardware specific.
*/
@@ -1106,7 +1131,8 @@ cell_handle_interrupt(struct pt_regs *re
=20
interrupt_mask =3D cbe_get_and_clear_pm_interrupts(cpu);
=20
=2D /* If the interrupt mask has been cleared, then the virt cntr
+ /*
+ * If the interrupt mask has been cleared, then the virt cntr
* has cleared the interrupt. When the thread that generated
* the interrupt is restored, the data count will be restored to
* 0xffffff0 to cause the interrupt to be regenerated.
@@ -1124,7 +1150,8 @@ cell_handle_interrupt(struct pt_regs *re
}
}
=20
=2D /* The counters were frozen by the interrupt.
+ /*
+ * The counters were frozen by the interrupt.
* Reenable the interrupt and restart the counters.
* If there was a race between the interrupt handler and
* the virtual counter routine. The virutal counter
@@ -1134,7 +1161,8 @@ cell_handle_interrupt(struct pt_regs *re
cbe_enable_pm_interrupts(cpu, hdw_thread,
virt_cntr_inter_mask);
=20
=2D /* The writes to the various performance counters only writes
+ /*
+ * The writes to the various performance counters only writes
* to a latch. The new values (interrupt setting bits, reset
* counter value etc.) are not copied to the actual registers
* until the performance monitor is enabled. In order to get
@@ -1147,7 +1175,8 @@ cell_handle_interrupt(struct pt_regs *re
spin_unlock_irqrestore(&virt_cntr_lock, flags);
}
=20
=2D/* This function is called from the generic OProfile
+/*
+ * This function is called from the generic OProfile
* driver. When profiling PPUs, we need to do the
* generic sync start; otherwise, do spu_sync_start.
*/
@@ -1167,7 +1196,6 @@ static int cell_sync_stop(void)
return 1;
}
=20
=2D
struct op_powerpc_model op_model_cell =3D {
.reg_setup =3D cell_reg_setup,
.cpu_setup =3D cell_cpu_setup,
next prev parent reply other threads:[~2007-02-26 23:50 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-02-22 0:02 [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch Carl Love
2007-02-26 23:50 ` Arnd Bergmann [this message]
2007-02-27 1:31 ` Michael Ellerman
2007-02-27 16:52 ` Maynard Johnson
2007-02-28 1:44 ` Arnd Bergmann
-- strict thread matches above, loose matches on Subject: below --
2007-02-14 23:52 Carl Love
2007-02-15 14:37 ` Arnd Bergmann
2007-02-15 16:15 ` Maynard Johnson
2007-02-15 18:13 ` Arnd Bergmann
2007-02-15 20:21 ` Carl Love
2007-02-15 21:03 ` Arnd Bergmann
2007-02-15 21:50 ` Paul E. McKenney
2007-02-16 0:33 ` Arnd Bergmann
2007-02-16 0:32 ` Maynard Johnson
2007-02-16 17:14 ` Arnd Bergmann
2007-02-16 21:43 ` Maynard Johnson
2007-02-18 23:18 ` Maynard Johnson
2007-02-06 0:28 [RFC,PATCH] CELL PPU " Carl Love
2007-02-06 23:02 ` [Cbe-oss-dev] [RFC, PATCH] CELL " Carl Love
2007-02-07 15:41 ` Maynard Johnson
2007-02-07 22:48 ` Michael Ellerman
2007-02-08 15:03 ` Maynard Johnson
2007-02-08 14:18 ` Milton Miller
2007-02-08 17:21 ` Arnd Bergmann
2007-02-08 18:01 ` Adrian Reber
2007-02-08 22:51 ` Carl Love
2007-02-09 2:46 ` Milton Miller
2007-02-09 16:17 ` Carl Love
2007-02-11 22:46 ` Milton Miller
2007-02-12 16:38 ` Carl Love
2007-02-09 18:47 ` Milton Miller
2007-02-09 19:10 ` Arnd Bergmann
2007-02-09 19:46 ` Milton Miller
2007-02-08 23:59 ` Maynard Johnson
2007-02-09 18:03 ` Milton Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200702270051.00393.arnd@arndb.de \
--to=arnd@arndb$(echo .)de \
--cc=cbe-oss-dev@ozlabs$(echo .)org \
--cc=cel@us$(echo .)ibm.com \
--cc=linux-kernel@vger$(echo .)kernel.org \
--cc=linuxppc-dev@ozlabs$(echo .)org \
--cc=oprofile-list@lists$(echo .)sourceforge.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox