From: thunder.leizhen@huawei•com (Leizhen (ThunderTown))
To: linux-arm-kernel@lists•infradead.org
Subject: [PATCH v3 2/3] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock
Date: Thu, 19 Jul 2018 15:22:22 +0800 [thread overview]
Message-ID: <5B503C2E.40602@huawei.com> (raw)
In-Reply-To: <625ffbc273577515c324f6deea66a366e675b751.1508334262.git.robin.murphy@arm.com>
On 2017/10/18 22:04, Robin Murphy wrote:
> Even without the MSI trick, we can still do a lot better than hogging
> the entire queue while it drains. All we actually need to do for the
> necessary guarantee of completion is wait for our particular command to
> have been consumed, and as long as we keep track of where it is there is
> no need to block other CPUs from adding further commands in the
> meantime. There is one theoretical (but incredibly unlikely) edge case
> to avoid, where cons has wrapped twice to still appear 'behind' the sync
> position - this is easily disambiguated by adding a generation count to
> the queue to indicate when prod wraps, since cons cannot wrap twice
> without prod having wrapped at least once.
Hi Robin,
I applied your patch and got below improvemnet for NVMe device.
Randomly Read IOPS: 146K --> 214K
Randomly Write IOPS: 143K --> 212K
Tested-by: Zhen Lei <thunder.leizhen@huawei•com>
>
> Signed-off-by: Robin Murphy <robin.murphy@arm•com>
> ---
>
> v3:
> - Move queue checks into helpers
> - Avoid race by updating generation before prod (after some
> deliberation I've concluded that it doesn't actually need any
> special handling for the timeout failure case either)
>
> drivers/iommu/arm-smmu-v3.c | 91 +++++++++++++++++++++++++++++++++------------
> 1 file changed, 68 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 83b76404e882..3130e7182dde 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -417,7 +417,6 @@
>
> /* High-level queue structures */
> #define ARM_SMMU_POLL_TIMEOUT_US 100
> -#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */
> #define ARM_SMMU_SYNC_TIMEOUT_US 1000000 /* 1s! */
>
> #define MSI_IOVA_BASE 0x8000000
> @@ -540,6 +539,7 @@ struct arm_smmu_queue {
> struct arm_smmu_cmdq {
> struct arm_smmu_queue q;
> spinlock_t lock;
> + int generation;
> };
>
> struct arm_smmu_evtq {
> @@ -737,6 +737,17 @@ static bool queue_empty(struct arm_smmu_queue *q)
> Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
> }
>
> +static bool queue_behind(struct arm_smmu_queue *q, u32 idx)
> +{
> + return Q_IDX(q, q->cons) < Q_IDX(q, idx);
> +}
> +
> +static bool queue_ahead_not_wrapped(struct arm_smmu_queue *q, u32 idx)
> +{
> + return Q_IDX(q, q->cons) >= Q_IDX(q, idx) &&
> + Q_WRP(q, q->cons) == Q_WRP(q, idx);
> +}
> +
> static void queue_sync_cons(struct arm_smmu_queue *q)
> {
> q->cons = readl_relaxed(q->cons_reg);
> @@ -770,21 +781,12 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
> writel(q->prod, q->prod_reg);
> }
>
> -/*
> - * Wait for the SMMU to consume items. If drain is true, wait until the queue
> - * is empty. Otherwise, wait until there is at least one free slot.
> - */
> -static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
> +/* Wait for the SMMU to consume items, until there is at least one free slot */
> +static int queue_poll_cons(struct arm_smmu_queue *q, bool wfe)
> {
> - ktime_t timeout;
> - unsigned int delay = 1;
> + ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
>
> - /* Wait longer if it's queue drain */
> - timeout = ktime_add_us(ktime_get(), drain ?
> - ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US :
> - ARM_SMMU_POLL_TIMEOUT_US);
> -
> - while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
> + while (queue_sync_cons(q), queue_full(q)) {
> if (ktime_compare(ktime_get(), timeout) > 0)
> return -ETIMEDOUT;
>
> @@ -792,8 +794,7 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
> wfe();
> } else {
> cpu_relax();
> - udelay(delay);
> - delay *= 2;
> + udelay(1);
> }
> }
>
> @@ -959,15 +960,20 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
> queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
> }
>
> -static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
> +static u32 arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
> {
> struct arm_smmu_queue *q = &smmu->cmdq.q;
> bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
>
> + if (Q_IDX(q, q->prod + 1) == 0)
> + smmu->cmdq.generation++;
> +
> while (queue_insert_raw(q, cmd) == -ENOSPC) {
> - if (queue_poll_cons(q, false, wfe))
> + if (queue_poll_cons(q, wfe))
> dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
> }
> +
> + return q->prod;
> }
>
> static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
> @@ -1001,15 +1007,53 @@ static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
> return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
> }
>
> +static int arm_smmu_sync_poll_cons(struct arm_smmu_device *smmu, u32 sync_idx,
> + int sync_gen)
> +{
> + ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US);
> + struct arm_smmu_queue *q = &smmu->cmdq.q;
> + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
> + unsigned int delay = 1;
> +
> + do {
> + queue_sync_cons(q);
> + /*
> + * If we see updates quickly enough, cons has passed sync_idx,
> + * but not yet wrapped. At worst, cons might have actually
> + * wrapped an even number of times, but that still guarantees
> + * the original sync must have been consumed.
> + */
> + if (queue_ahead_not_wrapped(q, sync_idx))
> + return 0;
> + /*
> + * Otherwise, cons may have passed sync_idx and wrapped one or
> + * more times to appear behind it again, but in that case prod
> + * must also be one or more generations ahead.
> + */
> + if (queue_behind(q, sync_idx) &&
> + READ_ONCE(smmu->cmdq.generation) != sync_gen)
> + return 0;
> +
> + if (wfe) {
> + wfe();
> + } else {
> + cpu_relax();
> + udelay(delay);
> + delay *= 2;
> + }
> + } while (ktime_before(ktime_get(), timeout));
> +
> + return -ETIMEDOUT;
> +}
> +
> static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
> {
> u64 cmd[CMDQ_ENT_DWORDS];
> unsigned long flags;
> - bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
> bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
> (smmu->features & ARM_SMMU_FEAT_COHERENCY);
> struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
> - int ret;
> + int ret, sync_idx, sync_gen;
>
> if (msi) {
> ent.sync.msidata = atomic_inc_return_relaxed(&smmu->sync_nr);
> @@ -1018,13 +1062,14 @@ static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
> arm_smmu_cmdq_build_cmd(cmd, &ent);
>
> spin_lock_irqsave(&smmu->cmdq.lock, flags);
> - arm_smmu_cmdq_insert_cmd(smmu, cmd);
> - if (!msi)
> - ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
> + sync_idx = arm_smmu_cmdq_insert_cmd(smmu, cmd);
> + sync_gen = READ_ONCE(smmu->cmdq.generation);
> spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
>
> if (msi)
> ret = arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
> + else
> + ret = arm_smmu_sync_poll_cons(smmu, sync_idx, sync_gen);
> if (ret)
> dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
> }
>
--
Thanks!
BestRegards
next prev parent reply other threads:[~2018-07-19 7:22 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-10-18 14:04 [PATCH v3 0/3] SMMUv3 CMD_SYNC optimisation Robin Murphy
2017-10-18 14:04 ` [PATCH v3 1/3] iommu/arm-smmu-v3: Use CMD_SYNC completion MSI Robin Murphy
2017-10-18 14:04 ` [PATCH v3 2/3] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock Robin Murphy
2018-07-19 7:22 ` Leizhen (ThunderTown) [this message]
2017-10-18 14:04 ` [RFT v3 3/3] iommu/arm-smmu-v3: Use burst-polling for sync completion Robin Murphy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5B503C2E.40602@huawei.com \
--to=thunder.leizhen@huawei$(echo .)com \
--cc=linux-arm-kernel@lists$(echo .)infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox