From: CAI Qian <caiqian@redhat•com>
To: Tejun Heo <tj@kernel•org>
Cc: kexec <kexec@lists•infradead.org>,
linux-next@vger•kernel.org, torvalds@linux-foundation•org,
linux-kernel <linux-kernel@vger•kernel.org>
Subject: Re: kdump regression compared to v2.6.35
Date: Mon, 30 Aug 2010 10:02:17 -0400 (EDT) [thread overview]
Message-ID: <1141332926.1524871283176937097.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com> (raw)
In-Reply-To: <4C7BA918.60707@kernel.org>
----- "Tejun Heo" <tj@kernel•org> wrote:
> On 08/30/2010 12:24 PM, CAI Qian wrote:
> > Can't see any difference with hangcheck timer enabled.
>
> Hmm, odd. So, here's the said debug patch. It will periodically
> check all works and report if any work is being delayed for too long.
> If the max wait goes over 30secs, it will dump all task states and
> disable itself. Can you please apply the patch on top of rc2 +
> wq#for-linus and report the output? It should tell us who's stuck
> where.
Nothing new was printed after around 10 minutes.
> Thanks.
>
> diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
> index f11100f..282322c 100644
> --- a/include/linux/workqueue.h
> +++ b/include/linux/workqueue.h
> @@ -83,6 +83,8 @@ struct work_struct {
> #ifdef CONFIG_LOCKDEP
> struct lockdep_map lockdep_map;
> #endif
> + unsigned long queued_on;
> + unsigned long activated_on;
> };
>
> #define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index a2dccfc..9f95169 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -913,6 +913,8 @@ static void insert_work(struct
> cpu_workqueue_struct *cwq,
> {
> struct global_cwq *gcwq = cwq->gcwq;
>
> + work->queued_on = work->activated_on = jiffies;
> +
> /* we own @work, set data and link */
> set_work_cwq(work, cwq, extra_flags);
>
> @@ -996,13 +998,14 @@ static void __queue_work(unsigned int cpu,
> struct workqueue_struct *wq,
> if (likely(cwq->nr_active < cwq->max_active)) {
> cwq->nr_active++;
> worklist = gcwq_determine_ins_pos(gcwq, cwq);
> + insert_work(cwq, work, worklist, work_flags);
> } else {
> work_flags |= WORK_STRUCT_DELAYED;
> worklist = &cwq->delayed_works;
> + insert_work(cwq, work, worklist, work_flags);
> + work->activated_on--;
> }
>
> - insert_work(cwq, work, worklist, work_flags);
> -
> spin_unlock_irqrestore(&gcwq->lock, flags);
> }
>
> @@ -1669,6 +1672,7 @@ static void cwq_activate_first_delayed(struct
> cpu_workqueue_struct *cwq)
> struct work_struct, entry);
> struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
>
> + work->activated_on = jiffies;
> move_linked_works(work, pos, NULL);
> __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
> cwq->nr_active++;
> @@ -2810,7 +2814,7 @@ struct workqueue_struct
> *__alloc_workqueue_key(const char *name,
> * list. Grab it, set max_active accordingly and add the new
> * workqueue to workqueues list.
> */
> - spin_lock(&workqueue_lock);
> + spin_lock_irq(&workqueue_lock);
>
> if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
> for_each_cwq_cpu(cpu, wq)
> @@ -2818,7 +2822,7 @@ struct workqueue_struct
> *__alloc_workqueue_key(const char *name,
>
> list_add(&wq->list, &workqueues);
>
> - spin_unlock(&workqueue_lock);
> + spin_unlock_irq(&workqueue_lock);
>
> return wq;
> err:
> @@ -2849,9 +2853,9 @@ void destroy_workqueue(struct workqueue_struct
> *wq)
> * wq list is used to freeze wq, remove from list after
> * flushing is complete in case freeze races us.
> */
> - spin_lock(&workqueue_lock);
> + spin_lock_irq(&workqueue_lock);
> list_del(&wq->list);
> - spin_unlock(&workqueue_lock);
> + spin_unlock_irq(&workqueue_lock);
>
> /* sanity check */
> for_each_cwq_cpu(cpu, wq) {
> @@ -2891,23 +2895,23 @@ void workqueue_set_max_active(struct
> workqueue_struct *wq, int max_active)
>
> max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
>
> - spin_lock(&workqueue_lock);
> + spin_lock_irq(&workqueue_lock);
>
> wq->saved_max_active = max_active;
>
> for_each_cwq_cpu(cpu, wq) {
> struct global_cwq *gcwq = get_gcwq(cpu);
>
> - spin_lock_irq(&gcwq->lock);
> + spin_lock(&gcwq->lock);
>
> if (!(wq->flags & WQ_FREEZEABLE) ||
> !(gcwq->flags & GCWQ_FREEZING))
> get_cwq(gcwq->cpu, wq)->max_active = max_active;
>
> - spin_unlock_irq(&gcwq->lock);
> + spin_unlock(&gcwq->lock);
> }
>
> - spin_unlock(&workqueue_lock);
> + spin_unlock_irq(&workqueue_lock);
> }
> EXPORT_SYMBOL_GPL(workqueue_set_max_active);
>
> @@ -3419,7 +3423,7 @@ void freeze_workqueues_begin(void)
> {
> unsigned int cpu;
>
> - spin_lock(&workqueue_lock);
> + spin_lock_irq(&workqueue_lock);
>
> BUG_ON(workqueue_freezing);
> workqueue_freezing = true;
> @@ -3428,7 +3432,7 @@ void freeze_workqueues_begin(void)
> struct global_cwq *gcwq = get_gcwq(cpu);
> struct workqueue_struct *wq;
>
> - spin_lock_irq(&gcwq->lock);
> + spin_lock(&gcwq->lock);
>
> BUG_ON(gcwq->flags & GCWQ_FREEZING);
> gcwq->flags |= GCWQ_FREEZING;
> @@ -3440,10 +3444,10 @@ void freeze_workqueues_begin(void)
> cwq->max_active = 0;
> }
>
> - spin_unlock_irq(&gcwq->lock);
> + spin_unlock(&gcwq->lock);
> }
>
> - spin_unlock(&workqueue_lock);
> + spin_unlock_irq(&workqueue_lock);
> }
>
> /**
> @@ -3464,7 +3468,7 @@ bool freeze_workqueues_busy(void)
> unsigned int cpu;
> bool busy = false;
>
> - spin_lock(&workqueue_lock);
> + spin_lock_irq(&workqueue_lock);
>
> BUG_ON(!workqueue_freezing);
>
> @@ -3488,7 +3492,7 @@ bool freeze_workqueues_busy(void)
> }
> }
> out_unlock:
> - spin_unlock(&workqueue_lock);
> + spin_unlock_irq(&workqueue_lock);
> return busy;
> }
>
> @@ -3505,7 +3509,7 @@ void thaw_workqueues(void)
> {
> unsigned int cpu;
>
> - spin_lock(&workqueue_lock);
> + spin_lock_irq(&workqueue_lock);
>
> if (!workqueue_freezing)
> goto out_unlock;
> @@ -3514,7 +3518,7 @@ void thaw_workqueues(void)
> struct global_cwq *gcwq = get_gcwq(cpu);
> struct workqueue_struct *wq;
>
> - spin_lock_irq(&gcwq->lock);
> + spin_lock(&gcwq->lock);
>
> BUG_ON(!(gcwq->flags & GCWQ_FREEZING));
> gcwq->flags &= ~GCWQ_FREEZING;
> @@ -3535,15 +3539,82 @@ void thaw_workqueues(void)
>
> wake_up_worker(gcwq);
>
> - spin_unlock_irq(&gcwq->lock);
> + spin_unlock(&gcwq->lock);
> }
>
> workqueue_freezing = false;
> out_unlock:
> - spin_unlock(&workqueue_lock);
> + spin_unlock_irq(&workqueue_lock);
> }
> #endif /* CONFIG_FREEZER */
>
> +#define WQ_CHECK_INTERVAL (10 * HZ)
> +static void workqueue_check_timer_fn(unsigned long data);
> +static DEFINE_TIMER(workqueue_check_timer, workqueue_check_timer_fn,
> 0, 0);
> +
> +static void workqueue_check_timer_fn(unsigned long data)
> +{
> + unsigned long now = jiffies;
> + unsigned long wait, max_wait = 0;
> + unsigned int cpu;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&workqueue_lock, flags);
> +
> + for_each_gcwq_cpu(cpu) {
> + struct global_cwq *gcwq = get_gcwq(cpu);
> + struct workqueue_struct *wq;
> + struct work_struct *work;
> +
> + spin_lock(&gcwq->lock);
> +
> + list_for_each_entry(wq, &workqueues, list) {
> + struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
> +
> + if (!cwq)
> + continue;
> +
> + list_for_each_entry(work, &cwq->delayed_works, entry) {
> + WARN_ON_ONCE(!time_before(work->activated_on,
> + work->queued_on));
> + wait = now - work->queued_on;
> + if (wait < WQ_CHECK_INTERVAL)
> + continue;
> + max_wait = max(max_wait, wait);
> + printk("XXX %s/%d %p:%pf delayed for %ums\n",
> + wq->name,
> + gcwq->cpu != WORK_CPU_UNBOUND ? gcwq->cpu : -1,
> + work, work->func, jiffies_to_msecs(wait));
> + }
> + }
> +
> + list_for_each_entry(work, &gcwq->worklist, entry) {
> + WARN_ON_ONCE(time_before(work->activated_on,
> + work->queued_on));
> + wait = now - work->activated_on;
> + if (wait < WQ_CHECK_INTERVAL)
> + continue;
> + max_wait = max(max_wait, wait);
> + printk("XXX %s/%d %p:%pf pending for %ums after delayed %ums\n",
> + get_work_cwq(work)->wq->name,
> + gcwq->cpu != WORK_CPU_UNBOUND ? gcwq->cpu : -1,
> + work, work->func,
> + jiffies_to_msecs(wait),
> + jiffies_to_msecs(work->activated_on - work->queued_on));
> + }
> +
> + spin_unlock(&gcwq->lock);
> + }
> +
> + spin_unlock_irqrestore(&workqueue_lock, flags);
> +
> + if (max_wait > 20 * HZ) {
> + printk("XXX max_wait over 30secs, dumping tasks\n");
> + show_state();
> + } else
> + mod_timer(&workqueue_check_timer, now + WQ_CHECK_INTERVAL / 2);
> +}
> +
> static int __init init_workqueues(void)
> {
> unsigned int cpu;
> @@ -3596,6 +3667,7 @@ static int __init init_workqueues(void)
> system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
> WQ_UNBOUND_MAX_ACTIVE);
> BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
> + mod_timer(&workqueue_check_timer, jiffies + WQ_CHECK_INTERVAL / 2);
> return 0;
> }
> early_initcall(init_workqueues);
>
>
> _______________________________________________
> kexec mailing list
> kexec@lists•infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
next prev parent reply other threads:[~2010-08-30 14:02 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <123671462.1479561283081998014.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
[not found] ` <123671462.1479561283081998014.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 11:41 ` kdump regression compared to v2.6.35 caiqian-H+wXaHxf7aLQT0dZR+AlfA
[not found] ` <171172387.1479581283082093912.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 11:56 ` CAI Qian
[not found] ` <1236896997.1479691283083005518.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 11:52 ` Tejun Heo
2010-08-29 12:03 ` CAI Qian
[not found] ` <779893521.1479771283083393771.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 12:36 ` Tejun Heo
2010-08-30 3:42 ` CAI Qian
[not found] ` <1888320510.1487031283139773505.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 8:29 ` Tejun Heo
2010-08-30 10:24 ` CAI Qian
[not found] ` <1633441528.1498131283163868227.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 12:50 ` Tejun Heo
2010-08-30 14:02 ` CAI Qian [this message]
[not found] ` <1141332926.1524871283176937097.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 14:21 ` Tejun Heo
2010-08-30 14:47 ` CAI Qian
[not found] ` <331762715.1536681283179646594.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 14:51 ` CAI Qian
[not found] ` <1706089082.1537331283179884183.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 14:55 ` Tejun Heo
[not found] <71887879.1606161283215975799.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
2010-08-31 0:53 ` caiqian
[not found] ` <2044609874.1606211283216015254.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-31 9:22 ` Tejun Heo
[not found] <373987879.1541191283181021800.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
2010-08-30 15:10 ` caiqian
[not found] ` <1331313838.1541221283181038073.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 16:38 ` Tejun Heo
[not found] ` <4C7BDE6E.8030107-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2010-08-30 17:31 ` Tejun Heo
[not found] <2142316909.1477341283065016062.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
[not found] ` <2142316909.1477341283065016062.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 7:01 ` caiqian-H+wXaHxf7aLQT0dZR+AlfA
[not found] ` <181596874.1477361283065264575.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 8:57 ` Tejun Heo
[not found] ` <4C7A20F6.5070802-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2010-08-29 11:24 ` CAI Qian
[not found] ` <633505726.1479321283081093502.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 11:21 ` Tejun Heo
[not found] <229468156.1475641283020469212.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
[not found] ` <229468156.1475641283020469212.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-28 18:36 ` caiqian-H+wXaHxf7aLQT0dZR+AlfA
[not found] <2082161789.1474781283008521258.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
2010-08-28 15:19 ` caiqian
2010-08-27 12:35 CAI Qian
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1141332926.1524871283176937097.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com \
--to=caiqian@redhat$(echo .)com \
--cc=kexec@lists$(echo .)infradead.org \
--cc=linux-kernel@vger$(echo .)kernel.org \
--cc=linux-next@vger$(echo .)kernel.org \
--cc=tj@kernel$(echo .)org \
--cc=torvalds@linux-foundation$(echo .)org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox