Linux Archive

Linux Archive (http://www.linux-archive.org/)
-   Crash Utility (http://www.linux-archive.org/crash-utility/)
-   -   Fix bugs in runq (http://www.linux-archive.org/crash-utility/696592-fix-bugs-runq.html)

Zhang Yanfei 08-22-2012 07:53 AM

Fix bugs in runq
 
Hello Dave,

In runq command, when dumping cfs and rt runqueues,
it seems that we get the wrong nr_running values of rq
and cfs_rq.

Please refer to the attached patch.

Thanks
Zhang Yanfei
>From 3cb0190cc380ac1fc52ebbf3b1794679ba90f39c Mon Sep 17 00:00:00 2001
From: zhangyanfei <zhangyanfei@cn.fujitsu.com>
Date: Tue, 21 Aug 2012 15:59:34 +0800
Subject: [PATCH] Fix bug: use cfs_rq->h_nr_running to get correct entity numbers

Signed-off-by: zhangyanfei <zhangyanfei@cn.fujitsu.com>
---
defs.h | 1 +
symbols.c | 2 ++
task.c | 17 +++++++++++++----
3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/defs.h b/defs.h
index 4a8e2e3..d0e34d4 100755
--- a/defs.h
+++ b/defs.h
@@ -1576,6 +1576,7 @@ struct offset_table { /* stash of commonly-used offsets */
long rq_nr_running;
long cfs_rq_rb_leftmost;
long cfs_rq_nr_running;
+ long cfs_rq_h_nr_running;
long cfs_rq_tasks_timeline;
long task_struct_se;
long sched_entity_run_node;
diff --git a/symbols.c b/symbols.c
index 2646ff8..cb15e9e 100755
--- a/symbols.c
+++ b/symbols.c
@@ -8638,6 +8638,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(sched_entity_on_rq));
fprintf(fp, " cfs_rq_nr_running: %ld
",
OFFSET(cfs_rq_nr_running));
+ fprintf(fp, " cfs_rq_h_nr_running: %ld
",
+ OFFSET(cfs_rq_h_nr_running));
fprintf(fp, " cfs_rq_rb_leftmost: %ld
",
OFFSET(cfs_rq_rb_leftmost));
fprintf(fp, " cfs_rq_tasks_timeline: %ld
",
diff --git a/task.c b/task.c
index c8dee43..40690fe 100755
--- a/task.c
+++ b/task.c
@@ -7566,6 +7566,9 @@ dump_CFS_runqueues(void)
MEMBER_OFFSET_INIT(sched_entity_on_rq, "sched_entity", "on_rq");
MEMBER_OFFSET_INIT(cfs_rq_rb_leftmost, "cfs_rq", "rb_leftmost");
MEMBER_OFFSET_INIT(cfs_rq_nr_running, "cfs_rq", "nr_running");
+ if (MEMBER_EXISTS("cfs_rq", "h_nr_running"))
+ MEMBER_OFFSET_INIT(cfs_rq_h_nr_running,
+ "cfs_rq", "h_nr_running");
MEMBER_OFFSET_INIT(cfs_rq_tasks_timeline, "cfs_rq",
"tasks_timeline");
MEMBER_OFFSET_INIT(cfs_rq_curr, "cfs_rq", "curr");
@@ -7610,6 +7613,7 @@ dump_CFS_runqueues(void)

readmem(runq, KVADDR, runqbuf, SIZE(runqueue),
"per-cpu rq", FAULT_ON_ERROR);
+ nr_running = LONG(runqbuf + OFFSET(rq_nr_running));

if (cfs_rq_buf) {
/*
@@ -7622,16 +7626,21 @@ dump_CFS_runqueues(void)

readmem(cfs_rq, KVADDR, cfs_rq_buf, SIZE(cfs_rq),
"per-cpu cfs_rq", FAULT_ON_ERROR);
- nr_running = LONG(cfs_rq_buf + OFFSET(rq_nr_running));
cfs_rq_nr_running = ULONG(cfs_rq_buf +
OFFSET(cfs_rq_nr_running));
root = (struct rb_root *)(cfs_rq +
OFFSET(cfs_rq_tasks_timeline));
} else {
cfs_rq = runq + OFFSET(rq_cfs);
- nr_running = LONG(runqbuf + OFFSET(rq_nr_running));
- cfs_rq_nr_running = ULONG(runqbuf + OFFSET(rq_cfs) +
- OFFSET(cfs_rq_nr_running));
+ if (MEMBER_EXISTS("cfs_rq", "h_nr_running")) {
+ cfs_rq_nr_running = ULONG(runqbuf +
+ OFFSET(rq_cfs) +
+ OFFSET(cfs_rq_h_nr_running));
+ } else {
+ cfs_rq_nr_running = ULONG(runqbuf +
+ OFFSET(rq_cfs) +
+ OFFSET(cfs_rq_nr_running));
+ }
root = (struct rb_root *)(runq + OFFSET(rq_cfs) +
OFFSET(cfs_rq_tasks_timeline));
}
--
1.7.1

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility

Dave Anderson 08-22-2012 05:56 PM

Fix bugs in runq
 
----- Original Message -----
> Hello Dave,
>
> In runq command, when dumping cfs and rt runqueues,
> it seems that we get the wrong nr_running values of rq
> and cfs_rq.
>
> Please refer to the attached patch.
>
> Thanks
> Zhang Yanfei

Hello Zhang,

I understand what you are trying to accomplish with this patch, but
none of my test dumpfiles can actually verify it because there is no
difference with or without your patch. What failure mode did you see
in your testing? I presume that it just showed "[no tasks queued]"
for the RT runqueue when there were actually tasks queued there?

The reason I ask is that I'm thinking that a better solution would
be to simplify dump_CFS_runqueues() by *not* accessing and using
rq_nr_running, cfs_rq_nr_running or cfs_rq_h_nr_running.

Those counters are only read to determine the "active" argument to
pass to dump_RT_prio_array(), which returns immediately if it is
FALSE. However, if we get rid of the "active" argument and simply
allow dump_RT_prio_array() to always check its queues every time,
it still works just fine.

For example, I tested my set of sample dumpfiles with this patch:

diff -u -r1.205 task.c
--- task.c 12 Jul 2012 20:04:00 -0000 1.205
+++ task.c 22 Aug 2012 15:33:32 -0000
@@ -7636,7 +7636,7 @@
OFFSET(cfs_rq_tasks_timeline));
}

- dump_RT_prio_array(nr_running != cfs_rq_nr_running,
+ dump_RT_prio_array(TRUE,
runq + OFFSET(rq_rt) + OFFSET(rt_rq_active),
&runqbuf[OFFSET(rq_rt) + OFFSET(rt_rq_active)]);

and the output is identical to testing with, and without, your patch.

So the question is whether dump_CFS_runqueues() should be needlessly
complicated with all of the "nr_running" references?

In fact, it also seems possible that a crash could happen at a point in
the scheduler code where those counters are not valid/current/trustworthy.

So unless you can convince me otherwise, I'd prefer to just remove
the "nr_running" business completely.

That being said -- and for your future reference -- when creating patches
such as yours, please consider the following:

When adding entries to the offset_table, always put them at the end
of the structure so that the offsets to the currently-existing members
do not change. This allows older extension modules to still have
valid OFFSET() values without having to be recompiled:

--- a/defs.h
+++ b/defs.h
@@ -1576,6 +1576,7 @@ struct offset_table { /* stash of commonly-used offsets */
long rq_nr_running;
long cfs_rq_rb_leftmost;
long cfs_rq_nr_running;
+ long cfs_rq_h_nr_running;
long cfs_rq_tasks_timeline;
long task_struct_se;
long sched_entity_run_node;

But as you have done, you can still display the new entry from
"help -o" nearby its related cfs_rq_xxx offsets.

Then, during initialization, there's no need to do the preliminary
MEMBER_EXISTS() call in this case -- just call MEMBER_OFFSET_INIT()
regardless. If it fails, the offset will remain -1 (INVALID):

--- a/task.c
+++ b/task.c
@@ -7566,6 +7566,9 @@ dump_CFS_runqueues(void)
MEMBER_OFFSET_INIT(sched_entity_on_rq, "sched_entity", "on_rq");
MEMBER_OFFSET_INIT(cfs_rq_rb_leftmost, "cfs_rq", "rb_leftmost");
MEMBER_OFFSET_INIT(cfs_rq_nr_running, "cfs_rq", "nr_running");
+ if (MEMBER_EXISTS("cfs_rq", "h_nr_running"))
+ MEMBER_OFFSET_INIT(cfs_rq_h_nr_running,
+ "cfs_rq", "h_nr_running");
MEMBER_OFFSET_INIT(cfs_rq_tasks_timeline, "cfs_rq",
"tasks_timeline");
MEMBER_OFFSET_INIT(cfs_rq_curr, "cfs_rq", "curr");

And then after initialization, instead of using MEMBER_EXISTS(), you
can use "if (VALID_MEMBER(cfs_rq_h_nr_running))", which simply
accesses the offset_table -- instead of involving a gdb call every
time:

+ if (MEMBER_EXISTS("cfs_rq", "h_nr_running")) {
+ cfs_rq_nr_running = ULONG(runqbuf +
+ OFFSET(rq_cfs) +
+ OFFSET(cfs_rq_h_nr_running));
+ } else {
+ cfs_rq_nr_running = ULONG(runqbuf +
+ OFFSET(rq_cfs) +
+ OFFSET(cfs_rq_nr_running));
+ }
root = (struct rb_root *)(runq + OFFSET(rq_cfs) +
OFFSET(cfs_rq_tasks_timeline));
}

Thanks,
Dave

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility

Dave Anderson 08-24-2012 06:17 PM

Fix bugs in runq
 
----- Original Message -----
>
>
> ----- Original Message -----
> > Hello Dave,
> >
> > In runq command, when dumping cfs and rt runqueues,
> > it seems that we get the wrong nr_running values of rq
> > and cfs_rq.
> >
> > Please refer to the attached patch.
> >
> > Thanks
> > Zhang Yanfei
>
> Hello Zhang,
>
> I understand what you are trying to accomplish with this patch, but
> none of my test dumpfiles can actually verify it because there is no
> difference with or without your patch. What failure mode did you see
> in your testing? I presume that it just showed "[no tasks queued]"
> for the RT runqueue when there were actually tasks queued there?
>
> The reason I ask is that I'm thinking that a better solution would
> be to simplify dump_CFS_runqueues() by *not* accessing and using
> rq_nr_running, cfs_rq_nr_running or cfs_rq_h_nr_running.
>
> Those counters are only read to determine the "active" argument to
> pass to dump_RT_prio_array(), which returns immediately if it is
> FALSE. However, if we get rid of the "active" argument and simply
> allow dump_RT_prio_array() to always check its queues every time,
> it still works just fine.
>
> For example, I tested my set of sample dumpfiles with this patch:
>
> diff -u -r1.205 task.c
> --- task.c 12 Jul 2012 20:04:00 -0000 1.205
> +++ task.c 22 Aug 2012 15:33:32 -0000
> @@ -7636,7 +7636,7 @@
> OFFSET(cfs_rq_tasks_timeline));
> }
>
> - dump_RT_prio_array(nr_running != cfs_rq_nr_running,
> + dump_RT_prio_array(TRUE,
> runq + OFFSET(rq_rt) + OFFSET(rt_rq_active),
> &runqbuf[OFFSET(rq_rt) +
> OFFSET(rt_rq_active)]);
>
> and the output is identical to testing with, and without, your patch.
>
> So the question is whether dump_CFS_runqueues() should be needlessly
> complicated with all of the "nr_running" references?
>
> In fact, it also seems possible that a crash could happen at a point in
> the scheduler code where those counters are not
> valid/current/trustworthy.
>
> So unless you can convince me otherwise, I'd prefer to just remove
> the "nr_running" business completely.

Hello Zhang,

Here's the patch I've got queued, which resolves the bug you encountered
by simplifying things:


--- task.c 12 Jul 2012 20:04:00 -0000 1.205
+++ task.c 24 Aug 2012 18:05:13 -0000
@@ -67,7 +67,7 @@
static int dump_tasks_in_cfs_rq(ulong);
static void dump_on_rq_tasks(void);
static void dump_CFS_runqueues(void);
-static void dump_RT_prio_array(int, ulong, char *);
+static void dump_RT_prio_array(ulong, char *);
static void task_struct_member(struct task_context *,unsigned int, struct reference *);
static void signal_reference(struct task_context *, ulong, struct reference *);
static void do_sig_thread_group(ulong);
@@ -7547,7 +7547,6 @@
char *runqbuf, *cfs_rq_buf;
ulong tasks_timeline ATTRIBUTE_UNUSED;
struct task_context *tc;
- long nr_running, cfs_rq_nr_running;
struct rb_root *root;
struct syment *rq_sp, *init_sp;

@@ -7622,22 +7621,15 @@

readmem(cfs_rq, KVADDR, cfs_rq_buf, SIZE(cfs_rq),
"per-cpu cfs_rq", FAULT_ON_ERROR);
- nr_running = LONG(cfs_rq_buf + OFFSET(rq_nr_running));
- cfs_rq_nr_running = ULONG(cfs_rq_buf +
- OFFSET(cfs_rq_nr_running));
root = (struct rb_root *)(cfs_rq +
OFFSET(cfs_rq_tasks_timeline));
} else {
cfs_rq = runq + OFFSET(rq_cfs);
- nr_running = LONG(runqbuf + OFFSET(rq_nr_running));
- cfs_rq_nr_running = ULONG(runqbuf + OFFSET(rq_cfs) +
- OFFSET(cfs_rq_nr_running));
root = (struct rb_root *)(runq + OFFSET(rq_cfs) +
OFFSET(cfs_rq_tasks_timeline));
}

- dump_RT_prio_array(nr_running != cfs_rq_nr_running,
- runq + OFFSET(rq_rt) + OFFSET(rt_rq_active),
+ dump_RT_prio_array(runq + OFFSET(rq_rt) + OFFSET(rt_rq_active),
&runqbuf[OFFSET(rq_rt) + OFFSET(rt_rq_active)]);

fprintf(fp, " CFS RB_ROOT: %lx
", (ulong)root);
@@ -7657,7 +7649,7 @@
}

static void
-dump_RT_prio_array(int active, ulong k_prio_array, char *u_prio_array)
+dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
{
int i, c, tot, cnt, qheads;
ulong offset, kvaddr, uvaddr;
@@ -7668,12 +7660,6 @@

fprintf(fp, " RT PRIO_ARRAY: %lx
", k_prio_array);

- if (!active) {
- INDENT(5);
- fprintf(fp, "[no tasks queued]
");
- return;
- }
-
qheads = (i = ARRAY_LENGTH(rt_prio_array_queue)) ?
i : get_array_length("rt_prio_array.queue", NULL, SIZE(list_head));



--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility

Zhang Yanfei 08-25-2012 03:23 AM

Fix bugs in runq
 
于 2012年08月25日 02:17, Dave Anderson 写道:
>
>
> ----- Original Message -----
>>
>>
>> ----- Original Message -----
>>> Hello Dave,
>>>
>>> In runq command, when dumping cfs and rt runqueues,
>>> it seems that we get the wrong nr_running values of rq
>>> and cfs_rq.
>>>
>>> Please refer to the attached patch.
>>>
>>> Thanks
>>> Zhang Yanfei
>>
>> Hello Zhang,
>>
>> I understand what you are trying to accomplish with this patch, but
>> none of my test dumpfiles can actually verify it because there is no
>> difference with or without your patch. What failure mode did you see
>> in your testing? I presume that it just showed "[no tasks queued]"
>> for the RT runqueue when there were actually tasks queued there?
>>
>> The reason I ask is that I'm thinking that a better solution would
>> be to simplify dump_CFS_runqueues() by *not* accessing and using
>> rq_nr_running, cfs_rq_nr_running or cfs_rq_h_nr_running.
>>
>> Those counters are only read to determine the "active" argument to
>> pass to dump_RT_prio_array(), which returns immediately if it is
>> FALSE. However, if we get rid of the "active" argument and simply
>> allow dump_RT_prio_array() to always check its queues every time,
>> it still works just fine.
>>
>> For example, I tested my set of sample dumpfiles with this patch:
>>
>> diff -u -r1.205 task.c
>> --- task.c 12 Jul 2012 20:04:00 -0000 1.205
>> +++ task.c 22 Aug 2012 15:33:32 -0000
>> @@ -7636,7 +7636,7 @@
>> OFFSET(cfs_rq_tasks_timeline));
>> }
>>
>> - dump_RT_prio_array(nr_running != cfs_rq_nr_running,
>> + dump_RT_prio_array(TRUE,
>> runq + OFFSET(rq_rt) + OFFSET(rt_rq_active),
>> &runqbuf[OFFSET(rq_rt) +
>> OFFSET(rt_rq_active)]);
>>
>> and the output is identical to testing with, and without, your patch.
>>
>> So the question is whether dump_CFS_runqueues() should be needlessly
>> complicated with all of the "nr_running" references?
>>
>> In fact, it also seems possible that a crash could happen at a point in
>> the scheduler code where those counters are not
>> valid/current/trustworthy.
>>
>> So unless you can convince me otherwise, I'd prefer to just remove
>> the "nr_running" business completely.
>
> Hello Zhang,
>
> Here's the patch I've got queued, which resolves the bug you encountered
> by simplifying things:
>

OK. I see.

And based on this patch, I made a new patch to solve the problem when
dumping rt runqueues. Currently dump_RT_prio_array() doesn't support
rt group scheduler.

In my test, I put some rt tasks into one group, just like below:

mkdir /cgroup/cpu/test1
echo 850000 > /cgroup/cpu/test1/cpu.rt_runtime_us

./rtloop1 &
echo $! > /cgroup/cpu/test1/tasks
./rtloop1 &
echo $! > /cgroup/cpu/test1/tasks
./rtloop1 &
echo $! > /cgroup/cpu/test1/tasks
./rtloop98 &
echo $! > /cgroup/cpu/test1/tasks
./rtloop45 &
echo $! > /cgroup/cpu/test1/tasks
./rtloop99 &
echo $! > /cgroup/cpu/test1/tasks

Using crash to analyse the vmcore:

crash> runq
CPU 0 RUNQUEUE: ffff880028216680
CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
RT PRIO_ARRAY: ffff880028216808
[ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
[ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
CFS RB_ROOT: ffff880028216718
[120] PID: 5109 TASK: ffff880037923500 COMMAND: "sh"
[120] PID: 5107 TASK: ffff88006eeccaa0 COMMAND: "sh"
[120] PID: 5123 TASK: ffff880107a4caa0 COMMAND: "sh"

CPU 1 RUNQUEUE: ffff880028296680
CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
RT PRIO_ARRAY: ffff880028296808
[ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
PID: 2852 TASK: ffff88013bd5aae0 COMMAND: "rtkit-daemon"
[ 54] CFS RB_ROOT: ffff880028296718
[120] PID: 5115 TASK: ffff8801152b1500 COMMAND: "sh"
[120] PID: 5113 TASK: ffff880139530080 COMMAND: "sh"
[120] PID: 5111 TASK: ffff88011bd86080 COMMAND: "sh"
[120] PID: 5121 TASK: ffff880115a9e080 COMMAND: "sh"
[120] PID: 5117 TASK: ffff8801152b0040 COMMAND: "sh"
[120] PID: 5119 TASK: ffff880115a9eae0 COMMAND: "sh"

We can see that the output is kind of incorrect.

After applying the attached patch, crash seems to work well:

crash> runq
CPU 0 RUNQUEUE: ffff880028216680
CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
RT PRIO_ARRAY: ffff880028216808
[ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
CHILD RT PRIO_ARRAY: ffff88013b050000
[ 0] PID: 5133 TASK: ffff88010799c080 COMMAND: "rtloop99"
[ 1] PID: 5131 TASK: ffff880037922aa0 COMMAND: "rtloop98"
[ 98] PID: 5128 TASK: ffff88011bd87540 COMMAND: "rtloop1"
PID: 5130 TASK: ffff8801396e7500 COMMAND: "rtloop1"
PID: 5129 TASK: ffff88011bf5a080 COMMAND: "rtloop1"
PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
[ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
CFS RB_ROOT: ffff880028216718
[120] PID: 5109 TASK: ffff880037923500 COMMAND: "sh"
[120] PID: 5107 TASK: ffff88006eeccaa0 COMMAND: "sh"
[120] PID: 5123 TASK: ffff880107a4caa0 COMMAND: "sh"

CPU 1 RUNQUEUE: ffff880028296680
CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
RT PRIO_ARRAY: ffff880028296808
[ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
PID: 2852 TASK: ffff88013bd5aae0 COMMAND: "rtkit-daemon"
[ 54] CHILD RT PRIO_ARRAY: ffff880138978000
[ 54] PID: 5132 TASK: ffff88006eecd500 COMMAND: "rtloop45"
CFS RB_ROOT: ffff880028296718
[120] PID: 5115 TASK: ffff8801152b1500 COMMAND: "sh"
[120] PID: 5113 TASK: ffff880139530080 COMMAND: "sh"
[120] PID: 5111 TASK: ffff88011bd86080 COMMAND: "sh"
[120] PID: 5121 TASK: ffff880115a9e080 COMMAND: "sh"
[120] PID: 5117 TASK: ffff8801152b0040 COMMAND: "sh"
[120] PID: 5119 TASK: ffff880115a9eae0 COMMAND: "sh"

Is this kind of output for rt runqueues ok? Or do you have any suggestion?

Thanks
Zhang Yanfei
>From 550d428cbb6d9d22837e3ef138e1de59e7ccc1b3 Mon Sep 17 00:00:00 2001
From: zhangyanfei <zhangyanfei@cn.fujitsu.com>
Date: Sat, 25 Aug 2012 11:17:37 +0800
Subject: [PATCH] Fix rt not support group sched bug

Signed-off-by: zhangyanfei <zhangyanfei@cn.fujitsu.com>
---
defs.h | 2 ++
symbols.c | 4 ++++
task.c | 47 ++++++++++++++++++++++++++++++++++++++++-------
3 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/defs.h b/defs.h
index 4a8e2e3..4af670d 100755
--- a/defs.h
+++ b/defs.h
@@ -1785,6 +1785,7 @@ struct offset_table { /* stash of commonly-used offsets */
long log_level;
long log_flags_level;
long timekeeper_xtime_sec;
+ long sched_rt_entity_my_q;
};

struct size_table { /* stash of commonly-used sizes */
@@ -1919,6 +1920,7 @@ struct size_table { /* stash of commonly-used sizes */
long msg_queue;
long log;
long log_level;
+ long rt_rq;
};

struct array_table {
diff --git a/symbols.c b/symbols.c
index 2646ff8..bbadd5e 100755
--- a/symbols.c
+++ b/symbols.c
@@ -8812,6 +8812,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(log_level));
fprintf(fp, " log_flags_level: %ld
",
OFFSET(log_flags_level));
+ fprintf(fp, " sched_rt_entity_my_q: %ld
",
+ OFFSET(sched_rt_entity_my_q));

fprintf(fp, "
size_table:
");
fprintf(fp, " page: %ld
", SIZE(page));
@@ -9027,6 +9029,8 @@ dump_offset_table(char *spec, ulong makestruct)
SIZE(log));
fprintf(fp, " log_level: %ld
",
SIZE(log_level));
+ fprintf(fp, " rt_rq: %ld
",
+ SIZE(rt_rq));

fprintf(fp, "
array_table:
");
/*
diff --git a/task.c b/task.c
index 6e4cfec..eeaad60 100755
--- a/task.c
+++ b/task.c
@@ -7552,6 +7552,7 @@ dump_CFS_runqueues(void)

if (!VALID_STRUCT(cfs_rq)) {
STRUCT_SIZE_INIT(cfs_rq, "cfs_rq");
+ STRUCT_SIZE_INIT(rt_rq, "rt_rq");
MEMBER_OFFSET_INIT(rq_rt, "rq", "rt");
MEMBER_OFFSET_INIT(rq_nr_running, "rq", "nr_running");
MEMBER_OFFSET_INIT(task_struct_se, "task_struct", "se");
@@ -7562,6 +7563,8 @@ dump_CFS_runqueues(void)
"cfs_rq");
MEMBER_OFFSET_INIT(sched_entity_my_q, "sched_entity",
"my_q");
+ MEMBER_OFFSET_INIT(sched_rt_entity_my_q, "sched_rt_entity",
+ "my_q");
MEMBER_OFFSET_INIT(sched_entity_on_rq, "sched_entity", "on_rq");
MEMBER_OFFSET_INIT(cfs_rq_rb_leftmost, "cfs_rq", "rb_leftmost");
MEMBER_OFFSET_INIT(cfs_rq_nr_running, "cfs_rq", "nr_running");
@@ -7648,6 +7651,8 @@ dump_CFS_runqueues(void)
FREEBUF(cfs_rq_buf);
}

+static int depth = 0;
+
static void
dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
{
@@ -7657,8 +7662,11 @@ dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
struct list_data list_data, *ld;
struct task_context *tc;
ulong *tlist;
+ ulong my_q, task_addr;
+ char *rt_rq_buf;

- fprintf(fp, " RT PRIO_ARRAY: %lx
", k_prio_array);
+ if (!depth)
+ fprintf(fp, " RT PRIO_ARRAY: %lx
", k_prio_array);

qheads = (i = ARRAY_LENGTH(rt_prio_array_queue)) ?
i : get_array_length("rt_prio_array.queue", NULL, SIZE(list_head));
@@ -7678,14 +7686,14 @@ dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
if ((list_head[0] == kvaddr) && (list_head[1] == kvaddr))
continue;

- fprintf(fp, " [%3d] ", i);
+ INDENT(5 + 9 * depth);
+ fprintf(fp, "[%3d] ", i);

BZERO(ld, sizeof(struct list_data));
ld->start = list_head[0];
if (VALID_MEMBER(task_struct_rt) &&
VALID_MEMBER(sched_rt_entity_run_list))
- ld->list_head_offset = OFFSET(task_struct_rt) +
- OFFSET(sched_rt_entity_run_list);
+ ld->list_head_offset = OFFSET(sched_rt_entity_run_list);
else
ld->list_head_offset = OFFSET(task_struct_run_list);
ld->end = kvaddr;
@@ -7695,10 +7703,35 @@ dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
tlist = (ulong *)GETBUF((cnt) * sizeof(ulong));
cnt = retrieve_list(tlist, cnt);
for (c = 0; c < cnt; c++) {
- if (!(tc = task_to_context(tlist[c])))
+ task_addr = tlist[c];
+ if (VALID_MEMBER(sched_rt_entity_my_q)) {
+ readmem(tlist[c] + OFFSET(sched_rt_entity_my_q),
+ KVADDR, &my_q, sizeof(ulong), "my_q",
+ FAULT_ON_ERROR);
+ if (my_q) {
+ rt_rq_buf = GETBUF(SIZE(rt_rq));
+ readmem(my_q, KVADDR, rt_rq_buf,
+ SIZE(rt_rq), "rt_rq",
+ FAULT_ON_ERROR);
+ if (c)
+ INDENT(11 + 9 * depth);
+ fprintf(fp, "CHILD RT PRIO_ARRAY: %lx
",
+ my_q + OFFSET(rt_rq_active));
+ tot++;
+ depth++;
+ dump_RT_prio_array(
+ my_q + OFFSET(rt_rq_active),
+ &rt_rq_buf[OFFSET(rt_rq_active)]);
+ depth--;
+ continue;
+ } else {
+ task_addr -= OFFSET(task_struct_rt);
+ }
+ }
+ if (!(tc = task_to_context(task_addr)))
continue;
if (c)
- INDENT(11);
+ INDENT(11 + 9 * depth);
fprintf(fp, "PID: %-5ld TASK: %lx COMMAND: "%s"
",
tc->pid, tc->task, tc->comm);
tot++;
@@ -7707,7 +7740,7 @@ dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
}

if (!tot) {
- INDENT(5);
+ INDENT(5 + 9 * depth);
fprintf(fp, "[no tasks queued]
");
}
}
--
1.7.1

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility

Dave Anderson 08-27-2012 03:53 PM

Fix bugs in runq
 
----- Original Message -----

> And based on this patch, I made a new patch to solve the problem when
> dumping rt runqueues. Currently dump_RT_prio_array() doesn't support
> rt group scheduler.
>
> In my test, I put some rt tasks into one group, just like below:
>
> mkdir /cgroup/cpu/test1
> echo 850000 > /cgroup/cpu/test1/cpu.rt_runtime_us
>
> ./rtloop1 &
> echo $! > /cgroup/cpu/test1/tasks
> ./rtloop1 &
> echo $! > /cgroup/cpu/test1/tasks
> ./rtloop1 &
> echo $! > /cgroup/cpu/test1/tasks
> ./rtloop98 &
> echo $! > /cgroup/cpu/test1/tasks
> ./rtloop45 &
> echo $! > /cgroup/cpu/test1/tasks
> ./rtloop99 &
> echo $! > /cgroup/cpu/test1/tasks
>
... [ cut ] ...
>
> After applying the attached patch, crash seems to work well:
>
> crash> runq
> CPU 0 RUNQUEUE: ffff880028216680
> CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
> RT PRIO_ARRAY: ffff880028216808
> [ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
> CHILD RT PRIO_ARRAY: ffff88013b050000
> [ 0] PID: 5133 TASK: ffff88010799c080 COMMAND: "rtloop99"
> [ 1] PID: 5131 TASK: ffff880037922aa0 COMMAND: "rtloop98"
> [ 98] PID: 5128 TASK: ffff88011bd87540 COMMAND: "rtloop1"
> PID: 5130 TASK: ffff8801396e7500 COMMAND: "rtloop1"
> PID: 5129 TASK: ffff88011bf5a080 COMMAND: "rtloop1"
> PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
> PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
> [ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
> PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
> CFS RB_ROOT: ffff880028216718
> [120] PID: 5109 TASK: ffff880037923500 COMMAND: "sh"
> [120] PID: 5107 TASK: ffff88006eeccaa0 COMMAND: "sh"
> [120] PID: 5123 TASK: ffff880107a4caa0 COMMAND: "sh"
>
> CPU 1 RUNQUEUE: ffff880028296680
> CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
> RT PRIO_ARRAY: ffff880028296808
> [ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
> PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
> PID: 2852 TASK: ffff88013bd5aae0 COMMAND: "rtkit-daemon"
> [ 54] CHILD RT PRIO_ARRAY: ffff880138978000
> [ 54] PID: 5132 TASK: ffff88006eecd500 COMMAND: "rtloop45"
> CFS RB_ROOT: ffff880028296718
> [120] PID: 5115 TASK: ffff8801152b1500 COMMAND: "sh"
> [120] PID: 5113 TASK: ffff880139530080 COMMAND: "sh"
> [120] PID: 5111 TASK: ffff88011bd86080 COMMAND: "sh"
> [120] PID: 5121 TASK: ffff880115a9e080 COMMAND: "sh"
> [120] PID: 5117 TASK: ffff8801152b0040 COMMAND: "sh"
> [120] PID: 5119 TASK: ffff880115a9eae0 COMMAND: "sh"
>
> Is this kind of output for rt runqueues ok? Or do you have any
> suggestion?

Hello Zhang,

I find the output a bit confusing. When Daisuke added support for
displaying the runnable tasks that are contained within a cgroup's
task-group scheduling entity, they are simply added to the list
of runnable tasks for a particular priority value. There is no
special "CHILD"/indent to differentiate them. So I'm not sure why
it would be necessary to to do the same thing for RT tasks?

Also, I'm not clear on what is going on here -- on cpu 0, the
"CHILD RT PRIO DISPLAY" looks like it's a "child" of task 5136:

> crash> runq
> CPU 0 RUNQUEUE: ffff880028216680
> CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
> RT PRIO_ARRAY: ffff880028216808
> [ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
> CHILD RT PRIO_ARRAY: ffff88013b050000
> [ 0] PID: 5133 TASK: ffff88010799c080 COMMAND: "rtloop99"
> [ 1] PID: 5131 TASK: ffff880037922aa0 COMMAND: "rtloop98"
> [ 98] PID: 5128 TASK: ffff88011bd87540 COMMAND: "rtloop1"
> PID: 5130 TASK: ffff8801396e7500 COMMAND: "rtloop1"
> PID: 5129 TASK: ffff88011bf5a080 COMMAND: "rtloop1"
> PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
> PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
> [ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
> PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
> ...

whereas on cpu 1, the "CHILD RT PRIO ARRAY" line is on the same line as
priority 54:

> CPU 1 RUNQUEUE: ffff880028296680
> CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
> RT PRIO_ARRAY: ffff880028296808
> [ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
> PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
> PID: 2852 TASK: ffff88013bd5aae0 COMMAND: "rtkit-daemon"
> [ 54] CHILD RT PRIO_ARRAY: ffff880138978000
> [ 54] PID: 5132 TASK: ffff88006eecd500 COMMAND: "rtloop45"
> CFS RB_ROOT: ffff880028296718

What is it a "child" of? Or maybe "CHILD" the wrong terminology here?

Dave


--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility

Zhang Yanfei 08-28-2012 01:49 AM

Fix bugs in runq
 
于 2012年08月27日 23:53, Dave Anderson 写道:
>
>
> ----- Original Message -----
>
>> And based on this patch, I made a new patch to solve the problem when
>> dumping rt runqueues. Currently dump_RT_prio_array() doesn't support
>> rt group scheduler.
>>
>> In my test, I put some rt tasks into one group, just like below:
>>
>> mkdir /cgroup/cpu/test1
>> echo 850000 > /cgroup/cpu/test1/cpu.rt_runtime_us
>>
>> ./rtloop1 &
>> echo $! > /cgroup/cpu/test1/tasks
>> ./rtloop1 &
>> echo $! > /cgroup/cpu/test1/tasks
>> ./rtloop1 &
>> echo $! > /cgroup/cpu/test1/tasks
>> ./rtloop98 &
>> echo $! > /cgroup/cpu/test1/tasks
>> ./rtloop45 &
>> echo $! > /cgroup/cpu/test1/tasks
>> ./rtloop99 &
>> echo $! > /cgroup/cpu/test1/tasks
>>
> ... [ cut ] ...
>>
>> After applying the attached patch, crash seems to work well:
>>
>> crash> runq
>> CPU 0 RUNQUEUE: ffff880028216680
>> CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
>> RT PRIO_ARRAY: ffff880028216808
>> [ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
>> CHILD RT PRIO_ARRAY: ffff88013b050000
>> [ 0] PID: 5133 TASK: ffff88010799c080 COMMAND: "rtloop99"
>> [ 1] PID: 5131 TASK: ffff880037922aa0 COMMAND: "rtloop98"
>> [ 98] PID: 5128 TASK: ffff88011bd87540 COMMAND: "rtloop1"
>> PID: 5130 TASK: ffff8801396e7500 COMMAND: "rtloop1"
>> PID: 5129 TASK: ffff88011bf5a080 COMMAND: "rtloop1"
>> PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
>> PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
>> [ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
>> PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
>> CFS RB_ROOT: ffff880028216718
>> [120] PID: 5109 TASK: ffff880037923500 COMMAND: "sh"
>> [120] PID: 5107 TASK: ffff88006eeccaa0 COMMAND: "sh"
>> [120] PID: 5123 TASK: ffff880107a4caa0 COMMAND: "sh"
>>
>> CPU 1 RUNQUEUE: ffff880028296680
>> CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
>> RT PRIO_ARRAY: ffff880028296808
>> [ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
>> PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
>> PID: 2852 TASK: ffff88013bd5aae0 COMMAND: "rtkit-daemon"
>> [ 54] CHILD RT PRIO_ARRAY: ffff880138978000
>> [ 54] PID: 5132 TASK: ffff88006eecd500 COMMAND: "rtloop45"
>> CFS RB_ROOT: ffff880028296718
>> [120] PID: 5115 TASK: ffff8801152b1500 COMMAND: "sh"
>> [120] PID: 5113 TASK: ffff880139530080 COMMAND: "sh"
>> [120] PID: 5111 TASK: ffff88011bd86080 COMMAND: "sh"
>> [120] PID: 5121 TASK: ffff880115a9e080 COMMAND: "sh"
>> [120] PID: 5117 TASK: ffff8801152b0040 COMMAND: "sh"
>> [120] PID: 5119 TASK: ffff880115a9eae0 COMMAND: "sh"
>>
>> Is this kind of output for rt runqueues ok? Or do you have any
>> suggestion?
>
> Hello Zhang,
>
> I find the output a bit confusing. When Daisuke added support for
> displaying the runnable tasks that are contained within a cgroup's
> task-group scheduling entity, they are simply added to the list
> of runnable tasks for a particular priority value. There is no
> special "CHILD"/indent to differentiate them. So I'm not sure why
> it would be necessary to to do the same thing for RT tasks?
>
> Also, I'm not clear on what is going on here -- on cpu 0, the
> "CHILD RT PRIO DISPLAY" looks like it's a "child" of task 5136:
>

Hmm, may be confusing here...

>> crash> runq
>> CPU 0 RUNQUEUE: ffff880028216680
>> CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
>> RT PRIO_ARRAY: ffff880028216808
>> [ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
>> CHILD RT PRIO_ARRAY: ffff88013b050000
>> [ 0] PID: 5133 TASK: ffff88010799c080 COMMAND: "rtloop99"
>> [ 1] PID: 5131 TASK: ffff880037922aa0 COMMAND: "rtloop98"
>> [ 98] PID: 5128 TASK: ffff88011bd87540 COMMAND: "rtloop1"
>> PID: 5130 TASK: ffff8801396e7500 COMMAND: "rtloop1"
>> PID: 5129 TASK: ffff88011bf5a080 COMMAND: "rtloop1"
>> PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
>> PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
>> [ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
>> PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
>> ...
>
> whereas on cpu 1, the "CHILD RT PRIO ARRAY" line is on the same line as
> priority 54:
>
>> CPU 1 RUNQUEUE: ffff880028296680
>> CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
>> RT PRIO_ARRAY: ffff880028296808
>> [ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
>> PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
>> PID: 2852 TASK: ffff88013bd5aae0 COMMAND: "rtkit-daemon"
>> [ 54] CHILD RT PRIO_ARRAY: ffff880138978000
>> [ 54] PID: 5132 TASK: ffff88006eecd500 COMMAND: "rtloop45"
>> CFS RB_ROOT: ffff880028296718
>
> What is it a "child" of? Or maybe "CHILD" the wrong terminology here?


Now, the scheduler is not limited to schedule processes, but can also
work with larger entities. This allows for implementing group scheduling.

So for every RT PRIO_ARRAY, each linked list for each priority has
its element embedded in a structure "sched_entity". This "sched_entity"
could represent two things: a process or a child rt runqueue.

for example, in cpu0, array[0] has four linked elements:
1. task 5136
2. a child rt rq
3. task 6
4. task 3
and the child rt rq has its own runqueue array with 5 tasks in it:
task 5133 with a priority of 0, task 5131 with a priority of 1, and the
last three tasks -- 5128, 5230, 5129 with a priority of 98.

Thanks
Zhang Yanfei

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility

Dave Anderson 08-28-2012 03:48 PM

Fix bugs in runq
 
----- Original Message -----

> Hmm, may be confusing here...
>
> >> crash> runq
> >> CPU 0 RUNQUEUE: ffff880028216680
> >> CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
> >> RT PRIO_ARRAY: ffff880028216808
> >> [ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
> >> CHILD RT PRIO_ARRAY: ffff88013b050000
> >> [ 0] PID: 5133 TASK: ffff88010799c080 COMMAND: "rtloop99"
> >> [ 1] PID: 5131 TASK: ffff880037922aa0 COMMAND: "rtloop98"
> >> [ 98] PID: 5128 TASK: ffff88011bd87540 COMMAND: "rtloop1"
> >> PID: 5130 TASK: ffff8801396e7500 COMMAND: "rtloop1"
> >> PID: 5129 TASK: ffff88011bf5a080 COMMAND: "rtloop1"
> >> PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
> >> PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
> >> [ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
> >> PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
> >> ...
> >
> > whereas on cpu 1, the "CHILD RT PRIO ARRAY" line is on the same
> > line as
> > priority 54:
> >
> >> CPU 1 RUNQUEUE: ffff880028296680
> >> CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
> >> RT PRIO_ARRAY: ffff880028296808
> >> [ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
> >> PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
> >> PID: 2852 TASK: ffff88013bd5aae0 COMMAND: "rtkit-daemon"
> >> [ 54] CHILD RT PRIO_ARRAY: ffff880138978000
> >> [ 54] PID: 5132 TASK: ffff88006eecd500 COMMAND: "rtloop45"
> >> CFS RB_ROOT: ffff880028296718
> >
> > What is it a "child" of? Or maybe "CHILD" the wrong terminology
> > here?
>
>
> Now, the scheduler is not limited to schedule processes, but can also
> work with larger entities. This allows for implementing group
> scheduling.
>
> So for every RT PRIO_ARRAY, each linked list for each priority has
> its element embedded in a structure "sched_entity". This "sched_entity"
> could represent two things: a process or a child rt runqueue.
>
> for example, in cpu0, array[0] has four linked elements:
> 1. task 5136
> 2. a child rt rq
> 3. task 6
> 4. task 3
> and the child rt rq has its own runqueue array with 5 tasks in it:
> task 5133 with a priority of 0, task 5131 with a priority of 1, and the
> last three tasks -- 5128, 5230, 5129 with a priority of 98.

Right, I understand. What I don't understand is the use of the "child"
terminology. If CONFIG_RT_GROUP_SCHED is configured, then the sched_rt_entity
may reference a "group" run queue. To me, it doesn't make sense to use
the term "CHILD RT PRIO_ARRAY" in the header. Wouldn't it make more sense to
call it a "GROUP RT PRIO_ARRAY"? Like this:

crash> runq
CPU 0 RUNQUEUE: ffff880028216680
CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
RT PRIO_ARRAY: ffff880028216808
[ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
GROUP RT PRIO_ARRAY: ffff88013b050000
[ 0] PID: 5133 TASK: ffff88010799c080 COMMAND: "rtloop99"
[ 1] PID: 5131 TASK: ffff880037922aa0 COMMAND: "rtloop98"
[ 98] PID: 5128 TASK: ffff88011bd87540 COMMAND: "rtloop1"
PID: 5130 TASK: ffff8801396e7500 COMMAND: "rtloop1"
PID: 5129 TASK: ffff88011bf5a080 COMMAND: "rtloop1"
PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
[ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
...


CPU 1 RUNQUEUE: ffff880028296680
CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
RT PRIO_ARRAY: ffff880028296808
[ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
PID: 2852 TASK: ffff88013bd5aae0 COMMAND: "rtkit-daemon"
[ 54] GROUP RT PRIO_ARRAY: ffff880138978000
[ 54] PID: 5132 TASK: ffff88006eecd500 COMMAND: "rtloop45"
CFS RB_ROOT: ffff880028296718
...

Thanks,
Dave

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility

Dave Anderson 08-28-2012 08:53 PM

Fix bugs in runq
 
----- Original Message -----

>
> Right, I understand. What I don't understand is the use of the "child"
> terminology. If CONFIG_RT_GROUP_SCHED is configured, then the sched_rt_entity
> may reference a "group" run queue. To me, it doesn't make sense to use
> the term "CHILD RT PRIO_ARRAY" in the header. Wouldn't it make more sense to
> call it a "GROUP RT PRIO_ARRAY"? Like this:
>
> crash> runq
> CPU 0 RUNQUEUE: ffff880028216680
> CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
> RT PRIO_ARRAY: ffff880028216808
> [ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
> GROUP RT PRIO_ARRAY: ffff88013b050000
> [ 0] PID: 5133 TASK: ffff88010799c080 COMMAND: "rtloop99"
> [ 1] PID: 5131 TASK: ffff880037922aa0 COMMAND: "rtloop98"
> [ 98] PID: 5128 TASK: ffff88011bd87540 COMMAND: "rtloop1"
> PID: 5130 TASK: ffff8801396e7500 COMMAND: "rtloop1"
> PID: 5129 TASK: ffff88011bf5a080 COMMAND: "rtloop1"
> PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
> PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
> [ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
> PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
> ...
>
>
> CPU 1 RUNQUEUE: ffff880028296680
> CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
> RT PRIO_ARRAY: ffff880028296808
> [ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
> PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
> PID: 2852 TASK: ffff88013bd5aae0 COMMAND:
> "rtkit-daemon"
> [ 54] GROUP RT PRIO_ARRAY: ffff880138978000
> [ 54] PID: 5132 TASK: ffff88006eecd500 COMMAND: "rtloop45"
> CFS RB_ROOT: ffff880028296718
> ...
>

Another question re: your patch -- is it possible to have a "depth" greater
than 1?

Thanks,
Dave






--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility

Zhang Yanfei 08-29-2012 02:06 AM

Fix bugs in runq
 
于 2012年08月29日 04:53, Dave Anderson 写道:
>
>
> ----- Original Message -----
>
>>
>> Right, I understand. What I don't understand is the use of the "child"
>> terminology. If CONFIG_RT_GROUP_SCHED is configured, then the sched_rt_entity
>> may reference a "group" run queue. To me, it doesn't make sense to use
>> the term "CHILD RT PRIO_ARRAY" in the header. Wouldn't it make more sense to
>> call it a "GROUP RT PRIO_ARRAY"? Like this:
>>
>> crash> runq
>> CPU 0 RUNQUEUE: ffff880028216680
>> CURRENT: PID: 5125 TASK: ffff88010799d540 COMMAND: "sh"
>> RT PRIO_ARRAY: ffff880028216808
>> [ 0] PID: 5136 TASK: ffff8801153cc040 COMMAND: "rtloop99"
>> GROUP RT PRIO_ARRAY: ffff88013b050000
>> [ 0] PID: 5133 TASK: ffff88010799c080 COMMAND: "rtloop99"
>> [ 1] PID: 5131 TASK: ffff880037922aa0 COMMAND: "rtloop98"
>> [ 98] PID: 5128 TASK: ffff88011bd87540 COMMAND: "rtloop1"
>> PID: 5130 TASK: ffff8801396e7500 COMMAND: "rtloop1"
>> PID: 5129 TASK: ffff88011bf5a080 COMMAND: "rtloop1"
>> PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
>> PID: 3 TASK: ffff88013d7ba040 COMMAND: "migration/0"
>> [ 1] PID: 5134 TASK: ffff8801153cd500 COMMAND: "rtloop98"
>> PID: 5135 TASK: ffff8801153ccaa0 COMMAND: "rtloop98"
>> ...
>>
>>
>> CPU 1 RUNQUEUE: ffff880028296680
>> CURRENT: PID: 5086 TASK: ffff88006eecc040 COMMAND: "bash"
>> RT PRIO_ARRAY: ffff880028296808
>> [ 0] PID: 5137 TASK: ffff880107b35540 COMMAND: "rtloop99"
>> PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
>> PID: 2852 TASK: ffff88013bd5aae0 COMMAND:
>> "rtkit-daemon"
>> [ 54] GROUP RT PRIO_ARRAY: ffff880138978000
>> [ 54] PID: 5132 TASK: ffff88006eecd500 COMMAND: "rtloop45"
>> CFS RB_ROOT: ffff880028296718
>> ...
>>
>
> Another question re: your patch -- is it possible to have a "depth" greater
> than 1?
>

Yes, "depth" could be greater than 1, see the example below:

CPU 0 RUNQUEUE: ffff880028216680
CURRENT: PID: 17085 TASK: ffff880137c63540 COMMAND: "bash"
RT PRIO_ARRAY: ffff880028216808 <-- depth = 0
[ 0] PID: 17129 TASK: ffff880037aeaaa0 COMMAND: "rtloop99"
PID: 2832 TASK: ffff88013b09cae0 COMMAND: "rtkit-daemon"
PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
[ 1] GROUP RT PRIO_ARRAY: ffff88002ca65000 <-- depth = 1
[ 1] GROUP RT PRIO_ARRAY: ffff880015821000 <-- depth = 2
[ 1] PID: 17126 TASK: ffff880135d2a040 COMMAND: "rtloop98"
[ 98] PID: 17119 TASK: ffff88010190d500 COMMAND: "rtloop1"
PID: 17121 TASK: ffff88013bd27500 COMMAND: "rtloop1"
PID: 17120 TASK: ffff88010190caa0 COMMAND: "rtloop1"
CFS RB_ROOT: ffff880028216718
[120] PID: 17114 TASK: ffff88001328d500 COMMAND: "sh"
[120] PID: 17112 TASK: ffff880037b84080 COMMAND: "sh"

CPU 1 RUNQUEUE: ffff880028296680
CURRENT: PID: 17106 TASK: ffff88001852a080 COMMAND: "sh"
RT PRIO_ARRAY: ffff880028296808 <-- depth = 0
[ 0] GROUP RT PRIO_ARRAY: ffff880103ded800 <-- depth = 1
[ 0] GROUP RT PRIO_ARRAY: ffff88011ae70800 <-- depth = 2
[ 0] PID: 17127 TASK: ffff8800378f6040 COMMAND: "rtloop99"
PID: 17124 TASK: ffff8800a9592ae0 COMMAND: "rtloop99"
[ 1] PID: 17122 TASK: ffff88011aec3500 COMMAND: "rtloop98"
[ 54] PID: 17123 TASK: ffff88013b414ae0 COMMAND: "rtloop45"
PID: 10 TASK: ffff88013cc2cae0 COMMAND: "watchdog/1"
PID: 7 TASK: ffff88013d7ef500 COMMAND: "migration/1"
[ 1] PID: 17128 TASK: ffff880139761540 COMMAND: "rtloop98"
CFS RB_ROOT: ffff880028296718
[120] PID: 17104 TASK: ffff88010fc0aaa0 COMMAND: "sh"
[120] PID: 17102 TASK: ffff880137f83540 COMMAND: "sh"
[120] PID: 17098 TASK: ffff880013294080 COMMAND: "sh"
[120] PID: 17100 TASK: ffff88011aec2040 COMMAND: "sh"
[120] PID: 17116 TASK: ffff880138d4f540 COMMAND: "sh"
[120] PID: 17110 TASK: ffff880137c06080 COMMAND: "sh"
[120] PID: 17108 TASK: ffff880037aeb500 COMMAND: "sh"

Hmm, I think the depth could not be that big. So how do you think this
kind of output.

The attached patch just changed "CHILD" to "GROUP".

Thanks
Zhang Yanfei
>From 550d428cbb6d9d22837e3ef138e1de59e7ccc1b3 Mon Sep 17 00:00:00 2001
From: zhangyanfei <zhangyanfei@cn.fujitsu.com>
Date: Sat, 25 Aug 2012 11:17:37 +0800
Subject: [PATCH] Fix rt not support group sched bug

Signed-off-by: zhangyanfei <zhangyanfei@cn.fujitsu.com>
---
defs.h | 2 ++
symbols.c | 4 ++++
task.c | 47 ++++++++++++++++++++++++++++++++++++++++-------
3 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/defs.h b/defs.h
index 4a8e2e3..4af670d 100755
--- a/defs.h
+++ b/defs.h
@@ -1785,6 +1785,7 @@ struct offset_table { /* stash of commonly-used offsets */
long log_level;
long log_flags_level;
long timekeeper_xtime_sec;
+ long sched_rt_entity_my_q;
};

struct size_table { /* stash of commonly-used sizes */
@@ -1919,6 +1920,7 @@ struct size_table { /* stash of commonly-used sizes */
long msg_queue;
long log;
long log_level;
+ long rt_rq;
};

struct array_table {
diff --git a/symbols.c b/symbols.c
index 2646ff8..bbadd5e 100755
--- a/symbols.c
+++ b/symbols.c
@@ -8812,6 +8812,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(log_level));
fprintf(fp, " log_flags_level: %ld
",
OFFSET(log_flags_level));
+ fprintf(fp, " sched_rt_entity_my_q: %ld
",
+ OFFSET(sched_rt_entity_my_q));

fprintf(fp, "
size_table:
");
fprintf(fp, " page: %ld
", SIZE(page));
@@ -9027,6 +9029,8 @@ dump_offset_table(char *spec, ulong makestruct)
SIZE(log));
fprintf(fp, " log_level: %ld
",
SIZE(log_level));
+ fprintf(fp, " rt_rq: %ld
",
+ SIZE(rt_rq));

fprintf(fp, "
array_table:
");
/*
diff --git a/task.c b/task.c
index 6e4cfec..eeaad60 100755
--- a/task.c
+++ b/task.c
@@ -7552,6 +7552,7 @@ dump_CFS_runqueues(void)

if (!VALID_STRUCT(cfs_rq)) {
STRUCT_SIZE_INIT(cfs_rq, "cfs_rq");
+ STRUCT_SIZE_INIT(rt_rq, "rt_rq");
MEMBER_OFFSET_INIT(rq_rt, "rq", "rt");
MEMBER_OFFSET_INIT(rq_nr_running, "rq", "nr_running");
MEMBER_OFFSET_INIT(task_struct_se, "task_struct", "se");
@@ -7562,6 +7563,8 @@ dump_CFS_runqueues(void)
"cfs_rq");
MEMBER_OFFSET_INIT(sched_entity_my_q, "sched_entity",
"my_q");
+ MEMBER_OFFSET_INIT(sched_rt_entity_my_q, "sched_rt_entity",
+ "my_q");
MEMBER_OFFSET_INIT(sched_entity_on_rq, "sched_entity", "on_rq");
MEMBER_OFFSET_INIT(cfs_rq_rb_leftmost, "cfs_rq", "rb_leftmost");
MEMBER_OFFSET_INIT(cfs_rq_nr_running, "cfs_rq", "nr_running");
@@ -7648,6 +7651,8 @@ dump_CFS_runqueues(void)
FREEBUF(cfs_rq_buf);
}

+static int depth = 0;
+
static void
dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
{
@@ -7657,8 +7662,11 @@ dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
struct list_data list_data, *ld;
struct task_context *tc;
ulong *tlist;
+ ulong my_q, task_addr;
+ char *rt_rq_buf;

- fprintf(fp, " RT PRIO_ARRAY: %lx
", k_prio_array);
+ if (!depth)
+ fprintf(fp, " RT PRIO_ARRAY: %lx
", k_prio_array);

qheads = (i = ARRAY_LENGTH(rt_prio_array_queue)) ?
i : get_array_length("rt_prio_array.queue", NULL, SIZE(list_head));
@@ -7678,14 +7686,14 @@ dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
if ((list_head[0] == kvaddr) && (list_head[1] == kvaddr))
continue;

- fprintf(fp, " [%3d] ", i);
+ INDENT(5 + 9 * depth);
+ fprintf(fp, "[%3d] ", i);

BZERO(ld, sizeof(struct list_data));
ld->start = list_head[0];
if (VALID_MEMBER(task_struct_rt) &&
VALID_MEMBER(sched_rt_entity_run_list))
- ld->list_head_offset = OFFSET(task_struct_rt) +
- OFFSET(sched_rt_entity_run_list);
+ ld->list_head_offset = OFFSET(sched_rt_entity_run_list);
else
ld->list_head_offset = OFFSET(task_struct_run_list);
ld->end = kvaddr;
@@ -7695,10 +7703,35 @@ dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
tlist = (ulong *)GETBUF((cnt) * sizeof(ulong));
cnt = retrieve_list(tlist, cnt);
for (c = 0; c < cnt; c++) {
- if (!(tc = task_to_context(tlist[c])))
+ task_addr = tlist[c];
+ if (VALID_MEMBER(sched_rt_entity_my_q)) {
+ readmem(tlist[c] + OFFSET(sched_rt_entity_my_q),
+ KVADDR, &my_q, sizeof(ulong), "my_q",
+ FAULT_ON_ERROR);
+ if (my_q) {
+ rt_rq_buf = GETBUF(SIZE(rt_rq));
+ readmem(my_q, KVADDR, rt_rq_buf,
+ SIZE(rt_rq), "rt_rq",
+ FAULT_ON_ERROR);
+ if (c)
+ INDENT(11 + 9 * depth);
+ fprintf(fp, "GROUP RT PRIO_ARRAY: %lx
",
+ my_q + OFFSET(rt_rq_active));
+ tot++;
+ depth++;
+ dump_RT_prio_array(
+ my_q + OFFSET(rt_rq_active),
+ &rt_rq_buf[OFFSET(rt_rq_active)]);
+ depth--;
+ continue;
+ } else {
+ task_addr -= OFFSET(task_struct_rt);
+ }
+ }
+ if (!(tc = task_to_context(task_addr)))
continue;
if (c)
- INDENT(11);
+ INDENT(11 + 9 * depth);
fprintf(fp, "PID: %-5ld TASK: %lx COMMAND: "%s"
",
tc->pid, tc->task, tc->comm);
tot++;
@@ -7707,7 +7740,7 @@ dump_RT_prio_array(ulong k_prio_array, char *u_prio_array)
}

if (!tot) {
- INDENT(5);
+ INDENT(5 + 9 * depth);
fprintf(fp, "[no tasks queued]
");
}
}
--
1.7.1

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility

Dave Anderson 08-29-2012 06:30 PM

Fix bugs in runq
 
----- Original Message -----

> > Another question re: your patch -- is it possible to have a "depth" greater
> > than 1?
>
> Yes, "depth" could be greater than 1, see the example below:
>
> CPU 0 RUNQUEUE: ffff880028216680
> CURRENT: PID: 17085 TASK: ffff880137c63540 COMMAND: "bash"
> RT PRIO_ARRAY: ffff880028216808 <-- depth = 0
> [ 0] PID: 17129 TASK: ffff880037aeaaa0 COMMAND: "rtloop99"
> PID: 2832 TASK: ffff88013b09cae0 COMMAND: "rtkit-daemon"
> PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
> [ 1] GROUP RT PRIO_ARRAY: ffff88002ca65000 <-- depth = 1
> [ 1] GROUP RT PRIO_ARRAY: ffff880015821000 <-- depth = 2
> [ 1] PID: 17126 TASK: ffff880135d2a040 COMMAND: "rtloop98"
> [ 98] PID: 17119 TASK: ffff88010190d500 COMMAND: "rtloop1"
> PID: 17121 TASK: ffff88013bd27500 COMMAND: "rtloop1"
> PID: 17120 TASK: ffff88010190caa0 COMMAND: "rtloop1"
> CFS RB_ROOT: ffff880028216718
...

> Hmm, I think the depth could not be that big. So how do you think this
> kind of output.
>
> The attached patch just changed "CHILD" to "GROUP".

Interesting -- how did you set up the depth-greater-than-one scenario?

Anyway, given that it is possible, let's at least tighten up the output display
by changing each "9 * depth" usage to be "6 * depth". That should alter
your example output to look like this:

CPU 0 RUNQUEUE: ffff880028216680
CURRENT: PID: 17085 TASK: ffff880137c63540 COMMAND: "bash"
RT PRIO_ARRAY: ffff880028216808
[ 0] PID: 17129 TASK: ffff880037aeaaa0 COMMAND: "rtloop99"
PID: 2832 TASK: ffff88013b09cae0 COMMAND: "rtkit-daemon"
PID: 6 TASK: ffff88013d7c6080 COMMAND: "watchdog/0"
[ 1] GROUP RT PRIO_ARRAY: ffff88002ca65000
[ 1] GROUP RT PRIO_ARRAY: ffff880015821000
[ 1] PID: 17126 TASK: ffff880135d2a040 COMMAND: "rtloop98"
[ 98] PID: 17119 TASK: ffff88010190d500 COMMAND: "rtloop1"
PID: 17121 TASK: ffff88013bd27500 COMMAND: "rtloop1"
PID: 17120 TASK: ffff88010190caa0 COMMAND: "rtloop1"
CFS RB_ROOT: ffff880028216718
...

And also, I'd prefer to not create the dangling "static int depth",
but rather to add a depth argument to dump_RT_prio_array(), where
dump_CFS_runqueues() passes a 0, and dump_RT_prio_array() passes
"depth+1" to itself:

static void
dump_RT_prio_array(int depth, ulong k_prio_array, char *u_prio_array)
{
int i, c, tot, cnt, qheads;
ulong offset, kvaddr, uvaddr;
ulong list_head[2];
struct list_data list_data, *ld;
struct task_context *tc;
ulong *tlist;
ulong my_q, task_addr;
char *rt_rq_buf;

if (!depth)
fprintf(fp, " RT PRIO_ARRAY: %lx
", k_prio_array);

qheads = (i = ARRAY_LENGTH(rt_prio_array_queue)) ?
i : get_array_length("rt_prio_array.queue", NULL, SIZE(list_head));

ld = &list_data;

for (i = tot = 0; i < qheads; i++) {
offset = OFFSET(rt_prio_array_queue) + (i * SIZE(list_head));
kvaddr = k_prio_array + offset;
uvaddr = (ulong)u_prio_array + offset;
BCOPY((char *)uvaddr, (char *)&list_head[0], sizeof(ulong)*2);

if (CRASHDEBUG(1))
fprintf(fp, "rt_prio_array[%d] @ %lx => %lx/%lx
",
i, kvaddr, list_head[0], list_head[1]);

if ((list_head[0] == kvaddr) && (list_head[1] == kvaddr))
continue;

INDENT(5 + 6 * depth);
fprintf(fp, "[%3d] ", i);

BZERO(ld, sizeof(struct list_data));
ld->start = list_head[0];
if (VALID_MEMBER(task_struct_rt) &&
VALID_MEMBER(sched_rt_entity_run_list))
ld->list_head_offset = OFFSET(sched_rt_entity_run_list);
else
ld->list_head_offset = OFFSET(task_struct_run_list);
ld->end = kvaddr;
hq_open();
cnt = do_list(ld);
hq_close();
tlist = (ulong *)GETBUF((cnt) * sizeof(ulong));
cnt = retrieve_list(tlist, cnt);
for (c = 0; c < cnt; c++) {
task_addr = tlist[c];
if (VALID_MEMBER(sched_rt_entity_my_q)) {
readmem(tlist[c] + OFFSET(sched_rt_entity_my_q),
KVADDR, &my_q, sizeof(ulong), "my_q",
FAULT_ON_ERROR);
if (my_q) {
rt_rq_buf = GETBUF(SIZE(rt_rq));
readmem(my_q, KVADDR, rt_rq_buf,
SIZE(rt_rq), "rt_rq",
FAULT_ON_ERROR);
if (c)
INDENT(11 + 6 * depth);
fprintf(fp, "GROUP RT PRIO_ARRAY: %lx
",
my_q + OFFSET(rt_rq_active));
tot++;
dump_RT_prio_array(depth+1,
my_q + OFFSET(rt_rq_active),
&rt_rq_buf[OFFSET(rt_rq_active)]);
continue;
} else
task_addr -= OFFSET(task_struct_rt);
}
if (!(tc = task_to_context(task_addr)))
continue;
if (c)
INDENT(11 + 6 * depth);
fprintf(fp, "PID: %-5ld TASK: %lx COMMAND: "%s"
",
tc->pid, tc->task, tc->comm);
tot++;
}
FREEBUF(tlist);
}

if (!tot) {
INDENT(5 + 6 * depth);
fprintf(fp, "[no tasks queued]
");
}
}

Can you verify that those changes work for you?

Thanks,
Dave

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility


All times are GMT. The time now is 05:16 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.