FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Crash Utility

 
 
LinkBack Thread Tools
 
Old 10-18-2010, 07:22 AM
Hu Tao
 
Default bug on get_be_long() and improvement of bt

Hi Dave,

There is a bug on get_be_long() that causes high 32 bits truncated.
As a result, we get wrong registers values from dump file. Patch 1
fixes this.

Once we can get right cpu registers values, it's better to use the
sp/ip for backtracing the active task. This can show a more accurate
backtrace, not including those invalid frames beyond sp. pathes 2 and
3 do this on kvmdump case(virsh dump).

To verify: run that km_probe.c test module on a x86_64 system, then
`echo q > /proc/sysrq-trigger' to trigger the kprobe which does
looping in post_handler. Then vrish dump then crash.

--
Thanks,
Hu Tao
diff --git a/qemu-load.c b/qemu-load.c
index 998b6d4..95eaf97 100644
--- a/qemu-load.c
+++ b/qemu-load.c
@@ -408,7 +408,7 @@ cpu_common_init_load (struct qemu_device_list *dl,

/* CPU loader. */

-static inline int
+static inline uint64_t
get_be_long (FILE *fp, int size)
{
uint32_t a = size == 32 ? 0 : get_be32 (fp);
diff --git a/qemu-load.h b/qemu-load.h
index 578fedd..0d8db4e 100644
--- a/qemu-load.h
+++ b/qemu-load.h
@@ -140,6 +140,18 @@ struct qemu_x86_mce {
uint64_t mce_banks[10 * 4];
};

+enum CPU_REG {
+ R_EAX,
+ R_ECX,
+ R_EDX,
+ R_EBX,
+ R_ESP,
+ R_EBP,
+ R_ESI,
+ R_EDI,
+ R_GP_MAX,
+};
+
struct qemu_device_x86 {
struct qemu_device dev_base;

diff --git a/kernel.c b/kernel.c
index e399099..2627020 100755
--- a/kernel.c
+++ b/kernel.c
@@ -16,6 +16,7 @@
*/

#include "defs.h"
+#include "qemu-load.h"
#include "xen_hyper_defs.h"
#include <elf.h>

diff --git a/kvmdump.c b/kvmdump.c
index 1bf0d9e..557d329 100644
--- a/kvmdump.c
+++ b/kvmdump.c
@@ -17,6 +17,7 @@

#include "defs.h"
#include "kvmdump.h"
+#include "qemu-load.h"

static struct kvmdump_data kvmdump_data = { 0 };
struct kvmdump_data *kvm = &kvmdump_data;
@@ -310,7 +311,11 @@ kvmdump_memory_dump(FILE *ofp)
void
get_kvmdump_regs(struct bt_info *bt, ulong *pc, ulong *sp)
{
- machdep->get_stack_frame(bt, pc, sp);
+ if (is_task_active(bt->task)) {
+ *sp = device_list->dx86->regs[R_ESP];
+ *pc = device_list->dx86->eip;
+ } else
+ machdep->get_stack_frame(bt, pc, sp);
}

ulong
diff --git a/main.c b/main.c
index 925de2f..ecab83c 100755
--- a/main.c
+++ b/main.c
@@ -16,6 +16,7 @@
*/

#include "defs.h"
+#include "qemu-load.h"
#include "xen_hyper_defs.h"
#include <curses.h>
#include <getopt.h>
@@ -1445,6 +1446,8 @@ clean_exit(int status)
if (pc->flags & MEMMOD)
cleanup_memory_driver();

+ device_list_free (device_list);
+
exit(status);
}

diff --git a/qemu-load.c b/qemu-load.c
index 95eaf97..c807c49 100644
--- a/qemu-load.c
+++ b/qemu-load.c
@@ -18,6 +18,7 @@
*/

#define _GNU_SOURCE
+#include "defs.h"
#include "qemu-load.h"
#include <stdlib.h>
#include <string.h>
@@ -881,6 +882,7 @@ qemu_load (const struct qemu_device_loader *devices, uint32_t required_features,
dprintf("
");

result = calloc (1, sizeof (struct qemu_device_list));
+ result->dx86 = NULL;
for (; {
struct qemu_device *d;
uint32_t features;
@@ -904,6 +906,9 @@ qemu_load (const struct qemu_device_loader *devices, uint32_t required_features,
if (feof (fp) || ferror (fp))
break;

+ if (STREQ(d->vtbl->name, "cpu"))
+ result->dx86 = d;
+
if (sec == QEMU_VM_SECTION_END || sec == QEMU_VM_SECTION_FULL)
result->features |= features;
}
@@ -924,7 +929,6 @@ fail:
* crash utility adaptation.
*/

-#include "defs.h"

int
is_qemu_vm_file(char *filename)
diff --git a/qemu-load.h b/qemu-load.h
index 0d8db4e..3872899 100644
--- a/qemu-load.h
+++ b/qemu-load.h
@@ -40,6 +40,7 @@ enum qemu_features {

struct qemu_device_list {
struct qemu_device *head, *tail;
+ struct qemu_device_x86 *dx86;
uint32_t features;
};

@@ -232,4 +233,6 @@ extern const struct qemu_device_loader devices_x86_32[];
/* For a 64-bit KVM host. */
extern const struct qemu_device_loader devices_x86_64[];

+extern struct qemu_device_list *device_list;
+
#endif
diff --git a/qemu.c b/qemu.c
index 84b6f96..a355c4f 100644
--- a/qemu.c
+++ b/qemu.c
@@ -280,10 +280,11 @@ int main (int argc, char **argv)

#include "defs.h"

+struct qemu_device_list *device_list = NULL;
+
int
qemu_init(char *filename)
{
- struct qemu_device_list *dl;
struct qemu_device_ram *dram;
uint64_t idt = 0;

@@ -298,24 +299,24 @@ qemu_init(char *filename)
please_wait("scanning KVM dumpfile");

if (machine_type("X86"))
- dl = qemu_load(devices_x86_32,
+ device_list = qemu_load(devices_x86_32,
QEMU_FEATURE_CPU|QEMU_FEATURE_RAM, kvm->vmp);
else if (machine_type("X86_64"))
- dl = qemu_load(devices_x86_64,
+ device_list = qemu_load(devices_x86_64,
QEMU_FEATURE_CPU|QEMU_FEATURE_RAM, kvm->vmp);
else
- dl = NULL;
+ device_list = NULL;

please_wait_done();

- if (dl) {
+ if (device_list) {
if (machine_type("X86_64")) {
- idt = get_idt_base(dl);
- kvm->mapinfo.phys_base = get_kernel_base(dl);
+ idt = get_idt_base(device_list);
+ kvm->mapinfo.phys_base = get_kernel_base(device_list);
}

dram = (struct qemu_device_ram *)
- device_find_instance (dl, "ram", 0);
+ device_find_instance (device_list, "ram", 0);

if (CRASHDEBUG(1)) {
if (machine_type("X86_64")) {
@@ -327,10 +328,8 @@ qemu_init(char *filename)
fprintf(kvm->ofp, "last RAM offset: %llx
",
(ulonglong)dram->last_ram_offset);
}
-
- device_list_free (dl);
} else
fclose(kvm->vmp);

- return dl ? TRUE : FALSE;
+ return device_list ? TRUE : FALSE;
}
--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 
Old 10-18-2010, 12:57 PM
Dave Anderson
 
Default bug on get_be_long() and improvement of bt

----- "Hu Tao" <hutao@cn.fujitsu.com> wrote:

> Hi Dave,
>
> There is a bug on get_be_long() that causes high 32 bits truncated.
> As a result, we get wrong registers values from dump file. Patch 1
> fixes this.

Good catch!

> Once we can get right cpu registers values, it's better to use the
> sp/ip for backtracing the active task. This can show a more accurate
> backtrace, not including those invalid frames beyond sp. pathes 2 and
> 3 do this on kvmdump case(virsh dump).
>
> To verify: run that km_probe.c test module on a x86_64 system, then
> `echo q > /proc/sysrq-trigger' to trigger the kprobe which does
> looping in post_handler. Then vrish dump then crash.

However, I'm wondering whether you actually tested this with a
*crashed* system's dumpfile, and not just with a *live* system dump with
a contrived set of circumstances. And if so, what differences, if any,
did you see with the backtraces of the task that either oops'd or called
panic(), as well as those of the other active tasks that were brought down
by IP interrupt?

Anyway, I'll give this a thorough testing with a set of sample
dumpfiles that I have on hand.

Thanks,
Dave


--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 
Old 10-18-2010, 01:56 PM
Dave Anderson
 
Default bug on get_be_long() and improvement of bt

----- "Dave Anderson" <anderson@redhat.com> wrote:

> ----- "Hu Tao" <hutao@cn.fujitsu.com> wrote:
>
> > Hi Dave,
> >
> > There is a bug on get_be_long() that causes high 32 bits truncated.
> > As a result, we get wrong registers values from dump file. Patch 1
> > fixes this.
>
> Good catch!
>
> > Once we can get right cpu registers values, it's better to use the
> > sp/ip for backtracing the active task. This can show a more accurate
> > backtrace, not including those invalid frames beyond sp. pathes 2 and
> > 3 do this on kvmdump case(virsh dump).
> >
> > To verify: run that km_probe.c test module on a x86_64 system, then
> > `echo q > /proc/sysrq-trigger' to trigger the kprobe which does
> > looping in post_handler. Then vrish dump then crash.
>
> However, I'm wondering whether you actually tested this with a
> *crashed* system's dumpfile, and not just with a *live* system dump with
> a contrived set of circumstances. And if so, what differences, if any,
> did you see with the backtraces of the task that either oops'd or called
> panic(), as well as those of the other active tasks that were brought down
> by IP interrupt?
>
> Anyway, I'll give this a thorough testing with a set of sample
> dumpfiles that I have on hand.

Actually, the patch fails miserably on SMP dumpfiles with segmentation
during initialization.

And now looking at your patch, I'm wondering whether you even tested this
with an SMP system?

The change to qemu_load() here:

@@ -904,6 +906,9 @@ qemu_load (const struct qemu_device_loader *devices, uint32_t required_features,
if (feof (fp) || ferror (fp))
break;

+ if (STREQ(d->vtbl->name, "cpu"))
+ result->dx86 = d;
+
if (sec == QEMU_VM_SECTION_END || sec == QEMU_VM_SECTION_FULL)
result->features |= features;
}

That function cycles through the "cpu" devices for *each* cpu
in the system, so this patch will store the device of the last cpu
device it encounters. So in an SMP machine, it will store the
device for the highest cpu only, right?

And then there's this change to get_kvmdump_regs():

@@ -310,7 +311,11 @@ kvmdump_memory_dump(FILE *ofp)
void
get_kvmdump_regs(struct bt_info *bt, ulong *pc, ulong *sp)
{
- machdep->get_stack_frame(bt, pc, sp);
+ if (is_task_active(bt->task)) {
+ *sp = device_list->dx86->regs[R_ESP];
+ *pc = device_list->dx86->eip;
+ } else
+ machdep->get_stack_frame(bt, pc, sp);
}

is_task_active() returns TRUE for all active tasks in an SMP system,
not just the panic task. So it would seem you're going to pass
back the same registers for all active tasks?

And what's the point of this change to kernel.c?

diff --git a/kernel.c b/kernel.c
index e399099..2627020 100755
--- a/kernel.c
+++ b/kernel.c
@@ -16,6 +16,7 @@
*/

#include "defs.h"
+#include "qemu-load.h"
#include "xen_hyper_defs.h"
#include <elf.h>

Also, the change to main.c is unnecessary -- there are dozens of malloc'd
memory areas in the program -- so why go to the bother of free'ing
just this one prior to exiting?

Anyway, instead of saving the device list, I suggest you do something
like storing the per-cpu IP/SP values in a separate data structure that
can possibly be used as an alternative source for register values for
"live dumps" -- and possibly for crashing systems if usable starting
hooks cannot be determined in the traditional manner. I had thought
of doing something like that in the past, but when I looked at the
register values, I must have run into the get_be_long() issue?

Dave








--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 
Old 10-19-2010, 12:36 AM
Hu Tao
 
Default bug on get_be_long() and improvement of bt

On Mon, Oct 18, 2010 at 09:56:41AM -0400, Dave Anderson wrote:
>
> ----- "Dave Anderson" <anderson@redhat.com> wrote:
>
> > ----- "Hu Tao" <hutao@cn.fujitsu.com> wrote:
> >
> > > Hi Dave,
> > >
> > > There is a bug on get_be_long() that causes high 32 bits truncated.
> > > As a result, we get wrong registers values from dump file. Patch 1
> > > fixes this.
> >
> > Good catch!
> >
> > > Once we can get right cpu registers values, it's better to use the
> > > sp/ip for backtracing the active task. This can show a more accurate
> > > backtrace, not including those invalid frames beyond sp. pathes 2 and
> > > 3 do this on kvmdump case(virsh dump).
> > >
> > > To verify: run that km_probe.c test module on a x86_64 system, then
> > > `echo q > /proc/sysrq-trigger' to trigger the kprobe which does
> > > looping in post_handler. Then vrish dump then crash.
> >
> > However, I'm wondering whether you actually tested this with a
> > *crashed* system's dumpfile, and not just with a *live* system dump with
> > a contrived set of circumstances. And if so, what differences, if any,
> > did you see with the backtraces of the task that either oops'd or called
> > panic(), as well as those of the other active tasks that were brought down
> > by IP interrupt?
> >
> > Anyway, I'll give this a thorough testing with a set of sample
> > dumpfiles that I have on hand.
>
> Actually, the patch fails miserably on SMP dumpfiles with segmentation
> during initialization.
>
> And now looking at your patch, I'm wondering whether you even tested this
> with an SMP system?

My fault. I will do more tests and improve the patch.

* test with a SMP system
* test with a live dump
* test with system panic/oops
* test with x86/x86_64

>
> The change to qemu_load() here:
>
> @@ -904,6 +906,9 @@ qemu_load (const struct qemu_device_loader *devices, uint32_t required_features,
> if (feof (fp) || ferror (fp))
> break;
>
> + if (STREQ(d->vtbl->name, "cpu"))
> + result->dx86 = d;
> +
> if (sec == QEMU_VM_SECTION_END || sec == QEMU_VM_SECTION_FULL)
> result->features |= features;
> }
>
> That function cycles through the "cpu" devices for *each* cpu
> in the system, so this patch will store the device of the last cpu
> device it encounters. So in an SMP machine, it will store the
> device for the highest cpu only, right?
>
> And then there's this change to get_kvmdump_regs():
>
> @@ -310,7 +311,11 @@ kvmdump_memory_dump(FILE *ofp)
> void
> get_kvmdump_regs(struct bt_info *bt, ulong *pc, ulong *sp)
> {
> - machdep->get_stack_frame(bt, pc, sp);
> + if (is_task_active(bt->task)) {
> + *sp = device_list->dx86->regs[R_ESP];
> + *pc = device_list->dx86->eip;
> + } else
> + machdep->get_stack_frame(bt, pc, sp);
> }
>
> is_task_active() returns TRUE for all active tasks in an SMP system,
> not just the panic task. So it would seem you're going to pass
> back the same registers for all active tasks?
>
> And what's the point of this change to kernel.c?
>
> diff --git a/kernel.c b/kernel.c
> index e399099..2627020 100755
> --- a/kernel.c
> +++ b/kernel.c
> @@ -16,6 +16,7 @@
> */
>
> #include "defs.h"
> +#include "qemu-load.h"
> #include "xen_hyper_defs.h"
> #include <elf.h>
>
> Also, the change to main.c is unnecessary -- there are dozens of malloc'd
> memory areas in the program -- so why go to the bother of free'ing
> just this one prior to exiting?
>
> Anyway, instead of saving the device list, I suggest you do something
> like storing the per-cpu IP/SP values in a separate data structure that
> can possibly be used as an alternative source for register values for
> "live dumps" -- and possibly for crashing systems if usable starting
> hooks cannot be determined in the traditional manner. I had thought
> of doing something like that in the past, but when I looked at the
> register values, I must have run into the get_be_long() issue?

Sounds reasonable. Thanks for suggestions.

--
Thanks,
Hu Tao

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 
Old 10-19-2010, 09:05 AM
Hu Tao
 
Default bug on get_be_long() and improvement of bt

Hi Dave,

These are updated patches tested with SMP system and panic task.

When testing a x86 guest, I found another bug about reading cpu
registers from dumpfile. Qemu simulated system is x86_64
(qemu-system-x86_64), guest OS is x86. When crash reads cpu registers
from dumpfile, it uses cpu_load_32(), this will read gp registers by
get_be_long(fp, 32), that is, treate them as 32bits. But in fact,
qemu-system-x86_64 saves 64bits for each of them(although guest OS
uses only lower 32 bits). As a result, crash gets wrong cpu gp
register values.

Is there any way we can know from dumpfile that these gp
registers(and those similar registers) are 32bits or 64bits?

--
Thanks,
Hu Tao
>From 007a86417ca1995e2727230851bd4f55dc8eff47 Mon Sep 17 00:00:00 2001
From: Hu Tao <hutao@cn.fujitsu.com>
Date: Mon, 18 Oct 2010 10:19:47 +0800
Subject: [PATCH 1/3] Return uint64_t for get_be_long()

---
qemu-load.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu-load.c b/qemu-load.c
index 998b6d4..95eaf97 100644
--- a/qemu-load.c
+++ b/qemu-load.c
@@ -408,7 +408,7 @@ cpu_common_init_load (struct qemu_device_list *dl,

/* CPU loader. */

-static inline int
+static inline uint64_t
get_be_long (FILE *fp, int size)
{
uint32_t a = size == 32 ? 0 : get_be32 (fp);
--
1.7.3

>From a329c20495c5dae4c0759798b4b954d0b0a45d1a Mon Sep 17 00:00:00 2001
From: Hu Tao <hutao@cn.fujitsu.com>
Date: Mon, 18 Oct 2010 11:28:04 +0800
Subject: [PATCH 2/3] Add macros for regs

---
qemu-load.h | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/qemu-load.h b/qemu-load.h
index 578fedd..0d8db4e 100644
--- a/qemu-load.h
+++ b/qemu-load.h
@@ -140,6 +140,18 @@ struct qemu_x86_mce {
uint64_t mce_banks[10 * 4];
};

+enum CPU_REG {
+ R_EAX,
+ R_ECX,
+ R_EDX,
+ R_EBX,
+ R_ESP,
+ R_EBP,
+ R_ESI,
+ R_EDI,
+ R_GP_MAX,
+};
+
struct qemu_device_x86 {
struct qemu_device dev_base;

--
1.7.3

>From de6809819c06cf8c294a03ad79207b3ac9c2dca8 Mon Sep 17 00:00:00 2001
From: Hu Tao <hutao@cn.fujitsu.com>
Date: Tue, 19 Oct 2010 13:10:12 +0800
Subject: [PATCH 3/3] Use cpu sp/ip to backtrace active task

---
defs.h | 8 ++++++++
global_data.c | 3 +++
kvmdump.c | 7 ++++++-
qemu-load.c | 8 ++++++--
4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/defs.h b/defs.h
index a26150e..f84f9d4 100755
--- a/defs.h
+++ b/defs.h
@@ -4992,4 +4992,12 @@ extern int have_full_symbols(void);
#define XEN_HYPERVISOR_ARCH
#endif

+struct cpu_info {
+ uint64_t esp;
+ uint64_t eip;
+};
+
+extern struct cpu_info cpu_infos[];
+extern unsigned int n_cpu;
+
#endif /* !GDB_COMMON */
diff --git a/global_data.c b/global_data.c
index 98a5a79..e936ca5 100755
--- a/global_data.c
+++ b/global_data.c
@@ -134,3 +134,6 @@ struct extension_table *extension_table = NULL;
struct offset_table offset_table = { 0 };
struct size_table size_table = { 0 };
struct array_table array_table = { 0 };
+
+struct cpu_info cpu_infos[NR_CPUS] = {};
+unsigned int n_cpu = 0;
diff --git a/kvmdump.c b/kvmdump.c
index 1bf0d9e..4b60551 100644
--- a/kvmdump.c
+++ b/kvmdump.c
@@ -310,7 +310,12 @@ kvmdump_memory_dump(FILE *ofp)
void
get_kvmdump_regs(struct bt_info *bt, ulong *pc, ulong *sp)
{
- machdep->get_stack_frame(bt, pc, sp);
+ if (is_task_active(bt->task)) {
+ assert(bt->tc->processor < n_cpu);
+ *sp = cpu_infos[bt->tc->processor].esp;
+ *pc = cpu_infos[bt->tc->processor].eip;
+ } else
+ machdep->get_stack_frame(bt, pc, sp);
}

ulong
diff --git a/qemu-load.c b/qemu-load.c
index 95eaf97..a7beb0b 100644
--- a/qemu-load.c
+++ b/qemu-load.c
@@ -18,6 +18,7 @@
*/

#define _GNU_SOURCE
+#include "defs.h"
#include "qemu-load.h"
#include <stdlib.h>
#include <string.h>
@@ -609,6 +610,11 @@ cpu_load (struct qemu_device *d, FILE *fp, int size)
dx86->kvm.wall_clock_msr = get_be64 (fp);
}

+ assert(d->instance_id == n_cpu);
+ cpu_infos[n_cpu].eip = dx86->eip;
+ cpu_infos[n_cpu].esp = dx86->regs[R_ESP];
+ n_cpu++;
+
return QEMU_FEATURE_CPU;
}

@@ -924,8 +930,6 @@ fail:
* crash utility adaptation.
*/

-#include "defs.h"
-
int
is_qemu_vm_file(char *filename)
{
--
1.7.3

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 
Old 10-19-2010, 01:06 PM
Dave Anderson
 
Default bug on get_be_long() and improvement of bt

----- "Hu Tao" <hutao@cn.fujitsu.com> wrote:

> Hi Dave,
>
> These are updated patches tested with SMP system and panic task.
>
> When testing a x86 guest, I found another bug about reading cpu
> registers from dumpfile. Qemu simulated system is x86_64
> (qemu-system-x86_64), guest OS is x86. When crash reads cpu registers
> from dumpfile, it uses cpu_load_32(), this will read gp registers by
> get_be_long(fp, 32), that is, treate them as 32bits. But in fact,
> qemu-system-x86_64 saves 64bits for each of them(although guest OS
> uses only lower 32 bits). As a result, crash gets wrong cpu gp
> register values.

As I understand it, you're running a 32-bit guest on a 64-bit host.
If you were to read 64-bit register values instead of 32-bit register
values, wouldn't that cause the file offsets of the subsequent get_xxx()
calls in cpu_load() to read from the wrong file offsets? And then
that would leave the ending file offset incorrect, such that the
qemu_load() loop would fail to find the next device?

In other words, the cpu_load() function, which is used for both
32-bit and 64-bit guests, must be reading the correct amount of
data from the "cpu" device, or else qemu_load() would fail to
find the next device in the next location in the dumpfile.

> Is there any way we can know from dumpfile that these gp
> registers(and those similar registers) are 32bits or 64bits?

I don't know. If what you say is true, when would those registers
ever be 32-bit values?

Dave

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 
Old 10-20-2010, 05:38 AM
Hu Tao
 
Default bug on get_be_long() and improvement of bt

On Tue, Oct 19, 2010 at 09:06:33AM -0400, Dave Anderson wrote:
>
> ----- "Hu Tao" <hutao@cn.fujitsu.com> wrote:
>
> > Hi Dave,
> >
> > These are updated patches tested with SMP system and panic task.
> >
> > When testing a x86 guest, I found another bug about reading cpu
> > registers from dumpfile. Qemu simulated system is x86_64
> > (qemu-system-x86_64), guest OS is x86. When crash reads cpu registers
> > from dumpfile, it uses cpu_load_32(), this will read gp registers by
> > get_be_long(fp, 32), that is, treate them as 32bits. But in fact,
> > qemu-system-x86_64 saves 64bits for each of them(although guest OS
> > uses only lower 32 bits). As a result, crash gets wrong cpu gp
> > register values.
>
> As I understand it, you're running a 32-bit guest on a 64-bit host.

Yes.

> If you were to read 64-bit register values instead of 32-bit register
> values, wouldn't that cause the file offsets of the subsequent get_xxx()
> calls in cpu_load() to read from the wrong file offsets? And then
> that would leave the ending file offset incorrect, such that the
> qemu_load() loop would fail to find the next device?
>
> In other words, the cpu_load() function, which is used for both
> 32-bit and 64-bit guests, must be reading the correct amount of
> data from the "cpu" device, or else qemu_load() would fail to
> find the next device in the next location in the dumpfile.

True. In fact, in my case if read 32-bit registers, following devices
are found:
block, ram, kvm-tpr-opt, kvmclock, timer, cpu_common, cpu.
If read 64-bit registers, following devices are found:
block, ram, kvm-tpr-opt, kvmclock, timer, cpu_common, cpu, apic, fw_cfg

>
> > Is there any way we can know from dumpfile that these gp
> > registers(and those similar registers) are 32bits or 64bits?
>
> I don't know. If what you say is true, when would those registers
> ever be 32-bit values?

I did tests on a 64-bit machine. Result is:

machine OS guest machine guest OS saved gp regs
------------------------------------------------------------------------
64-bit x86 qemu-kvm(kvm enabled) x86 64 bits
64-bit x86 qemu(kvm disabled) x86 32 bits

I guess on a 32-bit machine saved gp regs shoule be 32bits, but I have no
32-bit machine to test.

--
Thanks,
Hu Tao

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 
Old 10-20-2010, 07:51 AM
Hu Tao
 
Default bug on get_be_long() and improvement of bt

And one more patch adding command `cpu' to show cpu registers.

--
Thanks,
Hu Tao
>From 3d737a7ea51261eb3d39febbcbf93cce02b458c1 Mon Sep 17 00:00:00 2001
From: Hu Tao <hutao@cn.fujitsu.com>
Date: Wed, 20 Oct 2010 15:40:07 +0800
Subject: [PATCH 4/4] Add a command cpu to show cpu registers

---
defs.h | 18 +++++++++++++++++-
global_data.c | 1 +
help.c | 14 +-------------
kernel.c | 10 ++++++++++
kvmdump.c | 2 +-
qemu-load.c | 3 ++-
qemu-load.h | 12 ------------
x86.c | 20 ++++++++++++++++++++
x86_64.c | 28 ++++++++++++++++++++++++++++
9 files changed, 80 insertions(+), 28 deletions(-)

diff --git a/defs.h b/defs.h
index f84f9d4..1e8f2f9 100755
--- a/defs.h
+++ b/defs.h
@@ -807,6 +807,7 @@ struct machdep_table {
int (*xen_kdump_p2m_create)(struct xen_kdump_data *);
int (*in_alternate_stack)(int, ulong);
void (*dumpfile_init)(int, void *);
+ void (*show_cpu)(void);
};

/*
@@ -3357,6 +3358,7 @@ void cmd_sys(void); /* kernel.c */
void cmd_irq(void); /* kernel.c */
void cmd_timer(void); /* kernel.c */
void cmd_waitq(void); /* kernel.c */
+void cmd_cpu(void); /* kernel.c */
void cmd_sym(void); /* symbols.c */
void cmd_struct(void); /* symbols.c */
void cmd_union(void); /* symbols.c */
@@ -3821,6 +3823,7 @@ extern char *help_union[];
extern char *help_vm[];
extern char *help_vtop[];
extern char *help_waitq[];
+extern char *help_cpu[];
extern char *help_whatis[];
extern char *help_wr[];
#if defined(S390) || defined(S390X)
@@ -4992,9 +4995,22 @@ extern int have_full_symbols(void);
#define XEN_HYPERVISOR_ARCH
#endif

+enum CPU_REG {
+ R_EAX,
+ R_ECX,
+ R_EDX,
+ R_EBX,
+ R_ESP,
+ R_EBP,
+ R_ESI,
+ R_EDI,
+ R_GP_MAX,
+};
+
struct cpu_info {
- uint64_t esp;
+ uint64_t regs[16];
uint64_t eip;
+ uint64_t eflags;
};

extern struct cpu_info cpu_infos[];
diff --git a/global_data.c b/global_data.c
index e936ca5..20539fb 100755
--- a/global_data.c
+++ b/global_data.c
@@ -118,6 +118,7 @@ struct command_table_entry linux_command_table[] = {
{"waitq", cmd_waitq, help_waitq, REFRESH_TASK_TABLE},
{"whatis", cmd_whatis, help_whatis, 0},
{"wr", cmd_wr, help_wr, 0},
+ {"cpu", cmd_cpu, help_cpu, 0},
#if defined(S390) || defined(S390X)
{"s390dbf", cmd_s390dbf, help_s390dbf, 0},
#endif
diff --git a/help.c b/help.c
index 94402f9..6fb6846 100755
--- a/help.c
+++ b/help.c
@@ -2248,19 +2248,7 @@ NULL

char *help_cpu[] = {
"cpu",
-"set context to the active task on a cpu",
-"[cpu]",
-" This command sets the current context to the active task on a cpu. If no",
-" argument is given, the current context is displayed.
",
-" cpu a valid cpu number
",
-" This command is not allowable on live systems.",
-"
EXAMPLES",
-" %s> cpu 1",
-" PID: 286",
-" COMMAND: "in.rlogind"",
-" TASK: c0b3a000 ",
-" CPU: 1 ",
-" STATE: TASK_RUNNING (ACTIVE)",
+"Show cpu registers",
NULL
};

diff --git a/kernel.c b/kernel.c
index e399099..1d04e68 100755
--- a/kernel.c
+++ b/kernel.c
@@ -6070,6 +6070,16 @@ cmd_waitq(void)
}
}

+void
+cmd_cpu(void)
+{
+ if (machdep->show_cpu)
+ machdep->show_cpu();
+ else
+ printf("This command is not supported on %s.
", MACHINE_TYPE);
+}
+
+
static void
dump_waitq(ulong wq, char *wq_name)
{
diff --git a/kvmdump.c b/kvmdump.c
index 4b60551..6205892 100644
--- a/kvmdump.c
+++ b/kvmdump.c
@@ -312,7 +312,7 @@ get_kvmdump_regs(struct bt_info *bt, ulong *pc, ulong *sp)
{
if (is_task_active(bt->task)) {
assert(bt->tc->processor < n_cpu);
- *sp = cpu_infos[bt->tc->processor].esp;
+ *sp = cpu_infos[bt->tc->processor].regs[R_ESP];
*pc = cpu_infos[bt->tc->processor].eip;
} else
machdep->get_stack_frame(bt, pc, sp);
diff --git a/qemu-load.c b/qemu-load.c
index a7beb0b..303ed94 100644
--- a/qemu-load.c
+++ b/qemu-load.c
@@ -611,8 +611,9 @@ cpu_load (struct qemu_device *d, FILE *fp, int size)
}

assert(d->instance_id == n_cpu);
+ memcpy(cpu_infos[n_cpu].regs, dx86->regs, sizeof(cpu_infos[n_cpu].regs));
cpu_infos[n_cpu].eip = dx86->eip;
- cpu_infos[n_cpu].esp = dx86->regs[R_ESP];
+ cpu_infos[n_cpu].eflags = dx86->eflags;
n_cpu++;

return QEMU_FEATURE_CPU;
diff --git a/qemu-load.h b/qemu-load.h
index 0d8db4e..578fedd 100644
--- a/qemu-load.h
+++ b/qemu-load.h
@@ -140,18 +140,6 @@ struct qemu_x86_mce {
uint64_t mce_banks[10 * 4];
};

-enum CPU_REG {
- R_EAX,
- R_ECX,
- R_EDX,
- R_EBX,
- R_ESP,
- R_EBP,
- R_ESI,
- R_EDI,
- R_GP_MAX,
-};
-
struct qemu_device_x86 {
struct qemu_device dev_base;

diff --git a/x86.c b/x86.c
index 4121e28..4ab0677 100755
--- a/x86.c
+++ b/x86.c
@@ -1673,6 +1673,25 @@ x86_user_eframe(struct bt_info *bt)

}

+static void
+show_cpu(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < n_cpu; i++) {
+ printf("CPU %d:
", i);
+
+ printf(" eax %08llx ebx %08llx ecx %08llx edx %08llx
"
+ " esi %08llx edi %08llx esp %08llx ebp %08llx
",
+ cpu_infos[i].regs[R_EAX], cpu_infos[i].regs[R_EBX],
+ cpu_infos[i].regs[R_ECX], cpu_infos[i].regs[R_EDX],
+ cpu_infos[i].regs[R_ESI], cpu_infos[i].regs[R_EDI],
+ cpu_infos[i].regs[R_ESP], cpu_infos[i].regs[R_EBP]);
+
+ printf(" eip %08llx
", cpu_infos[i].eip);
+ }
+}
+
/*
* Do all necessary machine-specific setup here. This is called three times,
* during symbol table initialization, and before and after GDB has been
@@ -1717,6 +1736,7 @@ x86_init(int when)
machdep->last_ptbl_read = 0;
machdep->machspec = &x86_machine_specific;
machdep->verify_paddr = generic_verify_paddr;
+ machdep->show_cpu = show_cpu;
break;

case PRE_GDB:
diff --git a/x86_64.c b/x86_64.c
index be81247..5a500ee 100755
--- a/x86_64.c
+++ b/x86_64.c
@@ -102,6 +102,7 @@ static ulong x86_64_get_framepointer(struct bt_info *, ulong);
static int x86_64_get_framesize(struct bt_info *, ulong, ulong);
static void x86_64_framesize_debug(struct bt_info *);
static void x86_64_get_active_set(void);
+static void show_cpu(void);

struct machine_specific x86_64_machine_specific = { 0 };

@@ -154,6 +155,7 @@ x86_64_init(int when)
machdep->machspec->irq_eframe_link = UNINITIALIZED;
if (machdep->cmdline_args[0])
parse_cmdline_args();
+ machdep->show_cpu = show_cpu;
break;

case PRE_GDB:
@@ -6850,4 +6852,30 @@ x86_64_get_active_set(void)
}
}

+static void
+show_cpu(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < n_cpu; i++) {
+ printf("CPU %d:
", i);
+
+ printf(" rax %016llx rbx %016llx rcx %016llx rdx %016llx
"
+ " rsi %016llx rdi %016llx rsp %016llx rbp %016llx
",
+ cpu_infos[i].regs[R_EAX], cpu_infos[i].regs[R_EBX],
+ cpu_infos[i].regs[R_ECX], cpu_infos[i].regs[R_EDX],
+ cpu_infos[i].regs[R_ESI], cpu_infos[i].regs[R_EDI],
+ cpu_infos[i].regs[R_ESP], cpu_infos[i].regs[R_EBP]);
+
+ printf(" r8 %016llx r9 %016llx r10 %016llx r11 %016llx
"
+ " r12 %016llx r13 %016llx r14 %016llx r15 %016llx
",
+ cpu_infos[i].regs[8], cpu_infos[i].regs[9],
+ cpu_infos[i].regs[10], cpu_infos[i].regs[11],
+ cpu_infos[i].regs[12], cpu_infos[i].regs[13],
+ cpu_infos[i].regs[14], cpu_infos[i].regs[15]);
+
+ printf(" rip %16llx
", cpu_infos[i].eip);
+ }
+}
+
#endif /* X86_64 */
--
1.7.3

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 
Old 10-20-2010, 01:00 PM
Dave Anderson
 
Default bug on get_be_long() and improvement of bt

----- "Hu Tao" <hutao@cn.fujitsu.com> wrote:

> And one more patch adding command `cpu' to show cpu registers.

I had considered a "regs" command in the past, but given that they
are not always available in the various dumpfiles, and if implemented
should be done for all architectures, and quite frankly they're often
not that interesting because in, say ELF core dumps, they're typically
redundantly displayed in the backtrace exception frame.

Since this is KVM-specific and x86/x86_64-specific, for now I will
fold your patch into "help -n", which dumps dumpfile-specific data.
We can revisit making it a new command if it can be made more
generally used.

Thanks,
Dave

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 
Old 10-20-2010, 01:11 PM
Dave Anderson
 
Default bug on get_be_long() and improvement of bt

----- "Hu Tao" <hutao@cn.fujitsu.com> wrote:

> On Tue, Oct 19, 2010 at 09:06:33AM -0400, Dave Anderson wrote:
> >
> > ----- "Hu Tao" <hutao@cn.fujitsu.com> wrote:
> >
> > > Hi Dave,
> > >
> > > These are updated patches tested with SMP system and panic
> task.
> > >
> > > When testing a x86 guest, I found another bug about reading cpu
> > > registers from dumpfile. Qemu simulated system is x86_64
> > > (qemu-system-x86_64), guest OS is x86. When crash reads cpu registers
> > > from dumpfile, it uses cpu_load_32(), this will read gp registers by
> > > get_be_long(fp, 32), that is, treate them as 32bits. But in fact,
> > > qemu-system-x86_64 saves 64bits for each of them(although guest OS
> > > uses only lower 32 bits). As a result, crash gets wrong cpu gp
> > > register values.
> >
> > As I understand it, you're running a 32-bit guest on a 64-bit host.
>
> Yes.
>
> > If you were to read 64-bit register values instead of 32-bit register
> > values, wouldn't that cause the file offsets of the subsequent get_xxx()
> > calls in cpu_load() to read from the wrong file offsets? And then
> > that would leave the ending file offset incorrect, such that the
> > qemu_load() loop would fail to find the next device?
> >
> > In other words, the cpu_load() function, which is used for both
> > 32-bit and 64-bit guests, must be reading the correct amount of
> > data from the "cpu" device, or else qemu_load() would fail to
> > find the next device in the next location in the dumpfile.
>
> True. In fact, in my case if read 32-bit registers, following devices
> are found:
> block, ram, kvm-tpr-opt, kvmclock, timer, cpu_common, cpu.
> If read 64-bit registers, following devices are found:
> block, ram, kvm-tpr-opt, kvmclock, timer, cpu_common, cpu, apic, fw_cfg

Right -- so it got "lost" after incorrectly gathering the data for the
first "cpu" device instance.

> > > Is there any way we can know from dumpfile that these gp
> > > registers(and those similar registers) are 32bits or 64bits?
> >
> > I don't know. If what you say is true, when would those registers
> > ever be 32-bit values?
>
> I did tests on a 64-bit machine. Result is:
>
> machine OS guest machine guest OS saved gp regs
> ------------------------------------------------------------------------
> 64-bit x86 qemu-kvm(kvm enabled) x86 64 bits
> 64-bit x86 qemu(kvm disabled) x86 32 bits

I don't understand what you mean when you say that the guest machine
is "kvm enabled" or "kvm disabled"?

And if your host machine is running a 32-bit x86 OS (on 64-bit hardware),
that's something I've never seen given that Red Hat only allows 64-bit
kernels as KVM hosts.

Dave

--
Crash-utility mailing list
Crash-utility@redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
 

Thread Tools




All times are GMT. The time now is 03:48 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org