/* * Qemu save VM loader * * Copyright (C) 2009 Red Hat, Inc. * Written by Paolo Bonzini. * * Portions Copyright (C) 2009 David Anderson */ #define _GNU_SOURCE #include "qemu-load.h" #include #include #include #include #include "kvmdump.h" struct qemu_device * device_alloc (struct qemu_device_list *dl, size_t sz, struct qemu_device_vtbl *vtbl, uint32_t section_id, uint32_t instance_id) { struct qemu_device *d = calloc (1, sz); d->vtbl = vtbl; d->list = dl; d->section_id = section_id; d->instance_id = instance_id; if (!dl->head) dl->head = dl->tail = d; else { dl->tail->next = d; d->prev = dl->tail; } return d; } struct qemu_device * device_find (struct qemu_device_list *dl, uint32_t section_id) { struct qemu_device *d; d = dl->head; while (d && d->section_id != section_id) d = d->next; return d; } struct qemu_device * device_find_instance (struct qemu_device_list *dl, const char *name, uint32_t instance_id) { struct qemu_device *d; d = dl->head; while (d && (strcmp (d->vtbl->name, name) || d->instance_id != instance_id)) d = d->next; return d; } void device_free (struct qemu_device *d) { struct qemu_device_list *dl = d->list; if (d->prev) d->prev->next = d->next; else dl->head = d->next; if (d->next) d->next->prev = d->prev; else dl->tail = d->prev; d->prev = d->next = NULL; if (d->vtbl->free) d->vtbl->free (d, dl); } void device_list_free (struct qemu_device_list *l) { if (!l) return; while (l->head) device_free (l->head); } /* File access. */ static inline uint16_t get_be16 (FILE *fp) { uint8_t a = getc (fp); uint8_t b = getc (fp); return (a << 8) | b; } static inline uint16_t get_le16 (FILE *fp) { uint8_t b = getc (fp); uint8_t a = getc (fp); return (a << 8) | b; } static inline uint32_t get_be32 (FILE *fp) { uint16_t a = get_be16 (fp); uint16_t b = get_be16 (fp); return (a << 16) | b; } static inline uint32_t get_le32 (FILE *fp) { uint16_t b = get_le16 (fp); uint16_t a = get_le16 (fp); return (a << 16) | b; } static inline uint64_t get_be64 (FILE *fp) { uint32_t a = get_be32 (fp); uint32_t b = get_be32 (fp); return ((uint64_t)a << 32) | b; } static inline uint64_t get_le64 (FILE *fp) { uint32_t b = get_le32 (fp); uint32_t a = get_le32 (fp); return ((uint64_t)a << 32) | b; } static inline void get_qemu128 (FILE *fp, union qemu_uint128_t *result) { result->i[1] = get_le32 (fp); result->i[0] = get_le32 (fp); result->i[3] = get_le32 (fp); result->i[2] = get_le32 (fp); } /* RAM loader. */ #define RAM_SAVE_FLAG_FULL 0x01 #define RAM_SAVE_FLAG_COMPRESS 0x02 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 #define RAM_SAVE_FLAG_PAGE 0x08 #define RAM_SAVE_FLAG_EOS 0x10 #define RAM_SAVE_ADDR_MASK (~4095LL) #define RAM_OFFSET_COMPRESSED (~(off_t)255) static void ram_alloc (struct qemu_device_ram *dram, uint64_t size) { // size_t old_npages = dram->offsets ? 0 : dram->last_ram_offset / 4096; // size_t new_npages = size / 4096; // assert (size <= SIZE_MAX); // if (dram->offsets) // dram->offsets = realloc (dram->offsets, // new_npages * sizeof (off_t)); // else // dram->offsets = malloc (new_npages * sizeof (off_t)); // // assert (dram->offsets); // while (old_npages < new_npages) // dram->offsets[old_npages++] = RAM_OFFSET_COMPRESSED | 0; dram->last_ram_offset = size; } static uint32_t ram_load (struct qemu_device *d, FILE *fp, enum qemu_save_section sec) { struct qemu_device_ram *dram = (struct qemu_device_ram *)d; uint64_t header; do { uint64_t addr; off_t entry; header = get_be64 (fp); assert (!(header & RAM_SAVE_FLAG_FULL)); addr = header & RAM_SAVE_ADDR_MASK; if (header & RAM_SAVE_FLAG_MEM_SIZE) ram_alloc (dram, addr); else if (header & RAM_SAVE_FLAG_COMPRESS) { // dram->offsets[addr / 4096] = entry = RAM_OFFSET_COMPRESSED | getc(fp); store_memfile_offset(addr, &entry); } else if (header & RAM_SAVE_FLAG_PAGE) { // dram->offsets[addr / 4096] = ftell (fp); entry = ftell(fp); store_memfile_offset(addr, &entry); fseek (fp, 4096, SEEK_CUR); } } while (!(header & RAM_SAVE_FLAG_EOS) && !feof (fp) && !ferror (fp)); dram->fp = fp; return QEMU_FEATURE_RAM; } static void ram_free (struct qemu_device *d, struct qemu_device_list *dl) { struct qemu_device_ram *dram = (struct qemu_device_ram *)d; free (dram->offsets); } int ram_read_phys_page (struct qemu_device_ram *dram, void *buf, uint64_t addr) { off_t ofs; ssize_t bytes; if (addr >= dram->last_ram_offset) return false; assert ((addr & 0xfff) == 0); // ofs = dram->offsets[addr / 4096]; if (load_memfile_offset(addr, &ofs) < 0) return 0; if ((ofs & RAM_OFFSET_COMPRESSED) == RAM_OFFSET_COMPRESSED) memset (buf, ofs & 255, 4096); else bytes = pread (fileno (dram->fp), buf, 4096, ofs); return true; } static struct qemu_device * ram_init_load (struct qemu_device_list *dl, uint32_t section_id, uint32_t instance_id, uint32_t version_id, bool live, FILE *fp) { static struct qemu_device_vtbl ram = { "ram", ram_load, ram_free }; assert (version_id == 3); return device_alloc (dl, sizeof (struct qemu_device_ram), &ram, section_id, instance_id); } /* cpu_common loader. */ struct qemu_device_cpu_common { struct qemu_device base; uint32_t halted; uint32_t irq; }; static uint32_t cpu_common_load (struct qemu_device *d, FILE *fp, enum qemu_save_section sec) { struct qemu_device_cpu_common *cpu = (struct qemu_device_cpu_common *)d; cpu->halted = get_be32 (fp); cpu->irq = get_be32 (fp); return 0; } static struct qemu_device * cpu_common_init_load (struct qemu_device_list *dl, uint32_t section_id, uint32_t instance_id, uint32_t version_id, bool live, FILE *fp) { static struct qemu_device_vtbl cpu_common = { "cpu_common", cpu_common_load, NULL }; assert (!live); return device_alloc (dl, sizeof (struct qemu_device_cpu_common), &cpu_common, section_id, instance_id); } /* CPU loader. */ static inline int get_be_long (FILE *fp, int size) { uint32_t a = size == 32 ? 0 : get_be32 (fp); uint32_t b = get_be32 (fp); return ((uint64_t)a << 32) | b; } static inline void get_be_fp80 (FILE *fp, union qemu_fpu_reg *result) { result->mmx = get_be64 (fp); result->bytes[9] = getc (fp); result->bytes[8] = getc (fp); } static void cpu_load_seg (FILE *fp, struct qemu_x86_seg *seg, int size) { seg->selector = get_be32 (fp); seg->base = get_be_long (fp, size); seg->limit = get_be32 (fp); seg->flags = get_be32 (fp); } static uint32_t cpu_load (struct qemu_device *d, FILE *fp, int size) { struct qemu_device_x86 *dx86 = (struct qemu_device_x86 *)d; uint32_t qemu_hflags = 0, qemu_hflags2 = 0; int nregs = size == 32 ? 8 : 16; uint32_t version_id = dx86->version_id; int i; struct qemu_device_cpu_common *dcpu; dcpu = (struct qemu_device_cpu_common *) device_find_instance (d->list, "cpu_common", d->instance_id); if (dcpu) { dx86->halted = dcpu->halted; dx86->irq = dcpu->irq; device_free ((struct qemu_device *) dcpu); } for (i = 0; i < nregs; i++) dx86->regs[i] = get_be_long (fp, size); dx86->eip = get_be_long (fp, size); dx86->eflags = get_be_long (fp, size); qemu_hflags = get_be32 (fp); dx86->fpucw = get_be16 (fp); dx86->fpusw = get_be16 (fp); dx86->fpu_free = get_be16 (fp); if (get_be16 (fp)) for (i = 0; i < 8; i++) dx86->st[i].mmx = get_be64 (fp); else for (i = 0; i < 8; i++) get_be_fp80 (fp, &dx86->st[i]); cpu_load_seg (fp, &dx86->es, size); cpu_load_seg (fp, &dx86->cs, size); cpu_load_seg (fp, &dx86->ss, size); cpu_load_seg (fp, &dx86->ds, size); cpu_load_seg (fp, &dx86->fs, size); cpu_load_seg (fp, &dx86->gs, size); cpu_load_seg (fp, &dx86->ldt, size); cpu_load_seg (fp, &dx86->tr, size); cpu_load_seg (fp, &dx86->gdt, size); cpu_load_seg (fp, &dx86->idt, size); dx86->sysenter.cs = get_be32 (fp); dx86->sysenter.esp = get_be_long (fp, version_id <= 6 ? 32 : size); dx86->sysenter.eip = get_be_long (fp, version_id <= 6 ? 32 : size); dx86->cr0 = get_be_long (fp, size); dx86->cr2 = get_be_long (fp, size); dx86->cr3 = get_be_long (fp, size); dx86->cr4 = get_be_long (fp, size); for (i = 0; i < 8; i++) dx86->dr[i] = get_be_long (fp, size); dx86->a20_masked = get_be32 (fp) != 0xffffffff; dx86->mxcsr = get_be32 (fp); for (i = 0; i < nregs; i++) get_qemu128 (fp, &dx86->xmm[i]); if (size == 64) { dx86->efer = get_be64 (fp); dx86->star = get_be64 (fp); dx86->lstar = get_be64 (fp); dx86->cstar = get_be64 (fp); dx86->fmask = get_be64 (fp); dx86->kernel_gs_base = get_be64 (fp); } dx86->smbase = get_be32 (fp); dx86->soft_mmu = qemu_hflags & (1 << 2); dx86->smm = qemu_hflags & (1 << 19); if (version_id == 4) return QEMU_FEATURE_CPU; dx86->pat = get_be64 (fp); qemu_hflags2 = get_be32 (fp); dx86->global_if = qemu_hflags2 & (1 << 0); dx86->in_nmi = qemu_hflags2 & (1 << 2); if (version_id < 6) dx86->halted = get_be32 (fp); dx86->svm.hsave = get_be64 (fp); dx86->svm.vmcb = get_be64 (fp); dx86->svm.tsc_offset = get_be64 (fp); dx86->svm.in_vmm = qemu_hflags & (1 << 21); dx86->svm.guest_if_mask = qemu_hflags2 & (1 << 1); dx86->svm.guest_intr_masking = qemu_hflags2 & (1 << 3); dx86->svm.intercept_mask = get_be64 (fp); dx86->svm.cr_read_mask = get_be16 (fp); dx86->svm.cr_write_mask = get_be16 (fp); dx86->svm.dr_read_mask = get_be16 (fp); dx86->svm.dr_write_mask = get_be16 (fp); dx86->svm.exception_intercept_mask = get_be32 (fp); dx86->cr8 = getc (fp); if (version_id >= 8) { for (i = 0; i < 12; i++) dx86->fixed_mtrr[i] = get_be64 (fp); dx86->deftype_mtrr = get_be64 (fp); for (i = 0; i < 8; i++) { dx86->variable_mtrr[i].base = get_be64 (fp); dx86->variable_mtrr[i].mask = get_be64 (fp); } } /* This was present only when KVM was enabled up to v8. * Furthermore, it changed format in v9. */ if (version_id >= 9) { int32_t pending_irq = (int32_t) get_be32 (fp); if (pending_irq >= 0) dx86->kvm.int_bitmap[pending_irq / 64] |= (uint64_t)1 << (pending_irq & 63); dx86->kvm.mp_state = get_be32 (fp); dx86->kvm.tsc = get_be64 (fp); } else if (d->list->features & QEMU_FEATURE_KVM) { for (i = 0; i < 4; i++) dx86->kvm.int_bitmap[i] = get_be64 (fp); dx86->kvm.tsc = get_be64 (fp); if (version_id >= 5) dx86->kvm.mp_state = get_be32 (fp); } return QEMU_FEATURE_CPU; } static uint32_t cpu_load_32 (struct qemu_device *d, FILE *fp, enum qemu_save_section sec) { return cpu_load (d, fp, 32); } static struct qemu_device * cpu_init_load_32 (struct qemu_device_list *dl, uint32_t section_id, uint32_t instance_id, uint32_t version_id, bool live, FILE *fp) { struct qemu_device_x86 *dx86; static struct qemu_device_vtbl cpu = { "cpu", cpu_load_32, NULL }; assert (!live); assert (version_id >= 4 && version_id <= 9); dx86 = (struct qemu_device_x86 *) device_alloc (dl, sizeof (struct qemu_device_x86), &cpu, section_id, instance_id); dx86->version_id = version_id; return (struct qemu_device *) dx86; } static uint32_t cpu_load_64 (struct qemu_device *d, FILE *fp, enum qemu_save_section sec) { return cpu_load (d, fp, 64); } static struct qemu_device * cpu_init_load_64 (struct qemu_device_list *dl, uint32_t section_id, uint32_t instance_id, uint32_t version_id, bool live, FILE *fp) { struct qemu_device_x86 *dx86; static struct qemu_device_vtbl cpu = { "cpu", cpu_load_64, NULL }; assert (!live); assert (version_id >= 4 && version_id <= 9); dx86 = (struct qemu_device_x86 *) device_alloc (dl, sizeof (struct qemu_device_x86), &cpu, section_id, instance_id); dx86->version_id = version_id; return (struct qemu_device *) dx86; } /* timer loader. */ static uint32_t timer_load (struct qemu_device *d, FILE *fp, enum qemu_save_section sec) { fseek (fp, 24, SEEK_CUR); return QEMU_FEATURE_TIMER; } static struct qemu_device * timer_init_load (struct qemu_device_list *dl, uint32_t section_id, uint32_t instance_id, uint32_t version_id, bool live, FILE *fp) { static struct qemu_device_vtbl timer = { "timer", timer_load, NULL }; assert (!live); return device_alloc (dl, sizeof (struct qemu_device), &timer, section_id, instance_id); } /* kvm-tpr-opt loader. */ static uint32_t kvm_tpr_opt_load (struct qemu_device *d, FILE *fp, enum qemu_save_section sec) { fseek (fp, 144, SEEK_CUR); return QEMU_FEATURE_KVM; } static struct qemu_device * kvm_tpr_opt_init_load (struct qemu_device_list *dl, uint32_t section_id, uint32_t instance_id, uint32_t version_id, bool live, FILE *fp) { static struct qemu_device_vtbl kvm_tpr_opt = { "kvm-tpr-opt", kvm_tpr_opt_load, NULL }; assert (!live); return device_alloc (dl, sizeof (struct qemu_device), &kvm_tpr_opt, section_id, instance_id); } /* Putting it together. */ const struct qemu_device_loader devices_x86_64[] = { { "cpu_common", cpu_common_init_load }, { "kvm-tpr-opt", kvm_tpr_opt_init_load }, { "cpu", cpu_init_load_64 }, { "ram", ram_init_load }, { "timer", timer_init_load }, { NULL } }; const struct qemu_device_loader devices_x86_32[] = { { "cpu_common", cpu_common_init_load }, { "kvm-tpr-opt", kvm_tpr_opt_init_load }, { "cpu", cpu_init_load_32 }, { "ram", ram_init_load }, { "timer", timer_init_load }, { NULL } }; #define QEMU_VM_FILE_MAGIC 0x5145564D #define LIBVIRT_QEMU_VM_FILE_MAGIC 0x4c696276 struct libvirt_header { char magic[16]; uint32_t version; uint32_t xml_length; uint32_t was_running; uint32_t padding[16]; }; static struct qemu_device * device_get (const struct qemu_device_loader *devices, struct qemu_device_list *dl, enum qemu_save_section sec, FILE *fp) { char name[257]; uint32_t section_id, instance_id, version_id; // bool live; size_t items; int sz; section_id = get_be32 (fp); if (sec != QEMU_VM_SECTION_START && sec != QEMU_VM_SECTION_FULL) return device_find (dl, section_id); sz = getc (fp); if (sz == EOF) return NULL; items = fread (name, sz, 1, fp); name[sz] = 0; instance_id = get_be32 (fp); version_id = get_be32 (fp); while (devices->name && strcmp (devices->name, name)) devices++; if (!devices->name) return NULL; return devices->init_load (dl, section_id, instance_id, version_id, sec == QEMU_VM_SECTION_START, fp); } struct qemu_device_list * qemu_load (const struct qemu_device_loader *devices, uint32_t required_features, FILE *fp) { struct qemu_device_list *result = NULL; size_t items; switch (get_be32 (fp)) { case QEMU_VM_FILE_MAGIC: break; case LIBVIRT_QEMU_VM_FILE_MAGIC: { struct libvirt_header header; memcpy (header.magic, "Libv", 4); items = fread (&header.magic[4], sizeof (header) - 4, 1, fp); if (memcmp ("LibvirtQemudSave", header.magic, 16)) goto fail; fseek (fp, header.xml_length, SEEK_CUR); if (get_be32 (fp) != QEMU_VM_FILE_MAGIC) goto fail; break; } default: goto fail; } if (get_be32 (fp) != 3) return NULL; result = calloc (1, sizeof (struct qemu_device_list)); for (;;) { struct qemu_device *d; uint32_t features; enum qemu_save_section sec = getc (fp); if (feof (fp) || ferror (fp)) break; if (sec == QEMU_VM_EOF) break; d = device_get (devices, result, sec, fp); if (!d) break; dprintf("qemu_load: \"%s\"\n", d->vtbl->name); features = d->vtbl->load (d, fp, sec); if (feof (fp) || ferror (fp)) break; if (sec == QEMU_VM_SECTION_END || sec == QEMU_VM_SECTION_FULL) result->features |= features; } if (ferror (fp) || (result->features & required_features) != required_features) goto fail; return result; fail: device_list_free (result); free (result); return NULL; } /* * crash utility adaptation. */ #include "defs.h" int is_qemu_vm_file(char *filename) { struct libvirt_header header; int retval; size_t items; char *xml; if ((kvm->vmp = fopen(filename, "r")) == NULL) { error(INFO, "%s: %s\n", filename, strerror(errno)); return FALSE; } retval = FALSE; xml = NULL; switch (get_be32(kvm->vmp)) { case QEMU_VM_FILE_MAGIC: retval = TRUE; break; case LIBVIRT_QEMU_VM_FILE_MAGIC: rewind(kvm->vmp); items = fread(&header.magic[0], sizeof(header), 1, kvm->vmp); if (STRNEQ(header.magic, "LibvirtQemudSave")) { if ((xml = (char *)malloc(header.xml_length))) { items = fread(xml, header.xml_length, 1, kvm->vmp); /* * Parse here if necessary or desirable. */ } else fseek(kvm->vmp, header.xml_length, SEEK_CUR); if (get_be32(kvm->vmp) == QEMU_VM_FILE_MAGIC) retval = TRUE; } break; default: retval = FALSE; } if (xml) free(xml); if (retval == FALSE) fclose(kvm->vmp); return retval; } void dump_qemu_header(FILE *out) { int i; struct libvirt_header header; char magic[4]; uint8_t c; size_t items; rewind(kvm->vmp); if (get_be32(kvm->vmp) == QEMU_VM_FILE_MAGIC) { fprintf(out, "%s: QEMU_VM_FILE_MAGIC\n", pc->dumpfile); return; } rewind(kvm->vmp); items = fread(&header, sizeof(header), 1, kvm->vmp); fprintf(out, "%s: libvirt_header:\n\n", pc->dumpfile); fprintf(out, " magic: "); for (i = 0; i < 16; i++) fprintf(out, "%c", header.magic[i]); fprintf(out, "\n"); fprintf(out, " version: %d\n", header.version); fprintf(out, " xml_length: %d\n", header.xml_length); fprintf(out, " was_running: %d\n", header.was_running); fprintf(out, " padding: (not shown)\n\n"); for (i = 0; i < header.xml_length; i++) { c = getc(kvm->vmp); if (c) fprintf(out, "%c", c); } fprintf(out, "\n"); items = fread(&magic, sizeof(char), 4, kvm->vmp); for (i = 0; i < 4; i++) fprintf(out, "%c", magic[i]); fprintf(out, "\n"); }