dump queue

Hi
 
 The "dump" queue, with:
 - [PATCH v3/v4 0/9] dump: Cleanup and consolidation
 - [PATCH v4 0/4] dump: add 32-bit guest Windows support
 -----BEGIN PGP SIGNATURE-----
 
 iQJQBAABCAA6FiEEh6m9kz+HxgbSdvYt2ujhCXWWnOUFAmNY9gMcHG1hcmNhbmRy
 ZS5sdXJlYXVAcmVkaGF0LmNvbQAKCRDa6OEJdZac5ZUtD/kByfamsq/8hnS6N/ok
 xs9kXO+HZA1A1Kng19RjYWbTka1LpEAf6y6tPtV27l5rWJZxCgqFp3Q2VKQyzAxl
 Bcf4gvEhUDJI87jHrZ8WBJ0JvPL8pKNjPn4JUPOQO+6kX8A/3XTwAyvH/T3uxlTo
 I+4HLwY0EkJ6NU6Cokud5Uo36Zj7JghKrBxTDrd3NC0qSy8xOoIsB5Pbp2PVKuX2
 F5Zfll3F+NUDsj9zmMR6agP4PBUJUB680TtvMpMZXb2BXumKDLngthCLRtGrgsDh
 ChjYr6xkRS9qlXn0PWIYsUyDucDuRFfqTz/Pa9OcGhQuQfIfQiGOM2IFQUE3UcuN
 OphJEFi44za3E7xEZziAGIFmro+k8zX2fjgN3+mApxpBjUAF/uzoW1VzIIdx65Gh
 H/IguECFu7AwMxPucRUI7PkwexgIcqpufeTRqep2nCFsAwS6bS+obzrAzIMd9kj1
 ApLhj36lkub0Tn77B8bkf1TYJnpBcYbGZpmPCILtOxpBZGlXm++KD1DKAYt6rbnR
 8rQugZNRzEB92aSRTkLJ6QKsqudnbR9ssGbOdEJP+v1fgVtFzYbgygx5QMezGkRw
 vRLWrNbDLog+uYpI2Kb30ItU7+bsDrads9n/gqiGvTP887T3alCtRdIq+Fb28oor
 tSBhBMqMOtccMy3k+EoXBXX5gw==
 =BUEY
 -----END PGP SIGNATURE-----

Merge tag 'dump-pull-request' of https://gitlab.com/marcandre.lureau/qemu into staging

dump queue

Hi

The "dump" queue, with:
- [PATCH v3/v4 0/9] dump: Cleanup and consolidation
- [PATCH v4 0/4] dump: add 32-bit guest Windows support

# -----BEGIN PGP SIGNATURE-----
#
# iQJQBAABCAA6FiEEh6m9kz+HxgbSdvYt2ujhCXWWnOUFAmNY9gMcHG1hcmNhbmRy
# ZS5sdXJlYXVAcmVkaGF0LmNvbQAKCRDa6OEJdZac5ZUtD/kByfamsq/8hnS6N/ok
# xs9kXO+HZA1A1Kng19RjYWbTka1LpEAf6y6tPtV27l5rWJZxCgqFp3Q2VKQyzAxl
# Bcf4gvEhUDJI87jHrZ8WBJ0JvPL8pKNjPn4JUPOQO+6kX8A/3XTwAyvH/T3uxlTo
# I+4HLwY0EkJ6NU6Cokud5Uo36Zj7JghKrBxTDrd3NC0qSy8xOoIsB5Pbp2PVKuX2
# F5Zfll3F+NUDsj9zmMR6agP4PBUJUB680TtvMpMZXb2BXumKDLngthCLRtGrgsDh
# ChjYr6xkRS9qlXn0PWIYsUyDucDuRFfqTz/Pa9OcGhQuQfIfQiGOM2IFQUE3UcuN
# OphJEFi44za3E7xEZziAGIFmro+k8zX2fjgN3+mApxpBjUAF/uzoW1VzIIdx65Gh
# H/IguECFu7AwMxPucRUI7PkwexgIcqpufeTRqep2nCFsAwS6bS+obzrAzIMd9kj1
# ApLhj36lkub0Tn77B8bkf1TYJnpBcYbGZpmPCILtOxpBZGlXm++KD1DKAYt6rbnR
# 8rQugZNRzEB92aSRTkLJ6QKsqudnbR9ssGbOdEJP+v1fgVtFzYbgygx5QMezGkRw
# vRLWrNbDLog+uYpI2Kb30ItU7+bsDrads9n/gqiGvTP887T3alCtRdIq+Fb28oor
# tSBhBMqMOtccMy3k+EoXBXX5gw==
# =BUEY
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 26 Oct 2022 04:55:31 EDT
# gpg:                using RSA key 87A9BD933F87C606D276F62DDAE8E10975969CE5
# gpg:                issuer "marcandre.lureau@redhat.com"
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>" [full]
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>" [full]
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* tag 'dump-pull-request' of https://gitlab.com/marcandre.lureau/qemu:
  dump/win_dump: limit number of processed PRCBs
  s390x: pv: Add dump support
  s390x: Add KVM PV dump interface
  include/elf.h: add s390x note types
  s390x: Introduce PV query interface
  s390x: Add protected dump cap
  dump: Add architecture section and section string table support
  dump: Reintroduce memory_offset and section_offset
  dump: Reorder struct DumpState
  dump: Write ELF section headers right after ELF header
  dump: Use a buffer for ELF section data and headers

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2022-10-26 10:53:48 -04:00
commit 344744e148
13 changed files with 650 additions and 99 deletions

View file

@ -103,6 +103,7 @@ static int dump_cleanup(DumpState *s)
memory_mapping_list_free(&s->list);
close(s->fd);
g_free(s->guest_note);
g_array_unref(s->string_table_buf);
s->guest_note = NULL;
if (s->resume) {
if (s->detached) {
@ -152,11 +153,10 @@ static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header)
elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset);
elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
elf_header->e_phnum = cpu_to_dump16(s, phnum);
if (s->shdr_num) {
elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
}
elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
}
static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
@ -180,11 +180,10 @@ static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset);
elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
elf_header->e_phnum = cpu_to_dump16(s, phnum);
if (s->shdr_num) {
elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
}
elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
}
static void write_elf_header(DumpState *s, Error **errp)
@ -195,6 +194,8 @@ static void write_elf_header(DumpState *s, Error **errp)
void *header_ptr;
int ret;
/* The NULL header and the shstrtab are always defined */
assert(s->shdr_num >= 2);
if (dump_is_64bit(s)) {
prepare_elf64_header(s, &elf64_header);
header_size = sizeof(elf64_header);
@ -380,30 +381,136 @@ static void write_elf_phdr_note(DumpState *s, Error **errp)
}
}
static void write_elf_section(DumpState *s, int type, Error **errp)
static void prepare_elf_section_hdr_zero(DumpState *s)
{
Elf32_Shdr shdr32;
Elf64_Shdr shdr64;
if (dump_is_64bit(s)) {
Elf64_Shdr *shdr64 = s->elf_section_hdrs;
shdr64->sh_info = cpu_to_dump32(s, s->phdr_num);
} else {
Elf32_Shdr *shdr32 = s->elf_section_hdrs;
shdr32->sh_info = cpu_to_dump32(s, s->phdr_num);
}
}
static void prepare_elf_section_hdr_string(DumpState *s, void *buff)
{
uint64_t index = s->string_table_buf->len;
const char strtab[] = ".shstrtab";
Elf32_Shdr shdr32 = {};
Elf64_Shdr shdr64 = {};
int shdr_size;
void *shdr;
int ret;
if (type == 0) {
shdr_size = sizeof(Elf32_Shdr);
memset(&shdr32, 0, shdr_size);
shdr32.sh_info = cpu_to_dump32(s, s->phdr_num);
shdr = &shdr32;
} else {
g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab));
if (dump_is_64bit(s)) {
shdr_size = sizeof(Elf64_Shdr);
memset(&shdr64, 0, shdr_size);
shdr64.sh_info = cpu_to_dump32(s, s->phdr_num);
shdr64.sh_type = SHT_STRTAB;
shdr64.sh_offset = s->section_offset + s->elf_section_data_size;
shdr64.sh_name = index;
shdr64.sh_size = s->string_table_buf->len;
shdr = &shdr64;
} else {
shdr_size = sizeof(Elf32_Shdr);
shdr32.sh_type = SHT_STRTAB;
shdr32.sh_offset = s->section_offset + s->elf_section_data_size;
shdr32.sh_name = index;
shdr32.sh_size = s->string_table_buf->len;
shdr = &shdr32;
}
memcpy(buff, shdr, shdr_size);
}
static bool prepare_elf_section_hdrs(DumpState *s, Error **errp)
{
size_t len, sizeof_shdr;
void *buff_hdr;
/*
* Section ordering:
* - HDR zero
* - Arch section hdrs
* - String table hdr
*/
sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
len = sizeof_shdr * s->shdr_num;
s->elf_section_hdrs = g_malloc0(len);
buff_hdr = s->elf_section_hdrs;
/*
* The first section header is ALWAYS a special initial section
* header.
*
* The header should be 0 with one exception being that if
* phdr_num is PN_XNUM then the sh_info field contains the real
* number of segment entries.
*
* As we zero allocate the buffer we will only need to modify
* sh_info for the PN_XNUM case.
*/
if (s->phdr_num >= PN_XNUM) {
prepare_elf_section_hdr_zero(s);
}
buff_hdr += sizeof_shdr;
/* Add architecture defined section headers */
if (s->dump_info.arch_sections_write_hdr_fn
&& s->shdr_num > 2) {
buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr);
if (s->shdr_num >= SHN_LORESERVE) {
error_setg_errno(errp, EINVAL,
"dump: too many architecture defined sections");
return false;
}
}
ret = fd_write_vmcore(shdr, shdr_size, s);
/*
* String table is the last section since strings are added via
* arch_sections_write_hdr().
*/
prepare_elf_section_hdr_string(s, buff_hdr);
return true;
}
static void write_elf_section_headers(DumpState *s, Error **errp)
{
size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
int ret;
if (!prepare_elf_section_hdrs(s, errp)) {
return;
}
ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s);
if (ret < 0) {
error_setg_errno(errp, -ret,
"dump: failed to write section header table");
error_setg_errno(errp, -ret, "dump: failed to write section headers");
}
g_free(s->elf_section_hdrs);
}
static void write_elf_sections(DumpState *s, Error **errp)
{
int ret;
if (s->elf_section_data_size) {
/* Write architecture section data */
ret = fd_write_vmcore(s->elf_section_data,
s->elf_section_data_size, s);
if (ret < 0) {
error_setg_errno(errp, -ret,
"dump: failed to write architecture section data");
return;
}
}
/* Write string table */
ret = fd_write_vmcore(s->string_table_buf->data,
s->string_table_buf->len, s);
if (ret < 0) {
error_setg_errno(errp, -ret, "dump: failed to write string table data");
}
}
@ -554,6 +661,8 @@ static void dump_begin(DumpState *s, Error **errp)
* --------------
* | elf header |
* --------------
* | sctn_hdr |
* --------------
* | PT_NOTE |
* --------------
* | PT_LOAD |
@ -562,8 +671,6 @@ static void dump_begin(DumpState *s, Error **errp)
* --------------
* | PT_LOAD |
* --------------
* | sec_hdr |
* --------------
* | elf note |
* --------------
* | memory |
@ -579,6 +686,12 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
/* write section headers to vmcore */
write_elf_section_headers(s, errp);
if (*errp) {
return;
}
/* write PT_NOTE to vmcore */
write_elf_phdr_note(s, errp);
if (*errp) {
@ -591,21 +704,13 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
/* write section to vmcore */
if (s->shdr_num) {
write_elf_section(s, 1, errp);
if (*errp) {
return;
}
}
/* write notes to vmcore */
write_elf_notes(s, errp);
}
static int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
int64_t filter_area_start,
int64_t filter_area_length)
int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
int64_t filter_area_start,
int64_t filter_area_length)
{
int64_t size, left, right;
@ -623,9 +728,9 @@ static int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
return size;
}
static int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
int64_t filter_area_start,
int64_t filter_area_length)
int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
int64_t filter_area_start,
int64_t filter_area_length)
{
if (filter_area_length) {
/* return -1 if the block is not within filter area */
@ -665,6 +770,31 @@ static void dump_iterate(DumpState *s, Error **errp)
}
}
static void dump_end(DumpState *s, Error **errp)
{
int rc;
ERRP_GUARD();
if (s->elf_section_data_size) {
s->elf_section_data = g_malloc0(s->elf_section_data_size);
}
/* Adds the architecture defined section data to s->elf_section_data */
if (s->dump_info.arch_sections_write_fn &&
s->elf_section_data_size) {
rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data);
if (rc) {
error_setg_errno(errp, rc,
"dump: failed to get arch section data");
g_free(s->elf_section_data);
return;
}
}
/* write sections to vmcore */
write_elf_sections(s, errp);
}
static void create_vmcore(DumpState *s, Error **errp)
{
ERRP_GUARD();
@ -674,7 +804,14 @@ static void create_vmcore(DumpState *s, Error **errp)
return;
}
/* Iterate over memory and dump it to file */
dump_iterate(s, errp);
if (*errp) {
return;
}
/* Write the section data */
dump_end(s, errp);
}
static int write_start_flat_header(int fd)
@ -1684,6 +1821,14 @@ static void dump_init(DumpState *s, int fd, bool has_format,
s->filter_area_begin = begin;
s->filter_area_length = length;
/* First index is 0, it's the special null name */
s->string_table_buf = g_array_new(FALSE, TRUE, 1);
/*
* Allocate the null name, due to the clearing option set to true
* it will be 0.
*/
g_array_set_size(s->string_table_buf, 1);
memory_mapping_list_init(&s->list);
guest_phys_blocks_init(&s->guest_phys_blocks);
@ -1820,38 +1965,53 @@ static void dump_init(DumpState *s, int fd, bool has_format,
}
/*
* calculate phdr_num
*
* the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
* The first section header is always a special one in which most
* fields are 0. The section header string table is also always
* set.
*/
s->phdr_num = 1; /* PT_NOTE */
if (s->list.num < UINT16_MAX - 2) {
s->shdr_num = 0;
s->shdr_num = 2;
/*
* Adds the number of architecture sections to shdr_num and sets
* elf_section_data_size so we know the offsets and sizes of all
* parts.
*/
if (s->dump_info.arch_sections_add_fn) {
s->dump_info.arch_sections_add_fn(s);
}
/*
* calculate shdr_num so we know the offsets and sizes of all
* parts.
* Calculate phdr_num
*
* The absolute maximum amount of phdrs is UINT32_MAX - 1 as
* sh_info is 32 bit. There's special handling once we go over
* UINT16_MAX - 1 but that is handled in the ehdr and section
* code.
*/
s->phdr_num = 1; /* Reserve PT_NOTE */
if (s->list.num <= UINT32_MAX - 1) {
s->phdr_num += s->list.num;
} else {
/* sh_info of section 0 holds the real number of phdrs */
s->shdr_num = 1;
/* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
if (s->list.num <= UINT32_MAX - 1) {
s->phdr_num += s->list.num;
} else {
s->phdr_num = UINT32_MAX;
}
s->phdr_num = UINT32_MAX;
}
/*
* Now that the number of section and program headers is known we
* can calculate the offsets of the headers and data.
*/
if (dump_is_64bit(s)) {
s->phdr_offset = sizeof(Elf64_Ehdr);
s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
s->memory_offset = s->note_offset + s->note_size;
s->shdr_offset = sizeof(Elf64_Ehdr);
s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
} else {
s->phdr_offset = sizeof(Elf32_Ehdr);
s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
s->memory_offset = s->note_offset + s->note_size;
s->shdr_offset = sizeof(Elf32_Ehdr);
s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
}
s->memory_offset = s->note_offset + s->note_size;
s->section_offset = s->memory_offset + s->total_size;
return;

View file

@ -273,6 +273,13 @@ static void patch_and_save_context(WinDumpHeader *h, bool x64,
uint64_t Context;
WinContext ctx;
if (i >= WIN_DUMP_FIELD(NumberProcessors)) {
warn_report("win-dump: number of QEMU CPUs is bigger than"
" NumberProcessors (%u) in guest Windows",
WIN_DUMP_FIELD(NumberProcessors));
return;
}
if (cpu_read_ptr(x64, first_cpu,
KiProcessorBlock + i * win_dump_ptr_size(x64),
&Prcb)) {

View file

@ -20,6 +20,11 @@
#include "exec/confidential-guest-support.h"
#include "hw/s390x/ipl.h"
#include "hw/s390x/pv.h"
#include "target/s390x/kvm/kvm_s390x.h"
static bool info_valid;
static struct kvm_s390_pv_info_vm info_vm;
static struct kvm_s390_pv_info_dump info_dump;
static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
{
@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
} \
}
int s390_pv_query_info(void)
{
struct kvm_s390_pv_info info = {
.header.id = KVM_PV_INFO_VM,
.header.len_max = sizeof(info.header) + sizeof(info.vm),
};
int rc;
/* Info API's first user is dump so they are bundled */
if (!kvm_s390_get_protected_dump()) {
return 0;
}
rc = s390_pv_cmd(KVM_PV_INFO, &info);
if (rc) {
error_report("KVM PV INFO cmd %x failed: %s",
info.header.id, strerror(-rc));
return rc;
}
memcpy(&info_vm, &info.vm, sizeof(info.vm));
info.header.id = KVM_PV_INFO_DUMP;
info.header.len_max = sizeof(info.header) + sizeof(info.dump);
rc = s390_pv_cmd(KVM_PV_INFO, &info);
if (rc) {
error_report("KVM PV INFO cmd %x failed: %s",
info.header.id, strerror(-rc));
return rc;
}
memcpy(&info_dump, &info.dump, sizeof(info.dump));
info_valid = true;
return rc;
}
int s390_pv_vm_enable(void)
{
return s390_pv_cmd(KVM_PV_ENABLE, NULL);
@ -114,6 +155,77 @@ void s390_pv_inject_reset_error(CPUState *cs)
env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
}
uint64_t kvm_s390_pv_dmp_get_size_cpu(void)
{
return info_dump.dump_cpu_buffer_len;
}
uint64_t kvm_s390_pv_dmp_get_size_completion_data(void)
{
return info_dump.dump_config_finalize_len;
}
uint64_t kvm_s390_pv_dmp_get_size_mem_state(void)
{
return info_dump.dump_config_mem_buffer_per_1m;
}
bool kvm_s390_pv_info_basic_valid(void)
{
return info_valid;
}
static int s390_pv_dump_cmd(uint64_t subcmd, uint64_t uaddr, uint64_t gaddr,
uint64_t len)
{
struct kvm_s390_pv_dmp dmp = {
.subcmd = subcmd,
.buff_addr = uaddr,
.buff_len = len,
.gaddr = gaddr,
};
int ret;
ret = s390_pv_cmd(KVM_PV_DUMP, (void *)&dmp);
if (ret) {
error_report("KVM DUMP command %ld failed", subcmd);
}
return ret;
}
int kvm_s390_dump_cpu(S390CPU *cpu, void *buff)
{
struct kvm_s390_pv_dmp dmp = {
.subcmd = KVM_PV_DUMP_CPU,
.buff_addr = (uint64_t)buff,
.gaddr = 0,
.buff_len = info_dump.dump_cpu_buffer_len,
};
struct kvm_pv_cmd pv = {
.cmd = KVM_PV_DUMP,
.data = (uint64_t)&dmp,
};
return kvm_vcpu_ioctl(CPU(cpu), KVM_S390_PV_CPU_COMMAND, &pv);
}
int kvm_s390_dump_init(void)
{
return s390_pv_dump_cmd(KVM_PV_DUMP_INIT, 0, 0, 0);
}
int kvm_s390_dump_mem_state(uint64_t gaddr, size_t len, void *dest)
{
return s390_pv_dump_cmd(KVM_PV_DUMP_CONFIG_STOR_STATE, (uint64_t)dest,
gaddr, len);
}
int kvm_s390_dump_completion_data(void *buff)
{
return s390_pv_dump_cmd(KVM_PV_DUMP_COMPLETE, (uint64_t)buff, 0,
info_dump.dump_config_finalize_len);
}
#define TYPE_S390_PV_GUEST "s390-pv-guest"
OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST)

View file

@ -366,6 +366,12 @@ static int s390_machine_protect(S390CcwMachineState *ms)
ms->pv = true;
/* Will return 0 if API is not available since it's not vital */
rc = s390_pv_query_info();
if (rc) {
goto out_err;
}
/* Set SE header and unpack */
rc = s390_ipl_prepare_pv_header();
if (rc) {

View file

@ -1650,6 +1650,8 @@ typedef struct elf64_shdr {
#define NT_TASKSTRUCT 4
#define NT_AUXV 6
#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */
#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */
#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 (lower half) */

View file

@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
return ccw->pv;
}
int s390_pv_query_info(void);
int s390_pv_vm_enable(void);
void s390_pv_vm_disable(void);
int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
@ -46,8 +47,17 @@ void s390_pv_prep_reset(void);
int s390_pv_verify(void);
void s390_pv_unshare(void);
void s390_pv_inject_reset_error(CPUState *cs);
uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
uint64_t kvm_s390_pv_dmp_get_size_mem_state(void);
uint64_t kvm_s390_pv_dmp_get_size_completion_data(void);
bool kvm_s390_pv_info_basic_valid(void);
int kvm_s390_dump_init(void);
int kvm_s390_dump_cpu(S390CPU *cpu, void *buff);
int kvm_s390_dump_mem_state(uint64_t addr, size_t len, void *dest);
int kvm_s390_dump_completion_data(void *buff);
#else /* CONFIG_KVM */
static inline bool s390_is_pv(void) { return false; }
static inline int s390_pv_query_info(void) { return 0; }
static inline int s390_pv_vm_enable(void) { return 0; }
static inline void s390_pv_vm_disable(void) {}
static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; }
@ -56,6 +66,15 @@ static inline void s390_pv_prep_reset(void) {}
static inline int s390_pv_verify(void) { return 0; }
static inline void s390_pv_unshare(void) {}
static inline void s390_pv_inject_reset_error(CPUState *cs) {};
static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; }
static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; }
static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
static inline int kvm_s390_dump_init(void) { return 0; }
static inline int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) { return 0; }
static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len,
void *dest) { return 0; }
static inline int kvm_s390_dump_completion_data(void *buff) { return 0; }
#endif /* CONFIG_KVM */
int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);

View file

@ -21,6 +21,9 @@ typedef struct ArchDumpInfo {
uint32_t page_size; /* The target's page size. If it's variable and
* unknown, then this should be the maximum. */
uint64_t phys_base; /* The target's physmem base. */
void (*arch_sections_add_fn)(DumpState *s);
uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
} ArchDumpInfo;
struct GuestPhysBlockList; /* memory_mapping.h */

View file

@ -154,15 +154,8 @@ typedef struct DumpState {
GuestPhysBlockList guest_phys_blocks;
ArchDumpInfo dump_info;
MemoryMappingList list;
uint32_t phdr_num;
uint32_t shdr_num;
bool resume;
bool detached;
ssize_t note_size;
hwaddr shdr_offset;
hwaddr phdr_offset;
hwaddr section_offset;
hwaddr note_offset;
hwaddr memory_offset;
int fd;
@ -177,6 +170,20 @@ typedef struct DumpState {
int64_t filter_area_begin; /* Start address of partial guest memory area */
int64_t filter_area_length; /* Length of partial guest memory area */
/* Elf dump related data */
uint32_t phdr_num;
uint32_t shdr_num;
ssize_t note_size;
hwaddr shdr_offset;
hwaddr phdr_offset;
hwaddr section_offset;
hwaddr note_offset;
void *elf_section_hdrs; /* Pointer to section header buffer */
void *elf_section_data; /* Pointer to section data buffer */
uint64_t elf_section_data_size; /* Size of section data */
GArray *string_table_buf; /* String table data buffer */
uint8_t *note_buf; /* buffer for notes */
size_t note_buf_offset; /* the writing place in note_buf */
uint32_t nr_cpus; /* number of guest's cpu */
@ -208,4 +215,9 @@ typedef struct DumpState {
uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
uint32_t cpu_to_dump32(DumpState *s, uint32_t val);
uint64_t cpu_to_dump64(DumpState *s, uint64_t val);
int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start,
int64_t filter_area_length);
int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
int64_t filter_area_length);
#endif

View file

@ -12,11 +12,13 @@
*/
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "cpu.h"
#include "s390x-internal.h"
#include "elf.h"
#include "sysemu/dump.h"
#include "hw/s390x/pv.h"
#include "kvm/kvm_s390x.h"
struct S390xUserRegsStruct {
uint64_t psw[2];
@ -76,9 +78,16 @@ typedef struct noteStruct {
uint64_t todcmp;
uint32_t todpreg;
uint64_t ctrs[16];
uint8_t dynamic[1]; /*
* Would be a flexible array member, if
* that was legal inside a union. Real
* size comes from PV info interface.
*/
} contents;
} QEMU_PACKED Note;
static bool pv_dump_initialized;
static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id)
{
int i;
@ -177,28 +186,39 @@ static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id)
note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa));
}
static void s390x_write_elf64_pv(Note *note, S390CPU *cpu, int id)
{
note->hdr.n_type = cpu_to_be32(NT_S390_PV_CPU_DATA);
if (!pv_dump_initialized) {
return;
}
kvm_s390_dump_cpu(cpu, &note->contents.dynamic);
}
typedef struct NoteFuncDescStruct {
int contents_size;
uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents */
void (*note_contents_func)(Note *note, S390CPU *cpu, int id);
bool pvonly;
} NoteFuncDesc;
static const NoteFuncDesc note_core[] = {
{sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus},
{sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset},
{ 0, NULL}
{sizeof_field(Note, contents.prstatus), NULL, s390x_write_elf64_prstatus, false},
{sizeof_field(Note, contents.fpregset), NULL, s390x_write_elf64_fpregset, false},
{ 0, NULL, NULL, false}
};
static const NoteFuncDesc note_linux[] = {
{sizeof_field(Note, contents.prefix), s390x_write_elf64_prefix},
{sizeof_field(Note, contents.ctrs), s390x_write_elf64_ctrs},
{sizeof_field(Note, contents.timer), s390x_write_elf64_timer},
{sizeof_field(Note, contents.todcmp), s390x_write_elf64_todcmp},
{sizeof_field(Note, contents.todpreg), s390x_write_elf64_todpreg},
{sizeof_field(Note, contents.vregslo), s390x_write_elf64_vregslo},
{sizeof_field(Note, contents.vregshi), s390x_write_elf64_vregshi},
{sizeof_field(Note, contents.gscb), s390x_write_elf64_gscb},
{ 0, NULL}
{sizeof_field(Note, contents.prefix), NULL, s390x_write_elf64_prefix, false},
{sizeof_field(Note, contents.ctrs), NULL, s390x_write_elf64_ctrs, false},
{sizeof_field(Note, contents.timer), NULL, s390x_write_elf64_timer, false},
{sizeof_field(Note, contents.todcmp), NULL, s390x_write_elf64_todcmp, false},
{sizeof_field(Note, contents.todpreg), NULL, s390x_write_elf64_todpreg, false},
{sizeof_field(Note, contents.vregslo), NULL, s390x_write_elf64_vregslo, false},
{sizeof_field(Note, contents.vregshi), NULL, s390x_write_elf64_vregshi, false},
{sizeof_field(Note, contents.gscb), NULL, s390x_write_elf64_gscb, false},
{0, kvm_s390_pv_dmp_get_size_cpu, s390x_write_elf64_pv, true},
{ 0, NULL, NULL, false}
};
static int s390x_write_elf64_notes(const char *note_name,
@ -207,22 +227,41 @@ static int s390x_write_elf64_notes(const char *note_name,
DumpState *s,
const NoteFuncDesc *funcs)
{
Note note;
Note note, *notep;
const NoteFuncDesc *nf;
int note_size;
int note_size, content_size;
int ret = -1;
assert(strlen(note_name) < sizeof(note.name));
for (nf = funcs; nf->note_contents_func; nf++) {
memset(&note, 0, sizeof(note));
note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
note.hdr.n_descsz = cpu_to_be32(nf->contents_size);
g_strlcpy(note.name, note_name, sizeof(note.name));
(*nf->note_contents_func)(&note, cpu, id);
notep = &note;
if (nf->pvonly && !s390_is_pv()) {
continue;
}
note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size;
ret = f(&note, note_size, s);
content_size = nf->note_size_func ? nf->note_size_func() : nf->contents_size;
note_size = sizeof(note) - sizeof(notep->contents) + content_size;
/* Notes with dynamic sizes need to allocate a note */
if (nf->note_size_func) {
notep = g_malloc(note_size);
}
memset(notep, 0, sizeof(note));
/* Setup note header data */
notep->hdr.n_descsz = cpu_to_be32(content_size);
notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
g_strlcpy(notep->name, note_name, sizeof(notep->name));
/* Get contents and write them out */
(*nf->note_contents_func)(notep, cpu, id);
ret = f(notep, note_size, s);
if (nf->note_size_func) {
g_free(notep);
}
if (ret < 0) {
return -1;
@ -247,13 +286,179 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux);
}
/* PV dump section size functions */
static uint64_t get_mem_state_size_from_len(uint64_t len)
{
return (len / (MiB)) * kvm_s390_pv_dmp_get_size_mem_state();
}
static uint64_t get_size_mem_state(DumpState *s)
{
return get_mem_state_size_from_len(s->total_size);
}
static uint64_t get_size_completion_data(DumpState *s)
{
return kvm_s390_pv_dmp_get_size_completion_data();
}
/* PV dump section data functions*/
static int get_data_completion(DumpState *s, uint8_t *buff)
{
int rc;
if (!pv_dump_initialized) {
return 0;
}
rc = kvm_s390_dump_completion_data(buff);
if (!rc) {
pv_dump_initialized = false;
}
return rc;
}
static int get_mem_state(DumpState *s, uint8_t *buff)
{
int64_t memblock_size, memblock_start;
GuestPhysBlock *block;
uint64_t off;
int rc;
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin,
s->filter_area_length);
if (memblock_start == -1) {
continue;
}
memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin,
s->filter_area_length);
off = get_mem_state_size_from_len(block->target_start);
rc = kvm_s390_dump_mem_state(block->target_start,
get_mem_state_size_from_len(memblock_size),
buff + off);
if (rc) {
return rc;
}
}
return 0;
}
static struct sections {
uint64_t (*sections_size_func)(DumpState *s);
int (*sections_contents_func)(DumpState *s, uint8_t *buff);
char sctn_str[12];
} sections[] = {
{ get_size_mem_state, get_mem_state, "pv_mem_meta"},
{ get_size_completion_data, get_data_completion, "pv_compl"},
{NULL , NULL, ""}
};
static uint64_t arch_sections_write_hdr(DumpState *s, uint8_t *buff)
{
Elf64_Shdr *shdr = (void *)buff;
struct sections *sctn = sections;
uint64_t off = s->section_offset;
if (!pv_dump_initialized) {
return 0;
}
for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) {
memset(shdr, 0, sizeof(*shdr));
shdr->sh_type = SHT_PROGBITS;
shdr->sh_offset = off;
shdr->sh_size = sctn->sections_size_func(s);
shdr->sh_name = s->string_table_buf->len;
g_array_append_vals(s->string_table_buf, sctn->sctn_str, sizeof(sctn->sctn_str));
}
return (uintptr_t)shdr - (uintptr_t)buff;
}
/* Add arch specific number of sections and their respective sizes */
static void arch_sections_add(DumpState *s)
{
struct sections *sctn = sections;
/*
* We only do a PV dump if we are running a PV guest, KVM supports
* the dump API and we got valid dump length information.
*/
if (!s390_is_pv() || !kvm_s390_get_protected_dump() ||
!kvm_s390_pv_info_basic_valid()) {
return;
}
/*
* Start the UV dump process by doing the initialize dump call via
* KVM as the proxy.
*/
if (!kvm_s390_dump_init()) {
pv_dump_initialized = true;
} else {
/*
* Dump init failed, maybe the guest owner disabled dumping.
* We'll continue the non-PV dump process since this is no
* reason to crash qemu.
*/
return;
}
for (; sctn->sections_size_func; sctn++) {
s->shdr_num += 1;
s->elf_section_data_size += sctn->sections_size_func(s);
}
}
/*
* After the PV dump has been initialized, the CPU data has been
* fetched and memory has been dumped, we need to grab the tweak data
* and the completion data.
*/
static int arch_sections_write(DumpState *s, uint8_t *buff)
{
struct sections *sctn = sections;
int rc;
if (!pv_dump_initialized) {
return -EINVAL;
}
for (; sctn->sections_size_func; sctn++) {
rc = sctn->sections_contents_func(s, buff);
buff += sctn->sections_size_func(s);
if (rc) {
return rc;
}
}
return 0;
}
int cpu_get_dump_info(ArchDumpInfo *info,
const struct GuestPhysBlockList *guest_phys_blocks)
{
info->d_machine = EM_S390;
info->d_endian = ELFDATA2MSB;
info->d_class = ELFCLASS64;
/*
* This is evaluated for each dump so we can freely switch
* between PV and non-PV.
*/
if (s390_is_pv() && kvm_s390_get_protected_dump() &&
kvm_s390_pv_info_basic_valid()) {
info->arch_sections_add_fn = *arch_sections_add;
info->arch_sections_write_hdr_fn = *arch_sections_write_hdr;
info->arch_sections_write_fn = *arch_sections_write;
} else {
info->arch_sections_add_fn = NULL;
info->arch_sections_write_hdr_fn = NULL;
info->arch_sections_write_fn = NULL;
}
return 0;
}
@ -261,7 +466,7 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
{
int name_size = 8; /* "LINUX" or "CORE" + pad */
size_t elf_note_size = 0;
int note_head_size;
int note_head_size, content_size;
const NoteFuncDesc *nf;
assert(class == ELFCLASS64);
@ -270,12 +475,15 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
note_head_size = sizeof(Elf64_Nhdr);
for (nf = note_core; nf->note_contents_func; nf++) {
elf_note_size = elf_note_size + note_head_size + name_size +
nf->contents_size;
elf_note_size = elf_note_size + note_head_size + name_size + nf->contents_size;
}
for (nf = note_linux; nf->note_contents_func; nf++) {
if (nf->pvonly && !s390_is_pv()) {
continue;
}
content_size = nf->contents_size ? nf->contents_size : nf->note_size_func();
elf_note_size = elf_note_size + note_head_size + name_size +
nf->contents_size;
content_size;
}
return (elf_note_size) * nr_cpus;

View file

@ -158,6 +158,7 @@ static int cap_hpage_1m;
static int cap_vcpu_resets;
static int cap_protected;
static int cap_zpci_op;
static int cap_protected_dump;
static bool mem_op_storage_key_support;
@ -364,6 +365,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS);
cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED);
cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP);
cap_protected_dump = kvm_check_extension(s, KVM_CAP_S390_PROTECTED_DUMP);
kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0);
kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0);
@ -2045,6 +2047,11 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
}
int kvm_s390_get_protected_dump(void)
{
return cap_protected_dump;
}
int kvm_s390_get_ri(void)
{
return cap_ri;

View file

@ -26,6 +26,7 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state);
void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu);
int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu);
int kvm_s390_get_hpage_1m(void);
int kvm_s390_get_protected_dump(void);
int kvm_s390_get_ri(void);
int kvm_s390_get_zpci_op(void);
int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock);

View file

@ -1,6 +1,8 @@
s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
'kvm.c'
), if_false: files(
'stubs.c'
))
# Newer kernels on s390 check for an S390_PGSTE program header and

12
target/s390x/kvm/stubs.c Normal file
View file

@ -0,0 +1,12 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "kvm_s390x.h"
int kvm_s390_get_protected_dump(void)
{
return false;
}