dump queue

Hi The "dump" queue, with: - [PATCH v3/v4 0/9] dump: Cleanup and consolidation - [PATCH v4 0/4] dump: add 32-bit guest Windows support -----BEGIN PGP SIGNATURE----- iQJQBAABCAA6FiEEh6m9kz+HxgbSdvYt2ujhCXWWnOUFAmNY9gMcHG1hcmNhbmRy ZS5sdXJlYXVAcmVkaGF0LmNvbQAKCRDa6OEJdZac5ZUtD/kByfamsq/8hnS6N/ok xs9kXO+HZA1A1Kng19RjYWbTka1LpEAf6y6tPtV27l5rWJZxCgqFp3Q2VKQyzAxl Bcf4gvEhUDJI87jHrZ8WBJ0JvPL8pKNjPn4JUPOQO+6kX8A/3XTwAyvH/T3uxlTo I+4HLwY0EkJ6NU6Cokud5Uo36Zj7JghKrBxTDrd3NC0qSy8xOoIsB5Pbp2PVKuX2 F5Zfll3F+NUDsj9zmMR6agP4PBUJUB680TtvMpMZXb2BXumKDLngthCLRtGrgsDh ChjYr6xkRS9qlXn0PWIYsUyDucDuRFfqTz/Pa9OcGhQuQfIfQiGOM2IFQUE3UcuN OphJEFi44za3E7xEZziAGIFmro+k8zX2fjgN3+mApxpBjUAF/uzoW1VzIIdx65Gh H/IguECFu7AwMxPucRUI7PkwexgIcqpufeTRqep2nCFsAwS6bS+obzrAzIMd9kj1 ApLhj36lkub0Tn77B8bkf1TYJnpBcYbGZpmPCILtOxpBZGlXm++KD1DKAYt6rbnR 8rQugZNRzEB92aSRTkLJ6QKsqudnbR9ssGbOdEJP+v1fgVtFzYbgygx5QMezGkRw vRLWrNbDLog+uYpI2Kb30ItU7+bsDrads9n/gqiGvTP887T3alCtRdIq+Fb28oor tSBhBMqMOtccMy3k+EoXBXX5gw== =BUEY -----END PGP SIGNATURE----- Merge tag 'dump-pull-request' of https://gitlab.com/marcandre.lureau/qemu into staging dump queue Hi The "dump" queue, with: - [PATCH v3/v4 0/9] dump: Cleanup and consolidation - [PATCH v4 0/4] dump: add 32-bit guest Windows support # -----BEGIN PGP SIGNATURE----- # # iQJQBAABCAA6FiEEh6m9kz+HxgbSdvYt2ujhCXWWnOUFAmNY9gMcHG1hcmNhbmRy # ZS5sdXJlYXVAcmVkaGF0LmNvbQAKCRDa6OEJdZac5ZUtD/kByfamsq/8hnS6N/ok # xs9kXO+HZA1A1Kng19RjYWbTka1LpEAf6y6tPtV27l5rWJZxCgqFp3Q2VKQyzAxl # Bcf4gvEhUDJI87jHrZ8WBJ0JvPL8pKNjPn4JUPOQO+6kX8A/3XTwAyvH/T3uxlTo # I+4HLwY0EkJ6NU6Cokud5Uo36Zj7JghKrBxTDrd3NC0qSy8xOoIsB5Pbp2PVKuX2 # F5Zfll3F+NUDsj9zmMR6agP4PBUJUB680TtvMpMZXb2BXumKDLngthCLRtGrgsDh # ChjYr6xkRS9qlXn0PWIYsUyDucDuRFfqTz/Pa9OcGhQuQfIfQiGOM2IFQUE3UcuN # OphJEFi44za3E7xEZziAGIFmro+k8zX2fjgN3+mApxpBjUAF/uzoW1VzIIdx65Gh # H/IguECFu7AwMxPucRUI7PkwexgIcqpufeTRqep2nCFsAwS6bS+obzrAzIMd9kj1 # ApLhj36lkub0Tn77B8bkf1TYJnpBcYbGZpmPCILtOxpBZGlXm++KD1DKAYt6rbnR # 8rQugZNRzEB92aSRTkLJ6QKsqudnbR9ssGbOdEJP+v1fgVtFzYbgygx5QMezGkRw # vRLWrNbDLog+uYpI2Kb30ItU7+bsDrads9n/gqiGvTP887T3alCtRdIq+Fb28oor # tSBhBMqMOtccMy3k+EoXBXX5gw== # =BUEY # -----END PGP SIGNATURE----- # gpg: Signature made Wed 26 Oct 2022 04:55:31 EDT # gpg: using RSA key 87A9BD933F87C606D276F62DDAE8E10975969CE5 # gpg: issuer "marcandre.lureau@redhat.com" # gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>" [full] # gpg: aka "Marc-André Lureau <marcandre.lureau@gmail.com>" [full] # Primary key fingerprint: 87A9 BD93 3F87 C606 D276 F62D DAE8 E109 7596 9CE5 * tag 'dump-pull-request' of https://gitlab.com/marcandre.lureau/qemu: dump/win_dump: limit number of processed PRCBs s390x: pv: Add dump support s390x: Add KVM PV dump interface include/elf.h: add s390x note types s390x: Introduce PV query interface s390x: Add protected dump cap dump: Add architecture section and section string table support dump: Reintroduce memory_offset and section_offset dump: Reorder struct DumpState dump: Write ELF section headers right after ELF header dump: Use a buffer for ELF section data and headers Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2024-05-20 13:48:07 -04:00 · 2022-10-26 10:53:48 -04:00 · 2022-10-26 10:53:48 -04:00 · 344744e148
parent 08a5d04606 e38c24cb58
commit 344744e148
13 changed files with 650 additions and 99 deletions
--- a/dump/dump.c
+++ b/dump/dump.c
@ -103,6 +103,7 @@ static int dump_cleanup(DumpState *s)
    memory_mapping_list_free(&s->list);
    close(s->fd);
    g_free(s->guest_note);
+    g_array_unref(s->string_table_buf);
    s->guest_note = NULL;
    if (s->resume) {
        if (s->detached) {
@ -152,11 +153,10 @@ static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header)
    elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset);
    elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
    elf_header->e_phnum = cpu_to_dump16(s, phnum);
-    if (s->shdr_num) {
-        elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
-        elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
-        elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
-    }
+    elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
+    elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
+    elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
+    elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
 }

 static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
@ -180,11 +180,10 @@ static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
    elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset);
    elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
    elf_header->e_phnum = cpu_to_dump16(s, phnum);
-    if (s->shdr_num) {
-        elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
-        elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
-        elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
-    }
+    elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
+    elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
+    elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
+    elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
 }

 static void write_elf_header(DumpState *s, Error **errp)
@ -195,6 +194,8 @@ static void write_elf_header(DumpState *s, Error **errp)
    void *header_ptr;
    int ret;

+    /* The NULL header and the shstrtab are always defined */
+    assert(s->shdr_num >= 2);
    if (dump_is_64bit(s)) {
        prepare_elf64_header(s, &elf64_header);
        header_size = sizeof(elf64_header);
@ -380,30 +381,136 @@ static void write_elf_phdr_note(DumpState *s, Error **errp)
    }
 }

-static void write_elf_section(DumpState *s, int type, Error **errp)
+static void prepare_elf_section_hdr_zero(DumpState *s)
 {
-    Elf32_Shdr shdr32;
-    Elf64_Shdr shdr64;
+    if (dump_is_64bit(s)) {
+        Elf64_Shdr *shdr64 = s->elf_section_hdrs;
+
+        shdr64->sh_info = cpu_to_dump32(s, s->phdr_num);
+    } else {
+        Elf32_Shdr *shdr32 = s->elf_section_hdrs;
+
+        shdr32->sh_info = cpu_to_dump32(s, s->phdr_num);
+    }
+}
+
+static void prepare_elf_section_hdr_string(DumpState *s, void *buff)
+{
+    uint64_t index = s->string_table_buf->len;
+    const char strtab[] = ".shstrtab";
+    Elf32_Shdr shdr32 = {};
+    Elf64_Shdr shdr64 = {};
    int shdr_size;
    void *shdr;
-    int ret;

-    if (type == 0) {
-        shdr_size = sizeof(Elf32_Shdr);
-        memset(&shdr32, 0, shdr_size);
-        shdr32.sh_info = cpu_to_dump32(s, s->phdr_num);
-        shdr = &shdr32;
-    } else {
+    g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab));
+    if (dump_is_64bit(s)) {
        shdr_size = sizeof(Elf64_Shdr);
-        memset(&shdr64, 0, shdr_size);
-        shdr64.sh_info = cpu_to_dump32(s, s->phdr_num);
+        shdr64.sh_type = SHT_STRTAB;
+        shdr64.sh_offset = s->section_offset + s->elf_section_data_size;
+        shdr64.sh_name = index;
+        shdr64.sh_size = s->string_table_buf->len;
        shdr = &shdr64;
+    } else {
+        shdr_size = sizeof(Elf32_Shdr);
+        shdr32.sh_type = SHT_STRTAB;
+        shdr32.sh_offset = s->section_offset + s->elf_section_data_size;
+        shdr32.sh_name = index;
+        shdr32.sh_size = s->string_table_buf->len;
+        shdr = &shdr32;
+    }
+    memcpy(buff, shdr, shdr_size);
+}
+
+static bool prepare_elf_section_hdrs(DumpState *s, Error **errp)
+{
+    size_t len, sizeof_shdr;
+    void *buff_hdr;
+
+    /*
+     * Section ordering:
+     * - HDR zero
+     * - Arch section hdrs
+     * - String table hdr
+     */
+    sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
+    len = sizeof_shdr * s->shdr_num;
+    s->elf_section_hdrs = g_malloc0(len);
+    buff_hdr = s->elf_section_hdrs;
+
+    /*
+     * The first section header is ALWAYS a special initial section
+     * header.
+     *
+     * The header should be 0 with one exception being that if
+     * phdr_num is PN_XNUM then the sh_info field contains the real
+     * number of segment entries.
+     *
+     * As we zero allocate the buffer we will only need to modify
+     * sh_info for the PN_XNUM case.
+     */
+    if (s->phdr_num >= PN_XNUM) {
+        prepare_elf_section_hdr_zero(s);
+    }
+    buff_hdr += sizeof_shdr;
+
+    /* Add architecture defined section headers */
+    if (s->dump_info.arch_sections_write_hdr_fn
+        && s->shdr_num > 2) {
+        buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr);
+
+        if (s->shdr_num >= SHN_LORESERVE) {
+            error_setg_errno(errp, EINVAL,
+                             "dump: too many architecture defined sections");
+            return false;
+        }
    }

-    ret = fd_write_vmcore(shdr, shdr_size, s);
+    /*
+     * String table is the last section since strings are added via
+     * arch_sections_write_hdr().
+     */
+    prepare_elf_section_hdr_string(s, buff_hdr);
+    return true;
+}
+
+static void write_elf_section_headers(DumpState *s, Error **errp)
+{
+    size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
+    int ret;
+
+    if (!prepare_elf_section_hdrs(s, errp)) {
+        return;
+    }
+
+    ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s);
    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "dump: failed to write section header table");
+        error_setg_errno(errp, -ret, "dump: failed to write section headers");
+    }
+
+    g_free(s->elf_section_hdrs);
+}
+
+static void write_elf_sections(DumpState *s, Error **errp)
+{
+    int ret;
+
+    if (s->elf_section_data_size) {
+        /* Write architecture section data */
+        ret = fd_write_vmcore(s->elf_section_data,
+                              s->elf_section_data_size, s);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret,
+                             "dump: failed to write architecture section data");
+            return;
+        }
+    }
+
+    /* Write string table */
+    ret = fd_write_vmcore(s->string_table_buf->data,
+                          s->string_table_buf->len, s);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "dump: failed to write string table data");
    }
 }

@ -554,6 +661,8 @@ static void dump_begin(DumpState *s, Error **errp)
     *   --------------
     *   |  elf header |
     *   --------------
+     *   |  sctn_hdr   |
+     *   --------------
     *   |  PT_NOTE    |
     *   --------------
     *   |  PT_LOAD    |
@ -562,8 +671,6 @@ static void dump_begin(DumpState *s, Error **errp)
     *   --------------
     *   |  PT_LOAD    |
     *   --------------
-     *   |  sec_hdr    |
-     *   --------------
     *   |  elf note   |
     *   --------------
     *   |  memory     |
@ -579,6 +686,12 @@ static void dump_begin(DumpState *s, Error **errp)
        return;
    }

+    /* write section headers to vmcore */
+    write_elf_section_headers(s, errp);
+    if (*errp) {
+        return;
+    }
+
    /* write PT_NOTE to vmcore */
    write_elf_phdr_note(s, errp);
    if (*errp) {
@ -591,21 +704,13 @@ static void dump_begin(DumpState *s, Error **errp)
        return;
    }

-    /* write section to vmcore */
-    if (s->shdr_num) {
-        write_elf_section(s, 1, errp);
-        if (*errp) {
-            return;
-        }
-    }
-
    /* write notes to vmcore */
    write_elf_notes(s, errp);
 }

-static int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
-                                           int64_t filter_area_start,
-                                           int64_t filter_area_length)
+int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
+                                    int64_t filter_area_start,
+                                    int64_t filter_area_length)
 {
    int64_t size, left, right;

@ -623,9 +728,9 @@ static int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
    return size;
 }

-static int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
-                                            int64_t filter_area_start,
-                                            int64_t filter_area_length)
+int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
+                                     int64_t filter_area_start,
+                                     int64_t filter_area_length)
 {
    if (filter_area_length) {
        /* return -1 if the block is not within filter area */
@ -665,6 +770,31 @@ static void dump_iterate(DumpState *s, Error **errp)
    }
 }

+static void dump_end(DumpState *s, Error **errp)
+{
+    int rc;
+    ERRP_GUARD();
+
+    if (s->elf_section_data_size) {
+        s->elf_section_data = g_malloc0(s->elf_section_data_size);
+    }
+
+    /* Adds the architecture defined section data to s->elf_section_data  */
+    if (s->dump_info.arch_sections_write_fn &&
+        s->elf_section_data_size) {
+        rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data);
+        if (rc) {
+            error_setg_errno(errp, rc,
+                             "dump: failed to get arch section data");
+            g_free(s->elf_section_data);
+            return;
+        }
+    }
+
+    /* write sections to vmcore */
+    write_elf_sections(s, errp);
+}
+
 static void create_vmcore(DumpState *s, Error **errp)
 {
    ERRP_GUARD();
@ -674,7 +804,14 @@ static void create_vmcore(DumpState *s, Error **errp)
        return;
    }

+    /* Iterate over memory and dump it to file */
    dump_iterate(s, errp);
+    if (*errp) {
+        return;
+    }
+
+    /* Write the section data */
+    dump_end(s, errp);
 }

 static int write_start_flat_header(int fd)
@ -1684,6 +1821,14 @@ static void dump_init(DumpState *s, int fd, bool has_format,
    s->filter_area_begin = begin;
    s->filter_area_length = length;

+    /* First index is 0, it's the special null name */
+    s->string_table_buf = g_array_new(FALSE, TRUE, 1);
+    /*
+     * Allocate the null name, due to the clearing option set to true
+     * it will be 0.
+     */
+    g_array_set_size(s->string_table_buf, 1);
+
    memory_mapping_list_init(&s->list);

    guest_phys_blocks_init(&s->guest_phys_blocks);
@ -1820,38 +1965,53 @@ static void dump_init(DumpState *s, int fd, bool has_format,
    }

    /*
-     * calculate phdr_num
-     *
-     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
+     * The first section header is always a special one in which most
+     * fields are 0. The section header string table is also always
+     * set.
     */
-    s->phdr_num = 1; /* PT_NOTE */
-    if (s->list.num < UINT16_MAX - 2) {
-        s->shdr_num = 0;
+    s->shdr_num = 2;
+
+    /*
+     * Adds the number of architecture sections to shdr_num and sets
+     * elf_section_data_size so we know the offsets and sizes of all
+     * parts.
+     */
+    if (s->dump_info.arch_sections_add_fn) {
+        s->dump_info.arch_sections_add_fn(s);
+    }
+
+    /*
+     * calculate shdr_num so we know the offsets and sizes of all
+     * parts.
+     * Calculate phdr_num
+     *
+     * The absolute maximum amount of phdrs is UINT32_MAX - 1 as
+     * sh_info is 32 bit. There's special handling once we go over
+     * UINT16_MAX - 1 but that is handled in the ehdr and section
+     * code.
+     */
+    s->phdr_num = 1; /* Reserve PT_NOTE */
+    if (s->list.num <= UINT32_MAX - 1) {
        s->phdr_num += s->list.num;
    } else {
-        /* sh_info of section 0 holds the real number of phdrs */
-        s->shdr_num = 1;
-
-        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
-        if (s->list.num <= UINT32_MAX - 1) {
-            s->phdr_num += s->list.num;
-        } else {
-            s->phdr_num = UINT32_MAX;
-        }
+        s->phdr_num = UINT32_MAX;
    }

+    /*
+     * Now that the number of section and program headers is known we
+     * can calculate the offsets of the headers and data.
+     */
    if (dump_is_64bit(s)) {
-        s->phdr_offset = sizeof(Elf64_Ehdr);
-        s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
-        s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
-        s->memory_offset = s->note_offset + s->note_size;
+        s->shdr_offset = sizeof(Elf64_Ehdr);
+        s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
+        s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
    } else {
-
-        s->phdr_offset = sizeof(Elf32_Ehdr);
-        s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
-        s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
-        s->memory_offset = s->note_offset + s->note_size;
+        s->shdr_offset = sizeof(Elf32_Ehdr);
+        s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
+        s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
    }
+    s->memory_offset = s->note_offset + s->note_size;
+    s->section_offset = s->memory_offset + s->total_size;

    return;

--- a/dump/win_dump.c
+++ b/dump/win_dump.c
@ -273,6 +273,13 @@ static void patch_and_save_context(WinDumpHeader *h, bool x64,
        uint64_t Context;
        WinContext ctx;

+        if (i >= WIN_DUMP_FIELD(NumberProcessors)) {
+            warn_report("win-dump: number of QEMU CPUs is bigger than"
+                        " NumberProcessors (%u) in guest Windows",
+                        WIN_DUMP_FIELD(NumberProcessors));
+            return;
+        }
+
        if (cpu_read_ptr(x64, first_cpu,
                KiProcessorBlock + i * win_dump_ptr_size(x64),
                &Prcb)) {
--- a/hw/s390x/pv.c
+++ b/hw/s390x/pv.c
@ -20,6 +20,11 @@
 #include "exec/confidential-guest-support.h"
 #include "hw/s390x/ipl.h"
 #include "hw/s390x/pv.h"
+#include "target/s390x/kvm/kvm_s390x.h"
+
+static bool info_valid;
+static struct kvm_s390_pv_info_vm info_vm;
+static struct kvm_s390_pv_info_dump info_dump;

 static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
 {
@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
    }                                  \
 }

+int s390_pv_query_info(void)
+{
+    struct kvm_s390_pv_info info = {
+        .header.id = KVM_PV_INFO_VM,
+        .header.len_max = sizeof(info.header) + sizeof(info.vm),
+    };
+    int rc;
+
+    /* Info API's first user is dump so they are bundled */
+    if (!kvm_s390_get_protected_dump()) {
+        return 0;
+    }
+
+    rc = s390_pv_cmd(KVM_PV_INFO, &info);
+    if (rc) {
+        error_report("KVM PV INFO cmd %x failed: %s",
+                     info.header.id, strerror(-rc));
+        return rc;
+    }
+    memcpy(&info_vm, &info.vm, sizeof(info.vm));
+
+    info.header.id = KVM_PV_INFO_DUMP;
+    info.header.len_max = sizeof(info.header) + sizeof(info.dump);
+    rc = s390_pv_cmd(KVM_PV_INFO, &info);
+    if (rc) {
+        error_report("KVM PV INFO cmd %x failed: %s",
+                     info.header.id, strerror(-rc));
+        return rc;
+    }
+
+    memcpy(&info_dump, &info.dump, sizeof(info.dump));
+    info_valid = true;
+
+    return rc;
+}
+
 int s390_pv_vm_enable(void)
 {
    return s390_pv_cmd(KVM_PV_ENABLE, NULL);
@ -114,6 +155,77 @@ void s390_pv_inject_reset_error(CPUState *cs)
    env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
 }

+uint64_t kvm_s390_pv_dmp_get_size_cpu(void)
+{
+    return info_dump.dump_cpu_buffer_len;
+}
+
+uint64_t kvm_s390_pv_dmp_get_size_completion_data(void)
+{
+    return info_dump.dump_config_finalize_len;
+}
+
+uint64_t kvm_s390_pv_dmp_get_size_mem_state(void)
+{
+    return info_dump.dump_config_mem_buffer_per_1m;
+}
+
+bool kvm_s390_pv_info_basic_valid(void)
+{
+    return info_valid;
+}
+
+static int s390_pv_dump_cmd(uint64_t subcmd, uint64_t uaddr, uint64_t gaddr,
+                            uint64_t len)
+{
+    struct kvm_s390_pv_dmp dmp = {
+        .subcmd = subcmd,
+        .buff_addr = uaddr,
+        .buff_len = len,
+        .gaddr = gaddr,
+    };
+    int ret;
+
+    ret = s390_pv_cmd(KVM_PV_DUMP, (void *)&dmp);
+    if (ret) {
+        error_report("KVM DUMP command %ld failed", subcmd);
+    }
+    return ret;
+}
+
+int kvm_s390_dump_cpu(S390CPU *cpu, void *buff)
+{
+    struct kvm_s390_pv_dmp dmp = {
+        .subcmd = KVM_PV_DUMP_CPU,
+        .buff_addr = (uint64_t)buff,
+        .gaddr = 0,
+        .buff_len = info_dump.dump_cpu_buffer_len,
+    };
+    struct kvm_pv_cmd pv = {
+        .cmd = KVM_PV_DUMP,
+        .data = (uint64_t)&dmp,
+    };
+
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_S390_PV_CPU_COMMAND, &pv);
+}
+
+int kvm_s390_dump_init(void)
+{
+    return s390_pv_dump_cmd(KVM_PV_DUMP_INIT, 0, 0, 0);
+}
+
+int kvm_s390_dump_mem_state(uint64_t gaddr, size_t len, void *dest)
+{
+    return s390_pv_dump_cmd(KVM_PV_DUMP_CONFIG_STOR_STATE, (uint64_t)dest,
+                            gaddr, len);
+}
+
+int kvm_s390_dump_completion_data(void *buff)
+{
+    return s390_pv_dump_cmd(KVM_PV_DUMP_COMPLETE, (uint64_t)buff, 0,
+                            info_dump.dump_config_finalize_len);
+}
+
 #define TYPE_S390_PV_GUEST "s390-pv-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST)

--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@ -366,6 +366,12 @@ static int s390_machine_protect(S390CcwMachineState *ms)

    ms->pv = true;

+    /* Will return 0 if API is not available since it's not vital */
+    rc = s390_pv_query_info();
+    if (rc) {
+        goto out_err;
+    }
+
    /* Set SE header and unpack */
    rc = s390_ipl_prepare_pv_header();
    if (rc) {
--- a/include/elf.h
+++ b/include/elf.h
@ -1650,6 +1650,8 @@ typedef struct elf64_shdr {
 #define NT_TASKSTRUCT	4
 #define NT_AUXV		6
 #define NT_PRXFPREG     0x46e62b7f      /* copied from gdb5.1/include/elf/common.h */
+#define NT_S390_PV_CPU_DATA	0x30e	/* s390 protvirt cpu dump data */
+#define NT_S390_RI_CB	0x30d		/* s390 runtime instrumentation */
 #define NT_S390_GS_CB   0x30b           /* s390 guarded storage registers */
 #define NT_S390_VXRS_HIGH 0x30a         /* s390 vector registers 16-31 */
 #define NT_S390_VXRS_LOW  0x309         /* s390 vector registers 0-15 (lower half) */
--- a/include/hw/s390x/pv.h
+++ b/include/hw/s390x/pv.h
@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
    return ccw->pv;
 }

+int s390_pv_query_info(void);
 int s390_pv_vm_enable(void);
 void s390_pv_vm_disable(void);
 int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
@ -46,8 +47,17 @@ void s390_pv_prep_reset(void);
 int s390_pv_verify(void);
 void s390_pv_unshare(void);
 void s390_pv_inject_reset_error(CPUState *cs);
+uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
+uint64_t kvm_s390_pv_dmp_get_size_mem_state(void);
+uint64_t kvm_s390_pv_dmp_get_size_completion_data(void);
+bool kvm_s390_pv_info_basic_valid(void);
+int kvm_s390_dump_init(void);
+int kvm_s390_dump_cpu(S390CPU *cpu, void *buff);
+int kvm_s390_dump_mem_state(uint64_t addr, size_t len, void *dest);
+int kvm_s390_dump_completion_data(void *buff);
 #else /* CONFIG_KVM */
 static inline bool s390_is_pv(void) { return false; }
+static inline int s390_pv_query_info(void) { return 0; }
 static inline int s390_pv_vm_enable(void) { return 0; }
 static inline void s390_pv_vm_disable(void) {}
 static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; }
@ -56,6 +66,15 @@ static inline void s390_pv_prep_reset(void) {}
 static inline int s390_pv_verify(void) { return 0; }
 static inline void s390_pv_unshare(void) {}
 static inline void s390_pv_inject_reset_error(CPUState *cs) {};
+static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
+static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; }
+static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; }
+static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
+static inline int kvm_s390_dump_init(void) { return 0; }
+static inline int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) { return 0; }
+static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len,
+                                          void *dest) { return 0; }
+static inline int kvm_s390_dump_completion_data(void *buff) { return 0; }
 #endif /* CONFIG_KVM */

 int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
--- a/include/sysemu/dump-arch.h
+++ b/include/sysemu/dump-arch.h
@ -21,6 +21,9 @@ typedef struct ArchDumpInfo {
    uint32_t page_size;      /* The target's page size. If it's variable and
                              * unknown, then this should be the maximum. */
    uint64_t phys_base;      /* The target's physmem base. */
+    void (*arch_sections_add_fn)(DumpState *s);
+    uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
+    int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
 } ArchDumpInfo;

 struct GuestPhysBlockList; /* memory_mapping.h */
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@ -154,15 +154,8 @@ typedef struct DumpState {
    GuestPhysBlockList guest_phys_blocks;
    ArchDumpInfo dump_info;
    MemoryMappingList list;
-    uint32_t phdr_num;
-    uint32_t shdr_num;
    bool resume;
    bool detached;
-    ssize_t note_size;
-    hwaddr shdr_offset;
-    hwaddr phdr_offset;
-    hwaddr section_offset;
-    hwaddr note_offset;
    hwaddr memory_offset;
    int fd;

@ -177,6 +170,20 @@ typedef struct DumpState {
    int64_t filter_area_begin;  /* Start address of partial guest memory area */
    int64_t filter_area_length; /* Length of partial guest memory area */

+    /* Elf dump related data */
+    uint32_t phdr_num;
+    uint32_t shdr_num;
+    ssize_t note_size;
+    hwaddr shdr_offset;
+    hwaddr phdr_offset;
+    hwaddr section_offset;
+    hwaddr note_offset;
+
+    void *elf_section_hdrs;     /* Pointer to section header buffer */
+    void *elf_section_data;     /* Pointer to section data buffer */
+    uint64_t elf_section_data_size; /* Size of section data */
+    GArray *string_table_buf;   /* String table data buffer */
+
    uint8_t *note_buf;          /* buffer for notes */
    size_t note_buf_offset;     /* the writing place in note_buf */
    uint32_t nr_cpus;           /* number of guest's cpu */
@ -208,4 +215,9 @@ typedef struct DumpState {
 uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
 uint32_t cpu_to_dump32(DumpState *s, uint32_t val);
 uint64_t cpu_to_dump64(DumpState *s, uint64_t val);
+
+int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start,
+                                    int64_t filter_area_length);
+int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
+                                     int64_t filter_area_length);
 #endif
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@ -12,11 +12,13 @@
 */

 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "cpu.h"
 #include "s390x-internal.h"
 #include "elf.h"
 #include "sysemu/dump.h"
-
+#include "hw/s390x/pv.h"
+#include "kvm/kvm_s390x.h"

 struct S390xUserRegsStruct {
    uint64_t psw[2];
@ -76,9 +78,16 @@ typedef struct noteStruct {
        uint64_t todcmp;
        uint32_t todpreg;
        uint64_t ctrs[16];
+        uint8_t dynamic[1];  /*
+                              * Would be a flexible array member, if
+                              * that was legal inside a union. Real
+                              * size comes from PV info interface.
+                              */
    } contents;
 } QEMU_PACKED Note;

+static bool pv_dump_initialized;
+
 static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id)
 {
    int i;
@ -177,28 +186,39 @@ static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id)
    note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa));
 }

+static void s390x_write_elf64_pv(Note *note, S390CPU *cpu, int id)
+{
+    note->hdr.n_type = cpu_to_be32(NT_S390_PV_CPU_DATA);
+    if (!pv_dump_initialized) {
+        return;
+    }
+    kvm_s390_dump_cpu(cpu, &note->contents.dynamic);
+}

 typedef struct NoteFuncDescStruct {
    int contents_size;
+    uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents */
    void (*note_contents_func)(Note *note, S390CPU *cpu, int id);
+    bool pvonly;
 } NoteFuncDesc;

 static const NoteFuncDesc note_core[] = {
-    {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus},
-    {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset},
-    { 0, NULL}
+    {sizeof_field(Note, contents.prstatus), NULL, s390x_write_elf64_prstatus, false},
+    {sizeof_field(Note, contents.fpregset), NULL, s390x_write_elf64_fpregset, false},
+    { 0, NULL, NULL, false}
 };

 static const NoteFuncDesc note_linux[] = {
-    {sizeof_field(Note, contents.prefix),   s390x_write_elf64_prefix},
-    {sizeof_field(Note, contents.ctrs),     s390x_write_elf64_ctrs},
-    {sizeof_field(Note, contents.timer),    s390x_write_elf64_timer},
-    {sizeof_field(Note, contents.todcmp),   s390x_write_elf64_todcmp},
-    {sizeof_field(Note, contents.todpreg),  s390x_write_elf64_todpreg},
-    {sizeof_field(Note, contents.vregslo),  s390x_write_elf64_vregslo},
-    {sizeof_field(Note, contents.vregshi),  s390x_write_elf64_vregshi},
-    {sizeof_field(Note, contents.gscb),     s390x_write_elf64_gscb},
-    { 0, NULL}
+    {sizeof_field(Note, contents.prefix),   NULL, s390x_write_elf64_prefix,  false},
+    {sizeof_field(Note, contents.ctrs),     NULL, s390x_write_elf64_ctrs,    false},
+    {sizeof_field(Note, contents.timer),    NULL, s390x_write_elf64_timer,   false},
+    {sizeof_field(Note, contents.todcmp),   NULL, s390x_write_elf64_todcmp,  false},
+    {sizeof_field(Note, contents.todpreg),  NULL, s390x_write_elf64_todpreg, false},
+    {sizeof_field(Note, contents.vregslo),  NULL, s390x_write_elf64_vregslo, false},
+    {sizeof_field(Note, contents.vregshi),  NULL, s390x_write_elf64_vregshi, false},
+    {sizeof_field(Note, contents.gscb),     NULL, s390x_write_elf64_gscb,    false},
+    {0, kvm_s390_pv_dmp_get_size_cpu,       s390x_write_elf64_pv, true},
+    { 0, NULL, NULL, false}
 };

 static int s390x_write_elf64_notes(const char *note_name,
@ -207,22 +227,41 @@ static int s390x_write_elf64_notes(const char *note_name,
                                       DumpState *s,
                                       const NoteFuncDesc *funcs)
 {
-    Note note;
+    Note note, *notep;
    const NoteFuncDesc *nf;
-    int note_size;
+    int note_size, content_size;
    int ret = -1;

    assert(strlen(note_name) < sizeof(note.name));

    for (nf = funcs; nf->note_contents_func; nf++) {
-        memset(&note, 0, sizeof(note));
-        note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
-        note.hdr.n_descsz = cpu_to_be32(nf->contents_size);
-        g_strlcpy(note.name, note_name, sizeof(note.name));
-        (*nf->note_contents_func)(&note, cpu, id);
+        notep = &note;
+        if (nf->pvonly && !s390_is_pv()) {
+            continue;
+        }

-        note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size;
-        ret = f(&note, note_size, s);
+        content_size = nf->note_size_func ? nf->note_size_func() : nf->contents_size;
+        note_size = sizeof(note) - sizeof(notep->contents) + content_size;
+
+        /* Notes with dynamic sizes need to allocate a note */
+        if (nf->note_size_func) {
+            notep = g_malloc(note_size);
+        }
+
+        memset(notep, 0, sizeof(note));
+
+        /* Setup note header data */
+        notep->hdr.n_descsz = cpu_to_be32(content_size);
+        notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
+        g_strlcpy(notep->name, note_name, sizeof(notep->name));
+
+        /* Get contents and write them out */
+        (*nf->note_contents_func)(notep, cpu, id);
+        ret = f(notep, note_size, s);
+
+        if (nf->note_size_func) {
+            g_free(notep);
+        }

        if (ret < 0) {
            return -1;
@ -247,13 +286,179 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
    return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux);
 }

+/* PV dump section size functions */
+static uint64_t get_mem_state_size_from_len(uint64_t len)
+{
+    return (len / (MiB)) * kvm_s390_pv_dmp_get_size_mem_state();
+}
+
+static uint64_t get_size_mem_state(DumpState *s)
+{
+    return get_mem_state_size_from_len(s->total_size);
+}
+
+static uint64_t get_size_completion_data(DumpState *s)
+{
+    return kvm_s390_pv_dmp_get_size_completion_data();
+}
+
+/* PV dump section data functions*/
+static int get_data_completion(DumpState *s, uint8_t *buff)
+{
+    int rc;
+
+    if (!pv_dump_initialized) {
+        return 0;
+    }
+    rc = kvm_s390_dump_completion_data(buff);
+    if (!rc) {
+            pv_dump_initialized = false;
+    }
+    return rc;
+}
+
+static int get_mem_state(DumpState *s, uint8_t *buff)
+{
+    int64_t memblock_size, memblock_start;
+    GuestPhysBlock *block;
+    uint64_t off;
+    int rc;
+
+    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
+        memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin,
+                                                      s->filter_area_length);
+        if (memblock_start == -1) {
+            continue;
+        }
+
+        memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin,
+                                                    s->filter_area_length);
+
+        off = get_mem_state_size_from_len(block->target_start);
+
+        rc = kvm_s390_dump_mem_state(block->target_start,
+                                     get_mem_state_size_from_len(memblock_size),
+                                     buff + off);
+        if (rc) {
+            return rc;
+        }
+    }
+
+    return 0;
+}
+
+static struct sections {
+    uint64_t (*sections_size_func)(DumpState *s);
+    int (*sections_contents_func)(DumpState *s, uint8_t *buff);
+    char sctn_str[12];
+} sections[] = {
+    { get_size_mem_state, get_mem_state, "pv_mem_meta"},
+    { get_size_completion_data, get_data_completion, "pv_compl"},
+    {NULL , NULL, ""}
+};
+
+static uint64_t arch_sections_write_hdr(DumpState *s, uint8_t *buff)
+{
+    Elf64_Shdr *shdr = (void *)buff;
+    struct sections *sctn = sections;
+    uint64_t off = s->section_offset;
+
+    if (!pv_dump_initialized) {
+        return 0;
+    }
+
+    for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) {
+        memset(shdr, 0, sizeof(*shdr));
+        shdr->sh_type = SHT_PROGBITS;
+        shdr->sh_offset = off;
+        shdr->sh_size = sctn->sections_size_func(s);
+        shdr->sh_name = s->string_table_buf->len;
+        g_array_append_vals(s->string_table_buf, sctn->sctn_str, sizeof(sctn->sctn_str));
+    }
+
+    return (uintptr_t)shdr - (uintptr_t)buff;
+}
+
+
+/* Add arch specific number of sections and their respective sizes */
+static void arch_sections_add(DumpState *s)
+{
+    struct sections *sctn = sections;
+
+    /*
+     * We only do a PV dump if we are running a PV guest, KVM supports
+     * the dump API and we got valid dump length information.
+     */
+    if (!s390_is_pv() || !kvm_s390_get_protected_dump() ||
+        !kvm_s390_pv_info_basic_valid()) {
+        return;
+    }
+
+    /*
+     * Start the UV dump process by doing the initialize dump call via
+     * KVM as the proxy.
+     */
+    if (!kvm_s390_dump_init()) {
+        pv_dump_initialized = true;
+    } else {
+        /*
+         * Dump init failed, maybe the guest owner disabled dumping.
+         * We'll continue the non-PV dump process since this is no
+         * reason to crash qemu.
+         */
+        return;
+    }
+
+    for (; sctn->sections_size_func; sctn++) {
+        s->shdr_num += 1;
+        s->elf_section_data_size += sctn->sections_size_func(s);
+    }
+}
+
+/*
+ * After the PV dump has been initialized, the CPU data has been
+ * fetched and memory has been dumped, we need to grab the tweak data
+ * and the completion data.
+ */
+static int arch_sections_write(DumpState *s, uint8_t *buff)
+{
+    struct sections *sctn = sections;
+    int rc;
+
+    if (!pv_dump_initialized) {
+        return -EINVAL;
+    }
+
+    for (; sctn->sections_size_func; sctn++) {
+        rc = sctn->sections_contents_func(s, buff);
+        buff += sctn->sections_size_func(s);
+        if (rc) {
+            return rc;
+        }
+    }
+    return 0;
+}
+
 int cpu_get_dump_info(ArchDumpInfo *info,
                      const struct GuestPhysBlockList *guest_phys_blocks)
 {
    info->d_machine = EM_S390;
    info->d_endian = ELFDATA2MSB;
    info->d_class = ELFCLASS64;
-
+    /*
+     * This is evaluated for each dump so we can freely switch
+     * between PV and non-PV.
+     */
+    if (s390_is_pv() && kvm_s390_get_protected_dump() &&
+        kvm_s390_pv_info_basic_valid()) {
+        info->arch_sections_add_fn = *arch_sections_add;
+        info->arch_sections_write_hdr_fn = *arch_sections_write_hdr;
+        info->arch_sections_write_fn = *arch_sections_write;
+    } else {
+        info->arch_sections_add_fn = NULL;
+        info->arch_sections_write_hdr_fn = NULL;
+        info->arch_sections_write_fn = NULL;
+    }
    return 0;
 }

@ -261,7 +466,7 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
 {
    int name_size = 8; /* "LINUX" or "CORE" + pad */
    size_t elf_note_size = 0;
-    int note_head_size;
+    int note_head_size, content_size;
    const NoteFuncDesc *nf;

    assert(class == ELFCLASS64);
@ -270,12 +475,15 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
    note_head_size = sizeof(Elf64_Nhdr);

    for (nf = note_core; nf->note_contents_func; nf++) {
-        elf_note_size = elf_note_size + note_head_size + name_size +
-                        nf->contents_size;
+        elf_note_size = elf_note_size + note_head_size + name_size + nf->contents_size;
    }
    for (nf = note_linux; nf->note_contents_func; nf++) {
+        if (nf->pvonly && !s390_is_pv()) {
+            continue;
+        }
+        content_size = nf->contents_size ? nf->contents_size : nf->note_size_func();
        elf_note_size = elf_note_size + note_head_size + name_size +
-                        nf->contents_size;
+                        content_size;
    }

    return (elf_note_size) * nr_cpus;
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@ -158,6 +158,7 @@ static int cap_hpage_1m;
 static int cap_vcpu_resets;
 static int cap_protected;
 static int cap_zpci_op;
+static int cap_protected_dump;

 static bool mem_op_storage_key_support;

@ -364,6 +365,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
    cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS);
    cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED);
    cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP);
+    cap_protected_dump = kvm_check_extension(s, KVM_CAP_S390_PROTECTED_DUMP);

    kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0);
    kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0);
@ -2045,6 +2047,11 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
    return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
 }

+int kvm_s390_get_protected_dump(void)
+{
+    return cap_protected_dump;
+}
+
 int kvm_s390_get_ri(void)
 {
    return cap_ri;
--- a/target/s390x/kvm/kvm_s390x.h
+++ b/target/s390x/kvm/kvm_s390x.h
@ -26,6 +26,7 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state);
 void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu);
 int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu);
 int kvm_s390_get_hpage_1m(void);
+int kvm_s390_get_protected_dump(void);
 int kvm_s390_get_ri(void);
 int kvm_s390_get_zpci_op(void);
 int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock);
--- a/target/s390x/kvm/meson.build
+++ b/target/s390x/kvm/meson.build
@ -1,6 +1,8 @@

 s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
  'kvm.c'
+), if_false: files(
+  'stubs.c'
 ))

 # Newer kernels on s390 check for an S390_PGSTE program header and
--- a/target/s390x/kvm/stubs.c
+++ b/target/s390x/kvm/stubs.c
@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "kvm_s390x.h"
+
+int kvm_s390_get_protected_dump(void)
+{
+    return false;
+}