Gets vcpu mmap size, refactors kvm and vm (#856)

Co-authored-by: tompro <tomas.prochazka@apertia.cz>
This commit is contained in:
SuchAFuriousDeath 2024-04-28 19:07:04 +02:00 committed by GitHub
parent 005d99435a
commit 4bbb9d1c8a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 604 additions and 85 deletions

View file

@ -1,4 +1,4 @@
use crate::errno::{Errno, EPERM};
use crate::errno::Errno;
use crate::fs::{
make_dev, CharacterDevice, DeviceDriver, DriverFlags, IoCmd, IoLen, IoVec, IoVecMut,
MakeDevError, MakeDevFlags, Mode, OpenFlags,

View file

@ -67,6 +67,11 @@ extern "C" int kvm_set_user_memory_region(
return 0;
}
extern "C" int kvm_get_vcpu_mmap_size(int kvm)
{
return ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE);
}
extern "C" int kvm_create_vcpu(int vm, int id, int *fd)
{
auto vcpu = ioctl(vm, KVM_CREATE_VCPU, id);
@ -79,3 +84,18 @@ extern "C" int kvm_create_vcpu(int vm, int id, int *fd)
return 0;
}
extern "C" int kvm_run(int vcpu)
{
return ioctl(vcpu, KVM_RUN);
}
extern "C" int kvm_get_regs(int vcpu, kvm_regs *regs)
{
return ioctl(vcpu, KVM_GET_REGS, regs);
}
extern "C" int kvm_set_regs(int vcpu, kvm_regs *const regs)
{
return ioctl(vcpu, KVM_SET_REGS, regs);
}

View file

@ -1,74 +1,186 @@
use self::regs::KvmRegs;
use self::run::KvmRun;
use super::HypervisorError;
use libc::{open, O_RDWR};
use std::ffi::{c_int, c_void};
use std::io::Error;
use std::os::fd::{AsRawFd, BorrowedFd, FromRawFd, OwnedFd};
use std::mem::MaybeUninit;
use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
use std::ptr::NonNull;
use thiserror::Error;
pub fn open_kvm() -> Result<OwnedFd, HypervisorError> {
// Open KVM.
let fd = unsafe { open(c"/dev/kvm".as_ptr(), O_RDWR) };
mod regs;
mod run;
if fd < 0 {
return Err(HypervisorError::OpenKvmFailed(Error::last_os_error()));
}
pub struct Kvm(OwnedFd);
// Check KVM version.
let fd = unsafe { OwnedFd::from_raw_fd(fd) };
let mut compat = false;
impl Kvm {
pub fn open() -> Result<Self, HypervisorError> {
use libc::{open, O_RDWR};
match unsafe { kvm_check_version(fd.as_raw_fd(), &mut compat) } {
0 => {
if !compat {
let fd = unsafe { open(c"/dev/kvm".as_ptr(), O_RDWR) };
if fd < 0 {
return Err(HypervisorError::OpenKvmFailed(Error::last_os_error()));
}
// Check KVM version.
let fd = unsafe { OwnedFd::from_raw_fd(fd) };
let mut compat = false;
match unsafe { kvm_check_version(fd.as_raw_fd(), &mut compat) } {
0 if !compat => {
return Err(HypervisorError::KvmVersionMismatched);
}
0 => {}
v => {
return Err(HypervisorError::GetKvmVersionFailed(
Error::from_raw_os_error(v),
))
}
}
v => {
return Err(HypervisorError::GetKvmVersionFailed(
Error::from_raw_os_error(v),
))
Ok(Self(fd))
}
pub fn get_vcpu_mmap_size(&self) -> Result<usize, HypervisorError> {
match unsafe { kvm_get_vcpu_mmap_size(self.0.as_raw_fd()) } {
size @ 0.. => Ok(size as usize),
_ => Err(HypervisorError::GetMmapSizeFailed(Error::last_os_error())),
}
}
Ok(fd)
}
pub fn max_vcpus(&self) -> Result<usize, HypervisorError> {
let mut max = 0;
pub fn max_vcpus(kvm: BorrowedFd) -> Result<usize, Error> {
let mut max = 0;
match unsafe { kvm_max_vcpus(self.0.as_raw_fd(), &mut max) } {
0 => Ok(max),
v => Err(HypervisorError::GetMaxCpuFailed(Error::from_raw_os_error(
v,
))),
}
}
match unsafe { kvm_max_vcpus(kvm.as_raw_fd(), &mut max) } {
0 => Ok(max),
v => Err(Error::from_raw_os_error(v)),
pub fn create_vm(&self) -> Result<Vm, HypervisorError> {
let mut vm = -1;
match unsafe { kvm_create_vm(self.0.as_raw_fd(), &mut vm) } {
0 => Ok(Vm(unsafe { OwnedFd::from_raw_fd(vm) })),
v => Err(HypervisorError::CreateVmFailed(Error::from_raw_os_error(v))),
}
}
}
pub fn create_vm(kvm: BorrowedFd) -> Result<OwnedFd, Error> {
let mut vm = -1;
pub struct Vm(OwnedFd);
match unsafe { kvm_create_vm(kvm.as_raw_fd(), &mut vm) } {
0 => Ok(unsafe { OwnedFd::from_raw_fd(vm) }),
v => Err(Error::from_raw_os_error(v)),
impl Vm {
pub fn set_user_memory_region(
&self,
slot: u32,
addr: u64,
len: u64,
mem: *mut c_void,
) -> Result<(), HypervisorError> {
match unsafe { kvm_set_user_memory_region(self.0.as_raw_fd(), slot, addr, len, mem) } {
0 => Ok(()),
v => Err(HypervisorError::MapRamFailed(Error::from_raw_os_error(v))),
}
}
pub fn create_vcpus(&self, mmap_size: usize) -> Result<VCpus, CreateVCpusError> {
let vcpus = [
self.create_vcpu(0, mmap_size)
.map_err(|e| CreateVCpusError::CreateVcpuFailed(e, 0))?,
self.create_vcpu(1, mmap_size)
.map_err(|e| CreateVCpusError::CreateVcpuFailed(e, 1))?,
self.create_vcpu(2, mmap_size)
.map_err(|e| CreateVCpusError::CreateVcpuFailed(e, 2))?,
self.create_vcpu(3, mmap_size)
.map_err(|e| CreateVCpusError::CreateVcpuFailed(e, 3))?,
self.create_vcpu(4, mmap_size)
.map_err(|e| CreateVCpusError::CreateVcpuFailed(e, 4))?,
self.create_vcpu(5, mmap_size)
.map_err(|e| CreateVCpusError::CreateVcpuFailed(e, 5))?,
self.create_vcpu(6, mmap_size)
.map_err(|e| CreateVCpusError::CreateVcpuFailed(e, 6))?,
self.create_vcpu(7, mmap_size)
.map_err(|e| CreateVCpusError::CreateVcpuFailed(e, 7))?,
];
Ok(VCpus(vcpus))
}
fn create_vcpu(&self, id: i32, mmap_size: usize) -> Result<VCpu, CreateVCpuError> {
use libc::{mmap, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE};
let mut vcpu = -1;
let fd = match unsafe { kvm_create_vcpu(self.0.as_raw_fd(), id, &mut vcpu) } {
0 => Ok(unsafe { OwnedFd::from_raw_fd(vcpu) }),
v => Err(CreateVCpuError::CreateVcpuFailed(Error::from_raw_os_error(
v,
))),
}?;
let kvm_run = unsafe {
mmap(
std::ptr::null_mut(),
mmap_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd.as_raw_fd(),
0,
)
};
if kvm_run == MAP_FAILED {
return Err(CreateVCpuError::MmapFailed(Error::last_os_error()));
}
Ok(VCpu {
fd,
kvm_run: NonNull::new(kvm_run.cast()).unwrap(),
mmap_size,
})
}
}
pub fn set_user_memory_region(
vm: BorrowedFd,
slot: u32,
addr: u64,
len: u64,
mem: *mut c_void,
) -> Result<(), Error> {
match unsafe { kvm_set_user_memory_region(vm.as_raw_fd(), slot, addr, len, mem) } {
0 => Ok(()),
v => Err(Error::from_raw_os_error(v)),
#[derive(Debug)]
pub struct VCpus([VCpu; 8]);
#[derive(Debug)]
struct VCpu {
fd: OwnedFd,
kvm_run: NonNull<KvmRun>,
mmap_size: usize,
}
impl Drop for VCpu {
fn drop(&mut self) {
use libc::munmap;
unsafe {
if munmap(self.kvm_run.as_ptr().cast(), self.mmap_size) < 0 {
panic!("failed to munmap KVM_RUN: {}", Error::last_os_error());
};
}
}
}
pub fn create_vcpu(vm: BorrowedFd, id: i32) -> Result<OwnedFd, Error> {
let mut vcpu = -1;
impl VCpu {
pub fn get_regs(&self) -> Result<KvmRegs, Error> {
let mut regs = MaybeUninit::uninit();
match unsafe { kvm_create_vcpu(vm.as_raw_fd(), id, &mut vcpu) } {
0 => Ok(unsafe { OwnedFd::from_raw_fd(vcpu) }),
v => Err(Error::from_raw_os_error(v)),
match unsafe { kvm_get_regs(self.fd.as_raw_fd(), regs.as_mut_ptr()) } {
0 => Ok(unsafe { regs.assume_init() }),
_ => Err(Error::last_os_error()),
}
}
pub fn set_regs(&self, regs: KvmRegs) -> Result<(), Error> {
match unsafe { kvm_set_regs(self.fd.as_raw_fd(), &regs) } {
0 => Ok(()),
_ => Err(Error::last_os_error()),
}
}
}
@ -76,6 +188,8 @@ extern "C" {
fn kvm_check_version(kvm: c_int, compat: *mut bool) -> c_int;
fn kvm_max_vcpus(kvm: c_int, max: *mut usize) -> c_int;
fn kvm_create_vm(kvm: c_int, fd: *mut c_int) -> c_int;
fn kvm_get_vcpu_mmap_size(kvm: c_int) -> c_int;
fn kvm_set_user_memory_region(
vm: c_int,
slot: u32,
@ -84,4 +198,23 @@ extern "C" {
mem: *mut c_void,
) -> c_int;
fn kvm_create_vcpu(vm: c_int, id: c_int, fd: *mut c_int) -> c_int;
fn kvm_run(vcpu: c_int) -> c_int;
fn kvm_get_regs(vcpu: c_int, regs: *mut KvmRegs) -> c_int;
fn kvm_set_regs(vcpu: c_int, regs: *const KvmRegs) -> c_int;
}
#[derive(Debug, Error)]
pub enum CreateVCpusError {
#[error("failed to create vcpu #{1}")]
CreateVcpuFailed(#[source] CreateVCpuError, u8),
}
#[derive(Debug, Error)]
pub enum CreateVCpuError {
#[error("failed to create vcpu")]
CreateVcpuFailed(#[source] Error),
#[error("failed to mmap KVM_RUN")]
MmapFailed(#[source] Error),
}

View file

@ -0,0 +1,75 @@
#[repr(C)]
pub struct KvmRegs {
rax: u64,
rbx: u64,
rcx: u64,
rdx: u64,
rsi: u64,
rdi: u64,
rsp: u64,
rbp: u64,
r8: u64,
r9: u64,
r10: u64,
r11: u64,
r12: u64,
r13: u64,
r14: u64,
r15: u64,
rip: u64,
rflags: u64,
}
#[repr(C)]
pub struct KvmSpecialRegs {
cs: KvmSegment,
ds: KvmSegment,
es: KvmSegment,
fs: KvmSegment,
gs: KvmSegment,
ss: KvmSegment,
tr: KvmSegment,
ldt: KvmSegment,
gdt: KvmDTable,
idt: KvmDTable,
cr0: u64,
cr2: u64,
cr3: u64,
cr4: u64,
cr8: u64,
efer: u64,
apic_base: u64,
interrupt_bitmap: [u64; 4],
}
#[repr(C)]
struct KvmSegment {
base: u64,
limit: u32,
selector: u16,
ty: u8,
present: u8,
dpl: u8,
db: u8,
s: u8,
l: u8,
g: u8,
avl: u8,
unusable: u8,
padding: u8,
}
#[repr(C)]
struct KvmDTable {
base: u64,
limit: u16,
padding: [u16; 3],
}

View file

@ -0,0 +1,307 @@
use std::mem::ManuallyDrop;
#[repr(C)]
pub struct KvmRun {
request_interrupt_window: u8,
immediate_exit: u8,
padding1: [u8; 6],
exit_reason: u32,
ready_for_interrupt_injection: u8,
if_flag: u8,
flags: u16,
cr8: u64,
apic_base: u64,
exit: Exit,
}
#[repr(C)]
union Exit {
hw: ManuallyDrop<Hw>,
fail_entry: ManuallyDrop<FailEntry>,
ex: ManuallyDrop<Ex>,
io: ManuallyDrop<Io>,
debug: ManuallyDrop<Debug>,
mmio: ManuallyDrop<Mmio>,
iocsr_io: ManuallyDrop<IocsrIo>,
hypercall: ManuallyDrop<Hypercall>,
tpr_access: ManuallyDrop<TprAccess>,
s390_sieic: ManuallyDrop<S390Sieic>,
s390_reset_flags: u64,
s390_ucontrol: ManuallyDrop<S390Ucontrol>,
dcr: ManuallyDrop<Dcr>,
internal: ManuallyDrop<Internal>,
emulation_failure: ManuallyDrop<EmulationFailure>,
osi: ManuallyDrop<Osi>,
papr_hcall: ManuallyDrop<PaprHcall>,
s390_tsch: ManuallyDrop<S390Tsch>,
epr: ManuallyDrop<Epr>,
system_event: ManuallyDrop<SystemEvent>,
s390_stsi: ManuallyDrop<S390Stsi>,
eoi: ManuallyDrop<Eoi>,
hyperv: ManuallyDrop<KvmHypervExit>,
arm_nisv: ManuallyDrop<ArmNisv>,
msr: ManuallyDrop<Msr>,
xen: ManuallyDrop<KvmXenExit>,
riscv_sbi: ManuallyDrop<RiscvSbi>,
riscv_csr: ManuallyDrop<RiscvCsr>,
notify: ManuallyDrop<Notify>,
padding: [u8; 256],
}
#[repr(C)]
struct Hw {
hardware_exit_reason: u64,
}
#[repr(C)]
struct FailEntry {
hardware_entry_failure_reason: u64,
cpu: u32,
}
#[repr(C)]
struct Ex {
exception: u32,
error_code: u32,
}
#[repr(C)]
struct Io {
direction: u8,
size: u8,
port: u16,
count: u32,
data_offset: u64,
}
#[repr(C)]
struct Debug {
arch: KvmDebugExitArch,
}
#[repr(C)]
struct KvmDebugExitArch {
exception: u32,
pad: u32,
pc: u64,
dr6: u64,
dr7: u64,
}
#[repr(C)]
struct Mmio {
phys_addr: u64,
data: [u8; 8],
len: u32,
is_write: u8,
}
#[repr(C)]
struct IocsrIo {
phys_addr: u64,
data: [u8; 8],
len: u32,
is_write: u8,
}
#[repr(C)]
struct Hypercall {
nr: u64,
args: [u64; 6],
ret: u64,
inner: HypercallInner,
}
/// This struct has to be named in Rust
#[repr(C)]
union HypercallInner {
longmode: u32,
flags: u64,
}
#[repr(C)]
struct TprAccess {
rip: u64,
is_write: u32,
pad: u32,
}
#[repr(C)]
struct S390Sieic {
iptcode: u32,
ipa: u16,
ipb: u32,
}
#[repr(C)]
struct S390Ucontrol {
trans_exc_code: u64,
pgm_code: u32,
}
#[repr(C)]
struct Dcr {
dcrn: u32,
data: u32,
is_write: u8,
}
#[repr(C)]
struct Internal {
suberror: u32,
ndata: u32,
data: [u64; 16],
}
#[repr(C)]
struct EmulationFailure {
suberror: u32,
ndata: u32,
flags: u64,
insn_size: u8,
insn_bytes: [u8; 15],
}
#[repr(C)]
struct Osi {
gprs: [u64; 32],
}
#[repr(C)]
struct PaprHcall {
nr: u64,
ret: u64,
args: [u64; 9],
}
#[repr(C)]
struct S390Tsch {
subchannel_id: u16,
subchannel_nr: u16,
io_int_parm: u32,
io_int_word: u32,
dequeued: u8,
}
#[repr(C)]
struct Epr {
epr: u32,
}
#[repr(C)]
struct SystemEvent {
ty: u32,
ndata: u32,
inner: SystemEventInner,
}
/// This struct has to have a name in Rust
#[repr(C)]
union SystemEventInner {
flags: u64,
data: [u64; 16],
}
#[repr(C)]
struct S390Stsi {
addr: u64,
ar: u8,
reserver: u8,
fc: u8,
sel1: u8,
sel2: u8,
}
#[repr(C)]
struct Eoi {
vector: u8,
}
#[repr(C)]
struct KvmHypervExit {
ty: u32,
pad1: u32,
u: KvmHypervExitInner,
}
#[repr(C)]
union KvmHypervExitInner {
synic: ManuallyDrop<Synic>,
hcall: ManuallyDrop<Hcall>,
debug: ManuallyDrop<Syndbg>,
}
#[repr(C)]
struct Synic {
msr: u32,
pad2: u32,
control: u64,
evt_page: u64,
msg_page: u64,
}
#[repr(C)]
struct Hcall {
input: u64,
result: u64,
params: [u64; 2],
}
#[repr(C)]
struct Syndbg {
msr: u32,
pad2: u32,
control: u64,
status: u64,
send_page: u64,
recv_page: u64,
pending_page: u64,
}
#[repr(C)]
struct ArmNisv {
esr_iss: u64,
fault_ipa: u64,
}
#[repr(C)]
struct Msr {
error: u8,
pad: [u8; 7],
reason: u32,
index: u32,
data: u64,
}
#[repr(C)]
struct KvmXenExit {
ty: u32,
longmode: u32,
cp1: u32,
input: u64,
result: u64,
params: [u64; 6],
}
#[repr(C)]
struct RiscvSbi {
extension_id: u64,
function_id: u64,
args: [u64; 6],
ret: [u64; 2],
}
#[repr(C)]
struct RiscvCsr {
csr_num: u64,
new_value: u64,
write_mask: u64,
ret_value: u64,
}
#[repr(C)]
struct Notify {
flags: u32,
}

View file

@ -14,11 +14,11 @@ mod win32;
/// Do not create more than one Hypervisor because it will not work on macOS.
pub struct Hypervisor {
#[cfg(target_os = "linux")]
vcpus: [std::os::fd::OwnedFd; 8], // Drop before VM.
vcpus: self::linux::VCpus, // Drop before VM.
#[cfg(target_os = "linux")]
vm: std::os::fd::OwnedFd, // Drop before KVM.
vm: self::linux::Vm, // Drop before KVM.
#[cfg(target_os = "linux")]
kvm: std::os::fd::OwnedFd,
kvm: self::linux::Kvm,
#[cfg(target_os = "windows")]
part: self::win32::Partition,
@ -46,45 +46,27 @@ impl Hypervisor {
#[cfg(target_os = "linux")]
fn new_linux(ram: Ram) -> Result<Self, HypervisorError> {
use std::os::fd::AsFd;
// Open KVM device.
let kvm = self::linux::open_kvm()?;
let kvm = self::linux::Kvm::open()?;
if self::linux::max_vcpus(kvm.as_fd()).map_err(HypervisorError::GetMaxCpuFailed)? < 8 {
if kvm.max_vcpus()? < 8 {
return Err(HypervisorError::MaxCpuTooLow);
}
// Create a new VM.
let vm = self::linux::create_vm(kvm.as_fd()).map_err(HypervisorError::CreateVmFailed)?;
let vm = kvm.create_vm()?;
self::linux::set_user_memory_region(
vm.as_fd(),
vm.set_user_memory_region(
0,
ram.vm_addr().try_into().unwrap(),
ram.len().try_into().unwrap(),
ram.host_addr().cast(),
)
.map_err(HypervisorError::MapRamFailed)?;
)?;
let vcpus = [
self::linux::create_vcpu(vm.as_fd(), 0)
.map_err(|e| HypervisorError::CreateVcpuFailed(e, 0))?,
self::linux::create_vcpu(vm.as_fd(), 1)
.map_err(|e| HypervisorError::CreateVcpuFailed(e, 1))?,
self::linux::create_vcpu(vm.as_fd(), 2)
.map_err(|e| HypervisorError::CreateVcpuFailed(e, 2))?,
self::linux::create_vcpu(vm.as_fd(), 3)
.map_err(|e| HypervisorError::CreateVcpuFailed(e, 3))?,
self::linux::create_vcpu(vm.as_fd(), 4)
.map_err(|e| HypervisorError::CreateVcpuFailed(e, 4))?,
self::linux::create_vcpu(vm.as_fd(), 5)
.map_err(|e| HypervisorError::CreateVcpuFailed(e, 5))?,
self::linux::create_vcpu(vm.as_fd(), 6)
.map_err(|e| HypervisorError::CreateVcpuFailed(e, 6))?,
self::linux::create_vcpu(vm.as_fd(), 7)
.map_err(|e| HypervisorError::CreateVcpuFailed(e, 7))?,
];
let mmap_size = kvm.get_vcpu_mmap_size()?;
let vcpus = vm
.create_vcpus(mmap_size)
.map_err(HypervisorError::CreateVCpusError)?;
Ok(Self {
vcpus,
@ -155,6 +137,10 @@ pub enum HypervisorError {
#[error("couldn't create a RAM")]
CreateRamFailed(#[source] std::io::Error),
#[cfg(target_os = "linux")]
#[error("couldn't get maximum number of CPU for a VM")]
GetMaxCpuFailed(#[source] std::io::Error),
#[error("your OS does not support 8 vCPU on a VM")]
MaxCpuTooLow,
@ -170,10 +156,6 @@ pub enum HypervisorError {
#[error("unexpected KVM version")]
KvmVersionMismatched,
#[cfg(target_os = "linux")]
#[error("couldn't get maximum number of CPU for a VM")]
GetMaxCpuFailed(#[source] std::io::Error),
#[cfg(target_os = "linux")]
#[error("couldn't create a VM")]
CreateVmFailed(#[source] std::io::Error),
@ -183,8 +165,12 @@ pub enum HypervisorError {
MapRamFailed(#[source] std::io::Error),
#[cfg(target_os = "linux")]
#[error("couldn't create vcpu #{1}")]
CreateVcpuFailed(#[source] std::io::Error, u8),
#[error("couldn't get the size of vCPU mmap")]
GetMmapSizeFailed(#[source] std::io::Error),
#[cfg(target_os = "linux")]
#[error("couldn't create vCPUs")]
CreateVCpusError(#[source] self::linux::CreateVCpusError),
#[cfg(target_os = "windows")]
#[error("couldn't create WHP partition object ({0:#x})")]

View file

@ -16,7 +16,6 @@ impl Ram {
pub const SIZE: usize = 1024 * 1024 * 1024 * 8; // 8GB
pub fn new(addr: usize) -> Result<Self, Error> {
// Reserve a memory range on *nix.
#[cfg(unix)]
let mem = {
use libc::{mmap, MAP_ANON, MAP_FAILED, MAP_PRIVATE, PROT_NONE};
@ -40,7 +39,6 @@ impl Ram {
mem.cast()
};
// Reserve a memory range on Windows.
#[cfg(windows)]
let mem = {
use std::ptr::null;

View file

@ -150,7 +150,7 @@ impl FileBackend for SocketFileBackend {
IoCmd::SIOCSPGRP(_) => todo!("socket ioctl with SIOCSPGRP"),
IoCmd::SIOCGPGRP(_) => todo!("socket ioctl with SIOCGPGRP"),
IoCmd::SIOCATMARK(_) => todo!("socket ioctl with SIOCATMARK"),
_ => self.0.backend.control(todo!(), cmd, td),
_ => self.0.backend.control(&self.0, cmd, td),
}
}