Skip to content

Commit

Permalink
server/lib: add generic hypervisor enlightenment interface (#846)
Browse files Browse the repository at this point in the history
Implement some basic infrastructure to support hypervisor
enlightenments. The general idea is as follows:

- propolis-lib defines `Enlightenment` and `EnlightenmentDevice` traits
  that are implemented by components that provide a particular
  guest-hypervisor interface.
- Library users instantiate their chosen enlightenment stack and pass
  the resulting `Arc<dyn EnlightenmentDevice>` to the lib's
  `machine::Builder`. The resulting `Machine` contains vCPU objects that
  hold a reference to the selected `Enlightenment`. This allows them to
  forward RDMSR/WRMSR exits to the enlightenment stack for (possible)
  processing.
- Library users can call `Enlightenment::add_cpuid` to allow hypervisor
  interfaces to inject CPUID identifiers into the `CpuidSet`s that are
  used to configure the VM's vCPUs.
- Finally, library users can upcast an `EnlightenmentDevice` to an
  `Arc<dyn Lifecycle>` and use that reference to dispatch lifecycle and
  migration notifications to the active enlightenment stack.

Implement a `BhyveGuestInterface` enlightenment stack that implements
these interfaces but does nothing beyond exposing its vendor string in
CPUID leaf 0x4000_0000. This matches the hypervisor-related CPUID and
MSR behavior guests would have gotten before this change. More
interesting hypervisor interfaces are left for future PRs.

Add an instance spec field to allow propolis-server clients to select an
enlightenment stack. If not specified this defaults to the bhyve
interface; this provides compatibility when migrating to/from servers
built without this code.
  • Loading branch information
gjcolombo authored Feb 3, 2025
1 parent 33a0154 commit c0bb4cd
Show file tree
Hide file tree
Showing 17 changed files with 505 additions and 30 deletions.
1 change: 1 addition & 0 deletions bin/propolis-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ impl VmConfig {
cpuid: None,
cpus: self.vcpus,
memory_mb: self.memory,
guest_hv_interface: None,
},
components: Default::default(),
};
Expand Down
27 changes: 18 additions & 9 deletions bin/propolis-server/src/lib/initializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ use crate::vm::{
BlockBackendMap, CrucibleBackendMap, DeviceMap, NetworkInterfaceIds,
};
use anyhow::Context;
use cpuid_utils::bits::{HYPERVISOR_BASE_LEAF, HYPERVISOR_BHYVE_VALUES};
use cpuid_utils::CpuidValues;
use crucible_client_types::VolumeConstructionRequest;
pub use nexus_client::Client as NexusClient;
Expand All @@ -28,6 +27,7 @@ use oximeter_instruments::kstat::KstatSampler;
use propolis::block;
use propolis::chardev::{self, BlockingSource, Source};
use propolis::common::{Lifecycle, GB, MB, PAGE_SIZE};
use propolis::enlightenment::Enlightenment;
use propolis::firmware::smbios;
use propolis::hw::bhyve::BhyveHpet;
use propolis::hw::chipset::{i440fx, Chipset};
Expand Down Expand Up @@ -117,6 +117,7 @@ pub fn build_instance(
name: &str,
spec: &Spec,
use_reservoir: bool,
guest_hv_interface: Arc<dyn Enlightenment>,
_log: slog::Logger,
) -> Result<Machine, MachineInitError> {
let (lowmem, highmem) = get_spec_guest_ram_limits(spec);
Expand All @@ -125,10 +126,12 @@ pub fn build_instance(
use_reservoir,
track_dirty: true,
};

let mut builder = Builder::new(name, create_opts)
.context("failed to create kernel vmm builder")?
.max_cpus(spec.board.cpus)
.context("failed to set max cpus")?
.guest_hypervisor_interface(guest_hv_interface)
.add_mem_region(0, lowmem, "lowmem")
.context("failed to add low memory region")?
.add_rom_region(0x1_0000_0000 - MAX_ROM_SIZE, MAX_ROM_SIZE, "bootrom")
Expand Down Expand Up @@ -1231,20 +1234,16 @@ impl MachineInitializer<'_> {
/// them into the device map, and then, if a kstat sampler is provided,
/// tracking their kstats.
pub async fn initialize_cpus(&mut self) -> Result<(), MachineInitError> {
let hv_interface = self.machine.guest_hv_interface.as_ref();
for vcpu in self.machine.vcpus.iter() {
// Report that the guest is running on bhyve.
//
// The CPUID set in the spec is not allowed to contain any leaves in
// the hypervisor leaf region (enforced at spec generation time).
let mut set = self.spec.cpuid.clone();
assert!(
set.insert(
CpuidIdent::leaf(HYPERVISOR_BASE_LEAF),
HYPERVISOR_BHYVE_VALUES
)
.expect("no hypervisor subleaves")
.is_none(),
"CPUID set should have no hypervisor leaves"
hv_interface.add_cpuid(&mut set).expect(
"propolis_server::spec construction should deny direct \
requests to set hypervisor leaves",
);

let specialized = propolis::cpuid::Specializer::new()
Expand Down Expand Up @@ -1282,4 +1281,14 @@ impl MachineInitializer<'_> {
}
Ok(())
}

pub fn register_guest_hv_interface(
&mut self,
guest_hv_interface: Arc<dyn Lifecycle>,
) {
self.devices.insert(
SpecKey::Name("guest-hv-interface".to_string()),
guest_hv_interface,
);
}
}
1 change: 1 addition & 0 deletions bin/propolis-server/src/lib/spec/api_spec_v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ impl From<Spec> for InstanceSpecV0 {
cpus: board.cpus,
memory_mb: board.memory_mb,
chipset: board.chipset,
guest_hv_interface: board.guest_hv_interface,
cpuid: Some(cpuid.into_instance_spec_cpuid()),
};
let mut spec = InstanceSpecV0 { board, components: Default::default() };
Expand Down
4 changes: 3 additions & 1 deletion bin/propolis-server/src/lib/spec/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ impl SpecBuilder {
cpus: board.cpus,
memory_mb: board.memory_mb,
chipset: board.chipset,
guest_hv_interface: board.guest_hv_interface,
},
cpuid,
..Default::default()
Expand Down Expand Up @@ -364,7 +365,7 @@ impl SpecBuilder {
mod test {
use propolis_api_types::instance_spec::components::{
backends::{BlobStorageBackend, VirtioNetworkBackend},
board::{Chipset, I440Fx},
board::{Chipset, GuestHypervisorInterface, I440Fx},
devices::{VirtioDisk, VirtioNic},
};
use uuid::Uuid;
Expand All @@ -378,6 +379,7 @@ mod test {
cpus: 4,
memory_mb: 512,
chipset: Chipset::I440Fx(I440Fx { enable_pcie: false }),
guest_hv_interface: GuestHypervisorInterface::Bhyve,
};

SpecBuilder {
Expand Down
6 changes: 4 additions & 2 deletions bin/propolis-server/src/lib/spec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use propolis_api_types::instance_spec::{
BlobStorageBackend, CrucibleStorageBackend, FileStorageBackend,
VirtioNetworkBackend,
},
board::{Chipset, I440Fx},
board::{Chipset, GuestHypervisorInterface, I440Fx},
devices::{
NvmeDisk, PciPciBridge, QemuPvpanic as QemuPvpanicDesc,
SerialPortNumber, VirtioDisk, VirtioNic,
Expand Down Expand Up @@ -87,11 +87,12 @@ pub(crate) struct Spec {
/// before being included in an internal spec.
///
/// [instance spec `Board`]: propolis_api_types::instance_spec::components::board::Board
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Debug)]
pub(crate) struct Board {
pub cpus: u8,
pub memory_mb: u64,
pub chipset: Chipset,
pub guest_hv_interface: GuestHypervisorInterface,
}

impl Default for Board {
Expand All @@ -100,6 +101,7 @@ impl Default for Board {
cpus: 0,
memory_mb: 0,
chipset: Chipset::I440Fx(I440Fx { enable_pcie: false }),
guest_hv_interface: GuestHypervisorInterface::Bhyve,
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions bin/propolis-server/src/lib/vm/ensure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ use std::sync::Arc;

use oximeter::types::ProducerRegistry;
use oximeter_instruments::kstat::KstatSampler;
use propolis::enlightenment::{bhyve::BhyveGuestInterface, Enlightenment};
use propolis_api_types::{
instance_spec::components::board::GuestHypervisorInterface,
InstanceEnsureResponse, InstanceMigrateInitiateResponse,
InstanceProperties, InstanceState,
};
Expand Down Expand Up @@ -389,12 +391,17 @@ async fn initialize_vm_objects(

let vmm_log = log.new(slog::o!("component" => "vmm"));

let guest_hv_interface = match spec.board.guest_hv_interface {
GuestHypervisorInterface::Bhyve => Arc::new(BhyveGuestInterface),
};

// Set up the 'shell' instance into which the rest of this routine will
// add components.
let machine = build_instance(
&properties.vm_name(),
&spec,
options.use_reservoir,
guest_hv_interface.clone(),
vmm_log,
)?;

Expand Down Expand Up @@ -451,6 +458,7 @@ async fn initialize_vm_objects(
let ramfb =
init.initialize_fwcfg(spec.board.cpus, &options.bootrom_version)?;

init.register_guest_hv_interface(guest_hv_interface.as_lifecycle());
init.initialize_cpus().await?;
let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new(
&machine,
Expand Down
11 changes: 0 additions & 11 deletions crates/cpuid-utils/src/bits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,10 @@
//! Definitions here are taken from the AMD Architecture Programmer's Manual,
//! volume 3, appendix E (Publication 24594, revision 3.36, March 2024).
use propolis_types::CpuidValues;

pub const STANDARD_BASE_LEAF: u32 = 0;
pub const HYPERVISOR_BASE_LEAF: u32 = 0x4000_0000;
pub const EXTENDED_BASE_LEAF: u32 = 0x8000_0000;

/// The bhyve default hypervisor identifier ("bhyve bhyve " in ebx/ecx/edx with
/// no additional hypervisor CPUID leaves reported in eax).
pub const HYPERVISOR_BHYVE_VALUES: CpuidValues = CpuidValues {
eax: HYPERVISOR_BASE_LEAF,
ebx: 0x76796862,
ecx: 0x68622065,
edx: 0x20657679,
};

bitflags::bitflags! {
/// Leaf 1 ecx: instruction feature identifiers.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
Expand Down
27 changes: 27 additions & 0 deletions crates/propolis-api-types/src/instance_spec/components/board.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,22 @@ pub struct CpuidEntry {
pub edx: u32,
}

/// A hypervisor interface to expose to the guest.
#[derive(Clone, Deserialize, Serialize, Debug, JsonSchema, Default)]
#[serde(deny_unknown_fields, tag = "type", content = "value")]
pub enum GuestHypervisorInterface {
/// Expose a bhyve-like interface ("bhyve bhyve " as the hypervisor ID in
/// leaf 0x4000_0000 and no additional leaves or features).
#[default]
Bhyve,
}

impl GuestHypervisorInterface {
fn is_default(&self) -> bool {
matches!(self, Self::Bhyve)
}
}

/// A VM's mainboard.
#[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)]
#[serde(deny_unknown_fields)]
Expand All @@ -105,6 +121,17 @@ pub struct Board {
/// The chipset to expose to guest software.
pub chipset: Chipset,

/// The hypervisor platform to expose to the guest. The default is a
/// bhyve-compatible interface with no additional features.
///
/// For compatibility with older versions of Propolis, this field is only
/// serialized if it specifies a non-default interface.
#[serde(
default,
skip_serializing_if = "GuestHypervisorInterface::is_default"
)]
pub guest_hv_interface: GuestHypervisorInterface,

/// The CPUID values to expose to the guest. If `None`, bhyve will derive
/// default values from the host's CPUID values.
#[serde(default, skip_serializing_if = "Option::is_none")]
Expand Down
46 changes: 46 additions & 0 deletions lib/propolis/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,52 @@ use std::sync::atomic::{AtomicBool, Ordering};

use crate::vmm::SubMapping;

/// A vCPU number.
#[derive(Clone, Copy, Debug)]
pub struct VcpuId(u32);

impl From<u32> for VcpuId {
fn from(value: u32) -> Self {
Self(value)
}
}

impl From<i32> for VcpuId {
/// Converts a signed 32-bit value into a CPU identifier.
///
/// # Panics
///
/// Panics if `value` cannot be converted into a `u32`. This should
/// generally not be possible because bhyve uses non-negative (though
/// signed) CPU identifiers.
fn from(value: i32) -> Self {
Self(
u32::try_from(value)
.expect("vCPU number {value} should fit in a u32"),
)
}
}

impl Into<u32> for VcpuId {
fn into(self) -> u32 {
self.0
}
}

impl Into<i32> for VcpuId {
/// Converts a CPU identifier into a signed 32-bit value.
///
/// # Panics
///
/// Panics if the inner value cannot be converted to an `i32`. This should
/// generally not be possible because Propolis limits the maximum number of
/// CPUs a VM can have to a number well below `i32::MAX`.
fn into(self) -> i32 {
i32::try_from(self.0)
.expect("vCPU number {self.0} should fit in an i32")
}
}

/// Controls whether items wrapped in a [`GuestData`] are displayed or redacted
/// when the wrappers are printed via their `Display` or `Debug` impls.
//
Expand Down
63 changes: 63 additions & 0 deletions lib/propolis/src/enlightenment/bhyve.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Provides a bhyve-compatible guest-hypervisor interface.
//!
//! This interface supplies no special enlightenments; it merely identifies
//! itself as a bhyve hypervisor in CPUID leaf 0x4000_0000.
use cpuid_utils::{
bits::HYPERVISOR_BASE_LEAF, CpuidIdent, CpuidSet, CpuidValues,
};

use crate::{
accessors::MemAccessor,
common::{Lifecycle, VcpuId},
enlightenment::{AddCpuidError, Enlightenment},
msr::{MsrId, RdmsrOutcome, WrmsrOutcome},
};

/// An implementation of the bhyve guest-hypervisor interface. This interface
/// exposes no special enlightenments; its only purpose is to inject the
/// appropriate hypervisor ID into CPUID leaf 0x4000_0000, since this leaf will
/// not otherwise appear in a propolis-server instance specification's CPUID
/// settings.
pub struct BhyveGuestInterface;

impl Lifecycle for BhyveGuestInterface {
fn type_name(&self) -> &'static str {
"bhyve-guest-interface"
}
}

impl Enlightenment for BhyveGuestInterface {
fn add_cpuid(&self, cpuid: &mut CpuidSet) -> Result<(), AddCpuidError> {
let mut to_add = CpuidSet::new(cpuid.vendor());
to_add
.insert(
CpuidIdent::leaf(HYPERVISOR_BASE_LEAF),
// Leaf 0x4000_0000 is the maximum hypervisor leaf. "bhyve bhyve "
// is the vendor ID, split across ebx/ecx/edx.
CpuidValues {
eax: HYPERVISOR_BASE_LEAF,
ebx: 0x76796862,
ecx: 0x68622065,
edx: 0x20657679,
},
)
.expect("the map was previously empty");

super::add_cpuid(cpuid, to_add)
}

fn rdmsr(&self, _vcpu: VcpuId, _msr: MsrId) -> RdmsrOutcome {
RdmsrOutcome::NotHandled
}

fn wrmsr(&self, _vcpu: VcpuId, _msr: MsrId, _value: u64) -> WrmsrOutcome {
WrmsrOutcome::NotHandled
}

fn attach(&self, _parent: &MemAccessor) {}
}
Loading

0 comments on commit c0bb4cd

Please sign in to comment.