Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for TD SMP #598

Merged
merged 3 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions bootlib/src/kernel_launch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ use crate::platform::SvsmPlatformType;
use core::mem::size_of;
use zerocopy::{Immutable, IntoBytes};

// The SIPI stub is placed immediately below the stage 2 heap.
pub const SIPI_STUB_GPA: u32 = 0xF000;

// The first 640 KB of RAM (low memory)
pub const LOWMEM_END: u32 = 0xA0000;

pub const STAGE2_HEAP_START: u32 = 0x10000; // 64 KB
pub const STAGE2_HEAP_END: u32 = LOWMEM_END; // 640 KB
pub const STAGE2_BASE: u32 = 0x800000; // Start of stage2 area excluding heap
pub const STAGE2_STACK_END: u32 = 0x805000;
pub const STAGE2_INFO_SZ: u32 = size_of::<Stage2LaunchInfo>() as u32;
pub const STAGE2_STACK: u32 = STAGE2_STACK_END + 0x1000 - STAGE2_INFO_SZ;
Expand Down Expand Up @@ -95,6 +104,3 @@ pub struct ApStartContext {
pub transition_cr3: u32,
pub context_size: u32,
}

// The SIPI stub is placed immediately below the stage 2 heap are.
pub const SIPI_STUB_GPA: u32 = 0xF000;
6 changes: 4 additions & 2 deletions igvmbuilder/src/gpa_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
use std::error::Error;
use std::fs::metadata;

use bootlib::kernel_launch::{CPUID_PAGE, SECRETS_PAGE, STAGE2_STACK_END, STAGE2_START};
use bootlib::kernel_launch::{
CPUID_PAGE, SECRETS_PAGE, STAGE2_BASE, STAGE2_STACK_END, STAGE2_START,
};
use igvm_defs::PAGE_SIZE_4K;

use crate::cmd_options::{CmdOptions, Hypervisor};
Expand Down Expand Up @@ -158,7 +160,7 @@ impl GpaMap {
};

let gpa_map = Self {
base_addr: 0x800000,
base_addr: STAGE2_BASE.into(),
stage1_image,
stage2_stack: GpaRange::new_page(STAGE2_STACK_END.into())?,
stage2_image,
Expand Down
9 changes: 5 additions & 4 deletions igvmbuilder/src/igvm_firmware.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::error::Error;
use std::fs;

use bootlib::igvm_params::{IgvmGuestContext, IgvmParamBlockFwInfo};
use bootlib::kernel_launch::{LOWMEM_END, STAGE2_HEAP_END};
use igvm::snp_defs::SevVmsa;
use igvm::{IgvmDirectiveHeader, IgvmFile};
use igvm_defs::{
Expand Down Expand Up @@ -92,7 +93,7 @@ impl IgvmFirmware {
igvm_fw.fw_info.start = igvm_fw.lowest_gpa.try_into()?;
igvm_fw.fw_info.size = (igvm_fw.highest_gpa - igvm_fw.lowest_gpa).try_into()?;
igvm_fw.fw_info.in_low_memory = 1;
if igvm_fw.fw_info.start < 0xA0000 {
if igvm_fw.fw_info.start < LOWMEM_END {
return Err("IGVM firmware base is lower than 640K".into());
}

Expand All @@ -106,11 +107,11 @@ impl IgvmFirmware {
return Err("IGVM firmware does not contain guest context".into());
}

// Mark the range between the top of the stage 2 area and the base
// Mark the range between the top of the stage 2 heap and the base
// of memory as a range that needs to be validated.
igvm_fw.fw_info.prevalidated_count = 1;
igvm_fw.fw_info.prevalidated[0].base = 0xA0000;
igvm_fw.fw_info.prevalidated[0].size = igvm_fw.fw_info.start - 0xA0000;
igvm_fw.fw_info.prevalidated[0].base = STAGE2_HEAP_END;
igvm_fw.fw_info.prevalidated[0].size = igvm_fw.fw_info.start - STAGE2_HEAP_END;

Ok(Box::new(igvm_fw))
}
Expand Down
65 changes: 46 additions & 19 deletions kernel/src/boot_stage2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,26 @@
// Author: Joerg Roedel <jroedel@suse.de>

use core::arch::global_asm;
use core::mem::{offset_of, size_of};

use bootlib::kernel_launch::{ApStartContext, STAGE2_HEAP_START};
use svsm::{
cpu::{
efer::EFERFlags,
msr::{EFER, SEV_STATUS},
},
mm::PGTABLE_LVL3_IDX_PTE_SELFMAP,
platform::tdp::TdMailbox,
types::PAGE_SIZE,
};

const MAILBOX_ADDR: u32 = bootlib::kernel_launch::SIPI_STUB_GPA;
const AP_CTXT_ADDR: u32 = MAILBOX_ADDR + PAGE_SIZE as u32 - size_of::<ApStartContext>() as u32;

// The mailbox page must be within the first 640 KB of memory so it can be
// validated by the BSP in stage2. It also can't be in stage2's heap area.
const _: () = assert!(MAILBOX_ADDR + (PAGE_SIZE as u32) <= STAGE2_HEAP_START);

global_asm!(
r#"
.text
Expand All @@ -26,6 +37,7 @@ global_asm!(

/* Save pointer to startup structure in EBP */
movl %esp, %ebp

/*
* Load a GDT. Despite the naming, it contains valid
* entries for both, "legacy" 32bit and long mode each.
Expand Down Expand Up @@ -98,19 +110,16 @@ global_asm!(
orl %edx, %eax
movl %eax, 0xF6C(%edi)

/* Signal APs */
movl $setup_flag, %edi
movl $1, (%edi)
jmp 2f

.Lskip_paging_setup:
movl $setup_flag, %edi
.Lap_wait:
movl (%edi), %eax
test %eax, %eax
jz .Lap_wait
.Lskip_paging_setup:
movl $ap_flag, %edi
peterfang marked this conversation as resolved.
Show resolved Hide resolved
.Lap_wait_for_env:
cmpl $0, (%edi)
pause
je .Lap_wait_for_env

2:
2:
/* Enable 64bit PTEs, CR4.PAE. */
movl %cr4, %eax
bts $5, %eax
Expand All @@ -122,10 +131,9 @@ global_asm!(
movl %eax, %ebx
orl $({LME} | {NXE}), %eax
cmp %eax, %ebx
jz 2f
jz 3f
wrmsr
2:

3:
/* Load the static page table root. */
movl $pgtable, %eax
movl %eax, %cr3
Expand Down Expand Up @@ -241,9 +249,23 @@ global_asm!(
test %esi, %esi
jz .Lbsp_main

.Lcheck_command:
/* TODO */
jmp .Lcheck_command
movl ${MAILBOX_VPIDX_ADDR}, %eax

/*
* The following wait-for-signal code must be written in asm because
* APs run stacklessly here.
*/
.Lap_wait_for_signal:
/* vCPU index */
cmpl %esi, (%eax)
pause
jne .Lap_wait_for_signal
peterfang marked this conversation as resolved.
Show resolved Hide resolved

/* See smp.rs for prerequisites. */
movl ${AP_CTXT_ADDR}, %edi
movl ${AP_STARTUP_RIP_ADDR}, %eax
movq (%rax), %rax
jmp *%rax

.Lbsp_main:
/* Clear out .bss and transfer control to the main stage2 code. */
Expand All @@ -268,7 +290,8 @@ global_asm!(
.data

.align 4
setup_flag:
.globl ap_flag
ap_flag:
.long 0

idt32:
Expand Down Expand Up @@ -304,15 +327,19 @@ global_asm!(
.word gdt64_end - gdt64 - 1
.quad gdt64

.align 4096
.align {PAGE_SIZE}
.globl pgtable
pgtable:
.fill 7 * 4096, 1, 0
.fill 7*{PAGE_SIZE}, 1, 0
pgtable_end:"#,
PAGE_SIZE = const PAGE_SIZE,
PGTABLE_LVL3_IDX_PTE_SELFMAP = const PGTABLE_LVL3_IDX_PTE_SELFMAP,
EFER = const EFER,
LME = const EFERFlags::LME.bits(),
NXE = const EFERFlags::NXE.bits(),
SEV_STATUS = const SEV_STATUS,
MAILBOX_VPIDX_ADDR = const MAILBOX_ADDR + offset_of!(TdMailbox, vcpu_index) as u32,
AP_CTXT_ADDR = const AP_CTXT_ADDR,
AP_STARTUP_RIP_ADDR = const AP_CTXT_ADDR + offset_of!(ApStartContext, initial_rip) as u32,
options(att_syntax)
);
3 changes: 2 additions & 1 deletion kernel/src/fw_cfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::address::{Address, PhysAddr};
use crate::error::SvsmError;
use crate::mm::pagetable::max_phys_addr;
use crate::utils::MemoryRegion;
use bootlib::kernel_launch::{STAGE2_MAXLEN, STAGE2_START};

use super::io::IOPort;
use alloc::string::String;
Expand Down Expand Up @@ -214,7 +215,7 @@ impl<'a> FwCfg<'a> {
.or_else(|_| self.find_kernel_region_e820())?;

// Make sure that the kernel region doesn't overlap with the loader.
if kernel_region.start() < PhysAddr::from(640 * 1024u64) {
if kernel_region.start() < PhysAddr::from(u64::from(STAGE2_START + STAGE2_MAXLEN)) {
return Err(SvsmError::FwCfg(FwCfgError::KernelRegion));
}

Expand Down
10 changes: 6 additions & 4 deletions kernel/src/igvm_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,12 @@ use alloc::vec::Vec;
use cpuarch::vmsa::VMSA;

use bootlib::igvm_params::{IgvmGuestContext, IgvmParamBlock, IgvmParamPage};
use bootlib::kernel_launch::LOWMEM_END;
use core::mem::size_of;
use igvm_defs::{IgvmEnvironmentInfo, MemoryMapEntryType, IGVM_VHS_MEMORY_MAP_ENTRY};

const IGVM_MEMORY_ENTRIES_PER_PAGE: usize = PAGE_SIZE / size_of::<IGVM_VHS_MEMORY_MAP_ENTRY>();

const STAGE2_END_ADDR: usize = 0xA0000;

#[derive(Clone, Debug)]
#[repr(C, align(64))]
pub struct IgvmMemoryMap {
Expand Down Expand Up @@ -323,9 +322,12 @@ impl IgvmParams<'_> {
let mut regions = Vec::new();

if self.igvm_param_block.firmware.in_low_memory != 0 {
// Add the stage 2 region to the firmware region list so
// Add the lowmem region to the firmware region list so
// permissions can be granted to the guest VMPL for that range.
regions.push(MemoryRegion::new(PhysAddr::new(0), STAGE2_END_ADDR));
regions.push(MemoryRegion::from_addresses(
PhysAddr::from(0u64),
PhysAddr::from(u64::from(LOWMEM_END)),
));
}

regions.push(MemoryRegion::new(
Expand Down
4 changes: 2 additions & 2 deletions kernel/src/mm/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::error::SvsmError;
use crate::locking::RWLock;
use crate::utils::MemoryRegion;
use alloc::vec::Vec;
use bootlib::kernel_launch::KernelLaunchInfo;
use bootlib::kernel_launch::{KernelLaunchInfo, LOWMEM_END};

use super::pagetable::LAUNCH_VMSA_ADDR;

Expand Down Expand Up @@ -115,7 +115,7 @@ pub fn valid_phys_address(paddr: PhysAddr) -> bool {
}

/// The starting address of the ISA range.
const ISA_RANGE_START: PhysAddr = PhysAddr::new(0xa0000);
const ISA_RANGE_START: PhysAddr = PhysAddr::new(LOWMEM_END as usize);

/// The ending address of the ISA range.
const ISA_RANGE_END: PhysAddr = PhysAddr::new(0x100000);
Expand Down
49 changes: 46 additions & 3 deletions kernel/src/platform/tdp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ use crate::console::init_svsm_console;
use crate::cpu::cpuid::CpuidResult;
use crate::cpu::msr::read_msr;
use crate::cpu::percpu::PerCpu;
use crate::cpu::smp::create_ap_start_context;
use crate::cpu::x86::apic::{apic_register_bit, APIC_MSR_ISR};
use crate::error::SvsmError;
use crate::hyperv;
use crate::io::IOPort;
use crate::mm::PerCPUPageMappingGuard;
use crate::platform::{PageEncryptionMasks, PageStateChangeOp, PageValidateOp, SvsmPlatform};
use crate::tdx::tdcall::{
td_accept_physical_memory, td_accept_virtual_memory, tdvmcall_halt, tdvmcall_io_read,
Expand All @@ -22,13 +24,33 @@ use crate::tdx::TdxError;
use crate::types::{PageSize, PAGE_SIZE};
use crate::utils::immut_after_init::ImmutAfterInitCell;
use crate::utils::{is_aligned, MemoryRegion};
use bootlib::kernel_launch::{ApStartContext, SIPI_STUB_GPA};
use core::{mem, ptr};

#[cfg(test)]
use bootlib::platform::SvsmPlatformType;

static GHCI_IO_DRIVER: GHCIIOPort = GHCIIOPort::new();
static VTOM: ImmutAfterInitCell<usize> = ImmutAfterInitCell::uninit();

#[derive(Debug)]
#[repr(C, packed)]
peterfang marked this conversation as resolved.
Show resolved Hide resolved
pub struct TdMailbox {
pub vcpu_index: u32,
}

// Both structures must fit in a page
const _: () = assert!(mem::size_of::<TdMailbox>() + mem::size_of::<ApStartContext>() <= PAGE_SIZE);

// SAFETY: caller must ensure `mailbox` points to a valid memory address.
unsafe fn wakeup_ap(mailbox: *mut TdMailbox, index: usize) {
// SAFETY: caller must ensure the address is valid and not aliased.
unsafe {
// PerCpu's CPU index has a direct mapping to TD vCPU index
(*mailbox).vcpu_index = index.try_into().expect("CPU index too large");
}
}

#[derive(Clone, Copy, Debug)]
pub struct TdpPlatform {}

Expand Down Expand Up @@ -182,10 +204,31 @@ impl SvsmPlatform for TdpPlatform {

fn start_cpu(
&self,
_cpu: &PerCpu,
_context: &hyperv::HvInitialVpContext,
cpu: &PerCpu,
context: &hyperv::HvInitialVpContext,
) -> Result<(), SvsmError> {
todo!();
// Translate this context into an AP start context and place it in the
// AP startup transition page.
//
// transition_cr3 is not needed since all TD APs are using the stage2
// page table set up by the BSP.
let ap_context = create_ap_start_context(context, 0);

// The mailbox page was already accepted by the BSP in stage2 and
// therefore it's been initialized as a zero page.
let context_pa = PhysAddr::new(SIPI_STUB_GPA as usize);
let context_mapping = PerCPUPageMappingGuard::create_4k(context_pa)?;

// SAFETY: the address of the mailbox page was made valid when the
// `PerCPUPageMappingGuard` was created.
unsafe {
let mbx_va = context_mapping.virt_addr();
let size = mem::size_of::<ApStartContext>();
let context_ptr = (mbx_va + PAGE_SIZE - size).as_mut_ptr::<ApStartContext>();
ptr::copy_nonoverlapping(&ap_context, context_ptr, 1);
wakeup_ap(mbx_va.as_mut_ptr::<TdMailbox>(), cpu.shared().cpu_index());
}
Ok(())
}
}

Expand Down
Loading
Loading