diff --git a/vmm/sandbox/src/client.rs b/vmm/sandbox/src/client.rs index de1a5411..fb76c67f 100644 --- a/vmm/sandbox/src/client.rs +++ b/vmm/sandbox/src/client.rs @@ -15,7 +15,11 @@ limitations under the License. */ use std::{ - os::unix::io::{IntoRawFd, RawFd}, + io::{BufRead, BufReader, Write}, + os::unix::{ + io::{IntoRawFd, RawFd}, + net::UnixStream, + }, time::Duration, }; @@ -30,17 +34,12 @@ use nix::{ time::{clock_gettime, ClockId}, unistd::close, }; -use tokio::{ - io::{AsyncReadExt, AsyncWriteExt}, - net::UnixStream, - time::timeout, -}; +use tokio::time::timeout; use ttrpc::{context::with_timeout, r#async::Client}; use vmm_common::api::{sandbox::*, sandbox_ttrpc::SandboxServiceClient}; use crate::network::{NetworkInterface, Route}; -const HVSOCK_RETRY_TIMEOUT_IN_MS: u64 = 10; const TIME_SYNC_PERIOD: u64 = 60; const TIME_DIFF_TOLERANCE_IN_MS: u64 = 10; @@ -68,7 +67,11 @@ async fn new_ttrpc_client(address: &str) -> Result { let client = timeout(Duration::from_secs(ctx_timeout), fut) .await - .map_err(|_| anyhow!("{}s timeout connecting socket: {}", ctx_timeout, last_err))?; + .map_err(|_| { + let e = anyhow!("{}s timeout connecting socket: {}", ctx_timeout, last_err); + error!("{}", e); + e + })?; Ok(client) } @@ -162,41 +165,28 @@ async fn connect_to_hvsocket(address: &str) -> Result { if v.len() < 2 { return Err(anyhow!("hvsock address {} should not less than 2", address).into()); } - (v[0], v[1]) + (v[0].to_string(), v[1].to_string()) }; - let fut = async { - let mut stream = UnixStream::connect(addr).await?; - - match stream.write(format!("CONNECT {}\n", port).as_bytes()).await { - Ok(_) => { - let mut buf = [0; 4096]; - match stream.read(&mut buf).await { - Ok(0) => Err(anyhow!("stream closed")), - Ok(n) => { - if String::from_utf8(buf[..n].to_vec()) - .unwrap_or_default() - .contains("OK") - { - return Ok(stream.into_std()?.into_raw_fd()); - } - Err(anyhow!("failed to connect")) - } - Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { - Err(anyhow!("{}", e)) - } - Err(e) => Err(anyhow!("failed to read from hvsock: {}", e)), - } - } - Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => Err(anyhow!("{}", e)), - Err(e) => Err(anyhow!("failed to write CONNECT to hvsock: {}", e)), + tokio::task::spawn_blocking(move || { + let mut stream = + UnixStream::connect(&addr).map_err(|e| anyhow!("failed to connect hvsock: {}", e))?; + stream + .write_all(format!("CONNECT {}\n", port).as_bytes()) + .map_err(|e| anyhow!("hvsock connected but failed to write CONNECT: {}", e))?; + + let mut response = String::new(); + BufReader::new(&stream) + .read_line(&mut response) + .map_err(|e| anyhow!("CONNECT sent but failed to get response: {}", e))?; + if response.starts_with("OK") { + Ok(stream.into_raw_fd()) + } else { + Err(anyhow!("CONNECT sent but response is not OK: {}", response).into()) } - .map_err(Error::Other) - }; - - timeout(Duration::from_millis(HVSOCK_RETRY_TIMEOUT_IN_MS), fut) - .await - .map_err(|_| anyhow!("hvsock retry {}ms timeout", HVSOCK_RETRY_TIMEOUT_IN_MS))? + }) + .await + .map_err(|e| anyhow!("failed to spawn blocking task: {}", e))? } pub fn unix_sock(r#abstract: bool, socket_path: &str) -> Result { diff --git a/vmm/sandbox/src/cloud_hypervisor/client.rs b/vmm/sandbox/src/cloud_hypervisor/client.rs index b00ab91a..4771ca94 100644 --- a/vmm/sandbox/src/cloud_hypervisor/client.rs +++ b/vmm/sandbox/src/cloud_hypervisor/client.rs @@ -16,7 +16,6 @@ limitations under the License. use std::{ os::unix::net::UnixStream, - path::Path, thread::sleep, time::{Duration, SystemTime}, }; @@ -24,7 +23,7 @@ use std::{ use anyhow::anyhow; use api_client::{simple_api_command, simple_api_full_command_with_fds_and_response}; use containerd_sandbox::error::Result; -use log::{error, trace}; +use log::error; use crate::{ cloud_hypervisor::devices::{block::DiskConfig, AddDeviceResponse, RemoveDeviceRequest}, @@ -38,25 +37,26 @@ pub struct ChClient { } impl ChClient { - pub fn new>(socket_path: P) -> Result { + pub async fn new(socket_path: String) -> Result { let start_time = SystemTime::now(); - loop { + tokio::task::spawn_blocking(move || loop { match UnixStream::connect(&socket_path) { Ok(socket) => { return Ok(Self { socket }); } Err(e) => { - trace!("failed to create client: {:?}", e); if start_time.elapsed().unwrap().as_secs() > CLOUD_HYPERVISOR_START_TIMEOUT_IN_SEC { - error!("failed to create client: {:?}", e); + error!("failed to connect api server: {:?}", e); return Err(anyhow!("timeout connect client, {}", e).into()); } sleep(Duration::from_millis(10)); } } - } + }) + .await + .map_err(|e| anyhow!("failed to spawn a task {}", e))? } pub fn hot_attach(&mut self, device_info: DeviceInfo) -> Result { diff --git a/vmm/sandbox/src/cloud_hypervisor/devices/mod.rs b/vmm/sandbox/src/cloud_hypervisor/devices/mod.rs index c6180caa..3d1f2de7 100644 --- a/vmm/sandbox/src/cloud_hypervisor/devices/mod.rs +++ b/vmm/sandbox/src/cloud_hypervisor/devices/mod.rs @@ -24,6 +24,7 @@ pub mod device; pub mod fs; pub mod pmem; pub mod rng; +pub mod vfio; pub mod virtio_net; pub mod vsock; diff --git a/vmm/sandbox/src/cloud_hypervisor/devices/vfio.rs b/vmm/sandbox/src/cloud_hypervisor/devices/vfio.rs new file mode 100644 index 00000000..7d977185 --- /dev/null +++ b/vmm/sandbox/src/cloud_hypervisor/devices/vfio.rs @@ -0,0 +1,54 @@ +/* +Copyright 2022 The Kuasar Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use sandbox_derive::CmdLineParams; + +const VFIO_DEVICE_SYSFS_PATH: &str = "/sys/bus/pci/devices"; + +#[derive(CmdLineParams, Debug, Clone)] +#[params("device")] +pub struct VfioDevice { + #[property(ignore)] + pub id: String, + pub(crate) path: String, +} + +impl_device_no_bus!(VfioDevice); + +impl VfioDevice { + pub fn new(id: &str, bdf: &str) -> Self { + Self { + id: id.to_string(), + path: format!("{}/{}", VFIO_DEVICE_SYSFS_PATH, bdf), + } + } +} + +#[cfg(test)] +mod tests { + use crate::{cloud_hypervisor::devices::vfio::VfioDevice, param::ToParams}; + + #[test] + fn test_attr() { + let device = VfioDevice::new("", "0000:b4:05.1"); + let params = device.to_params(); + let property = params.get(0).unwrap(); + assert_eq!( + property.get("path").unwrap(), + "/sys/bus/pci/devices/0000:b4:05.1" + ); + } +} diff --git a/vmm/sandbox/src/cloud_hypervisor/mod.rs b/vmm/sandbox/src/cloud_hypervisor/mod.rs index 2a8648fd..684a4515 100644 --- a/vmm/sandbox/src/cloud_hypervisor/mod.rs +++ b/vmm/sandbox/src/cloud_hypervisor/mod.rs @@ -35,7 +35,9 @@ use crate::{ cloud_hypervisor::{ client::ChClient, config::{CloudHypervisorConfig, CloudHypervisorVMConfig, VirtiofsdConfig}, - devices::{block::Disk, virtio_net::VirtioNetDevice, CloudHypervisorDevice}, + devices::{ + block::Disk, vfio::VfioDevice, virtio_net::VirtioNetDevice, CloudHypervisorDevice, + }, }, device::{BusType, DeviceInfo}, impl_recoverable, load_config, @@ -110,7 +112,7 @@ impl CloudHypervisorVM { } async fn create_client(&self) -> Result { - ChClient::new(&self.config.api_socket) + ChClient::new(self.config.api_socket.to_string()).await } fn get_client(&mut self) -> Result<&mut ChClient> { @@ -125,7 +127,7 @@ impl CloudHypervisorVM { let mut cmd = tokio::process::Command::new(&self.virtiofsd_config.path); cmd.args(params.as_slice()); debug!("start virtiofsd with cmdline: {:?}", cmd); - set_cmd_netns(&mut cmd, &self.netns)?; + set_cmd_netns(&mut cmd, self.netns.to_string())?; cmd.stderr(Stdio::piped()); cmd.stdout(Stdio::piped()); let child = cmd @@ -164,7 +166,7 @@ impl VM for CloudHypervisorVM { cmd.args(params.as_slice()); set_cmd_fd(&mut cmd, self.fds.to_vec())?; - set_cmd_netns(&mut cmd, &self.netns)?; + set_cmd_netns(&mut cmd, self.netns.to_string())?; cmd.stdout(Stdio::piped()); cmd.stderr(Stdio::piped()); debug!("start cloud hypervisor with cmdline: {:?}", cmd); @@ -221,8 +223,9 @@ impl VM for CloudHypervisorVM { ); self.add_device(device); } - DeviceInfo::Physical(_vfio_info) => { - todo!() + DeviceInfo::Physical(vfio_info) => { + let device = VfioDevice::new(&vfio_info.id, &vfio_info.bdf); + self.add_device(device); } DeviceInfo::VhostUser(_vhost_user_info) => { todo!() diff --git a/vmm/sandbox/src/network/link.rs b/vmm/sandbox/src/network/link.rs index 46a3e830..2addab4d 100644 --- a/vmm/sandbox/src/network/link.rs +++ b/vmm/sandbox/src/network/link.rs @@ -215,7 +215,7 @@ pub struct NetworkInterface { impl NetworkInterface { pub async fn parse_from_message( msg: LinkMessage, - _netns: &str, + netns: &str, queue: u32, handle: &Handle, ) -> Result { @@ -266,6 +266,20 @@ impl NetworkInterface { } } } + // find the pci device for unknown type interface, maybe it is a physical interface. + if let LinkType::Unkonwn = intf.r#type { + // only search those with ip addresses + if !intf.ip_addresses.is_empty() { + let if_name = intf.name.to_string(); + let bdf = if !netns.is_empty() { + run_in_new_netns(netns, move || get_bdf_for_eth(&if_name)).await?? + } else { + get_bdf_for_eth(&if_name)? + }; + let driver = get_pci_driver(&bdf).await?; + intf.r#type = LinkType::Physical(bdf, driver); + } + } Ok(intf) } @@ -411,7 +425,6 @@ impl NetworkInterface { } } -#[allow(dead_code)] fn get_bdf_for_eth(if_name: &str) -> Result { if if_name.len() > 16 { return Err(anyhow!("the interface name length is larger than 16").into()); @@ -544,10 +557,11 @@ fn create_tap_device(tap_name: &str, mut queue: u32) -> Result> { Ok(fds) } -#[allow(dead_code)] async fn get_pci_driver(bdf: &str) -> Result { let driver_path = format!("/sys/bus/pci/devices/{}/driver", bdf); - let driver_dest = tokio::fs::read_link(driver_path).await?; + let driver_dest = tokio::fs::read_link(&driver_path) + .await + .map_err(|e| anyhow!("fail to readlink of {} : {}", driver_path, e))?; let file_name = driver_dest.file_name().ok_or(anyhow!( "failed to get file name from driver path {:?}", driver_dest @@ -560,14 +574,21 @@ async fn get_pci_driver(bdf: &str) -> Result { } async fn bind_device_to_driver(driver: &str, bdf: &str) -> Result<()> { + // 1. Switch the device driver let driver_override_path = format!("/sys/bus/pci/devices/{}/driver_override", bdf); write_file_async(&driver_override_path, driver).await?; + + // 2. Unbind the device from its native driver let unbind_path = format!("/sys/bus/pci/devices/{}/driver/unbind", bdf); if Path::new(&*unbind_path).exists() { write_file_async(&unbind_path, bdf).await?; } + + // 3. Probe driver for device let probe_path = "/sys/bus/pci/drivers_probe"; write_file_async(probe_path, bdf).await?; + + // 4. Check the result let driver_link = format!("/sys/bus/pci/devices/{}/driver", bdf); let driver_path = tokio::fs::read_link(&*driver_link).await?; diff --git a/vmm/sandbox/src/network/mod.rs b/vmm/sandbox/src/network/mod.rs index f3fcaa07..d3e76f22 100644 --- a/vmm/sandbox/src/network/mod.rs +++ b/vmm/sandbox/src/network/mod.rs @@ -19,7 +19,7 @@ use std::{fmt::Debug, os::unix::prelude::AsRawFd, path::Path}; use anyhow::anyhow; use containerd_sandbox::error::Result; use futures_util::TryStreamExt; -use log::{debug, error}; +use log::{debug, error, warn}; use nix::{ fcntl::OFlag, sched::{setns, CloneFlags}, @@ -64,9 +64,20 @@ impl Network { let mut links = handle.link().get().execute(); let mut intfs = vec![]; while let Some(msg) = links.try_next().await.map_err(|e| anyhow!(e))? { - let network_interface = - NetworkInterface::parse_from_message(msg, &config.netns, config.queue, &handle) - .await?; + let network_interface = match NetworkInterface::parse_from_message( + msg, + &config.netns, + config.queue, + &handle + ) + .await + { + Ok(interface) => interface, + Err(e) => { + warn!("failed to parse network interface: {}, paas it", e); + continue; + } + }; if let LinkType::Loopback = network_interface.r#type { continue; } diff --git a/vmm/sandbox/src/sandbox.rs b/vmm/sandbox/src/sandbox.rs index b4209080..66f4a039 100644 --- a/vmm/sandbox/src/sandbox.rs +++ b/vmm/sandbox/src/sandbox.rs @@ -466,6 +466,9 @@ where async fn stop(&mut self, force: bool) -> Result<()> { match self.status { + SandboxStatus::Created => { + return Ok(()); + } SandboxStatus::Running(_) => {} SandboxStatus::Stopped(_, _) => { return Ok(()); diff --git a/vmm/sandbox/src/utils.rs b/vmm/sandbox/src/utils.rs index 14c2d94d..08d561ac 100644 --- a/vmm/sandbox/src/utils.rs +++ b/vmm/sandbox/src/utils.rs @@ -33,7 +33,7 @@ use containerd_sandbox::{ use log::{debug, error}; use nix::{ fcntl::{open, OFlag}, - libc::{dup2, exit, fcntl, kill, setns, FD_CLOEXEC, F_GETFD, F_SETFD}, + libc::{dup2, fcntl, kill, setns, FD_CLOEXEC, F_GETFD, F_SETFD}, sched::CloneFlags, sys::stat::Mode, }; @@ -428,16 +428,15 @@ pub fn safe_open_file( Ok(unsafe { OwnedFd::from_raw_fd(fd) }) } -pub fn set_cmd_netns(cmd: &mut Command, netns: &str) -> Result<()> { +pub fn set_cmd_netns(cmd: &mut Command, netns: String) -> Result<()> { if !netns.is_empty() { - let netns_fd = safe_open_file(netns, OFlag::O_CLOEXEC, Mode::empty()) - .map_err(|e| anyhow!("failed to open netns {}", e))?; unsafe { cmd.pre_exec(move || { - let setns_result = setns(netns_fd.as_raw_fd(), CloneFlags::CLONE_NEWNET.bits()); - if setns_result != 0 { - eprintln!("failed to set netns: {}", setns_result); - exit(127); + let netns_fd = safe_open_file(Path::new(&netns), OFlag::O_CLOEXEC, Mode::empty())?; + if setns(netns_fd.as_raw_fd(), CloneFlags::CLONE_NEWNET.bits()) != 0 { + let e = std::io::Error::last_os_error(); + eprintln!("failed to set netns: {}, fd: {}", e, netns_fd.as_raw_fd()); + return Err(e); } Ok(()) }) @@ -456,12 +455,14 @@ pub fn set_cmd_fd(cmd: &mut Command, fds: Vec) -> Result<()> { if src_fd == dest_fd { let flags = fcntl(src_fd, F_GETFD); if flags < 0 || fcntl(src_fd, F_SETFD, flags & !FD_CLOEXEC) < 0 { - eprintln!("failed to call fnctl"); - exit(127); + let e = std::io::Error::last_os_error(); + eprintln!("failed to call fnctl: {}", e); + return Err(e); } } else if dup2(src_fd, dest_fd) < 0 { - eprintln!("failed to call dup2"); - exit(127); + let e = std::io::Error::last_os_error(); + eprintln!("failed to call dup2: {}", e); + return Err(e); } } Ok(())