diff --git a/README.adoc b/README.adoc index 931cef79..6625595c 100644 --- a/README.adoc +++ b/README.adoc @@ -28,11 +28,6 @@ uses the opte crate to implement the Oxide VPC network. opte-ioctl:: The userland library used for interacting with OPTE. -opte-drv (obsolete):: A kernel module which hooks into the network -path between viona (bhyve) and mac, feeding all packets into opte-core -for processing. This module has been rendered obsolete by `xde`, but -it still remains until all features are transitioned over. - opteadm:: The command line utility used to configure and inspect OPTE. This is meant to be used by an operator, not by a client program. diff --git a/opte-drv/.gitignore b/opte-drv/.gitignore deleted file mode 100644 index 4dd04a80..00000000 --- a/opte-drv/.gitignore +++ /dev/null @@ -1 +0,0 @@ -opte diff --git a/opte-drv/Cargo.lock b/opte-drv/Cargo.lock deleted file mode 100644 index 0dd5e26a..00000000 --- a/opte-drv/Cargo.lock +++ /dev/null @@ -1,474 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "ahash" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29661b60bec623f0586702976ff4d0c9942dcb6723161c2df0eea78455cfedfb" -dependencies = [ - "const-random", -] - -[[package]] -name = "anymap" -version = "0.12.1" -source = "git+https://github.com/michaelmelanson/anymap?branch=no_std#2957b71eef770e50222c1398d390324697e0c928" -dependencies = [ - "hashbrown", -] - -[[package]] -name = "atomic-polyfill" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a93ba5d6053837dbb76fd0ae26fd4f0c1859a008a783b0ce072b797c07f0f27" -dependencies = [ - "cortex-m", -] - -[[package]] -name = "autocfg" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" - -[[package]] -name = "bare-metal" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5deb64efa5bd81e31fcd1938615a6d98c82eafcbcd787162b6f63b91d6bac5b3" -dependencies = [ - "rustc_version", -] - -[[package]] -name = "bitfield" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46afbd2983a5d5a7bd740ccb198caf5b82f45c40c09c0eed36052d91cb92e719" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "const-random" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" -dependencies = [ - "const-random-macro", - "proc-macro-hack", -] - -[[package]] -name = "const-random-macro" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" -dependencies = [ - "getrandom", - "lazy_static", - "proc-macro-hack", - "tiny-keccak", -] - -[[package]] -name = "cortex-m" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ac919ef424449ec8c08d515590ce15d9262c0ca5f0da5b0c901e971a3b783b3" -dependencies = [ - "bare-metal", - "bitfield", - "embedded-hal", - "volatile-register", -] - -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "cstr_core" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "917ba9efe9e1e736671d5a03f006afc4e7e3f32503e2077e0bcaf519c0c8c1d3" -dependencies = [ - "cty", - "memchr", -] - -[[package]] -name = "cty" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7313c0d620d0cb4dbd9d019e461a4beb501071ff46ec0ab933efb4daa76d73e3" - -[[package]] -name = "embedded-hal" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36cfb62ff156596c892272f3015ef952fe1525e85261fa3a7f327bd6b384ab9" -dependencies = [ - "nb 0.1.3", - "void", -] - -[[package]] -name = "getrandom" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" -dependencies = [ - "cfg-if 1.0.0", - "libc", - "wasi", -] - -[[package]] -name = "hash32" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" -dependencies = [ - "byteorder", -] - -[[package]] -name = "hashbrown" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6073d0ca812575946eb5f35ff68dbe519907b25c42530389ff946dc84c6ead" -dependencies = [ - "ahash", - "autocfg", -] - -[[package]] -name = "heapless" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e476c64197665c3725621f0ac3f9e5209aa5e889e02a08b1daf5f16dc5fd952" -dependencies = [ - "atomic-polyfill", - "hash32", - "serde", - "spin", - "stable_deref_trait", -] - -[[package]] -name = "illumos-ddi-dki" -version = "0.1.0" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119" - -[[package]] -name = "lock_api" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109" -dependencies = [ - "scopeguard", -] - -[[package]] -name = "managed" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca88d725a0a943b096803bd34e73a4437208b6077654cc4ecb2947a5f91618d" - -[[package]] -name = "memchr" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" - -[[package]] -name = "nb" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "801d31da0513b6ec5214e9bf433a77966320625a37860f910be265be6e18d06f" -dependencies = [ - "nb 1.0.0", -] - -[[package]] -name = "nb" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "546c37ac5d9e56f55e73b677106873d9d9f5190605e41a856503623648488cae" - -[[package]] -name = "opte-core" -version = "0.1.0" -dependencies = [ - "anymap", - "cfg-if 0.1.10", - "cstr_core", - "heapless", - "illumos-ddi-dki", - "postcard", - "serde", - "smoltcp", - "zerocopy 0.6.1", -] - -[[package]] -name = "opte-drv" -version = "0.1.0" -dependencies = [ - "illumos-ddi-dki", - "opte-core", - "postcard", - "serde", - "zerocopy 0.5.0", -] - -[[package]] -name = "postcard" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8863e251332eb18520388099b8b0acc4810ed6e602e3b6f674e8a46ba20e15c" -dependencies = [ - "heapless", - "postcard-cobs", - "serde", -] - -[[package]] -name = "postcard-cobs" -version = "0.1.5-pre" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c68cb38ed13fd7bc9dd5db8f165b7c8d9c1a315104083a2b10f11354c2af97f" - -[[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - -[[package]] -name = "proc-macro2" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "quote" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rustc_version" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -dependencies = [ - "semver", -] - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "semver" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - -[[package]] -name = "serde" -version = "1.0.127" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03b9878abf6d14e6779d3f24f07b2cfa90352cfec4acc5aab8f1ac7f146fae8" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.127" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a024926d3432516606328597e0f224a51355a493b49fdd67e9209187cbe55ecc" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "smoltcp" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2308a1657c8db1f5b4993bab4e620bdbe5623bd81f254cf60326767bb243237" -dependencies = [ - "bitflags", - "byteorder", - "managed", -] - -[[package]] -name = "spin" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "511254be0c5bcf062b019a6c89c01a664aa359ded62f78aa72c6fc137c0590e5" -dependencies = [ - "lock_api", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "syn" -version = "1.0.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - -[[package]] -name = "synstructure" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "474aaa926faa1603c40b7885a9eaea29b444d1cb2850cb7c0e37bb1a4182f4fa" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "unicode-xid", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - -[[package]] -name = "vcell" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77439c1b53d2303b20d9459b1ade71a83c716e3f9c34f3228c00e6f185d6c002" - -[[package]] -name = "void" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" - -[[package]] -name = "volatile-register" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d67cb4616d99b940db1d6bd28844ff97108b498a6ca850e5b6191a532063286" -dependencies = [ - "vcell", -] - -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" - -[[package]] -name = "zerocopy" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e59ec1d2457bd6c0dd89b50e7d9d6b0b647809bf3f0a59ac85557046950b7b2" -dependencies = [ - "byteorder", - "zerocopy-derive", -] - -[[package]] -name = "zerocopy" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "332f188cc1bcf1fe1064b8c58d150f497e697f49774aa846f2dc949d9a25f236" -dependencies = [ - "byteorder", - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0fbc82b82efe24da867ee52e015e58178684bd9dd64c34e66bdf21da2582a9f" -dependencies = [ - "proc-macro2", - "syn", - "synstructure", -] diff --git a/opte-drv/Cargo.toml b/opte-drv/Cargo.toml deleted file mode 100644 index f3c500c1..00000000 --- a/opte-drv/Cargo.toml +++ /dev/null @@ -1,37 +0,0 @@ -[package] -name = "opte-drv" -version = "0.1.0" -authors = ["Ryan Zezeski "] -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -illumos-ddi-dki = { path = "../illumos-ddi-dki" } -opte-core = { path = "../opte-core", default-features = false } -postcard = { version = "0.7.0", features = ["alloc"] } -zerocopy = "0.5.0" - -[dependencies.serde] -version = "1.0" -default-features = false -features = ["alloc", "derive"] - -[lib] -crate-type = ["staticlib"] -name = "opte" - -# Some of these values are the same as the defaults, but it helps to -# be explicit sometimes. -[profile.dev] -codegen-units = 1 -debug = 2 -incremental = false -opt-level = 0 -panic = 'abort' - -[profile.release] -codegen-units = 1 -debug = 2 -incremental = false -panic = 'abort' diff --git a/opte-drv/README.adoc b/opte-drv/README.adoc deleted file mode 100644 index 91b03ee3..00000000 --- a/opte-drv/README.adoc +++ /dev/null @@ -1,259 +0,0 @@ -This crate provides the OPTE kernel module, aka `opte` -footnote:opte[I tend to use lowercase when referring to the kernel -module]. The opte module provides two main functions. - -1. It hooks into the guest's networking datapath so that *all* packets -are intercepted footnote:temporary[opte currently requires a modified -viona. This is not how it will work in the future. It will more likely -be its own device like a VNIC, which is created by Propolis at VM -start, and somehow connects into the native routing table to determine -which physical NIC to use for next hop]. It feeds these packets into -OPTE's engine, `opte-core`, for processing. It is there where the -actual packet inspection and modification logic is performed -footnote:half-truth[There is still some logic in opte-drv that should -be in opte-core, but the end goal is to have all packet processing and -decision making done in opte-core]. - -2. It provides an API for programming the opte-core engine via an -ioctl interface. Any Rust program needing to interact with this -interface should use libopte (XXX: make link when libopte is pushed). - -In order to use opte two kernel modules must be built: viona and opte. - -== Building - -=== Building viona - -The viona module is an in-kernel implementation of a virtio networking -device. It provides all network connectivity to bhyve guests. In order -for opte to hook into the guest's network datapath a custom viona must -be built. - -https://github.com/oxidecomputer/illumos-gate/tree/viona-opte-08-31-21 - -This module must be built and then copied over the existing viona -module. The easiest way to go about building it is to first do a full -nightly build of the stock gate. - ----- -$ pfexec pkg install illumos-tools -$ pfexec zfs create -o mountpoint=/build rpool/build -$ cd /build -$ git clone https://github.com/oxidecomputer/illumos-gate -$ cd illumos-gate -$ ./usr/src/tools/scripts/nightly /opt/onbld/env/omnios-illumos-gate ----- - -If that's successful, then you can switch over to the custom viona -branch and build just the viona module. - -NOTE: You could also just run the nightly against the viona branch, -but it will report the build as a failure due to pkg linting. However, -the module itself should be there. - ----- -$ git checkout viona-opte-08-31-21 -$ ./usr/src/tools/scripts/bldenv /opt/onbld/env/omnios-illumos-gate -$ cd usr/src/uts/i86pc/viona -$ make install ----- - -The https://omnios.org/dev/gate[build instructions] are taken almost -verbatim from the OmniOS wiki, with the exception of the branch we are -building against. If you are inclined to learn more about building -illumos I also recommend the illumos dev guide -https://illumos.org/books/dev/workflow.html[workflow] page. - -If you already have a previous nightly you could also just switch to -the `viona-opte-08-31-21` branch and build just the viona module under -`usr/src/uts/i86pc/viona/`. - -=== Building opte - -NOTE: You *MUST* build the opte kernel module on an illumos host. This -cannot be cross-compiled. - -The opte kernel module is an illumos kernel module written in Rust. It -uses stable kernel APIs to do its job, and thus it doesn't need to be -built with the illumos-gate infrastructure. Rather, it's more like -building any other Rust program, but with a few small exceptions: - -NOTE: Previously there were -https://github.com/oxidecomputer/opte/issues/1[codegen issues] when -compiling opte-drv with certain toolchains. As of -`nightly-2021-09-03-x86_64-unknown-illumos` the issue seems resolved. -However, if your opte driver fails to load, then perform the following -check: `elfdump opte | grep GOTPCREL`. - -1. The opte driver relies on nightly features and thus requires a -nightly toolchain. - -2. The opte driver requires the unstable cargo feature -https://doc.rust-lang.org/cargo/reference/unstable.html#build-std[build-std] -in order to build core and alloc for our custom illumos target. - -3. The opte driver uses a custom -https://doc.rust-lang.org/cargo/commands/cargo-build.html#compilation-options[rustc -target] in order to generate code which can run in the illumos kernel. -(XXX It looks like we could use the `build.target` config value -instead). - -4. We need to run the linker manually as -https://doc.rust-lang.org/reference/linkage.html[staticlib] crates do -not run the linker. - ----- -$ cd ~/opte/opte-drv -$ cargo +nightly -v rustc -Z build-std=core,alloc --target x86_64-unknown-unknown.json --release -$ ld -r -dy -N"drv/mac" -z allextract target/x86_64-unknown-unknown/release/opte.a -o opte ----- - -NOTE: You can also use a non-release build, but stack usage will -increase dramatically and there is the potential for a panic due to a -blown stack (in the form of a double fault). - -== Installing - -=== Pre-install - -You may want to create a dedicated boot environment (BE) before -installing your custom modules. This way you can easily rollback to a -known working environment. - ----- -$ pfexec beadm create -a opte-test - -$ beadm list -BE Active Mountpoint Space Policy Created -omnios-r151037 - - 44.70M static 2021-03-24 22:03 -omnios-r151037-backup-1 - - 220K static 2021-03-24 22:51 -omnios-r151037-1 - - 8.26M static 2021-03-24 23:00 -viona-rmc - - 6.08M static 2021-03-25 19:23 -viona-opte - - 89.43G static 2021-04-07 03:34 -omnios-r151039 N / 182.50K static 2021-08-31 18:55 -opte-test R - 3.33G static 2021-09-01 03:47 - -$ pfexec reboot ----- - -=== Installing viona - -Installing viona is a matter of copying the custom-built module -overtop of the system one. - ----- -$ pfexec cp /usr/kernel/drv/amd64/viona{,.orig} -$ pfexec cp /build/illumos-gate/proto/root_i386/usr/kernel/drv/amd64/viona /usr/kernel/drv/amd64/viona ----- - -=== Installing opte - -Installing opte is a bit more involved. It's not a module that comes -packaged with the system. - -1. Copy the module and its conf file. -+ ----- -$ pfexec cp ~/opte/opte-drv/opte /kernel/drv/amd64/ -$ pfexec cp ~/opte/opte-drv/opte.conf /kernel/drv/ ----- -+ -2. Install the driver. -+ ----- -$ pfexec add_drv opte ----- - -On subsequent builds of opte you don't need to repeat all these steps. -It's enough to just copy the kernel module. - ----- -$ pfexec cp opte /kernel/drv/amd64/ ----- - -== Running - -NOTE: XXX These instructions currently assume you are using the bhyve -zone in OmniOS, we probably need to update this to use Propolis soon. -That said, there's no reason opte shouldn't work with Propolis, in -fact, Propolis should have no idea that opte is even on the scene and -vice versa. - -NOTE: This assumes no guests are running and the viona module is not -currently loaded. - -NOTE: Currently opte can only have one guest instance running on a -given host. - -Now that all necessary modules are in place we can actually run a -guest on top of OPTE. - -1. Load the viona module. -+ ----- -$ pfexec modload -p drv/amd64/viona ----- -+ -2. Set `viona_use_opte` to `1`. Optionally enable some debug printing -by setting `opte_debug` to `1`. -+ ----- -$ pfexec mdb -kw -Loading modules: [ unix genunix specfs mac cpu.generic uppc apix scsi_vhci zfs sata sd ip hook neti sockfs arp usba xhci mm stmf stmf_sbd lofs random ufs logindmux ptm nfs ] - -> viona_use_opte/W 1 -viona_use_opte: 0 = 0x1 -> opte_debug/W 1 -opte_debug: 0 = 0x1 -> ----- -+ -3. Start the guest. -+ ----- -$ pfexec zoneadm -z guest1 boot ----- -+ -4. Start a server to proxy VNC. -+ ----- -$ pfexec /usr/lib/brand/bhyve/socat /zones/guest1/root/tmp/vm.vnc 5905 ----- -+ -5. Wait for the guest to get to its login screen, then set the IP -config and remove various IP/mac protection. -+ ----- -$ cd ~/opte/opteadm -$ pfexec cargo run set-ip-config private_ip=10.0.0.210 public_ip=10.0.0.99 port_start=1025 port_end=4096 vpc_sub4=10.0.0.0/24 gw_mac=78:23:ae:5d:4f:0d gw_ip=10.0.0.1 -$ pfexec dladm reset-linkprop -p protection guest1 -$ pfexec dladm set-linkprop -p secondary-macs="a8:40:25:00:00:63" guest1 ----- - -|=== -|Field |Description - -a|`private_ip` -|The IPv4 address of the guest. - -a|`public_ip` -|The public IP of the guest. This should be an unused IP in the same - subnet as the guest. OPTE will adopt this IP by responding to any - ARPs for it and then use it as the outbound NAT IP. - -a|`port_start`, `port_end` -a|The start and end of the port range for outbound NAT. This is used in - conjunction with `public_ip`. - -a|`vpc_sub4` -|The VPC subnet of the guest. For most of you playing along at home - this is the same subnet that all your home devices are on: typically - a `10.0.0.0/24` or `192.168.{0,1}.0/24`. - -a|`gw_mac` -|The MAC address of your router/gateway. - -a|`gw_ip` -|The IPv4 address of your router/gateway. - -|=== diff --git a/opte-drv/opte.conf b/opte-drv/opte.conf deleted file mode 100644 index dd4ba139..00000000 --- a/opte-drv/opte.conf +++ /dev/null @@ -1,8 +0,0 @@ -name="opte" parent="pseudo" instance=0; - -# -# XXX This is a temporary config to allow OPTE to run directly on an -# IPv4 home/lab network. -# -gateway_mac = "00:01:02:03:04:05"; -gateway_ipv4 = "1.2.3.4"; diff --git a/opte-drv/src/ioctl.rs b/opte-drv/src/ioctl.rs deleted file mode 100644 index 3bd88118..00000000 --- a/opte-drv/src/ioctl.rs +++ /dev/null @@ -1,191 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2022 Oxide Computer Company - -use alloc::string::String; -use alloc::vec::Vec; -use core::fmt::Debug; -use core::mem::{self, MaybeUninit}; -use core::result; - -use ddi::{c_int, c_void}; -use illumos_ddi_dki as ddi; - -use opte_core::ioctl::{CmdErr, CmdOk, Ioctl}; -use opte_core::CString; - -use postcard; - -use serde::de::DeserializeOwned; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Deserialize, Serialize)] -pub enum Error { - DeserError(String), - FailedCopyin, - FailedCopyout, - RespTooLong, -} - -pub type Result = result::Result; - -pub fn to_errno(e: Error) -> c_int { - match e { - Error::DeserError(_) => ddi::EINVAL, - Error::RespTooLong => ddi::ENOBUFS, - _ => ddi::EFAULT, - } -} - -extern "C" { - fn __dtrace_probe_copy__out__resp(resp_str: ddi::uintptr_t); -} - -fn dtrace_probe_copy_out_resp(resp: &T) { - let cstr = CString::new(format!("{:?}", resp)).unwrap(); - unsafe { - __dtrace_probe_copy__out__resp(cstr.as_ptr() as ddi::uintptr_t); - } -} - -/// An envelope for dealing with `Ioctl`. It contains all information -/// needed to deserialize the user's request and serialize the -/// kernel's response. -pub struct IoctlEnvelope { - //The kernel-side copy of the user's `Ioctl`. - ioctl: Ioctl, - - // A pointer to the user's copy of the `Ioctl`. - arg_ptr: *const c_void, - - // A copy of the `mode` argument passed to the ioctl(9E) - // interface. - mode: c_int, -} - -impl IoctlEnvelope { - /// Safety: The `arg_ptr` should come directly from the `arg` - /// argument passed to the `ioctl(9E)` callback. - pub unsafe fn new(arg_ptr: *const c_void, mode: c_int) -> Result { - let mut ioctl = MaybeUninit::::uninit(); - - let ret = ddi::ddi_copyin( - arg_ptr, - ioctl.as_mut_ptr() as *mut c_void, - mem::size_of::(), - mode, - ); - - if ret != 0 { - return Err(Error::FailedCopyin); - } - - let ioctl = ioctl.assume_init(); - Ok(IoctlEnvelope { ioctl, arg_ptr, mode }) - } - - fn copy_out_self(&self) -> Result<()> { - // Safety: We know the `self.ioctl` pointer is valid as our - // `new()` constructor made the allocation. We also know the - // `self.arg` pointer is valid as long as the caller obeyed - // the safety invariant of the constructor: that it's - // `arg_ptr` be the `arg` passed to `ioctl(9E)`. - let ret = unsafe { - ddi::ddi_copyout( - &self.ioctl as *const Ioctl as *const c_void, - self.arg_ptr as *mut c_void, - mem::size_of::(), - self.mode, - ) - }; - - if ret != 0 { - return Err(Error::FailedCopyout); - } - - Ok(()) - } - - /// Take any type which implements `Serialize`, serialize it, and - /// then `ddi_copyoyt(9F)` it to the user address specified in - /// `resp_bytes`. Return an error if the `resp_len` indicates that - /// the user buffer is not large enough to hold the serialized - /// bytes. - pub fn copy_out_resp( - &mut self, - val: &result::Result, - ) -> Result<()> - where - E: CmdErr, - T: CmdOk, - { - dtrace_probe_copy_out_resp(val); - - // We expect the kernel to pass values of `T` which will - // serialize, thus the use of `unwrap()`. - let vec = postcard::to_allocvec(val).unwrap(); - self.ioctl.resp_len_needed = vec.len(); - - if vec.len() > self.ioctl.resp_len { - self.copy_out_self()?; - return Err(Error::RespTooLong); - } - - // Safety: We know the `vec` pointer is valid as we just - // created it. We assume the `resp_bytes` pointer is valid, - // but since it's coming from userspace it could be anything. - // However, it is `ddi_copyout()`'s job to protect against an - // invalid pointer, not ours. - let ret = unsafe { - ddi::ddi_copyout( - vec.as_ptr() as *const c_void, - self.ioctl.resp_bytes as *mut c_void, - vec.len(), - self.mode, - ) - }; - - if ret != 0 { - return Err(Error::FailedCopyout); - } - - self.copy_out_self()?; - Ok(()) - } - - /// Given `self`, return the deserialized ioctl request. - pub fn copy_in_req(&self) -> Result { - // TODO place upper limit on req_len to prevent - // malicious/malformed requests from allocating large amounts - // of kmem. - let mut bytes = Vec::with_capacity(self.ioctl.req_len); - let ret = unsafe { - ddi::ddi_copyin( - self.ioctl.req_bytes as *const c_void, - bytes.as_mut_ptr() as *mut c_void, - self.ioctl.req_len, - self.mode, - ) - }; - - if ret != 0 { - return Err(Error::FailedCopyin); - } - - // Safety: We know the `Vec` has `req_len` capacity, and that - // `ddi_copyin(9F)` either copied `req_len` bytes or returned - // an error. - unsafe { bytes.set_len(self.ioctl.req_len) }; - - // TODO Do I need to control the length of how many bytes - // postcard might read here? - match postcard::from_bytes(&bytes) { - Ok(val) => Ok(val), - Err(deser_error) => { - Err(Error::DeserError(format!("{}", deser_error))) - } - } - } -} diff --git a/opte-drv/src/lib.rs b/opte-drv/src/lib.rs deleted file mode 100644 index b3fbd052..00000000 --- a/opte-drv/src/lib.rs +++ /dev/null @@ -1,1588 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2022 Oxide Computer Company - -//! OPTE - Oxide Packet Transformation Engine -//! -//! This driver is used as a way to interface the OPTE implementation -//! (opte-core) with the bhyve virtual interface device (viona). It -//! allows us to perform inbound and outbound packet filtering and -//! modification between the guest instance and the network. In it's -//! current form it achieves this by mimicking the mac client APIs. We -//! then use a modified viona device which replaces the calls to the -//! mac client API with calls to this module's API. -//! -//! This module also presents itself as a character device under -//! `/dev/opte`. This allows users to control and inspect the state of -//! opte as it is running (via opteadm). Requests are sent by way of -//! ioctl, interpreted by this driver, and then routed to the -//! corresponding opte-core APIs. In the future opte will probably -//! present more of a virtual-switch-like abstraction, where each -//! guest interface is a port on the switch, and the two physical NICs -//! have ports on the same virtual switch. -//! -//! When loaded, this driver effectively limits to the system to a -//! single bhyve/viona instance. This limit can be lifted with some -//! work, but for the purposes of prototyping that work was postponed. -#![feature(extern_types)] -#![feature(lang_items)] -#![feature(panic_info_message)] -#![no_std] -#![allow(non_camel_case_types)] -#![feature(alloc_error_handler)] -#![feature(rustc_private)] -#![deny(unused_must_use)] - -mod ioctl; - -#[macro_use] -extern crate alloc; - -use alloc::borrow::ToOwned; -use alloc::boxed::Box; -use alloc::collections::btree_map::BTreeMap; -// TODO Is Arc okay for illumos-kernel use? I.e., it uses atomics -// underneath, is the code generated okay for the illumos kernel? -use alloc::string::{String, ToString}; -use alloc::sync::Arc; -use alloc::vec::Vec; -use core::convert::TryFrom; -use core::fmt::Debug; -use core::ops::Range; -use core::panic::PanicInfo; -use core::ptr; -use core::str::FromStr; - -use serde::Serialize; - -use crate::ioctl::{to_errno, IoctlEnvelope}; - -use opte_core::ether::{EtherAddr, ETHER_TYPE_ARP}; -use opte_core::headers::IpCidr; -use opte_core::ioctl::{ - self as api, AddPortReq, CmdErr, CmdOk, DeletePortReq, IoctlCmd, PortInfo, -}; -use opte_core::ip4::Ipv4Addr; -use opte_core::oxide_net::firewall::{FwAddRuleReq, FwRemRuleReq}; -use opte_core::oxide_net::PortCfg; -use opte_core::oxide_net::{overlay, router}; -use opte_core::packet::{Initialized, Packet, Parsed}; -use opte_core::port::{self, Port, ProcessResult}; -use opte_core::sync::{KMutex, KMutexGuard, KMutexType}; -use opte_core::{CStr, CString, Direction, ExecCtx}; - -// For now I glob import all of DDI/DKI until I have a better idea of -// how I would want to organize it. Also, for the time being, if it's -// in defined in the DDI/DKI crate, then opte-drv probably needs it. -// -// TODO: Now that I'm a bit more familiar with Rust I'm not wild about -// glob imports. I think I'd rather just import the DDI types I need -// and then also bind the module to `ddi` so I can call functions like -// `ddi::msgsize()`, making it apparent where they come from. -use ddi_attach_cmd_t::*; -use ddi_detach_cmd_t::*; -use illumos_ddi_dki::*; - -// TODO To `_t` or not to `_t`, that is the question. -// -// In general, we should prefer to name the types identical to the way -// they are in the illumos kernel, namely using the `_t` suffix when -// the corresponding illumos code does. However, there are some -// unforunate typedefs in the illumos code that make implicit -// pointers. For example: -// -// ``` -// typedef struct __mac_resource_handle *mac_resource_handle_t; -// ``` -// -// And you'll see this type used by the mac rx callback type: -// -// typedef void (*mac_rx_t)(void *, mac_resource_handle_t, mblk_t *, -// boolean_t); -// -// In the rust code I'd like to continue to name the type -// mac_resource_handle_t, however, I don't think there is a way to -// declare an extern type name as a pointer, implicitly. Rather, when -// using the extern type, we must declare the argument/variable as a -// pointer to the extern type, to avoid unsized issues. However, -// something like `*mut mac_resource_handle_t` might give someone the -// impression this is a pointer to a pointer. So, in cases like this -// we drop the `_t`, and instead fallback to the underlying struct name. - -// The following are "C type" aliases for native Rust types so that -// the native illumos structures may be defined almost verbatim to the -// source. These definitions assume AMD64 arch/LP64. -pub type c_void = core::ffi::c_void; -pub type c_schar = i8; -pub type c_uchar = u8; -pub type c_char = c_schar; -pub type c_ushort = u16; -pub type c_int = i32; -pub type c_ulong = u64; -pub type c_longlong = i64; - -pub type size_t = usize; -pub type intptr_t = isize; -pub type uintptr_t = usize; -pub type ssize_t = isize; - -const OPTE_STR: *const c_char = b"OPTE\0".as_ptr() as *const c_char; -const OPTE_CTL_MINOR: minor_t = 0; - -#[no_mangle] -static mut opte_dip: *mut dev_info = ptr::null_mut::() as *mut dev_info; - -// This block is purely for SDT probes. -extern "C" { - fn __dtrace_probe_hdlr__resp(resp_str: uintptr_t); - fn __dtrace_probe_rx(mp: uintptr_t); - fn __dtrace_probe_tx(mp: uintptr_t); - fn __dtrace_probe_rx__chain__todo(mp: uintptr_t); -} - -#[allow(dead_code)] -#[repr(C)] -pub enum mac_client_promisc_type_t { - MAC_CLIENT_PROMISC_ALL, - MAC_CLIENT_PROMISC_FILTERED, - MAC_CLIENT_PROMISC_MULTI, -} - -#[allow(unused_imports)] -use mac_client_promisc_type_t::*; - -type mac_tx_cookie_t = uintptr_t; -type mac_rx_fn = unsafe extern "C" fn( - *mut c_void, - *mut mac_resource_handle, - *mut mblk_t, - boolean_t, -); - -// See uts/common/sys/mac_client.h. -pub const MAC_DROP_ON_NO_DESC: u16 = 0x01; - -// The mac APIs. -extern "C" { - pub type mac_handle; - type mac_client_handle; - type mac_promisc_handle; - pub type mac_resource_handle; - - fn mac_client_open( - mh: *const mac_handle, - mch: *mut *mut mac_client_handle, - name: *const c_char, - flags: u16, - ) -> c_int; - - fn mac_client_close(mch: *const mac_client_handle, flags: u16); - fn mac_client_name(mch: *const mac_client_handle) -> *const c_char; - fn mac_close(mh: *const mac_handle); - fn mac_open_by_linkname( - link: *const c_char, - mhp: *mut *mut mac_handle, - ) -> c_int; - fn mac_promisc_add( - mch: *const mac_client_handle, - ptype: mac_client_promisc_type_t, - pfn: mac_rx_fn, - arg: *mut c_void, - // I've been going back and forth on using - // const/mut for a lot of the illumos function - // pointer arguments. Part of me wants to be - // faithful to the C API which declares very - // few things `const` (most just strings), but - // another part of me recognizes that in many - // cases a) the kernel will not modify these - // objects after they are allocated or/and b) - // the Rust code is treating them as opaque - // blobs and certainly won't touch them. For - // example, the mac_promisc_handle: - // - // o Rust won't mess with it. - // - // o Perhaps illumos messes with it but I - // doubt it? - // - // In terms of the Rust compiler I'm guessing - // it doesn't make too much difference as I - // don't believe unsafe/raw pointers can assume - // any of the strict aliasing rules that - // shared/unique references do. - mphp: *mut *mut mac_promisc_handle, - flags: u16, - ) -> c_int; - fn mac_promisc_remove(mph: *const mac_promisc_handle); - fn mac_rx_barrier(mch: *const mac_client_handle); - fn mac_rx_set( - mch: *const mac_client_handle, - rx_fn: mac_rx_fn, - arg: *mut c_void, - ); - fn mac_rx_clear(mch: *const mac_client_handle); - fn mac_tx( - mch: *const mac_client_handle, - mp_chain: *const mblk_t, - hint: uintptr_t, - flag: u16, - ret_mp: *mut *const mblk_t, - ) -> mac_tx_cookie_t; - fn mac_unicast_primary_get(mh: *const mac_handle, addr: *mut [u8; 6]); -} - -// A step towards a safe abstraction over the mac_client.h API. -struct MacClient { - close_flags: u16, - mch: *mut mac_client_handle, -} - -impl MacClient { - // Open a new mac client on top of the mac provider specified by `mh`. - fn open( - mh: *const mac_handle, - open_flags: u16, - close_flags: u16, - ) -> Result { - let mut mch = ptr::null_mut:: as *mut mac_client_handle; - let ret = - unsafe { mac_client_open(mh, &mut mch, ptr::null(), open_flags) }; - - if ret != 0 { - return Err(ret); - } - - Ok(Self { close_flags, mch }) - } - - // Get the name of the client. - fn name(&self) -> String { - unsafe { - CStr::from_ptr(mac_client_name(self.mch)) - .to_str() - .unwrap() - .to_string() - } - } - - fn rx_barrier(&self) { - unsafe { mac_rx_barrier(self.mch) }; - } - - // Clear the function to receive Rx packets. All future packets - // destined for this client are dropped by mac. - fn clear_rx_fn(&self) { - unsafe { mac_rx_clear(self.mch) }; - } - - // Set the function to receive Rx packets. - fn set_rx_fn(&self, rx_fn: mac_rx_fn, arg: *mut c_void) { - unsafe { mac_rx_set(self.mch, rx_fn, arg) }; - } - - fn add_promisc_fn( - &self, - ptype: mac_client_promisc_type_t, - promisc_fn: mac_rx_fn, - arg: *mut c_void, - flags: u16, - ) -> Result<*mut mac_promisc_handle, c_int> { - let mut mph = 0 as *mut mac_promisc_handle; - - let ret = unsafe { - mac_promisc_add(self.mch, ptype, promisc_fn, arg, &mut mph, flags) - }; - - if ret == 0 { - Ok(mph) - } else { - return Err(ret); - } - } - - fn rem_promisc_fn(&self, mph: *mut mac_promisc_handle) { - unsafe { mac_promisc_remove(mph) }; - } - - // Send the packet on this client. This function consumes the packet. - // - // XXX This function can actually take a packet chain, but for now - // we just pass a single packet at a time. - // - // XXX Probably want two separate functions, one for - // MAC_DROP_ON_NO_DESC and one for ret_mp. - fn tx( - &self, - pkt: Packet, - hint: uintptr_t, - flag: u16, - ret_mp: *mut *const mblk_t, - ) { - // We must unwrap the raw `mblk_t` out of the `pkt` here, - // otherwise the mblk_t would be dropped at the end of this - // function along with `pkt`. - unsafe { mac_tx(self.mch, pkt.unwrap(), hint, flag, ret_mp) }; - } -} - -impl Drop for MacClient { - fn drop(&mut self) { - // Safety: We know that a MacClient can only exist if a mac - // client handle was successfully obtained, and thus mch is - // valid. - unsafe { mac_client_close(self.mch, self.close_flags) }; - } -} - -fn get_gw_mac(dip: *mut dev_info) -> EtherAddr { - let mut gw_mac_c: *const c_char = ptr::null(); - - let ret = unsafe { - ddi_prop_lookup_string( - DDI_DEV_T_ANY, - dip, - DDI_PROP_DONTPASS, - b"gateway_mac\0".as_ptr() as *const c_char, - &mut gw_mac_c, - ) - }; - - if ret != DDI_PROP_SUCCESS { - let err = format!("failed to get gateway_mac: {}", ret); - unsafe { cmn_err(CE_WARN, CString::new(err).unwrap().as_ptr()) }; - return EtherAddr::from([0; 6]); - } - - let gw_mac = unsafe { CStr::from_ptr(gw_mac_c).to_owned() }; - unsafe { ddi_prop_free(gw_mac_c as *mut c_void) }; - - EtherAddr::from_str(gw_mac.to_str().unwrap()).unwrap_or_else(|err| { - let msg = format!("failed to parse gateway_mac property: {}", err); - unsafe { cmn_err(CE_WARN, CString::new(msg).unwrap().as_ptr()) }; - EtherAddr::from([0; 6]) - }) -} - -fn get_gw_ip(dip: *mut dev_info) -> Ipv4Addr { - let mut gw_ip_c: *const c_char = ptr::null(); - - let ret = unsafe { - ddi_prop_lookup_string( - DDI_DEV_T_ANY, - dip, - DDI_PROP_DONTPASS, - b"gateway_ipv4\0".as_ptr() as *const c_char, - &mut gw_ip_c, - ) - }; - - if ret != DDI_PROP_SUCCESS { - let err = format!("failed to get gateway_ipv4: {}", ret); - unsafe { cmn_err(CE_WARN, CString::new(err).unwrap().as_ptr()) }; - return Ipv4Addr::from_str("0.0.0.0").unwrap(); - } - - let gw_ip = unsafe { CStr::from_ptr(gw_ip_c).to_owned() }; - unsafe { ddi_prop_free(gw_ip_c as *mut c_void) }; - - Ipv4Addr::from_str(gw_ip.to_str().unwrap()).unwrap_or_else(|err| { - let msg = format!("failed to parse gateway_ipv4 property: {}", err); - unsafe { cmn_err(CE_WARN, CString::new(msg).unwrap().as_ptr()) }; - Ipv4Addr::from_str("0.0.0.0").unwrap() - }) -} - -#[no_mangle] -unsafe extern "C" fn opte_open( - _devp: *mut dev_t, - _flags: c_int, - _otype: c_int, - _credp: *mut cred_t, -) -> c_int { - 0 -} - -#[no_mangle] -unsafe extern "C" fn opte_close( - _dev: dev_t, - _flags: c_int, - _otype: c_int, - _credp: *mut cred_t, -) -> c_int { - 0 -} - -type LinkName = String; - -enum PortState { - Inactive(Port, PortCfg, *const mac_handle), - Active(Arc, *const mac_handle), -} - -struct OpteState { - ectx: Arc, - gateway_mac: EtherAddr, - gateway_ip: Ipv4Addr, - v2p: Arc, - ports: KMutex>, -} - -impl OpteState { - fn new(gateway_mac: EtherAddr, gateway_ip: Ipv4Addr) -> Self { - let ectx = Arc::new(ExecCtx { log: Box::new(opte_core::KernelLog {}) }); - - OpteState { - ectx, - gateway_mac, - gateway_ip, - v2p: Arc::new(overlay::Virt2Phys::new()), - ports: KMutex::new(BTreeMap::new(), KMutexType::Driver), - } - } -} - -fn get_opte_state() -> &'static OpteState { - // Safety: The opte_dip pointer is write-once and is a valid - // pointer passed to attach(9E). The returned pointer is valid as - // it was derived from Box::into_raw() during attach(9E). - unsafe { &*(ddi_get_driver_private(opte_dip) as *mut OpteState) } -} - -fn add_port(req: &AddPortReq) -> Result<(), api::AddPortError> { - let state = get_opte_state(); - - // We must hold this lock until we have inserted the new port into - // the map; otherwise, multiple threads could race to add the same - // port. - let mut ports_lock = state.ports.lock(); - - if let Some(_) = ports_lock.get(&req.link_name) { - return Err(api::AddPortError::Exists); - } - - let mut mh: *mut mac_handle = ptr::null_mut::() as *mut mac_handle; - let link_name_c = CString::new(req.link_name.clone()).unwrap(); - let ret = unsafe { mac_open_by_linkname(link_name_c.as_ptr(), &mut mh) }; - - if ret != 0 { - return Err(api::AddPortError::MacOpenFailed(ret)); - } - - let mut private_mac = [0u8; 6]; - unsafe { mac_unicast_primary_get(mh, &mut private_mac) }; - let private_mac = EtherAddr::from(private_mac); - - let vpc_subnet = if req.port_cfg.snat.is_none() { - "192.168.77.0/24".parse().unwrap() - } else { - req.port_cfg.snat.as_ref().unwrap().vpc_sub4 - }; - - let dyn_nat = match req.port_cfg.snat.as_ref() { - None => opte_core::oxide_net::DynNat4Cfg { - public_ip: "192.168.99.99".parse().unwrap(), - ports: Range { start: 999, end: 1000 }, - }, - - Some(snat) => opte_core::oxide_net::DynNat4Cfg { - public_ip: snat.public_ip, - ports: Range { start: snat.port_start, end: snat.port_end }, - }, - }; - - let port_cfg = PortCfg { - vpc_subnet, - private_mac, - private_ip: req.port_cfg.private_ip, - gw_mac: state.gateway_mac, - gw_ip: state.gateway_ip, - dyn_nat, - overlay: None, - }; - - let mut new_port = - Port::new(&req.link_name, private_mac, state.ectx.clone()); - opte_core::oxide_net::firewall::setup(&mut new_port).unwrap(); - opte_core::oxide_net::dhcp4::setup(&mut new_port, &port_cfg).unwrap(); - opte_core::oxide_net::icmp::setup(&mut new_port, &port_cfg).unwrap(); - - // TODO: In order to demo this in the lab environment we currently - // allow SNAT to be optional. - if req.port_cfg.snat.is_some() { - opte_core::oxide_net::dyn_nat4::setup(&mut new_port, &port_cfg) - .unwrap(); - } - opte_core::oxide_net::arp::setup(&mut new_port, &port_cfg).unwrap(); - // We know the firewall layer is there so it can't fail. - router::setup(&mut new_port).unwrap(); - - ports_lock.insert( - req.link_name.clone(), - PortState::Inactive(new_port, port_cfg, mh), - ); - Ok(()) -} - -fn delete_port(req: &DeletePortReq) -> Result<(), api::DeletePortError> { - let state = - unsafe { &*(ddi_get_driver_private(opte_dip) as *mut OpteState) }; - - let mut ports_lock = state.ports.lock(); - - let _ = match ports_lock.get(&req.name) { - Some(PortState::Inactive(inactive_port, _, _)) => inactive_port, - - Some(PortState::Active(_, _)) => { - return Err(api::DeletePortError::InUse); - } - - None => return Err(api::DeletePortError::NotFound), - }; - - if let Some(PortState::Inactive(_, _, mh)) = ports_lock.remove(&req.name) { - unsafe { mac_close(mh) }; - } - - Ok(()) -} - -fn get_ocs<'a, 'b>( - ports_lock: &'a mut KMutexGuard>, - name: &'b str, -) -> Result, api::PortError> { - match ports_lock.get(name) { - None => Err(api::PortError::NotFound), - Some(PortState::Inactive(_, _, _)) => Err(api::PortError::Inactive), - Some(PortState::Active(ocs, _)) => Ok(ocs.clone()), - } -} - -// We need to pass this function a lock because the caller is likely -// performing several actions on a given Port and thus must hold the -// lock the entire time to prevent another thread from deleting the -// same Port. -fn get_port<'a, 'b>( - ports_lock: &'a KMutexGuard>, - name: &'b str, -) -> Result<&'a Port, api::PortError> { - match ports_lock.get(name) { - None => Err(api::PortError::NotFound), - Some(PortState::Inactive(port, _, _)) => Ok(&port), - Some(PortState::Active(_, _)) => Err(api::PortError::Active), - } -} - -#[derive(Debug, Serialize)] -enum HdlrError { - System(i32), -} - -impl From for HdlrError { - fn from(e: self::ioctl::Error) -> Self { - match e { - self::ioctl::Error::DeserError(_) => Self::System(EINVAL), - self::ioctl::Error::FailedCopyin => Self::System(EFAULT), - self::ioctl::Error::FailedCopyout => Self::System(EFAULT), - self::ioctl::Error::RespTooLong => Self::System(ENOBUFS), - } - } -} - -fn add_port_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: AddPortReq = ioctlenv.copy_in_req()?; - Ok(add_port(&req)) -} - -fn delete_port_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: DeletePortReq = ioctlenv.copy_in_req()?; - Ok(delete_port(&req)) -} - -fn list_ports_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let _req: api::ListPortsReq = ioctlenv.copy_in_req()?; - let mut resp = api::ListPortsResp { ports: vec![] }; - let state = get_opte_state(); - for (_k, ps) in state.ports.lock().iter() { - match ps { - PortState::Inactive(port, cfg, _) => { - resp.ports.push(PortInfo { - name: port.name().to_string(), - mac_addr: port.mac_addr(), - ip4_addr: cfg.private_ip, - in_use: false, - }); - } - - PortState::Active(ocs, _) => { - resp.ports.push(PortInfo { - name: ocs.name.clone(), - mac_addr: ocs.private_mac, - ip4_addr: ocs.port_cfg.private_ip, - in_use: true, - }); - } - } - } - - Ok(Ok(resp)) -} - -fn add_fw_rule_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: FwAddRuleReq = ioctlenv.copy_in_req()?; - let state = get_opte_state(); - let mut ports_lock = state.ports.lock(); - let ocs = match get_ocs(&mut ports_lock, &req.port_name) { - Ok(v) => v, - Err(e) => return Ok(Err(api::AddFwRuleError::from(e))), - }; - Ok(api::add_fw_rule(&ocs.port, &req)) -} - -fn rem_fw_rule_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: FwRemRuleReq = ioctlenv.copy_in_req()?; - let state = get_opte_state(); - let mut ports_lock = state.ports.lock(); - let ocs = match get_ocs(&mut ports_lock, &req.port_name) { - Ok(v) => v, - Err(e) => return Ok(Err(api::RemFwRuleError::from(e))), - }; - Ok(api::rem_fw_rule(&ocs.port, &req)) -} - -fn dump_tcp_flows_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: api::DumpTcpFlowsReq = ioctlenv.copy_in_req()?; - let state = get_opte_state(); - let mut ports_lock = state.ports.lock(); - let ocs = match get_ocs(&mut ports_lock, &req.port_name) { - Ok(v) => v, - Err(e) => return Ok(Err(api::DumpTcpFlowsError::from(e))), - }; - Ok(Ok(api::dump_tcp_flows(&ocs.port, &req))) -} - -fn dump_layer_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: api::DumpLayerReq = ioctlenv.copy_in_req()?; - let state = get_opte_state(); - let mut ports_lock = state.ports.lock(); - let ocs = match get_ocs(&mut ports_lock, &req.port_name) { - Ok(v) => v, - Err(e) => return Ok(Err(api::DumpLayerError::from(e))), - }; - Ok(api::dump_layer(&ocs.port, &req)) -} - -fn list_layers_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: api::ListLayersReq = ioctlenv.copy_in_req()?; - let state = get_opte_state(); - let mut ports_lock = state.ports.lock(); - match get_port(&mut ports_lock, &req.port_name) { - Ok(port) => return Ok(Ok(port.list_layers())), - Err(_) => (), - }; - match get_ocs(&mut ports_lock, &req.port_name) { - Ok(ocs) => Ok(Ok(ocs.port.list_layers())), - - Err(_) => { - Ok(Err(api::ListLayersError::PortError(api::PortError::NotFound))) - } - } -} - -fn dump_uft_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: api::DumpUftReq = ioctlenv.copy_in_req()?; - let state = get_opte_state(); - let mut ports_lock = state.ports.lock(); - let ocs = match get_ocs(&mut ports_lock, &req.port_name) { - Ok(v) => v, - Err(e) => return Ok(Err(api::DumpUftError::from(e))), - }; - Ok(Ok(api::dump_uft(&ocs.port, &req))) -} - -fn set_overlay_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: overlay::SetOverlayReq = ioctlenv.copy_in_req()?; - let state = get_opte_state(); - let ports_lock = state.ports.lock(); - let port = match get_port(&ports_lock, &req.port_name) { - Ok(p) => p, - Err(e) => return Ok(Err(overlay::SetOverlayError::from(e))), - }; - api::set_overlay(&port, &req, state.v2p.clone()); - Ok(Ok(())) -} - -fn set_v2p_hdlr(ioctlenv: &IoctlEnvelope) -> Result, HdlrError> { - let req: overlay::SetVirt2PhysReq = ioctlenv.copy_in_req()?; - let state = get_opte_state(); - state.v2p.set(req.vip, req.phys); - Ok(Ok(())) -} - -fn add_router_entry_hdlr( - ioctlenv: &IoctlEnvelope, -) -> Result, HdlrError> { - let req: router::AddRouterEntryIpv4Req = ioctlenv.copy_in_req()?; - let state = get_opte_state(); - let mut ports_lock = state.ports.lock(); - match get_port(&ports_lock, &req.port_name) { - Ok(port) => Ok(router::add_entry_inactive( - port, - IpCidr::Ip4(req.dest), - req.target, - )), - - _ => { - let ocs = match get_ocs(&mut ports_lock, &req.port_name) { - Ok(v) => v, - Err(e) => return Ok(Err(router::AddEntryError::from(e))), - }; - Ok(router::add_entry_active( - &ocs.port, - IpCidr::Ip4(req.dest), - req.target, - )) - } - } -} - -fn dtrace_probe_hdlr_resp(resp: &Result, HdlrError>) -where - T: CmdOk, - E: CmdErr, -{ - let cstr = CString::new(format!("{:?}", resp)).unwrap(); - unsafe { - __dtrace_probe_hdlr__resp(cstr.as_ptr() as uintptr_t); - } -} - -// Convert the handler's response to the appropriate ioctl(2) return -// value and copyout any command response. In the case of a failure -// with the command we return success for the ioctl(2) call but the -// command response buffer will contain a Result so that the client -// can differentiate between success or failure. Non-zero ioctl(2) -// return values are used to indicate the more typical error -// conditions you would expect in the ioctl mechanisms itself, like -// copyin, copyout, etc. -fn hdlr_resp( - ioctlenv: &mut IoctlEnvelope, - resp: Result, HdlrError>, -) -> c_int -where - T: CmdOk, - E: CmdErr, -{ - dtrace_probe_hdlr_resp(&resp); - - match resp { - Ok(resp) => match ioctlenv.copy_out_resp(&resp) { - Ok(()) => 0, - Err(e) => to_errno(e), - }, - - Err(HdlrError::System(ret)) => ret, - } -} - -#[no_mangle] -unsafe extern "C" fn opte_ioctl( - _dev: dev_t, - cmd: c_int, - arg: intptr_t, - mode: c_int, - _credp: *mut cred_t, - _rvalp: *mut c_int, -) -> c_int { - let cmd = match IoctlCmd::try_from(cmd) { - Ok(v) => v, - Err(_) => { - // XXX Replace this with a stat. - opte_core::err(format!("invalid ioctl cmd: {}", cmd)); - return EINVAL; - } - }; - - let mut ioctlenv = match IoctlEnvelope::new(arg as *const c_void, mode) { - Ok(val) => val, - _ => return EFAULT, - }; - - match cmd { - IoctlCmd::AddPort => { - let resp = add_port_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::DeletePort => { - let resp = delete_port_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - // XXX Eventually this information (or some subset of it) - // comes from Omicron/SA, but for now we require manual config - // between the window of creating an instance (which creates - // an OPTE Port) and starting it. - IoctlCmd::SetOverlay => { - let resp = set_overlay_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::ListPorts => { - let resp = list_ports_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::FwAddRule => { - let resp = add_fw_rule_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::FwRemRule => { - // XXX At the moment a default rule can be removed. That's - // something we may want to prevent at the OPTE layer - // moving forward. Or we may want to allow complete - // freedom at this level and place that enforcement at the - // control plane level. - let resp = rem_fw_rule_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::DumpTcpFlows => { - let resp = dump_tcp_flows_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::DumpLayer => { - let resp = dump_layer_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::ListLayers => { - let resp = list_layers_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::DumpUft => { - let resp = dump_uft_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::SetVirt2Phys => { - let resp = set_v2p_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - - IoctlCmd::AddRouterEntryIpv4 => { - let resp = add_router_entry_hdlr(&ioctlenv); - hdlr_resp(&mut ioctlenv, resp) - } - _ => ENOTSUP, - } -} - -#[no_mangle] -unsafe extern "C" fn opte_read( - _dev: dev_t, - _uiop: *mut uio, - _credp: *mut cred_t, -) -> c_int { - 0 -} - -#[no_mangle] -unsafe extern "C" fn opte_write( - _dev: dev_t, - _uiop: *mut uio, - _credp: *mut cred_t, -) -> c_int { - 0 -} - -#[no_mangle] -static opte_cb_ops: cb_ops = cb_ops { - cb_open: opte_open, - cb_close: opte_close, - cb_strategy: nodev, - cb_print: nodev, - cb_dump: nodev, - cb_read: opte_read, - cb_write: opte_write, - cb_ioctl: opte_ioctl, - cb_devmap: nodev, - cb_mmap: nodev, - cb_segmap: nodev, - cb_chpoll: nochpoll, - cb_prop_op: ddi_prop_op, - cb_str: ptr::null_mut::() as *mut streamtab, - cb_flag: D_MP, - cb_rev: CB_REV, - cb_aread: nodev, - cb_awrite: nodev, -}; - -#[no_mangle] -static opte_devops: dev_ops = dev_ops { - devo_rev: DEVO_REV, - devo_refcnt: 0, - devo_getinfo: nodev_getinfo, - devo_identify: nulldev_identify, - devo_probe: nulldev_probe, - devo_attach: opte_attach, - devo_detach: opte_detach, - devo_reset: nodev_reset, - devo_cb_ops: &opte_cb_ops, - devo_bus_ops: 0 as *const bus_ops, // ptr::null() - devo_power: nodev_power, - devo_quiesce: ddi_quiesce_not_needed, -}; - -#[no_mangle] -static opte_modldrv: modldrv = unsafe { - modldrv { - drv_modops: &mod_driverops, - drv_linkinfo: OPTE_STR, - drv_dev_ops: &opte_devops, - } -}; - -// NOTE We don't need the `no_magle` here, but it's nice to keep the -// symbol clean to keep with the C modules (it also makes it easier to -// grab from MDB). I'm also using lowercase to be consistent with -// other kernel modules. -// -// TODO There's probably a slightly better way to initialize -// `ml_linkage` to NULL instead of explicitly filling in each slot. -#[no_mangle] -static opte_linkage: modlinkage = modlinkage { - ml_rev: MODREV_1, - ml_linkage: [ - (&opte_modldrv as *const modldrv).cast(), - ptr::null(), - ptr::null(), - ptr::null(), - ptr::null(), - ptr::null(), - ptr::null(), - ], -}; - -#[no_mangle] -unsafe extern "C" fn opte_attach( - dip: *mut dev_info, - cmd: ddi_attach_cmd_t, -) -> c_int { - match cmd { - DDI_RESUME => return DDI_SUCCESS, - cmd if cmd != DDI_ATTACH => return DDI_FAILURE, - _ => (), - } - - // We create a minor node to use as entry for opteadm ioctls. - let ret = ddi_create_minor_node( - dip, - b"opte\0".as_ptr() as *const c_char, - S_IFCHR, - OPTE_CTL_MINOR, - DDI_PSEUDO, - 0, - ); - - if ret != DDI_SUCCESS { - cmn_err( - CE_WARN, - b"failed to create minor node\0".as_ptr() as *const c_char, - ); - return DDI_FAILURE; - } - - let gateway_mac = get_gw_mac(dip); - let gateway_ip = get_gw_ip(dip); - cmn_err( - CE_NOTE, - CString::new(format!( - "gateway_mac: {}, gateway_ip: {}", - gateway_mac, gateway_ip - )) - .unwrap() - .as_ptr(), - ); - let state = Box::new(OpteState::new(gateway_mac, gateway_ip)); - - // We consume the box and place it's raw pointer in the - // per-instance device state. On detach we place this pointer back - // into the box so it can be dropped. All other uses of the - // pointer will simply convert to a reference, as we know the - // pointer is non-NULL and aligned properly. - ddi_set_driver_private(dip, Box::into_raw(state) as *mut c_void); - opte_dip = dip; - ddi_report_dev(dip); - DDI_SUCCESS -} - -#[no_mangle] -unsafe extern "C" fn opte_detach( - _dip: *mut dev_info, - cmd: ddi_detach_cmd_t, -) -> c_int { - match cmd { - DDI_SUSPEND => return DDI_SUCCESS, - cmd if cmd != DDI_DETACH => return DDI_FAILURE, - _ => (), - } - - // We should never be in detach if attach has not run. - // Furthermore, if we have a dip, we have non-NULL state. - assert!(!opte_dip.is_null()); - let rstate = ddi_get_driver_private(opte_dip); - assert!(!rstate.is_null()); - - // Put the state back in the box so Rust can drop it. - let _ = Box::from_raw(rstate as *mut OpteState); - ddi_remove_minor_node(opte_dip, ptr::null()); - opte_dip = ptr::null_mut::() as *mut dev_info; - DDI_SUCCESS -} - -#[no_mangle] -unsafe extern "C" fn _init() -> c_int { - mod_install(&opte_linkage) -} - -#[no_mangle] -unsafe extern "C" fn _info(modinfop: *mut modinfo) -> c_int { - mod_info(&opte_linkage, modinfop) -} - -#[no_mangle] -unsafe extern "C" fn _fini() -> c_int { - let ret = mod_remove(&opte_linkage); - if ret != 0 { - return ret; - } - - 0 -} - -#[lang = "eh_personality"] -extern "C" fn eh_personality() {} - -// The symbol name gets rewritten to `rust_being_unwind` (don't ask me -// why), so we use `panic_hdlr` to avoid clashing with the kernel's -// panic symbol. -#[panic_handler] -fn panic_hdlr(info: &PanicInfo) -> ! { - let msg = CString::new(format!("{}", info)).unwrap(); - unsafe { - cmn_err(CE_WARN, msg.as_ptr()); - panic(msg.as_ptr()); - } -} - -// ================================================================ -// mac client intercept APIs -// -// Thes APIs are meant to mimic the mac client APIs, allowing opte to -// act as an intermediary between viona and mac. -// ================================================================ - -// TODO The port configuration and client state are conflated here. It -// would be good to tease them apart into separate types to better -// demarcate things. E.g., the client state might be the rx_state and -// promisc_state, along with a pointer to something like `PortState`. -// And the `PortState` might be what OpteClientState is right now. -// Though you might tease this out a bit more and separate the static -// port configuration handed down during port registration from actual -// state like the hairpin queue. -pub struct OpteClientState { - mh: *const mac_handle, - mc: MacClient, - rx_state: Option, - mph: *mut mac_promisc_handle, - name: String, - promisc_state: Option, - port: Arc>, - port_cfg: PortCfg, - port_periodic: *const ddi_periodic, - private_mac: EtherAddr, - // Packets generated by OPTE on the guest's/network's behalf, to - // be returned to the source (aka a "hairpin" packet). - hairpin_queue: KMutex>>, -} - -const ONE_SECOND: hrtime_t = 1_000_000_000; - -#[no_mangle] -pub unsafe extern "C" fn opte_port_periodic(arg: *mut c_void) { - // The `arg` is a raw pointer to a `Port`, as guaranteed by - // opte_client_open(). - assert!(!arg.is_null()); - let port = &*(arg as *const Port); - port.expire_flows(gethrtime()); -} - -#[no_mangle] -pub unsafe extern "C" fn opte_client_open( - mh: *const mac_handle, - ocspo: *mut *const OpteClientState, - _name: *const c_char, - flags: u16, -) -> c_int { - *ocspo = ptr::null_mut(); - - let mc = match MacClient::open(mh, flags, 0) { - Err(ret) => return ret, - Ok(mc) => mc, - }; - - let link_name = mc.name(); - let state = &mut *(ddi_get_driver_private(opte_dip) as *mut OpteState); - - // We must hold the ports's lock for the duration of this call to ensure - // that transition from inactive to active is atomic. - let mut ports_lock = state.ports.lock(); - - let (port, port_cfg, port_mh) = match ports_lock.remove(&link_name) { - Some(PortState::Inactive(p, c, port_mh)) => (p, c, port_mh), - Some(PortState::Active(_, _)) => return EBUSY, - None => return ENOENT, - }; - - let active_port = Arc::new(port.activate()); - let mac_addr = active_port.mac_addr(); - let port_periodic = ddi_periodic_add( - opte_port_periodic, - // The non-counted alias is fine as the periodic is cleaned up - // as part of opte_client_close(), prior to the Port being - // dropped. - active_port.as_ref() as *const Port<_> as *const c_void, - ONE_SECOND, - DDI_IPL_0, - ); - - let ocs = Arc::new(OpteClientState { - mh, - mc, - name: link_name.clone(), - rx_state: None, - mph: 0 as *mut mac_promisc_handle, - promisc_state: None, - port: active_port, - port_cfg, - port_periodic, - private_mac: mac_addr, - hairpin_queue: KMutex::new(Vec::with_capacity(4), KMutexType::Driver), - }); - - // We can lease this uncounted reference to viona because this - // driver guarantees to the client that a port cannot be deleted - // until the client is finished with it. There is no concern over - // aliasing as this structure is opaque to the client and any - // structure inside ocs should provide its own thread-safety. - *ocspo = Arc::as_ptr(&ocs); - ports_lock.insert(link_name.clone(), PortState::Active(ocs, port_mh)); - 0 -} - -#[no_mangle] -pub unsafe extern "C" fn opte_client_close( - ocsp: *mut OpteClientState, - _flags: u16, -) { - let link_name = &((*ocsp).name); - let state = &mut *(ddi_get_driver_private(opte_dip) as *mut OpteState); - // This should NEVER happen. It would mean we have an active OPTE - // client but are not tracking it at all in our clients list. - let _ = state.ports.lock().remove(link_name).expect("something is amiss"); - - // The ownership of `ocs` is being given back to opte. We need - // to put it back in the box so that the value and its owned - // resources are properly dropped. - let ocs = Box::from_raw(ocsp); - - // The client is closing its handle to this port. We need to - // effectively "reset" the port by wiping all of its current state - // and returning it to its original state in preparation for the - // next client open. This is best done by dropping the entire Port - // and replacing it with a new one with the identical - // configuration. - ddi_periodic_delete(ocs.port_periodic); - - let mut new_port = - Port::new(&ocs.name, ocs.private_mac, state.ectx.clone()); - - let port_cfg = ocs.port_cfg; - - opte_core::oxide_net::firewall::setup(&mut new_port).unwrap(); - opte_core::oxide_net::dhcp4::setup(&mut new_port, &port_cfg).unwrap(); - opte_core::oxide_net::icmp::setup(&mut new_port, &port_cfg).unwrap(); - opte_core::oxide_net::dyn_nat4::setup(&mut new_port, &port_cfg).unwrap(); - opte_core::oxide_net::arp::setup(&mut new_port, &port_cfg).unwrap(); - // We know the firewall layer is there so it can't fail. - router::setup(&mut new_port).unwrap(); - - state.ports.lock().insert( - link_name.to_string(), - PortState::Inactive(new_port, port_cfg, ocs.mh), - ); -} - -#[no_mangle] -pub unsafe extern "C" fn opte_rx_barrier(ocsp: *const OpteClientState) { - let ocs = &*ocsp; - ocs.mc.rx_barrier(); -} - -struct OpteRxState { - rx_fn: mac_rx_fn, - arg: *mut c_void, -} - -#[no_mangle] -pub unsafe extern "C" fn opte_rx_set( - ocsp: *mut OpteClientState, - rx_fn: mac_rx_fn, - arg: *mut c_void, -) { - let ocs = &mut *ocsp; - ocs.rx_state = Some(OpteRxState { rx_fn, arg }); - ocs.mc.set_rx_fn(opte_rx, ocsp as *mut c_void); -} - -#[no_mangle] -pub unsafe extern "C" fn opte_rx_clear(ocsp: *mut OpteClientState) { - let ocs = &mut *ocsp; - // Need to take the state out so it is dropped. - let _ = ocs.rx_state.take(); - ocs.mc.clear_rx_fn(); -} - -struct OptePromiscState { - promisc_fn: mac_rx_fn, - arg: *mut c_void, -} - -#[no_mangle] -pub unsafe extern "C" fn opte_promisc_add( - ocsp: *mut OpteClientState, - ptype: mac_client_promisc_type_t, - promisc_fn: mac_rx_fn, - arg: *mut c_void, - flags: u16, -) -> c_int { - let mut ocs = &mut *ocsp; - ocs.promisc_state = Some(OptePromiscState { promisc_fn, arg }); - - let res = - ocs.mc.add_promisc_fn(ptype, opte_rx_mcast, ocsp as *mut c_void, flags); - - ocs.mph = match res { - Ok(mph) => mph, - Err(ret) => { - let _ = ocs.promisc_state.take(); - return ret; - } - }; - - 0 -} - -#[no_mangle] -pub unsafe extern "C" fn opte_promisc_remove(ocsp: *mut OpteClientState) { - let mut ocs = &mut *ocsp; - ocs.mc.rem_promisc_fn(ocs.mph); - ocs.mph = 0 as *mut mac_promisc_handle; - let _ = ocs.promisc_state.take(); -} - -#[no_mangle] -pub unsafe extern "C" fn opte_tx( - ocsp: *mut OpteClientState, - mp_chain: *mut mblk_t, - hint: uintptr_t, - flag: u16, - ret_mp: *mut *const mblk_t, -) { - // TODO: I haven't dealt with chains, though I'm pretty sure it's - // always just one. - assert!((*mp_chain).b_next == ptr::null_mut()); - __dtrace_probe_tx(mp_chain as uintptr_t); - - let mut pkt = match Packet::::wrap(mp_chain).parse() { - Ok(pkt) => pkt, - Err(e) => { - // TODO SDT probe - // TODO stat - opte_core::dbg(format!("failed to parse packet: {:?}", e)); - return; - } - }; - let ocs = &*ocsp; - let res = ocs.port.process(Direction::Out, &mut pkt); - - match res { - Ok(ProcessResult::Modified) => { - ocs.mc.tx(pkt, hint, flag, ret_mp); - } - - // TODO Probably want a state + a probe along with a reason - // carried up via `ProcessResult::Drop(String)` so that a - // reason can be given as part of the probe. - Ok(ProcessResult::Drop { .. }) => { - return; - } - - Ok(ProcessResult::Hairpin(hppkt)) => { - let rx_state = ocs.rx_state.as_ref().unwrap(); - (rx_state.rx_fn)( - rx_state.arg, - // TODO: IIRC we can just set the mrh (mac - // resource handle) to NULL and it will - // deliver via the default ring. If this - // doesn't work we can create some type of - // hairpin queue. - 0 as *mut c_void as *mut mac_resource_handle, - hppkt.unwrap(), - boolean_t::B_FALSE, - ); - return; - } - - // In this case the packet is bypassing processing. This - // result type will probably go away eventually. For now we - // use it for protocols/traffic we aren't ready to deal with - // yet. - Ok(ProcessResult::Bypass) => { - ocs.mc.tx(pkt, hint, flag, ret_mp); - } - - // TODO Want something better here eventually: - // - // 1. Not sure we really want to log every error to the system log. - // - // 2. Though we should probably fire a probe for every error. - // - // 3. Certainly we want stats around errors, perhaps both in - // OPTE itself as well as this driver. - Err(e) => { - cmn_err( - CE_WARN, - CString::new(format!("{:?}", e)).unwrap().as_ptr(), - ); - return; - } - } - - // Deal with any pending outbound hairpin packets. - // - // XXX This should be done by a task queue. Otherwise, we only - // clear the hairpin queue when the guest is actively trying to - // send packets. - while let Some(p) = ocs.hairpin_queue.lock().pop() { - // XXX Get rid of unwrap() here and instead do stat/probe/log - // if hairpin packet fails to parse. - ocs.mc.tx( - p.parse().unwrap(), - hint, - MAC_DROP_ON_NO_DESC, - ptr::null_mut(), - ); - } -} - -// This doesn't need to be no_mangle, but I like keeping callbacks -// demangled. -#[no_mangle] -pub unsafe extern "C" fn opte_rx( - arg: *mut c_void, - mrh: *mut mac_resource_handle, - mp_chain: *mut mblk_t, - loopback: boolean_t, -) { - // XXX Need to deal with chains. This was an assert but it's - // blocking other work that's more pressing at the moment as I - // keep tripping it. - if !(*mp_chain).b_next.is_null() { - __dtrace_probe_rx__chain__todo(mp_chain as uintptr_t); - } - __dtrace_probe_rx(mp_chain as uintptr_t); - - let mut pkt = match Packet::::wrap(mp_chain).parse() { - Ok(pkt) => pkt, - Err(e) => { - // TODO SDT probe - // TODO stat - opte_core::dbg(format!("failed to parse packet: {:?}", e)); - return; - } - }; - let ocs = &*(arg as *const OpteClientState); - let rx_state = ocs.rx_state.as_ref().unwrap(); - let res = ocs.port.process(Direction::In, &mut pkt); - - match res { - Ok(ProcessResult::Modified) => { - let meta = pkt.meta(); - let etype = match meta.inner.ether.as_ref() { - Some(ether) => ether.ether_type, - _ => panic!("no inner ether"), - }; - - // We should never see ARP here. The only outbound ARP - // should be for the gateway, and that should be handled - // by a hairpin action in opte_tx(). Any inbound should be - // the gateway ARPing for the private or public IP and - // should be handled by the hairpin below, all other - // inbound ARP should be denied. - // - // TODO This check will eventually go away. Just want it - // here for now to verify no ARP is getting thru to the - // guest. - if etype == ETHER_TYPE_ARP { - panic!("Should never see ARP here"); - } - - (rx_state.rx_fn)(rx_state.arg, mrh, pkt.unwrap(), loopback); - } - - // TODO Probably want a state + a probe along with a reason - // carried up via `ProcessResult::Drop(String)` so that a - // reason can be given as part of the probe. - Ok(ProcessResult::Drop { .. }) => { - return; - } - - Ok(ProcessResult::Hairpin(hppkt)) => { - ocs.hairpin_queue.lock().push(hppkt); - return; - } - - // In this case the packet is bypassing processing. This - // result type will probably go away eventually. For now we - // use it for protocols/traffic we aren't ready to deal with - // yet. - Ok(ProcessResult::Bypass) => { - let meta = pkt.meta(); - let etype = match meta.inner.ether.as_ref() { - Some(ether) => ether.ether_type, - _ => panic!("no inner ether"), - }; - - // See comment above. - if etype == ETHER_TYPE_ARP { - panic!("Should never see ARP here"); - } - - (rx_state.rx_fn)(rx_state.arg, mrh, pkt.unwrap(), loopback); - } - - // TODO Want something better here eventually: - // - // 1. Not sure we really want to log every error to the system log. - // - // 2. Though we should probably fire a probe for every error. - // - // 3. Certainly we want stats around errors, perhaps both in - // OPTE itself as well as this driver. - Err(e) => { - cmn_err( - CE_WARN, - CString::new(format!("{:?}", e)).unwrap().as_ptr(), - ); - return; - } - } -} - -// This doesn't need to be no_mangle, but I like keeping callbacks -// demangled. -#[no_mangle] -pub unsafe extern "C" fn opte_rx_mcast( - arg: *mut c_void, - mrh: *mut mac_resource_handle, - mp: *mut mblk_t, - loopback: boolean_t, -) { - let ocs = &*(arg as *const OpteClientState); - let pstate = ocs.promisc_state.as_ref().unwrap(); - (pstate.promisc_fn)(pstate.arg, mrh, mp, loopback); -} - -// On alignment, `kmem_alloc(9F)` has this of offer: -// -// > The allocated memory is at least double-word aligned, so it can -// > hold any C data structure. No greater alignment can be assumed. -// -// I really hate when documentation uses "word", because that seems to -// mean different things in different contexts, in this case I have to -// assume it means native integer size, or 32-bit in the case our our -// AMD64 kernel. So this means all allocations are at least 8-byte -// aligned, but could be more. However, the last sentence is saying -// that you cannot assume alignment is ever greater than 8 bytes. -// Therefore, it seems best to just assume it's 8 bytes. So, for the -// purposes of implementing GlobalAlloc, I believe this means that I -// should return NULL for any Layout which requests more than 8-byte -// alignment (or probably just panic since I never expect this). -// Furthermore, things that could have smaller alignment will just -// have to live with the larger alignment. - -use core::alloc::{GlobalAlloc, Layout}; - -struct KmemAlloc; - -unsafe impl GlobalAlloc for KmemAlloc { - unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - if layout.align() > 8 { - panic!("kernel alloc greater than 8-byte alignment"); - } - - kmem_alloc(layout.size(), KM_SLEEP) as *mut u8 - } - - unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { - kmem_free(ptr as *mut c_void, layout.size() as size_t) - } -} - -#[global_allocator] -static A: KmemAlloc = KmemAlloc; - -// In reality, if the GlobalAlloc is using KM_SLEEP, then we can never -// hit this. But the compiler wants us to define it, so we do. -#[alloc_error_handler] -fn alloc_error(_: Layout) -> ! { - panic!("allocation error"); -} - -// This is a hack to get around the fact that liballoc includes -// calls to _Unwind_Resume, supposedly because it is not compiled -// with `panic=abort`. This is all a little bit beyond me but I just -// want to satisfy the symbol resolution so I can load this module. -// -// https://github.com/rust-lang/rust/issues/47493 -#[allow(non_snake_case)] -#[no_mangle] -fn _Unwind_Resume() -> ! { - panic!("_Unwind_Resume called"); -} diff --git a/opte-drv/x86_64-unknown-unknown.json b/opte-drv/x86_64-unknown-unknown.json deleted file mode 100644 index efc482c3..00000000 --- a/opte-drv/x86_64-unknown-unknown.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "arch": "x86_64", - "code-model": "kernel", - "cpu": "x86-64", - "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", - "disable-redzone": true, - "dynamic-linking": false, - "eh-frame-header": false, - "frame-pointer": "always", - "executables": true, - "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", - "has-rpath": true, - "is-builtin": false, - "is-like-solaris": true, - "limit-rdylib-exports": false, - "linker": "ld", - "llvm-target": "x86_64-none-none", - "max-atomic-width": 64, - "needs-plt": true, - "no-default-libraries": true, - "os": "none", - "panic-strategy": "abort", - "pointer-width": 64, - "relax-elf-relocations": false, - "relocation-model": "static", - "relro-level": "full", - "staticlib-prefix": "", - "target-family": "unix", - "target-pointer-width": "64" -} diff --git a/xde/src/lib.rs b/xde/src/lib.rs index 5f23b4a5..f0dbd772 100644 --- a/xde/src/lib.rs +++ b/xde/src/lib.rs @@ -4,19 +4,25 @@ // Copyright 2022 Oxide Computer Company -// xde - a mac provider for OPTE and the onramp to the Oxide rack network +// xde - A mac provider for OPTE-based network implementations. #![feature(extern_types)] #![feature(lang_items)] #![feature(panic_info_message)] #![no_std] #![allow(non_camel_case_types)] #![allow(non_upper_case_globals)] +// XXX We do not use double in the kernel. We should not allow +// "improper C types". This hack is here is because of the ip.rs code +// generated by bindgen. It brings in a bunch of stuff we do not use. +// At some point we could hand write the stuff that is actually +// needed, or come up with a better solution like using CTF data to +// generate Rust types for only the stuff we need. #![allow(improper_ctypes)] // for long double -> u128 #![allow(non_camel_case_types)] // for bindgen code in ip.rs #![allow(non_snake_case)] // for bindgen code in ip.rs -#![deny(unused_must_use)] #![feature(alloc_error_handler)] #![feature(rustc_private)] +#![deny(unused_must_use)] mod ioctl; @@ -94,6 +100,18 @@ fn alloc_error(_: Layout) -> ! { #[global_allocator] static A: KmemAlloc = KmemAlloc; +// This is a hack to get around the fact that liballoc includes +// calls to _Unwind_Resume, supposedly because it is not compiled +// with `panic=abort`. This is all a little bit beyond me but I just +// want to satisfy the symbol resolution so I can load this module. +// +// https://github.com/rust-lang/rust/issues/47493 +#[allow(non_snake_case)] +#[no_mangle] +fn _Unwind_Resume() -> ! { + panic!("_Unwind_Resume called"); +} + // NOTE: We allow unused_unsafe so these macros can be used freely in // unsafe and non-unsafe functions. #[macro_export] diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 09b68ace..84c98e61 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -4,11 +4,11 @@ // Copyright 2022 Oxide Computer Company -//! XDE - A MAC provider for OPTE +//! xde - A mac provider for OPTE. //! -//! This is an illumos kernel driver that provides MAC devices hooked up to -//! OPTE. At the time of writing this driver is being developed in a parallel -//! crate to opte-drv. It's expected that this driver will merge into opte-drv. +//! An illumos kernel driver that implements the mac provider +//! interface, allowing one to run network implementations written in +//! the OPTE framework. // TODO // - ddm integration to choose correct underlay device (currently just using