Skip to content

Commit

Permalink
feat: cordon features (#966)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 2, 2024
1 parent 501323a commit 565dfd7
Show file tree
Hide file tree
Showing 23 changed files with 849 additions and 134 deletions.
6 changes: 5 additions & 1 deletion Cargo.Bazel.lock
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"checksum": "d9bc8b02db59266969caa52b402af82e441437a6119cc3bcb932eae6acb3fc52",
"checksum": "abee9a1e183890980b7417228d49c17fa7e3379b15e022a305a18ccf1a343194",
"crates": {
"actix-codec 0.5.2": {
"name": "actix-codec",
Expand Down Expand Up @@ -10447,6 +10447,10 @@
"id": "serde_json 1.0.128",
"target": "serde_json"
},
{
"id": "serde_yaml 0.9.34+deprecated",
"target": "serde_yaml"
},
{
"id": "sha2 0.10.8",
"target": "sha2"
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions cordoned_features.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# This file is used for cordoining ceratin node features
# which are used while managing subnets.
# The full list of features can be found here:
# https://github.com/dfinity/dre/blob/main/rs/ic-management-types/src/lib.rs#L317-L324
features:
# Example entry
# - feature: data_center
# value: mu1
1 change: 1 addition & 0 deletions rs/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ comfy-table = { workspace = true }
human_bytes = { workspace = true }
mockall.workspace = true
clio = { workspace = true }
serde_yaml.workspace = true

[dev-dependencies]
actix-rt = { workspace = true }
Expand Down
4 changes: 4 additions & 0 deletions rs/cli/src/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ The argument is mandatory for testnets, and is optional for mainnet and staging"
/// Link to the related forum post, where proposal details can be discussed
#[clap(long, global = true, visible_aliases = &["forum-link", "forum"])]
pub forum_post_link: Option<String>,

/// Path to file which contains cordoned features
#[clap(long, global = true, visible_aliases = &["cf-file", "cfff"])]
pub cordon_feature_fallback_file: Option<PathBuf>,
}

// Do not use outside of DRE CLI.
Expand Down
2 changes: 1 addition & 1 deletion rs/cli/src/commands/subnet/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ impl ExecutableCommand for Replace {
_ => SubnetTarget::FromNodesIds(self.nodes.clone()),
};

let subnet_manager = ctx.subnet_manager().await;
let subnet_manager = ctx.subnet_manager().await?;
let subnet_change_response = subnet_manager
.with_target(subnet_target)
.membership_replace(
Expand Down
2 changes: 1 addition & 1 deletion rs/cli/src/commands/subnet/resize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl ExecutableCommand for Resize {
async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> {
let runner = ctx.runner().await?;

let subnet_manager = ctx.subnet_manager().await;
let subnet_manager = ctx.subnet_manager().await?;

let subnet_change_response = subnet_manager
.subnet_resize(
Expand Down
174 changes: 174 additions & 0 deletions rs/cli/src/cordoned_feature_fetcher.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
use std::{path::PathBuf, time::Duration};

use decentralization::network::NodeFeaturePair;
use futures::future::BoxFuture;
use ic_management_types::NodeFeature;
use itertools::Itertools;
use log::warn;
use mockall::automock;
use reqwest::{Client, ClientBuilder};
use strum::VariantNames;

#[automock]
pub trait CordonedFeatureFetcher: Sync + Send {
fn fetch(&self) -> BoxFuture<'_, anyhow::Result<Vec<NodeFeaturePair>>>;
}

pub struct CordonedFeatureFetcherImpl {
client: Client,
fallback_file: Option<PathBuf>,
offline: bool,
}

const CORDONED_FEATURES_FILE_URL: &str = "https://raw.githubusercontent.com/dfinity/dre/refs/heads/main/cordoned_features.yaml";

impl CordonedFeatureFetcherImpl {
pub fn new(offline: bool, fallback_file: Option<PathBuf>) -> anyhow::Result<Self> {
let client = ClientBuilder::new().timeout(Duration::from_secs(10)).build()?;

Ok(Self {
client,
fallback_file,
offline,
})
}

async fn fetch_from_git(&self) -> anyhow::Result<Vec<NodeFeaturePair>> {
let bytes = self
.client
.get(CORDONED_FEATURES_FILE_URL)
.send()
.await?
.error_for_status()?
.bytes()
.await?;

self.parse(&bytes)
}

fn fetch_from_file(&self) -> anyhow::Result<Vec<NodeFeaturePair>> {
let contents = std::fs::read(self.fallback_file.as_ref().unwrap())?;

self.parse(&contents)
}

// Write tests for this
fn parse(&self, contents: &[u8]) -> anyhow::Result<Vec<NodeFeaturePair>> {
let valid_yaml = serde_yaml::from_slice::<serde_yaml::Value>(contents)?;

let features = match valid_yaml.get("features") {
Some(serde_yaml::Value::Sequence(features)) => features.clone(),
Some(serde_yaml::Value::Null) => vec![],
n => anyhow::bail!(
"Failed to parse contents. Expected to have top-level key `features` with an array of node features. Got: \n{:?}",
n
),
};

let mut valid_features = vec![];
for feature in features {
valid_features.push(NodeFeaturePair {
feature: feature
.get("feature")
.map(|value| {
serde_yaml::from_value(value.clone()).map_err(|_| {
anyhow::anyhow!(
"Failed to parse feature `{}`. Expected one of: [{}]",
serde_yaml::to_string(value).unwrap(),
NodeFeature::VARIANTS.iter().join(",")
)
})
})
.ok_or(anyhow::anyhow!("Expected `feature` key to be present. Got: \n{:?}", feature))??,
value: feature
.get("value")
.map(|value| {
value
.as_str()
.ok_or(anyhow::anyhow!(
"Failed to parse value `{}`. Expected string",
serde_yaml::to_string(value).unwrap(),
))
.map(|s| s.to_string())
})
.ok_or(anyhow::anyhow!("Expected `value` key to be present. Got: \n{:?}", feature))??,
});
}

Ok(valid_features)
}
}

impl CordonedFeatureFetcher for CordonedFeatureFetcherImpl {
fn fetch(&self) -> BoxFuture<'_, anyhow::Result<Vec<NodeFeaturePair>>> {
Box::pin(async {
match (self.offline, self.fallback_file.is_some()) {
(true, true) => self.fetch_from_file(),
(true, false) => Err(anyhow::anyhow!("Cannot fetch cordoned features offline without a fallback file")),
(false, true) => match self.fetch_from_git().await {
Ok(from_git) => Ok(from_git),
Err(e_from_git) => {
warn!("Failed to fetch cordoned features from git: {:?}", e_from_git);
warn!("Falling back to fetching from file");
match self.fetch_from_file() {
Ok(from_file) => Ok(from_file),
Err(e_from_file) => Err(anyhow::anyhow!(
"Failed to fetch cordoned features both from file and from git.\nError from git: {:?}\nError from file: {:?}",
e_from_git,
e_from_file
)),
}
}
},
(false, false) => self.fetch_from_git().await,
}
})
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn valid_parsing() {
let contents = br#"
features:
- feature: data_center
value: mu1
- feature: node_provider
value: some-np
- feature: data_center_owner
value: some-dco
- feature: city
value: some-city
- feature: city
value: another-city
- feature: country
value: some-country
- feature: continent
value: some-continent"#;

let fetcher = CordonedFeatureFetcherImpl::new(true, None).unwrap();

let maybe_parsed = fetcher.parse(contents);
assert!(maybe_parsed.is_ok());
let parsed = maybe_parsed.unwrap();

assert_eq!(parsed.len(), 7)
}

#[test]
fn valid_empty_file() {
let contents = br#"
features:"#;

let fetcher = CordonedFeatureFetcherImpl::new(true, None).unwrap();

let maybe_parsed = fetcher.parse(contents);
assert!(maybe_parsed.is_ok());
let parsed = maybe_parsed.unwrap();

assert_eq!(parsed.len(), 0)
}
}
31 changes: 27 additions & 4 deletions rs/cli/src/ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::{

use ic_canisters::{governance::governance_canister_version, IcAgentCanisterClient};
use ic_management_backend::{
health::{self, HealthStatusQuerier},
lazy_git::LazyGit,
lazy_registry::{LazyRegistry, LazyRegistryImpl},
proposal::{ProposalAgent, ProposalAgentImpl},
Expand All @@ -21,6 +22,7 @@ use crate::{
artifact_downloader::{ArtifactDownloader, ArtifactDownloaderImpl},
auth::Neuron,
commands::{Args, AuthOpts, AuthRequirement, ExecutableCommand, IcAdminVersion},
cordoned_feature_fetcher::{CordonedFeatureFetcher, CordonedFeatureFetcherImpl},
ic_admin::{download_ic_admin, should_update_ic_admin, IcAdmin, IcAdminImpl, FALLBACK_IC_ADMIN_VERSION},
runner::Runner,
subnet_manager::SubnetManager,
Expand All @@ -42,8 +44,11 @@ pub struct DreContext {
neuron: Neuron,
proceed_without_confirmation: bool,
version: IcAdminVersion,
cordoned_features_fetcher: Arc<dyn CordonedFeatureFetcher>,
health_client: Arc<dyn HealthStatusQuerier>,
}

#[allow(clippy::too_many_arguments)]
impl DreContext {
pub async fn new(
network: String,
Expand All @@ -57,6 +62,8 @@ impl DreContext {
auth_requirement: AuthRequirement,
forum_post_link: Option<String>,
ic_admin_version: IcAdminVersion,
cordoned_features_fetcher: Arc<dyn CordonedFeatureFetcher>,
health_client: Arc<dyn HealthStatusQuerier>,
) -> anyhow::Result<Self> {
let network = match offline {
false => ic_management_types::Network::new(network.clone(), &nns_urls)
Expand Down Expand Up @@ -93,6 +100,8 @@ impl DreContext {
neuron,
proceed_without_confirmation: yes,
version: ic_admin_version,
cordoned_features_fetcher,
health_client,
})
}

Expand All @@ -109,6 +118,10 @@ impl DreContext {
args.subcommands.require_auth(),
args.forum_post_link.clone(),
args.ic_admin_version.clone(),
Arc::new(CordonedFeatureFetcherImpl::new(args.offline, args.cordon_feature_fallback_file.clone())?) as Arc<dyn CordonedFeatureFetcher>,
Arc::new(health::HealthClient::new(
ic_management_types::Network::new(args.network.clone(), &args.nns_urls).await?,
)),
)
.await
}
Expand Down Expand Up @@ -212,10 +225,14 @@ impl DreContext {
))
}

pub async fn subnet_manager(&self) -> SubnetManager {
pub async fn subnet_manager(&self) -> anyhow::Result<SubnetManager> {
let registry = self.registry().await;

SubnetManager::new(registry, self.network().clone())
Ok(SubnetManager::new(
registry,
self.cordoned_features_fetcher.clone(),
self.health_client.clone(),
))
}

pub fn proposals_agent(&self) -> Arc<dyn ProposalAgent> {
Expand All @@ -235,6 +252,8 @@ impl DreContext {
self.verbose_runner,
self.ic_repo.clone(),
self.artifact_downloader.clone(),
self.cordoned_features_fetcher.clone(),
self.health_client.clone(),
));
*self.runner.borrow_mut() = Some(runner.clone());
Ok(runner)
Expand All @@ -250,10 +269,10 @@ impl DreContext {
pub mod tests {
use std::{cell::RefCell, sync::Arc};

use ic_management_backend::{lazy_git::LazyGit, lazy_registry::LazyRegistry, proposal::ProposalAgent};
use ic_management_backend::{health::HealthStatusQuerier, lazy_git::LazyGit, lazy_registry::LazyRegistry, proposal::ProposalAgent};
use ic_management_types::Network;

use crate::{artifact_downloader::ArtifactDownloader, auth::Neuron, ic_admin::IcAdmin};
use crate::{artifact_downloader::ArtifactDownloader, auth::Neuron, cordoned_feature_fetcher::CordonedFeatureFetcher, ic_admin::IcAdmin};

use super::DreContext;

Expand All @@ -265,6 +284,8 @@ pub mod tests {
git: Arc<dyn LazyGit>,
proposal_agent: Arc<dyn ProposalAgent>,
artifact_downloader: Arc<dyn ArtifactDownloader>,
cordoned_features_fetcher: Arc<dyn CordonedFeatureFetcher>,
health_client: Arc<dyn HealthStatusQuerier>,
) -> DreContext {
DreContext {
network,
Expand All @@ -281,6 +302,8 @@ pub mod tests {
neuron,
proceed_without_confirmation: true,
version: crate::commands::IcAdminVersion::Strict("Shouldn't reach this because of mock".to_string()),
cordoned_features_fetcher,
health_client,
}
}
}
1 change: 1 addition & 0 deletions rs/cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
pub mod artifact_downloader;
pub mod auth;
pub mod commands;
mod cordoned_feature_fetcher;
pub mod ctx;
mod desktop_notify;
pub mod ic_admin;
Expand Down
1 change: 1 addition & 0 deletions rs/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use log::{info, warn};
mod artifact_downloader;
mod auth;
mod commands;
mod cordoned_feature_fetcher;
mod ctx;
mod desktop_notify;
mod ic_admin;
Expand Down
Loading

0 comments on commit 565dfd7

Please sign in to comment.