diff --git a/Cargo.Bazel.lock b/Cargo.Bazel.lock index 8e52068ea..c06a2dfac 100644 --- a/Cargo.Bazel.lock +++ b/Cargo.Bazel.lock @@ -1,5 +1,5 @@ { - "checksum": "87e2c8f3dba5c353695777f770c5dd8e59ed40002ef4a441217e73677175c77b", + "checksum": "c3dcea9e1d5b4d6c8e5e6f9ece05a77fcd0901359c76444219a2d1904e54e8c2", "crates": { "actix-codec 0.5.2": { "name": "actix-codec", @@ -11213,6 +11213,10 @@ ], "deps": { "common": [ + { + "id": "actix-web 4.6.0", + "target": "actix_web" + }, { "id": "anyhow 1.0.86", "target": "anyhow" @@ -11434,6 +11438,10 @@ }, "deps_dev": { "common": [ + { + "id": "actix-rt 2.9.0", + "target": "actix_rt" + }, { "id": "wiremock 0.6.0", "target": "wiremock" @@ -28936,29 +28944,35 @@ ], "crate_features": { "common": [ - "elf", - "errno", "general", "ioctl", "no_std" ], "selects": { "aarch64-unknown-linux-gnu": [ + "elf", + "errno", "prctl", "std", "system" ], "arm-unknown-linux-gnueabi": [ + "elf", + "errno", "prctl", "std", "system" ], "armv7-unknown-linux-gnueabi": [ + "elf", + "errno", "prctl", "std", "system" ], "i686-unknown-linux-gnu": [ + "elf", + "errno", "prctl", "std", "system" @@ -28974,6 +28988,8 @@ "system" ], "x86_64-unknown-linux-gnu": [ + "elf", + "errno", "prctl", "std", "system" diff --git a/Cargo.lock b/Cargo.lock index 66bc4de18..f16830a39 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2214,6 +2214,8 @@ checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" name = "dre" version = "0.4.0" dependencies = [ + "actix-rt", + "actix-web", "anyhow", "async-recursion", "async-trait", diff --git a/rs/cli/Cargo.toml b/rs/cli/Cargo.toml index 73ced9359..7fd4d49df 100644 --- a/rs/cli/Cargo.toml +++ b/rs/cli/Cargo.toml @@ -11,6 +11,7 @@ documentation.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +actix-web = { workspace = true } anyhow = { workspace = true } async-recursion = { workspace = true } async-trait = { workspace = true } @@ -72,6 +73,7 @@ url = { workspace = true } humantime = { workspace = true } [dev-dependencies] +actix-rt = { workspace = true } wiremock = { workspace = true } [build-dependencies] diff --git a/rs/cli/src/main.rs b/rs/cli/src/main.rs index d9726df8a..2c07b0317 100644 --- a/rs/cli/src/main.rs +++ b/rs/cli/src/main.rs @@ -5,13 +5,13 @@ use dotenv::dotenv; use dre::detect_neuron::Auth; use dre::general::{filter_proposals, get_node_metrics_history, vote_on_proposals}; use dre::operations::hostos_rollout::{NodeGroupUpdate, NumberOfNodes}; -use dre::{cli, ic_admin, local_unused_port, registry_dump, runner}; +use dre::{cli, ic_admin, registry_dump, runner}; use ic_base_types::CanisterId; use ic_canisters::governance::{governance_canister_version, GovernanceCanisterWrapper}; use ic_canisters::CanisterClient; -use ic_management_backend::endpoints; use ic_management_types::requests::NodesRemoveRequest; use ic_management_types::{Artifact, MinNakamotoCoefficients, NodeFeature}; + use ic_nns_common::pb::v1::ProposalId; use ic_nns_governance::pb::v1::ListProposalInfo; use log::{info, warn}; @@ -19,8 +19,6 @@ use regex::Regex; use serde_json::Value; use std::collections::BTreeMap; use std::str::FromStr; -use std::sync::mpsc; -use std::thread; use tokio::runtime::Runtime; const STAGING_NEURON_ID: u64 = 49; @@ -70,35 +68,18 @@ async fn async_main() -> Result<(), anyhow::Error> { let governance_canister_version = governance_canister_v.stringified_hash; - let (tx, rx) = mpsc::channel(); - - let backend_port = local_unused_port(); - let target_network_backend = target_network.clone(); - thread::spawn(move || { - let rt = tokio::runtime::Runtime::new().unwrap(); - rt.block_on(async move { - endpoints::run_backend(&target_network_backend, "127.0.0.1", backend_port, true, Some(tx)) - .await - .expect("failed") - }); - }); - - let srv = rx.recv().unwrap(); - - let r = ic_admin::with_ic_admin(governance_canister_version.into(), async { + ic_admin::with_ic_admin(governance_canister_version.into(), async { let dry_run = cli_opts.dry_run; + let cli = dre::parsed_cli::ParsedCli::from_opts(&cli_opts) + .await + .expect("Failed to create authenticated CLI"); + let ic_admin_wrapper = IcAdminWrapper::from_cli(cli); - let runner_instance = { - let cli = dre::parsed_cli::ParsedCli::from_opts(&cli_opts) - .await - .expect("Failed to create authenticated CLI"); - let ic_admin_wrapper = IcAdminWrapper::from_cli(cli); - runner::Runner::new_with_network_and_backend_port(ic_admin_wrapper, &target_network, backend_port) - .await - .expect("Failed to create a runner") - }; + let runner_instance = runner::Runner::new(ic_admin_wrapper, &target_network) + .await + .expect("Failed to create a runner"); - match &cli_opts.subcommand { + let r = match &cli_opts.subcommand { cli::Commands::DerToPrincipal { path } => { let principal = ic_base_types::PrincipalId::new_self_authenticating(&std::fs::read(path)?); println!("{}", principal); @@ -268,26 +249,20 @@ async fn async_main() -> Result<(), anyhow::Error> { cli::Commands::Propose { args } => runner_instance.ic_admin.run_passthrough_propose(args, dry_run).await, cli::Commands::UpdateUnassignedNodes { nns_subnet_id } => { - let runner_instance = if target_network.is_mainnet() { - runner_instance.as_automation() + let ic_admin = if target_network.is_mainnet() { + runner_instance.ic_admin.clone().as_automation() } else { - runner_instance + runner_instance.ic_admin.clone() }; let nns_subnet_id = match nns_subnet_id { Some(subnet_id) => subnet_id.to_owned(), None => { - let res = runner_instance - .ic_admin - .run_passthrough_get(&["get-subnet-list".to_string()], true) - .await?; + let res = ic_admin.run_passthrough_get(&["get-subnet-list".to_string()], true).await?; let subnet_list: Vec = serde_json::from_str(&res)?; subnet_list.first().ok_or_else(|| anyhow::anyhow!("No subnet found"))?.clone() } }; - runner_instance - .ic_admin - .update_unassigned_nodes(&nns_subnet_id, &target_network, dry_run) - .await + ic_admin.update_unassigned_nodes(&nns_subnet_id, &target_network, dry_run).await } cli::Commands::Version(version_command) => match &version_command { @@ -331,13 +306,11 @@ async fn async_main() -> Result<(), anyhow::Error> { }, cli::Commands::Hostos(nodes) => { - let runner_instance = if target_network.is_mainnet() { - runner_instance.as_automation() - } else { - runner_instance - }; + let as_automation = target_network.is_mainnet(); match &nodes.subcommand { - cli::hostos::Commands::Rollout { version, nodes } => runner_instance.hostos_rollout(nodes.clone(), version, dry_run, None).await, + cli::hostos::Commands::Rollout { version, nodes } => { + runner_instance.hostos_rollout(nodes.clone(), version, dry_run, None, as_automation).await + } cli::hostos::Commands::RolloutFromNodeGroup { version, assignment, @@ -347,7 +320,9 @@ async fn async_main() -> Result<(), anyhow::Error> { } => { let update_group = NodeGroupUpdate::new(*assignment, *owner, NumberOfNodes::from_str(nodes_in_group)?); if let Some((nodes_to_update, summary)) = runner_instance.hostos_rollout_nodes(update_group, version, exclude).await? { - return runner_instance.hostos_rollout(nodes_to_update, version, dry_run, Some(summary)).await; + return runner_instance + .hostos_rollout(nodes_to_update, version, dry_run, Some(summary), as_automation) + .await; } Ok(()) } @@ -550,13 +525,11 @@ async fn async_main() -> Result<(), anyhow::Error> { Ok(()) } }, - } + }; + let _ = runner_instance.stop_backend().await; + r }) - .await; - - srv.stop(false).await; - - r + .await } // Construct MinNakamotoCoefficients from an array (slice) of ["key=value"], and diff --git a/rs/cli/src/runner.rs b/rs/cli/src/runner.rs index a81b811d2..f47f5e01a 100644 --- a/rs/cli/src/runner.rs +++ b/rs/cli/src/runner.rs @@ -3,9 +3,11 @@ use crate::ic_admin::ProposeOptions; use crate::operations::hostos_rollout::{HostosRollout, HostosRolloutResponse, NodeGroupUpdate}; use crate::ops_subnet_node_replace; use crate::{ic_admin, local_unused_port}; +use actix_web::dev::ServerHandle; use decentralization::SubnetChangeResponse; use futures::future::join_all; use ic_base_types::PrincipalId; +use ic_management_backend::endpoints; use ic_management_backend::proposal::ProposalAgent; use ic_management_backend::public_dashboard::query_ic_dashboard_list; use ic_management_backend::registry::{self, RegistryState}; @@ -13,17 +15,74 @@ use ic_management_types::requests::NodesRemoveRequest; use ic_management_types::{Artifact, Network, Node, NodeFeature, NodeProvidersResponse}; use itertools::Itertools; use log::{info, warn}; +use std::cell::RefCell; use std::collections::BTreeMap; +use std::sync::mpsc; +use std::thread; use tabled::builder::Builder; use tabled::settings::Style; pub struct Runner { pub ic_admin: ic_admin::IcAdminWrapper, - dashboard_backend_client: DashboardBackendClient, registry: RegistryState, + dashboard_backend_client: RefCell>, + backend_srv: RefCell>, } impl Runner { + pub async fn get_backend_client(&self) -> anyhow::Result { + if let Some(dashboard_backend_client) = &*self.dashboard_backend_client.borrow() { + return Ok(dashboard_backend_client.clone()); + }; + + // This will be executed just once creating the backend + let backend_port = local_unused_port(); + let backend_url = format!("http://localhost:{}/", backend_port); + let (tx, rx) = mpsc::channel(); + + let target_network_backend = self.registry.network(); + thread::spawn(move || { + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + endpoints::run_backend(&target_network_backend, "127.0.0.1", backend_port, true, Some(tx)) + .await + .expect("failed") + }); + }); + let srv = rx.recv().unwrap(); + let dashboard_backend_client = DashboardBackendClient::new_with_backend_url(backend_url); + + self.dashboard_backend_client + .borrow_mut() + .get_or_insert_with(|| dashboard_backend_client.clone()); + self.backend_srv.borrow_mut().get_or_insert_with(|| srv.clone()); + + Ok(dashboard_backend_client) + } + + pub async fn stop_backend(&self) -> anyhow::Result<()> { + let backend_srv_opt = self.backend_srv.borrow().clone(); + if let Some(backend_srv) = backend_srv_opt { + backend_srv.stop(false).await; + } + Ok(()) + } + + pub async fn new(ic_admin: ic_admin::IcAdminWrapper, network: &Network) -> anyhow::Result { + let mut registry = registry::RegistryState::new(network, true).await; + let node_providers = query_ic_dashboard_list::(network, "v3/node-providers") + .await? + .node_providers; + registry.update_node_details(&node_providers).await?; + + Ok(Self { + ic_admin, + registry, + dashboard_backend_client: RefCell::new(None), + backend_srv: RefCell::new(None), + }) + } + pub async fn deploy(&self, subnet: &PrincipalId, version: &str, dry_run: bool) -> anyhow::Result<()> { self.ic_admin .propose_run( @@ -59,7 +118,7 @@ impl Runner { dry_run: bool, ) -> anyhow::Result<()> { let subnet = request.subnet; - let change = self.dashboard_backend_client.subnet_resize(request).await?; + let change = self.get_backend_client().await?.subnet_resize(request).await?; if verbose { if let Some(run_log) = &change.run_log { println!("{}\n", run_log.join("\n")); @@ -107,7 +166,7 @@ impl Runner { println!("{}", self.ic_admin.grep_subcommand_arguments("propose-to-create-subnet")); return Ok(()); } - let subnet_creation_data = self.dashboard_backend_client.subnet_create(request).await?; + let subnet_creation_data = self.get_backend_client().await?.subnet_create(request).await?; if verbose { if let Some(run_log) = &subnet_creation_data.run_log { println!("{}\n", run_log.join("\n")); @@ -116,7 +175,8 @@ impl Runner { println!("{}", subnet_creation_data); let replica_version = replica_version.unwrap_or( - self.dashboard_backend_client + self.get_backend_client() + .await? .get_nns_replica_version() .await .expect("Failed to get a GuestOS version of the NNS subnet"), @@ -146,7 +206,7 @@ impl Runner { verbose: bool, dry_run: bool, ) -> anyhow::Result<()> { - let change = self.dashboard_backend_client.membership_replace(request).await?; + let change = self.get_backend_client().await?.membership_replace(request).await?; if verbose { if let Some(run_log) = &change.run_log { println!("{}\n", run_log.join("\n")); @@ -163,7 +223,7 @@ impl Runner { async fn run_membership_change(&self, change: SubnetChangeResponse, options: ProposeOptions, dry_run: bool) -> anyhow::Result<()> { let subnet_id = change.subnet_id.ok_or_else(|| anyhow::anyhow!("subnet_id is required"))?; - let pending_action = self.dashboard_backend_client.subnet_pending_action(subnet_id).await?; + let pending_action = self.get_backend_client().await?.subnet_pending_action(subnet_id).await?; if let Some(proposal) = pending_action { return Err(anyhow::anyhow!(format!( "There is a pending proposal for this subnet: https://dashboard.internetcomputer.org/proposal/{}", @@ -186,44 +246,8 @@ impl Runner { Ok(()) } - pub async fn new_with_network_and_backend_port(ic_admin: ic_admin::IcAdminWrapper, network: &Network, backend_port: u16) -> anyhow::Result { - let backend_url = format!("http://localhost:{}/", backend_port); - - let dashboard_backend_client = DashboardBackendClient::new_with_backend_url(backend_url); - let mut registry = registry::RegistryState::new(network, true).await; - let node_providers = query_ic_dashboard_list::(network, "v3/node-providers") - .await? - .node_providers; - registry.update_node_details(&node_providers).await?; - - Ok(Self { - ic_admin, - dashboard_backend_client, - // TODO: Remove once DREL-118 completed. - registry, - }) - } - - pub async fn new(ic_admin: ic_admin::IcAdminWrapper, network: &Network) -> anyhow::Result { - // TODO: Remove once DREL-118 completed. - let backend_port = local_unused_port(); - let backend_url = format!("http://localhost:{}/", backend_port); - let dashboard_backend_client = DashboardBackendClient::new_with_backend_url(backend_url); - - let mut registry = registry::RegistryState::new(network, true).await; - let node_providers = query_ic_dashboard_list::(network, "v3/node-providers") - .await? - .node_providers; - registry.update_node_details(&node_providers).await?; - Ok(Self { - ic_admin, - dashboard_backend_client, - registry, - }) - } - pub async fn prepare_versions_to_retire(&self, release_artifact: &Artifact, edit_summary: bool) -> anyhow::Result<(String, Option>)> { - let retireable_versions = self.dashboard_backend_client.get_retireable_versions(release_artifact).await?; + let retireable_versions = self.get_backend_client().await?.get_retireable_versions(release_artifact).await?; let versions = if retireable_versions.is_empty() { Vec::new() @@ -374,9 +398,22 @@ impl Runner { } } } - pub async fn hostos_rollout(&self, nodes: Vec, version: &str, dry_run: bool, maybe_summary: Option) -> anyhow::Result<()> { + pub async fn hostos_rollout( + &self, + nodes: Vec, + version: &str, + dry_run: bool, + maybe_summary: Option, + as_automation: bool, + ) -> anyhow::Result<()> { + let ic_admin = if as_automation { + self.ic_admin.clone().as_automation() + } else { + self.ic_admin.clone() + }; + let title = format!("Set HostOS version: {version} on {} nodes", nodes.clone().len()); - self.ic_admin + ic_admin .propose_run( ic_admin::ProposeCommand::DeployHostosToSomeNodes { nodes: nodes.clone(), @@ -398,7 +435,7 @@ impl Runner { } pub async fn remove_nodes(&self, request: NodesRemoveRequest, dry_run: bool) -> anyhow::Result<()> { - let node_remove_response = self.dashboard_backend_client.remove_nodes(request).await?; + let node_remove_response = self.get_backend_client().await?.remove_nodes(request).await?; let mut node_removals = node_remove_response.removals; node_removals.sort_by_key(|nr| nr.reason.message()); @@ -452,7 +489,7 @@ impl Runner { _verbose: bool, simulate: bool, ) -> Result<(), anyhow::Error> { - let change = self.dashboard_backend_client.network_heal(request).await?; + let change = self.get_backend_client().await?.network_heal(request).await?; println!("{}", change); let errors = join_all(change.subnets_change_response.iter().map(|subnet_change_response| async move {