diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1b1f1ba75958..e2e18c119610 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,11 +32,16 @@
 - [#3591](https://github.com/ChainSafe/forest/pull/3591) Add
   `forest-tool car validate` command for checking non-filecoin invariants in CAR
   files.
+- [#3589](https://github.com/ChainSafe/forest/pull/3589) Add
+  `forest-tool archive diff` command for debugging state-root mismatches.
 
 ### Changed
 
 ### Removed
 
+- [#3589](https://github.com/ChainSafe/forest/pull/3589) Remove
+  `forest-cli state diff` command. Replaced by `forest-tool archive diff`.
+
 ### Fixed
 
 ## Forest v0.14.0 "Hakuna Matata"
diff --git a/documentation/src/developer_documentation/mainnet_compatibility.md b/documentation/src/developer_documentation/mainnet_compatibility.md
index 8ec8aab852ab..8261a2a0fbda 100644
--- a/documentation/src/developer_documentation/mainnet_compatibility.md
+++ b/documentation/src/developer_documentation/mainnet_compatibility.md
@@ -55,8 +55,18 @@ snapshots only contain stateroot data for the previous 2000 epochs. So, if you
 have a statediff at epoch X, download a snapshot for epoch X+100 and tell Forest
 to re-validate the snapshot from epoch X.
 
-For more detailed instructions, follow
-[this document](https://www.notion.so/chainsafe/Interop-debugging-6adabf9222d7449bbfeaacb1ec997cf8)
+Steps to print a state-diff:
+
+1. Note the epoch of the state-root mismatch. State-roots can only be checked
+   for the parents of a tipset so the failing epoch may be 1 higher than you
+   think.
+2. Download a recent snapshot dated _before_ the failing epoch.
+3. Import the snapshot into Lotus and sync to HEAD.
+4. Export a new snapshot 100 epochs _after_ the failing epoch.
+5. Convert the `.car.zst` file to `.forest.car.zst` with:
+   `forest-tool snapshot compress {snapshot.car.zst}`
+6. Use the `forest-tool` binary to print the state-diff:
+   `forest-tool archive diff {snapshot.forest.car.zst} --epoch {failing_epoch}`
 
 ## FVM Traces
 
@@ -67,3 +77,8 @@ mismatches.
 To confirm: the execution traces format is not uniform across implementations,
 so it takes a certain amount of elbow grease to find the differences. Lotus is
 capable of spitting this out in JSON for nice UX
+
+## Dated resources
+
+For more (but dated) information, see
+[this document.](https://www.notion.so/chainsafe/Interop-debugging-6adabf9222d7449bbfeaacb1ec997cf8)
diff --git a/src/cli/subcommands/state_cmd.rs b/src/cli/subcommands/state_cmd.rs
index fbd50bd86255..4044ee7c511e 100644
--- a/src/cli/subcommands/state_cmd.rs
+++ b/src/cli/subcommands/state_cmd.rs
@@ -2,15 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0, MIT
 
 use std::path::PathBuf;
-use std::sync::Arc;
 
-use crate::db::db_engine::db_root;
-use crate::db::db_engine::open_proxy_db;
 use crate::lotus_json::LotusJson;
 use crate::rpc_client::state_ops::state_fetch_root;
 use crate::shim::clock::ChainEpoch;
 use crate::shim::econ::TokenAmount;
-use crate::statediff::print_state_diff;
 use cid::Cid;
 use clap::Subcommand;
 use serde_tuple::{self, Deserialize_tuple, Serialize_tuple};
@@ -37,15 +33,6 @@ pub enum StateCommands {
         #[arg(short, long)]
         save_to_file: Option<PathBuf>,
     },
-    Diff {
-        /// The previous CID state root
-        pre: Cid,
-        /// The post CID state root
-        post: Cid,
-        /// The depth at which IPLD links are resolved
-        #[arg(short, long)]
-        depth: Option<u64>,
-    },
 }
 
 impl StateCommands {
@@ -59,18 +46,6 @@ impl StateCommands {
                         .map_err(handle_rpc_err)?
                 );
             }
-            Self::Diff { pre, post, depth } => {
-                let chain_path = config
-                    .client
-                    .data_dir
-                    .join(config.chain.network.to_string());
-                let blockstore =
-                    Arc::new(open_proxy_db(db_root(&chain_path)?, Default::default())?);
-
-                if let Err(err) = print_state_diff(&blockstore, &pre, &post, depth) {
-                    eprintln!("Failed to print state diff: {err}");
-                }
-            }
         }
         Ok(())
     }
diff --git a/src/statediff/mod.rs b/src/statediff/mod.rs
index 908c5c2d1e22..25a826af91d2 100644
--- a/src/statediff/mod.rs
+++ b/src/statediff/mod.rs
@@ -91,8 +91,14 @@ fn try_print_actor_states<BS: Blockstore>(
             if &other != actor {
                 let comma = ",";
                 let expected_pp = pp_actor_state(bs, &other, depth)?;
-                let expected = expected_pp.split(comma).collect::<Vec<&str>>();
-                let calculated = calc_pp.split(comma).collect::<Vec<&str>>();
+                let expected = expected_pp
+                    .split(comma)
+                    .map(|s| s.trim_start_matches('\n'))
+                    .collect::<Vec<&str>>();
+                let calculated = calc_pp
+                    .split(comma)
+                    .map(|s| s.trim_start_matches('\n'))
+                    .collect::<Vec<&str>>();
                 let diffs = TextDiff::from_slices(&expected, &calculated);
                 let stdout = stdout();
                 let mut handle = stdout.lock();
@@ -199,7 +205,6 @@ pub fn print_state_diff<BS>(
 where
     BS: Blockstore,
 {
-    eprintln!("StateDiff:\n  Expected: {expected_root}\n  Root: {root}");
     if let Err(e) = try_print_actor_states(bs, root, expected_root, depth) {
         println!("Could not resolve actor states: {e}\nUsing default resolution:");
         let expected = resolve_cids_recursive(bs, expected_root, depth)?;
diff --git a/src/tool/subcommands/archive_cmd.rs b/src/tool/subcommands/archive_cmd.rs
index d22d9c6d8122..936f9a0c725b 100644
--- a/src/tool/subcommands/archive_cmd.rs
+++ b/src/tool/subcommands/archive_cmd.rs
@@ -10,9 +10,14 @@ use crate::cid_collections::CidHashSet;
 use crate::cli_shared::{snapshot, snapshot::TrustedVendor};
 use crate::db::car::ManyCar;
 use crate::db::car::{AnyCar, RandomAccessFileReader};
+use crate::interpreter::VMTrace;
 use crate::ipld::{stream_graph, unordered_stream_graph};
 use crate::networks::{calibnet, mainnet, ChainConfig, NetworkChain};
+use crate::shim::address::CurrentNetwork;
 use crate::shim::clock::{ChainEpoch, EPOCHS_IN_DAY, EPOCH_DURATION_SECONDS};
+use crate::shim::fvm_shared_latest::address::Network;
+use crate::shim::machine::MultiEngine;
+use crate::state_manager::{apply_block_messages, NO_CALLBACK};
 use anyhow::{bail, Context as _};
 use chrono::NaiveDateTime;
 use clap::Subcommand;
@@ -81,6 +86,20 @@ pub enum ArchiveCommands {
         #[arg(long, default_value_t = false)]
         force: bool,
     },
+    /// Show the difference between the canonical and computed state of a
+    /// tipset.
+    Diff {
+        /// Snapshot input paths. Supports `.car`, `.car.zst`, and `.forest.car.zst`.
+        #[arg(required = true)]
+        snapshot_files: Vec<PathBuf>,
+        /// Selected epoch to validate.
+        #[arg(long)]
+        epoch: ChainEpoch,
+        // Depth of diffing. Differences in trees below this depth will just be
+        // shown as different branch IDs.
+        #[arg(long)]
+        depth: Option<u64>,
+    },
 }
 
 impl ArchiveCommands {
@@ -124,6 +143,11 @@ impl ArchiveCommands {
                 output_path,
                 force,
             } => merge_snapshots(snapshot_files, output_path, force).await,
+            Self::Diff {
+                snapshot_files,
+                epoch,
+                depth,
+            } => show_tipset_diff(snapshot_files, epoch, depth).await,
         }
     }
 }
@@ -440,6 +464,85 @@ async fn merge_snapshots(
     Ok(())
 }
 
+/// Compute the tree of actor states for a given epoch and compare it to the
+/// expected result (as encoded in the blockchain). Differences are printed
+/// using the diff format (red for the blockchain state, green for the computed
+/// state).
+async fn show_tipset_diff(
+    snapshot_files: Vec<PathBuf>,
+    epoch: ChainEpoch,
+    depth: Option<u64>,
+) -> anyhow::Result<()> {
+    use colored::*;
+
+    let store = Arc::new(ManyCar::try_from(snapshot_files)?);
+
+    let heaviest_tipset = Arc::new(store.heaviest_tipset()?);
+    if heaviest_tipset.epoch() <= epoch {
+        anyhow::bail!(
+            "Highest epoch must be at least 1 greater than the target epoch. \
+             Highest epoch = {}, target epoch = {}.",
+            heaviest_tipset.epoch(),
+            epoch
+        )
+    }
+
+    let genesis = heaviest_tipset.genesis(&store)?;
+    let network = NetworkChain::from_genesis_or_devnet_placeholder(genesis.cid());
+
+    let timestamp = genesis.timestamp();
+    let chain_index = ChainIndex::new(Arc::clone(&store));
+    let chain_config = ChainConfig::from_chain(&network);
+    if chain_config.is_testnet() {
+        CurrentNetwork::set_global(Network::Testnet);
+    }
+    let beacon = Arc::new(chain_config.get_beacon_schedule(timestamp));
+    let tipset = chain_index.tipset_by_height(
+        epoch,
+        Arc::clone(&heaviest_tipset),
+        ResolveNullTipset::TakeOlder,
+    )?;
+
+    let child_tipset = chain_index.tipset_by_height(
+        epoch + 1,
+        Arc::clone(&heaviest_tipset),
+        ResolveNullTipset::TakeNewer,
+    )?;
+
+    let (state_root, _) = apply_block_messages(
+        timestamp,
+        Arc::new(chain_index),
+        Arc::new(chain_config),
+        beacon,
+        &MultiEngine::default(),
+        tipset,
+        NO_CALLBACK,
+        VMTrace::NotTraced,
+    )?;
+
+    if child_tipset.parent_state() != &state_root {
+        println!(
+            "{}",
+            format!("- Expected state hash: {}", child_tipset.parent_state()).red()
+        );
+        println!(
+            "{}",
+            format!("+ Computed state hash: {}", state_root).green()
+        );
+
+        crate::statediff::print_state_diff(
+            &store,
+            &state_root,
+            child_tipset.parent_state(),
+            depth,
+        )?;
+    } else {
+        println!("Computed state matches expected state.");
+    }
+
+    Ok(())
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;