From d77fb5cbbbb4c38cc9b3ae538243f7c243666ffc Mon Sep 17 00:00:00 2001
From: Bas Zalmstra <bas@prefix.dev>
Date: Wed, 22 May 2024 08:53:45 +0200
Subject: [PATCH] fix: result grouped by subdir instead of channel (#666)

Also refactor to add a runtime error when duplicate package records are encountered.
---
 .../src/gateway/query.rs                      | 17 ++--
 crates/rattler_solve/src/lib.rs               | 80 ++++++++-------
 crates/rattler_solve/src/libsolv_c/input.rs   | 90 +++++++++--------
 crates/rattler_solve/src/libsolv_c/mod.rs     | 42 ++++----
 crates/rattler_solve/src/resolvo/mod.rs       | 97 ++++++++++---------
 crates/rattler_solve/tests/backends.rs        | 38 ++++++--
 6 files changed, 211 insertions(+), 153 deletions(-)

diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs
index 866fabfb9..a906b9a01 100644
--- a/crates/rattler_repodata_gateway/src/gateway/query.rs
+++ b/crates/rattler_repodata_gateway/src/gateway/query.rs
@@ -89,7 +89,6 @@ impl GatewayQuery {
         let channels_and_platforms = self
             .channels
             .iter()
-            .enumerate()
             .cartesian_product(self.platforms.into_iter())
             .collect_vec();
 
@@ -97,10 +96,10 @@ impl GatewayQuery {
         // becomes available.
         let mut subdirs = Vec::with_capacity(channels_and_platforms.len());
         let mut pending_subdirs = FuturesUnordered::new();
-        for ((channel_idx, channel), platform) in channels_and_platforms {
+        for (subdir_idx, (channel, platform)) in channels_and_platforms.into_iter().enumerate() {
             // Create a barrier so work that need this subdir can await it.
             let barrier = Arc::new(BarrierCell::new());
-            subdirs.push((channel_idx, barrier.clone()));
+            subdirs.push((subdir_idx, barrier.clone()));
 
             let inner = self.gateway.clone();
             let reporter = self.reporter.clone();
@@ -136,14 +135,14 @@ impl GatewayQuery {
         let mut pending_records = FuturesUnordered::new();
 
         // The resulting list of repodata records.
-        let mut result = vec![RepoData::default(); self.channels.len()];
+        let mut result = vec![RepoData::default(); subdirs.len()];
 
         // Loop until all pending package names have been fetched.
         loop {
             // Iterate over all pending package names and create futures to fetch them from all
             // subdirs.
             for (package_name, specs) in pending_package_specs.drain() {
-                for (channel_idx, subdir) in subdirs.iter().cloned() {
+                for (subdir_idx, subdir) in subdirs.iter().cloned() {
                     let specs = specs.clone();
                     let package_name = package_name.clone();
                     let reporter = self.reporter.clone();
@@ -154,8 +153,8 @@ impl GatewayQuery {
                             Subdir::Found(subdir) => subdir
                                 .get_or_fetch_package_records(&package_name, reporter)
                                 .await
-                                .map(|records| (channel_idx, specs, records)),
-                            Subdir::NotFound => Ok((channel_idx, specs, Arc::from(vec![]))),
+                                .map(|records| (subdir_idx, specs, records)),
+                            Subdir::NotFound => Ok((subdir_idx, specs, Arc::from(vec![]))),
                         }
                     });
                 }
@@ -170,7 +169,7 @@ impl GatewayQuery {
 
                 // Handle any records that were fetched
                 records = pending_records.select_next_some() => {
-                    let (channel_idx, request_specs, records) = records?;
+                    let (subdir_idx, request_specs, records) = records?;
 
                     if self.recursive {
                         // Extract the dependencies from the records and recursively add them to the
@@ -193,7 +192,7 @@ impl GatewayQuery {
 
                     // Add the records to the result
                     if records.len() > 0 {
-                        let result = &mut result[channel_idx];
+                        let result = &mut result[subdir_idx];
                         result.len += records.len();
                         result.shards.push(records);
                     }
diff --git a/crates/rattler_solve/src/lib.rs b/crates/rattler_solve/src/lib.rs
index 082934333..b70f0d245 100644
--- a/crates/rattler_solve/src/lib.rs
+++ b/crates/rattler_solve/src/lib.rs
@@ -1,5 +1,6 @@
-//! `rattler_solve` is a crate that provides functionality to solve Conda environments. It currently
-//! exposes the functionality through the [`SolverImpl::solve`] function.
+//! `rattler_solve` is a crate that provides functionality to solve Conda
+//! environments. It currently exposes the functionality through the
+//! [`SolverImpl::solve`] function.
 
 #![deny(missing_docs)]
 
@@ -8,17 +9,18 @@ pub mod libsolv_c;
 #[cfg(feature = "resolvo")]
 pub mod resolvo;
 
+use std::fmt;
+
 use chrono::{DateTime, Utc};
 use rattler_conda_types::{GenericVirtualPackage, MatchSpec, RepoDataRecord};
-use std::fmt;
 
 /// Represents a solver implementation, capable of solving [`SolverTask`]s
 pub trait SolverImpl {
     /// The repo data associated to a channel and platform combination
     type RepoData<'a>: SolverRepoData<'a>;
 
-    /// Resolve the dependencies and return the [`RepoDataRecord`]s that should be present in the
-    /// environment.
+    /// Resolve the dependencies and return the [`RepoDataRecord`]s that should
+    /// be present in the environment.
     fn solve<
         'a,
         R: IntoRepoData<'a, Self::RepoData<'a>>,
@@ -36,14 +38,17 @@ pub enum SolveError {
     Unsolvable(Vec<String>),
 
     /// The solver backend returned operations that we dont know how to install.
-    /// Each string is a somewhat user-friendly representation of which operation was not recognized
-    /// and can be used for error reporting
+    /// Each string is a somewhat user-friendly representation of which
+    /// operation was not recognized and can be used for error reporting
     UnsupportedOperations(Vec<String>),
 
     /// Error when converting matchspec
     #[error(transparent)]
     ParseMatchSpecError(#[from] rattler_conda_types::ParseMatchSpecError),
 
+    /// Encountered duplicate records in the available packages.
+    DuplicateRecords(String),
+
     /// To support Resolvo cancellation
     Cancelled,
 }
@@ -67,6 +72,9 @@ impl fmt::Display for SolveError {
             SolveError::Cancelled => {
                 write!(f, "Solve operation has been cancelled")
             }
+            SolveError::DuplicateRecords(filename) => {
+                write!(f, "encountered duplicate records for {filename}")
+            }
         }
     }
 }
@@ -74,8 +82,8 @@ impl fmt::Display for SolveError {
 /// Represents the channel priority option to use during solves.
 #[derive(Clone, Copy, PartialEq, Eq, Default)]
 pub enum ChannelPriority {
-    /// The channel that the package is first found in will be used as the only channel
-    /// for that package.
+    /// The channel that the package is first found in will be used as the only
+    /// channel for that package.
     #[default]
     Strict,
 
@@ -83,32 +91,36 @@ pub enum ChannelPriority {
     // are only taken from lower-priority channels when this prevents unsatisfiable environment
     // errors, but this would need implementation in the solvers.
     // Flexible,
-    /// Packages can be retrieved from any channel as package version takes precedence.
+    /// Packages can be retrieved from any channel as package version takes
+    /// precedence.
     Disabled,
 }
 
-/// Represents a dependency resolution task, to be solved by one of the backends (currently only
-/// libsolv is supported)
+/// Represents a dependency resolution task, to be solved by one of the backends
+/// (currently only libsolv is supported)
 pub struct SolverTask<TAvailablePackagesIterator> {
     /// An iterator over all available packages
     pub available_packages: TAvailablePackagesIterator,
 
     /// Records of packages that are previously selected.
     ///
-    /// If the solver encounters multiple variants of a single package (identified by its name), it
-    /// will sort the records and select the best possible version. However, if there exists a
-    /// locked version it will prefer that variant instead. This is useful to reduce the number of
+    /// If the solver encounters multiple variants of a single package
+    /// (identified by its name), it will sort the records and select the
+    /// best possible version. However, if there exists a locked version it
+    /// will prefer that variant instead. This is useful to reduce the number of
     /// packages that are updated when installing new packages.
     ///
-    /// Usually you add the currently installed packages or packages from a lock-file here.
+    /// Usually you add the currently installed packages or packages from a
+    /// lock-file here.
     pub locked_packages: Vec<RepoDataRecord>,
 
     /// Records of packages that are previously selected and CANNOT be changed.
     ///
-    /// If the solver encounters multiple variants of a single package (identified by its name), it
-    /// will sort the records and select the best possible version. However, if there is a variant
-    /// available in the `pinned_packages` field it will always select that version no matter what
-    /// even if that means other packages have to be downgraded.
+    /// If the solver encounters multiple variants of a single package
+    /// (identified by its name), it will sort the records and select the
+    /// best possible version. However, if there is a variant available in
+    /// the `pinned_packages` field it will always select that version no matter
+    /// what even if that means other packages have to be downgraded.
     pub pinned_packages: Vec<RepoDataRecord>,
 
     /// Virtual packages considered active
@@ -120,11 +132,12 @@ pub struct SolverTask<TAvailablePackagesIterator> {
     /// The timeout after which the solver should stop
     pub timeout: Option<std::time::Duration>,
 
-    /// The channel priority to solve with, either [`ChannelPriority::Strict`] or
-    /// [`ChannelPriority::Disabled`]
+    /// The channel priority to solve with, either [`ChannelPriority::Strict`]
+    /// or [`ChannelPriority::Disabled`]
     pub channel_priority: ChannelPriority,
 
-    /// Exclude any package that has a timestamp newer than the specified timestamp.
+    /// Exclude any package that has a timestamp newer than the specified
+    /// timestamp.
     pub exclude_newer: Option<DateTime<Utc>>,
 
     /// The solve strategy.
@@ -169,22 +182,23 @@ pub enum SolveStrategy {
     LowestVersionDirect,
 }
 
-/// A representation of a collection of [`RepoDataRecord`] usable by a [`SolverImpl`]
-/// implementation.
+/// A representation of a collection of [`RepoDataRecord`] usable by a
+/// [`SolverImpl`] implementation.
 ///
-/// Some solvers might be able to cache the collection between different runs of the solver which
-/// could potentially eliminate some overhead. This trait enables creating a representation of the
-/// repodata that is most suitable for a specific backend.
+/// Some solvers might be able to cache the collection between different runs of
+/// the solver which could potentially eliminate some overhead. This trait
+/// enables creating a representation of the repodata that is most suitable for
+/// a specific backend.
 ///
-/// Some solvers may add additional functionality to their specific implementation that enables
-/// caching the repodata to disk in an efficient way (see [`crate::libsolv_c::RepoData`] for
-/// an example).
+/// Some solvers may add additional functionality to their specific
+/// implementation that enables caching the repodata to disk in an efficient way
+/// (see [`crate::libsolv_c::RepoData`] for an example).
 pub trait SolverRepoData<'a>: FromIterator<&'a RepoDataRecord> {}
 
 /// Defines the ability to convert a type into [`SolverRepoData`].
 pub trait IntoRepoData<'a, S: SolverRepoData<'a>> {
-    /// Converts this instance into an instance of [`SolverRepoData`] which is consumable by a
-    /// specific [`SolverImpl`] implementation.
+    /// Converts this instance into an instance of [`SolverRepoData`] which is
+    /// consumable by a specific [`SolverImpl`] implementation.
     fn into(self) -> S;
 }
 
diff --git a/crates/rattler_solve/src/libsolv_c/input.rs b/crates/rattler_solve/src/libsolv_c/input.rs
index 799c2ef7c..0fb09c31f 100644
--- a/crates/rattler_solve/src/libsolv_c/input.rs
+++ b/crates/rattler_solve/src/libsolv_c/input.rs
@@ -1,5 +1,10 @@
-//! Contains business logic that loads information into libsolv in order to solve a conda
-//! environment
+//! Contains business logic that loads information into libsolv in order to
+//! solve a conda environment
+
+use std::{cmp::Ordering, collections::HashMap};
+
+use chrono::{DateTime, Utc};
+use rattler_conda_types::{package::ArchiveType, GenericVirtualPackage, RepoDataRecord};
 
 use super::{
     c_string,
@@ -16,15 +21,14 @@ use super::{
         solvable::SolvableId,
     },
 };
-use chrono::{DateTime, Utc};
-use rattler_conda_types::{package::ArchiveType, GenericVirtualPackage, RepoDataRecord};
-use std::{cmp::Ordering, collections::HashMap};
+use crate::SolveError;
 
 #[cfg(not(target_family = "unix"))]
 /// Adds solvables to a repo from an in-memory .solv file
 ///
-/// Note: this function relies on primitives that are only available on unix-like operating systems,
-/// and will panic if called from another platform (e.g. Windows)
+/// Note: this function relies on primitives that are only available on
+/// unix-like operating systems, and will panic if called from another platform
+/// (e.g. Windows)
 pub fn add_solv_file(_pool: &Pool, _repo: &Repo<'_>, _solv_bytes: &LibcByteSlice) {
     unimplemented!("this platform does not support in-memory .solv files");
 }
@@ -32,8 +36,9 @@ pub fn add_solv_file(_pool: &Pool, _repo: &Repo<'_>, _solv_bytes: &LibcByteSlice
 #[cfg(target_family = "unix")]
 /// Adds solvables to a repo from an in-memory .solv file
 ///
-/// Note: this function relies on primitives that are only available on unix-like operating systems,
-/// and will panic if called from another platform (e.g. Windows)
+/// Note: this function relies on primitives that are only available on
+/// unix-like operating systems, and will panic if called from another platform
+/// (e.g. Windows)
 pub fn add_solv_file(pool: &Pool, repo: &Repo<'_>, solv_bytes: &LibcByteSlice) {
     // Add solv file from memory if available
     let mode = c_string("r");
@@ -50,12 +55,12 @@ pub fn add_repodata_records<'a>(
     repo: &Repo<'_>,
     repo_datas: impl IntoIterator<Item = &'a RepoDataRecord>,
     exclude_newer: Option<&DateTime<Utc>>,
-) -> Vec<SolvableId> {
+) -> Result<Vec<SolvableId>, SolveError> {
     // Sanity check
     repo.ensure_belongs_to_pool(pool);
 
-    // Get all the IDs (these strings are internal to libsolv and always present, so we can
-    // unwrap them at will)
+    // Get all the IDs (these strings are internal to libsolv and always present, so
+    // we can unwrap them at will)
     let solvable_buildflavor_id = pool.find_interned_str(SOLVABLE_BUILDFLAVOR).unwrap();
     let solvable_buildtime_id = pool.find_interned_str(SOLVABLE_BUILDTIME).unwrap();
     let solvable_buildversion_id = pool.find_interned_str(SOLVABLE_BUILDVERSION).unwrap();
@@ -74,7 +79,8 @@ pub fn add_repodata_records<'a>(
     // Keeps a mapping from packages added to the repo to the type and solvable
     let mut package_to_type: HashMap<&str, (ArchiveType, SolvableId)> = HashMap::new();
 
-    // Through `data` we can manipulate solvables (see the `Repodata` docs for details)
+    // Through `data` we can manipulate solvables (see the `Repodata` docs for
+    // details)
     let data = repo.add_repodata();
 
     let mut solvable_ids = Vec::new();
@@ -87,7 +93,7 @@ pub fn add_repodata_records<'a>(
 
         // Create a solvable for the package
         let solvable_id =
-            match add_or_reuse_solvable(pool, repo, &data, &mut package_to_type, repo_data) {
+            match add_or_reuse_solvable(pool, repo, &data, &mut package_to_type, repo_data)? {
                 Some(id) => id,
                 None => continue,
             };
@@ -96,7 +102,8 @@ pub fn add_repodata_records<'a>(
         // from the final transaction
         data.set_num(solvable_id, solvable_index_id, repo_data_index as u64);
 
-        // Safe because there are no other active references to any solvable (so no aliasing)
+        // Safe because there are no other active references to any solvable (so no
+        // aliasing)
         let solvable = unsafe { solvable_id.resolve_raw(pool).as_mut() };
         let record = &repo_data.package_record;
 
@@ -201,21 +208,22 @@ pub fn add_repodata_records<'a>(
 
     repo.internalize();
 
-    solvable_ids
+    Ok(solvable_ids)
 }
 
-/// When adding packages, we want to make sure that `.conda` packages have preference over `.tar.bz`
-/// packages. For that reason, when adding a solvable we check first if a `.conda` version of the
-/// package has already been added, in which case we forgo adding its `.tar.bz` version (and return
-/// `None`). If no `.conda` version has been added, we create a new solvable (replacing any existing
-/// solvable for the `.tar.bz` version of the package).
+/// When adding packages, we want to make sure that `.conda` packages have
+/// preference over `.tar.bz` packages. For that reason, when adding a solvable
+/// we check first if a `.conda` version of the package has already been added,
+/// in which case we forgo adding its `.tar.bz` version (and return `None`). If
+/// no `.conda` version has been added, we create a new solvable (replacing any
+/// existing solvable for the `.tar.bz` version of the package).
 fn add_or_reuse_solvable<'a>(
     pool: &Pool,
     repo: &Repo<'_>,
     data: &Repodata<'_>,
     package_to_type: &mut HashMap<&'a str, (ArchiveType, SolvableId)>,
     repo_data: &'a RepoDataRecord,
-) -> Option<SolvableId> {
+) -> Result<Option<SolvableId>, SolveError> {
     // Sometimes we can reuse an existing solvable
     if let Some((filename, archive_type)) = ArchiveType::split_str(&repo_data.file_name) {
         if let Some(&(other_package_type, old_solvable_id)) = package_to_type.get(filename) {
@@ -223,7 +231,7 @@ fn add_or_reuse_solvable<'a>(
                 Ordering::Less => {
                     // A previous package that we already stored is actually a package of a better
                     // "type" so we'll just use that instead (.conda > .tar.bz)
-                    return None;
+                    return Ok(None);
                 }
                 Ordering::Greater => {
                     // A previous package has a worse package "type", we'll reuse the handle but
@@ -234,22 +242,22 @@ fn add_or_reuse_solvable<'a>(
 
                     // Reset and reuse the old solvable
                     reset_solvable(pool, repo, data, old_solvable_id);
-                    return Some(old_solvable_id);
+                    return Ok(Some(old_solvable_id));
                 }
                 Ordering::Equal => {
-                    unreachable!("found a duplicate package")
+                    return Err(SolveError::DuplicateRecords(filename.to_string()));
                 }
             }
         } else {
             let solvable_id = repo.add_solvable();
             package_to_type.insert(filename, (archive_type, solvable_id));
-            return Some(solvable_id);
+            return Ok(Some(solvable_id));
         }
     } else {
         tracing::warn!("unknown package extension: {}", &repo_data.file_name);
     }
 
-    Some(repo.add_solvable())
+    Ok(Some(repo.add_solvable()))
 }
 
 pub fn add_virtual_packages(pool: &Pool, repo: &Repo<'_>, packages: &[GenericVirtualPackage]) {
@@ -261,7 +269,8 @@ pub fn add_virtual_packages(pool: &Pool, repo: &Repo<'_>, packages: &[GenericVir
         // Create a solvable for the package
         let solvable_id = repo.add_solvable();
 
-        // Safe because there are no other references to this solvable_id (we just created it)
+        // Safe because there are no other references to this solvable_id (we just
+        // created it)
         let solvable = unsafe { solvable_id.resolve_raw(pool).as_mut() };
 
         // Name and version
@@ -286,34 +295,36 @@ fn reset_solvable(pool: &Pool, repo: &Repo<'_>, data: &Repodata<'_>, solvable_id
     pool.swap_solvables(blank_solvable, solvable_id);
     data.swap_attrs(blank_solvable, solvable_id);
 
-    // It is safe to free the blank solvable, because there are no other references to it
-    // than in this function
+    // It is safe to free the blank solvable, because there are no other references
+    // to it than in this function
     unsafe { repo.free_solvable(blank_solvable) };
 }
 
 /// Caches the repodata as an in-memory `.solv` file
 ///
-/// Note: this function relies on primitives that are only available on unix-like operating systems,
-/// and will panic if called from another platform (e.g. Windows)
+/// Note: this function relies on primitives that are only available on
+/// unix-like operating systems, and will panic if called from another platform
+/// (e.g. Windows)
 #[cfg(not(target_family = "unix"))]
-pub fn cache_repodata(_url: String, _data: &[RepoDataRecord]) -> LibcByteSlice {
+pub fn cache_repodata(_url: String, _data: &[RepoDataRecord]) -> Result<LibcByteSlice, SolveError> {
     unimplemented!("this function is only available on unix-like operating systems")
 }
 
 /// Caches the repodata as an in-memory `.solv` file
 ///
-/// Note: this function relies on primitives that are only available on unix-like operating systems,
-/// and will panic if called from another platform (e.g. Windows)
+/// Note: this function relies on primitives that are only available on
+/// unix-like operating systems, and will panic if called from another platform
+/// (e.g. Windows)
 #[cfg(target_family = "unix")]
 pub fn cache_repodata(
     url: String,
     data: &[RepoDataRecord],
     channel_priority: Option<i32>,
-) -> LibcByteSlice {
+) -> Result<LibcByteSlice, SolveError> {
     // Add repodata to a new pool + repo
     let pool = Pool::default();
     let repo = Repo::new(&pool, url, channel_priority.unwrap_or(0));
-    add_repodata_records(&pool, &repo, data, None);
+    add_repodata_records(&pool, &repo, data, None)?;
 
     // Export repo to .solv in memory
     let mut stream_ptr = std::ptr::null_mut();
@@ -326,6 +337,7 @@ pub fn cache_repodata(
 
     let stream_ptr = std::ptr::NonNull::new(stream_ptr).expect("stream_ptr was null");
 
-    // Safe because we know `stream_ptr` points to an array of bytes of length `stream_size`
-    unsafe { LibcByteSlice::from_raw_parts(stream_ptr.cast(), stream_size) }
+    // Safe because we know `stream_ptr` points to an array of bytes of length
+    // `stream_size`
+    Ok(unsafe { LibcByteSlice::from_raw_parts(stream_ptr.cast(), stream_size) })
 }
diff --git a/crates/rattler_solve/src/libsolv_c/mod.rs b/crates/rattler_solve/src/libsolv_c/mod.rs
index 0cbe181b3..ae3dfd4eb 100644
--- a/crates/rattler_solve/src/libsolv_c/mod.rs
+++ b/crates/rattler_solve/src/libsolv_c/mod.rs
@@ -1,15 +1,16 @@
 //! Provides an solver implementation based on the [`rattler_libsolv_c`] crate.
 
-use crate::{ChannelPriority, IntoRepoData, SolveStrategy, SolverRepoData};
-use crate::{SolveError, SolverTask};
+use std::{
+    collections::{HashMap, HashSet},
+    ffi::CString,
+    mem::ManuallyDrop,
+};
+
 pub use input::cache_repodata;
 use input::{add_repodata_records, add_solv_file, add_virtual_packages};
 pub use libc_byte_slice::LibcByteSlice;
 use output::get_required_packages;
 use rattler_conda_types::RepoDataRecord;
-use std::collections::{HashMap, HashSet};
-use std::ffi::CString;
-use std::mem::ManuallyDrop;
 use wrapper::{
     flags::SolverFlag,
     pool::{Pool, Verbosity},
@@ -17,13 +18,15 @@ use wrapper::{
     solve_goal::SolveGoal,
 };
 
+use crate::{ChannelPriority, IntoRepoData, SolveError, SolveStrategy, SolverRepoData, SolverTask};
+
 mod input;
 mod libc_byte_slice;
 mod output;
 mod wrapper;
 
-/// Represents the information required to load available packages into libsolv for a single channel
-/// and platform combination
+/// Represents the information required to load available packages into libsolv
+/// for a single channel and platform combination
 #[derive(Clone)]
 pub struct RepoData<'a> {
     /// The actual records after parsing `repodata.json`
@@ -55,8 +58,8 @@ impl<'a> RepoData<'a> {
 
 impl<'a> SolverRepoData<'a> for RepoData<'a> {}
 
-/// Convenience method that converts a string reference to a `CString`, replacing NUL characters
-/// with whitespace (`b' '`)
+/// Convenience method that converts a string reference to a `CString`,
+/// replacing NUL characters with whitespace (`b' '`)
 fn c_string<T: AsRef<str>>(str: T) -> CString {
     let bytes = str.as_ref().as_bytes();
 
@@ -72,7 +75,8 @@ fn c_string<T: AsRef<str>>(str: T) -> CString {
     // Trailing 0
     vec.push(0);
 
-    // Safe because the string does is guaranteed to have no NUL bytes other than the trailing one
+    // Safe because the string does is guaranteed to have no NUL bytes other than
+    // the trailing one
     unsafe { CString::from_vec_with_nul_unchecked(vec) }
 }
 
@@ -118,10 +122,11 @@ impl super::SolverImpl for Solver {
             .map(IntoRepoData::into)
             .collect();
 
-        // Determine the channel priority for each channel in the repodata in the order in which
-        // the repodatas are passed, where the first channel will have the highest priority value
-        // and each successive channel will descend in priority value. If not strict, the highest
-        // priority value will be 0 and the channel priority map will not be populated as it will
+        // Determine the channel priority for each channel in the repodata in the order
+        // in which the repodatas are passed, where the first channel will have
+        // the highest priority value and each successive channel will descend
+        // in priority value. If not strict, the highest priority value will be
+        // 0 and the channel priority map will not be populated as it will
         // not be used.
         let mut highest_priority: i32 = 0;
         let channel_priority: HashMap<String, i32> =
@@ -183,7 +188,7 @@ impl super::SolverImpl for Solver {
                     &repo,
                     repodata.records.iter().copied(),
                     task.exclude_newer.as_ref(),
-                );
+                )?;
             }
 
             // Keep our own info about repodata_records
@@ -193,7 +198,7 @@ impl super::SolverImpl for Solver {
 
         // Create a special pool for records that are already installed or locked.
         let repo = Repo::new(&pool, "locked", highest_priority);
-        let installed_solvables = add_repodata_records(&pool, &repo, &task.locked_packages, None);
+        let installed_solvables = add_repodata_records(&pool, &repo, &task.locked_packages, None)?;
 
         // Also add the installed records to the repodata
         repo_mapping.insert(repo.id(), repo_mapping.len());
@@ -201,7 +206,7 @@ impl super::SolverImpl for Solver {
 
         // Create a special pool for records that are pinned and cannot be changed.
         let repo = Repo::new(&pool, "pinned", highest_priority);
-        let pinned_solvables = add_repodata_records(&pool, &repo, &task.pinned_packages, None);
+        let pinned_solvables = add_repodata_records(&pool, &repo, &task.pinned_packages, None)?;
 
         // Also add the installed records to the repodata
         repo_mapping.insert(repo.id(), repo_mapping.len());
@@ -261,9 +266,10 @@ impl super::SolverImpl for Solver {
 
 #[cfg(test)]
 mod test {
-    use super::*;
     use rstest::rstest;
 
+    use super::*;
+
     #[rstest]
     #[case("", "")]
     #[case("a\0b\0c\0d\0", "a b c d ")]
diff --git a/crates/rattler_solve/src/resolvo/mod.rs b/crates/rattler_solve/src/resolvo/mod.rs
index f305aedd3..809cf3106 100644
--- a/crates/rattler_solve/src/resolvo/mod.rs
+++ b/crates/rattler_solve/src/resolvo/mod.rs
@@ -1,35 +1,36 @@
 //! Provides an solver implementation based on the [`resolvo`] crate.
 
-use crate::{ChannelPriority, IntoRepoData, SolveError, SolveStrategy, SolverRepoData, SolverTask};
-use chrono::{DateTime, Utc};
-use rattler_conda_types::package::ArchiveType;
-use rattler_conda_types::{
-    GenericVirtualPackage, MatchSpec, NamelessMatchSpec, PackageRecord, ParseMatchSpecError,
-    ParseStrictness, RepoDataRecord,
-};
-use resolvo::{
-    Candidates, Dependencies, DependencyProvider, KnownDependencies, NameId, Pool, SolvableDisplay,
-    SolvableId, Solver as LibSolvRsSolver, SolverCache, UnsolvableOrCancelled, VersionSet,
-    VersionSetId,
-};
-use std::collections::HashSet;
-use std::rc::Rc;
 use std::{
     cell::RefCell,
     cmp::Ordering,
-    collections::HashMap,
+    collections::{HashMap, HashSet},
     fmt::{Display, Formatter},
     marker::PhantomData,
     ops::Deref,
+    rc::Rc,
 };
 
-use crate::resolvo::conda_util::CompareStrategy;
+use chrono::{DateTime, Utc};
 use itertools::Itertools;
+use rattler_conda_types::{
+    package::ArchiveType, GenericVirtualPackage, MatchSpec, NamelessMatchSpec, PackageRecord,
+    ParseMatchSpecError, ParseStrictness, RepoDataRecord,
+};
+use resolvo::{
+    Candidates, Dependencies, DependencyProvider, KnownDependencies, NameId, Pool, SolvableDisplay,
+    SolvableId, Solver as LibSolvRsSolver, SolverCache, UnsolvableOrCancelled, VersionSet,
+    VersionSetId,
+};
+
+use crate::{
+    resolvo::conda_util::CompareStrategy, ChannelPriority, IntoRepoData, SolveError, SolveStrategy,
+    SolverRepoData, SolverTask,
+};
 
 mod conda_util;
 
-/// Represents the information required to load available packages into libsolv for a single channel
-/// and platform combination
+/// Represents the information required to load available packages into libsolv
+/// for a single channel and platform combination
 #[derive(Clone)]
 pub struct RepoData<'a> {
     /// The actual records after parsing `repodata.json`
@@ -188,7 +189,7 @@ impl<'a> CondaDependencyProvider<'a> {
         channel_priority: ChannelPriority,
         exclude_newer: Option<DateTime<Utc>>,
         strategy: SolveStrategy,
-    ) -> Self {
+    ) -> Result<Self, SolveError> {
         let pool = Rc::new(Pool::default());
         let mut records: HashMap<NameId, Candidates> = HashMap::default();
 
@@ -218,14 +219,17 @@ impl<'a> CondaDependencyProvider<'a> {
 
         // Add additional records
         for repo_datas in repodata {
-            // Iterate over all records and dedup records that refer to the same package data but with
-            // different archive types. This can happen if you have two variants of the same package but
-            // with different extensions. We prefer `.conda` packages over `.tar.bz`.
+            // Iterate over all records and dedup records that refer to the same package
+            // data but with different archive types. This can happen if you
+            // have two variants of the same package but with different
+            // extensions. We prefer `.conda` packages over `.tar.bz`.
             //
-            // Its important to insert the records in the same order as how they were presented to this
-            // function to ensure that each solve is deterministic. Iterating over HashMaps is not
-            // deterministic at runtime so instead we store the values in a Vec as we iterate over the
-            // records. This guarentees that the order of records remains the same over runs.
+            // Its important to insert the records in the same order as how they were
+            // presented to this function to ensure that each solve is
+            // deterministic. Iterating over HashMaps is not deterministic at
+            // runtime so instead we store the values in a Vec as we iterate over the
+            // records. This guarentees that the order of records remains the same over
+            // runs.
             let mut ordered_repodata = Vec::with_capacity(repo_datas.records.len());
             let mut package_to_type: HashMap<&str, (ArchiveType, usize, bool)> =
                 HashMap::with_capacity(repo_datas.records.len());
@@ -252,27 +256,29 @@ impl<'a> CondaDependencyProvider<'a> {
                             ordered_repodata[*idx] = record;
                             *previous_excluded = false;
                         } else if excluded && !*previous_excluded {
-                            // The previous package would not have been excluded by the solver but
-                            // this one will, so we'll keep the previous one regardless of the type.
+                            // The previous package would not have been excluded
+                            // by the solver but
+                            // this one will, so we'll keep the previous one
+                            // regardless of the type.
                         } else {
                             match archive_type.cmp(prev_archive_type) {
                                 Ordering::Greater => {
-                                    // A previous package has a worse package "type", we'll use the current record
-                                    // instead.
+                                    // A previous package has a worse package "type", we'll use the
+                                    // current record instead.
                                     *prev_archive_type = archive_type;
                                     ordered_repodata[*idx] = record;
+                                    *previous_excluded = excluded;
                                 }
                                 Ordering::Less => {
-                                    // A previous package that we already stored is actually a package of a better
-                                    // "type" so we'll just use that instead (.conda > .tar.bz)
+                                    // A previous package that we already stored
+                                    // is actually a package of a better
+                                    // "type" so we'll just use that instead
+                                    // (.conda > .tar.bz)
                                 }
                                 Ordering::Equal => {
-                                    if record != ordered_repodata[*idx] {
-                                        unreachable!(
-                                            "found duplicate record with different values for {}",
-                                            &record.file_name
-                                        );
-                                    }
+                                    return Err(SolveError::DuplicateRecords(
+                                        record.file_name.clone(),
+                                    ));
                                 }
                             }
                         }
@@ -314,7 +320,8 @@ impl<'a> CondaDependencyProvider<'a> {
                         if let Some(spec_channel) = &spec.channel {
                             if record.channel != spec_channel.base_url.to_string() {
                                 tracing::debug!("Ignoring {} from {} because it was not requested from that channel.", &record.package_record.name.as_normalized(), &record.channel);
-                                // Add record to the excluded with reason of being in the non requested channel.
+                                // Add record to the excluded with reason of being in the non
+                                // requested channel.
                                 let message = format!(
                                     "candidate not in requested channel: '{}'",
                                     spec_channel
@@ -332,7 +339,8 @@ impl<'a> CondaDependencyProvider<'a> {
                 }
 
                 // Enforce channel priority
-                // This function makes the assumption that the records are given in order of the channels.
+                // This function makes the assumption that the records are given in order of the
+                // channels.
                 if let (Some(first_channel), ChannelPriority::Strict) = (
                     package_name_found_in_channel
                         .get(&record.package_record.name.as_normalized().to_string()),
@@ -382,7 +390,7 @@ impl<'a> CondaDependencyProvider<'a> {
             candidates.locked = Some(solvable);
         }
 
-        Self {
+        Ok(Self {
             pool,
             records,
             matchspec_to_highest_version: RefCell::default(),
@@ -390,7 +398,7 @@ impl<'a> CondaDependencyProvider<'a> {
             stop_time,
             strategy,
             direct_dependencies,
-        }
+        })
     }
 }
 
@@ -473,7 +481,8 @@ impl<'a> DependencyProvider<SolverMatchSpec<'a>> for CondaDependencyProvider<'a>
     }
 }
 
-/// Displays the different candidates by their version and sorted by their version
+/// Displays the different candidates by their version and sorted by their
+/// version
 pub struct CondaSolvableDisplay;
 
 impl SolvableDisplay<SolverMatchSpec<'_>> for CondaSolvableDisplay {
@@ -522,7 +531,7 @@ impl super::SolverImpl for Solver {
             task.channel_priority,
             task.exclude_newer,
             task.strategy,
-        );
+        )?;
         let pool = provider.pool.clone();
 
         // Construct the requirements that the solver needs to satisfy.
diff --git a/crates/rattler_solve/tests/backends.rs b/crates/rattler_solve/tests/backends.rs
index 2df43d150..c37d4a93f 100644
--- a/crates/rattler_solve/tests/backends.rs
+++ b/crates/rattler_solve/tests/backends.rs
@@ -1,3 +1,5 @@
+use std::{str::FromStr, time::Instant};
+
 use chrono::{DateTime, Utc};
 use once_cell::sync::Lazy;
 use rattler_conda_types::{
@@ -6,8 +8,6 @@ use rattler_conda_types::{
 };
 use rattler_repodata_gateway::sparse::SparseRepoData;
 use rattler_solve::{ChannelPriority, SolveError, SolveStrategy, SolverImpl, SolverTask};
-use std::str::FromStr;
-use std::time::Instant;
 use url::Url;
 
 fn channel_config() -> ChannelConfig {
@@ -147,8 +147,8 @@ fn solve_real_world<T: SolverImpl + Default>(specs: Vec<&str>) -> Vec<String> {
             })
             .collect::<Vec<_>>();
 
-        // The order of packages is nondeterministic, so we sort them to ensure we can compare them
-        // to a previous run
+        // The order of packages is nondeterministic, so we sort them to ensure we can
+        // compare them to a previous run
         pkgs.sort();
         pkgs
     };
@@ -523,6 +523,22 @@ macro_rules! solver_backend_tests {
                 "although there is a newer version available we expect an older version of foo because we exclude the newer version based on the timestamp");
             assert_eq!(&info.file_name, "foo-3.0.2-py36h1af98f8_1.tar.bz2", "even though there is a conda version available we expect the tar.bz2 version because we exclude the .conda version based on the timestamp");
         }
+
+        #[test]
+        fn test_duplicate_record() {
+            use rattler_solve::SolverImpl;
+
+            let mut records = super::read_repodata(&dummy_channel_json_path());
+            records.push(records[0].clone());
+
+            let task = rattler_solve::SolverTask::from_iter([&records]);
+
+            let result = <$T>::default().solve(task);
+            match result {
+               Err(rattler_solve::SolveError::DuplicateRecords(_)) => {},
+                _ => panic!("expected a DuplicateRecord error"),
+            }
+        }
     };
 }
 
@@ -530,22 +546,23 @@ macro_rules! solver_backend_tests {
 mod libsolv_c {
     #![allow(unused_imports)] // For some reason windows thinks this is an unused import.
 
+    use rattler_solve::{ChannelPriority, SolveStrategy};
+
     use super::{
         dummy_channel_json_path, installed_package, solve, solve_real_world, FromStr,
         GenericVirtualPackage, SimpleSolveTask, SolveError, Version,
     };
-    use rattler_solve::ChannelPriority;
-    use rattler_solve::SolveStrategy;
 
     solver_backend_tests!(rattler_solve::libsolv_c::Solver);
 
     #[test]
     #[cfg(target_family = "unix")]
     fn test_solve_with_cached_solv_file_install_new() {
-        use super::read_repodata;
         use rattler_conda_types::{Channel, ChannelConfig, MatchSpec};
         use rattler_solve::{SolverImpl, SolverTask};
 
+        use super::read_repodata;
+
         let repo_data = read_repodata(&dummy_channel_json_path());
 
         let cached_repo_data = rattler_solve::libsolv_c::cache_repodata(
@@ -558,7 +575,8 @@ mod libsolv_c {
             .to_string(),
             &repo_data,
             None,
-        );
+        )
+        .unwrap();
 
         let libsolv_repodata = rattler_solve::libsolv_c::RepoData {
             records: repo_data.iter().collect(),
@@ -819,8 +837,8 @@ fn compare_solve(task: CompareTask<'_>) {
             })
             .collect::<Vec<_>>();
 
-        // The order of packages is nondeterministic, so we sort them to ensure we can compare them
-        // to a previous run
+        // The order of packages is nondeterministic, so we sort them to ensure we can
+        // compare them to a previous run
         pkgs.sort();
         pkgs
     };