From d77fb5cbbbb4c38cc9b3ae538243f7c243666ffc Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Wed, 22 May 2024 08:53:45 +0200 Subject: [PATCH] fix: result grouped by subdir instead of channel (#666) Also refactor to add a runtime error when duplicate package records are encountered. --- .../src/gateway/query.rs | 17 ++-- crates/rattler_solve/src/lib.rs | 80 ++++++++------- crates/rattler_solve/src/libsolv_c/input.rs | 90 +++++++++-------- crates/rattler_solve/src/libsolv_c/mod.rs | 42 ++++---- crates/rattler_solve/src/resolvo/mod.rs | 97 ++++++++++--------- crates/rattler_solve/tests/backends.rs | 38 ++++++-- 6 files changed, 211 insertions(+), 153 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 866fabfb9..a906b9a01 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -89,7 +89,6 @@ impl GatewayQuery { let channels_and_platforms = self .channels .iter() - .enumerate() .cartesian_product(self.platforms.into_iter()) .collect_vec(); @@ -97,10 +96,10 @@ impl GatewayQuery { // becomes available. let mut subdirs = Vec::with_capacity(channels_and_platforms.len()); let mut pending_subdirs = FuturesUnordered::new(); - for ((channel_idx, channel), platform) in channels_and_platforms { + for (subdir_idx, (channel, platform)) in channels_and_platforms.into_iter().enumerate() { // Create a barrier so work that need this subdir can await it. let barrier = Arc::new(BarrierCell::new()); - subdirs.push((channel_idx, barrier.clone())); + subdirs.push((subdir_idx, barrier.clone())); let inner = self.gateway.clone(); let reporter = self.reporter.clone(); @@ -136,14 +135,14 @@ impl GatewayQuery { let mut pending_records = FuturesUnordered::new(); // The resulting list of repodata records. - let mut result = vec![RepoData::default(); self.channels.len()]; + let mut result = vec![RepoData::default(); subdirs.len()]; // Loop until all pending package names have been fetched. loop { // Iterate over all pending package names and create futures to fetch them from all // subdirs. for (package_name, specs) in pending_package_specs.drain() { - for (channel_idx, subdir) in subdirs.iter().cloned() { + for (subdir_idx, subdir) in subdirs.iter().cloned() { let specs = specs.clone(); let package_name = package_name.clone(); let reporter = self.reporter.clone(); @@ -154,8 +153,8 @@ impl GatewayQuery { Subdir::Found(subdir) => subdir .get_or_fetch_package_records(&package_name, reporter) .await - .map(|records| (channel_idx, specs, records)), - Subdir::NotFound => Ok((channel_idx, specs, Arc::from(vec![]))), + .map(|records| (subdir_idx, specs, records)), + Subdir::NotFound => Ok((subdir_idx, specs, Arc::from(vec![]))), } }); } @@ -170,7 +169,7 @@ impl GatewayQuery { // Handle any records that were fetched records = pending_records.select_next_some() => { - let (channel_idx, request_specs, records) = records?; + let (subdir_idx, request_specs, records) = records?; if self.recursive { // Extract the dependencies from the records and recursively add them to the @@ -193,7 +192,7 @@ impl GatewayQuery { // Add the records to the result if records.len() > 0 { - let result = &mut result[channel_idx]; + let result = &mut result[subdir_idx]; result.len += records.len(); result.shards.push(records); } diff --git a/crates/rattler_solve/src/lib.rs b/crates/rattler_solve/src/lib.rs index 082934333..b70f0d245 100644 --- a/crates/rattler_solve/src/lib.rs +++ b/crates/rattler_solve/src/lib.rs @@ -1,5 +1,6 @@ -//! `rattler_solve` is a crate that provides functionality to solve Conda environments. It currently -//! exposes the functionality through the [`SolverImpl::solve`] function. +//! `rattler_solve` is a crate that provides functionality to solve Conda +//! environments. It currently exposes the functionality through the +//! [`SolverImpl::solve`] function. #![deny(missing_docs)] @@ -8,17 +9,18 @@ pub mod libsolv_c; #[cfg(feature = "resolvo")] pub mod resolvo; +use std::fmt; + use chrono::{DateTime, Utc}; use rattler_conda_types::{GenericVirtualPackage, MatchSpec, RepoDataRecord}; -use std::fmt; /// Represents a solver implementation, capable of solving [`SolverTask`]s pub trait SolverImpl { /// The repo data associated to a channel and platform combination type RepoData<'a>: SolverRepoData<'a>; - /// Resolve the dependencies and return the [`RepoDataRecord`]s that should be present in the - /// environment. + /// Resolve the dependencies and return the [`RepoDataRecord`]s that should + /// be present in the environment. fn solve< 'a, R: IntoRepoData<'a, Self::RepoData<'a>>, @@ -36,14 +38,17 @@ pub enum SolveError { Unsolvable(Vec), /// The solver backend returned operations that we dont know how to install. - /// Each string is a somewhat user-friendly representation of which operation was not recognized - /// and can be used for error reporting + /// Each string is a somewhat user-friendly representation of which + /// operation was not recognized and can be used for error reporting UnsupportedOperations(Vec), /// Error when converting matchspec #[error(transparent)] ParseMatchSpecError(#[from] rattler_conda_types::ParseMatchSpecError), + /// Encountered duplicate records in the available packages. + DuplicateRecords(String), + /// To support Resolvo cancellation Cancelled, } @@ -67,6 +72,9 @@ impl fmt::Display for SolveError { SolveError::Cancelled => { write!(f, "Solve operation has been cancelled") } + SolveError::DuplicateRecords(filename) => { + write!(f, "encountered duplicate records for {filename}") + } } } } @@ -74,8 +82,8 @@ impl fmt::Display for SolveError { /// Represents the channel priority option to use during solves. #[derive(Clone, Copy, PartialEq, Eq, Default)] pub enum ChannelPriority { - /// The channel that the package is first found in will be used as the only channel - /// for that package. + /// The channel that the package is first found in will be used as the only + /// channel for that package. #[default] Strict, @@ -83,32 +91,36 @@ pub enum ChannelPriority { // are only taken from lower-priority channels when this prevents unsatisfiable environment // errors, but this would need implementation in the solvers. // Flexible, - /// Packages can be retrieved from any channel as package version takes precedence. + /// Packages can be retrieved from any channel as package version takes + /// precedence. Disabled, } -/// Represents a dependency resolution task, to be solved by one of the backends (currently only -/// libsolv is supported) +/// Represents a dependency resolution task, to be solved by one of the backends +/// (currently only libsolv is supported) pub struct SolverTask { /// An iterator over all available packages pub available_packages: TAvailablePackagesIterator, /// Records of packages that are previously selected. /// - /// If the solver encounters multiple variants of a single package (identified by its name), it - /// will sort the records and select the best possible version. However, if there exists a - /// locked version it will prefer that variant instead. This is useful to reduce the number of + /// If the solver encounters multiple variants of a single package + /// (identified by its name), it will sort the records and select the + /// best possible version. However, if there exists a locked version it + /// will prefer that variant instead. This is useful to reduce the number of /// packages that are updated when installing new packages. /// - /// Usually you add the currently installed packages or packages from a lock-file here. + /// Usually you add the currently installed packages or packages from a + /// lock-file here. pub locked_packages: Vec, /// Records of packages that are previously selected and CANNOT be changed. /// - /// If the solver encounters multiple variants of a single package (identified by its name), it - /// will sort the records and select the best possible version. However, if there is a variant - /// available in the `pinned_packages` field it will always select that version no matter what - /// even if that means other packages have to be downgraded. + /// If the solver encounters multiple variants of a single package + /// (identified by its name), it will sort the records and select the + /// best possible version. However, if there is a variant available in + /// the `pinned_packages` field it will always select that version no matter + /// what even if that means other packages have to be downgraded. pub pinned_packages: Vec, /// Virtual packages considered active @@ -120,11 +132,12 @@ pub struct SolverTask { /// The timeout after which the solver should stop pub timeout: Option, - /// The channel priority to solve with, either [`ChannelPriority::Strict`] or - /// [`ChannelPriority::Disabled`] + /// The channel priority to solve with, either [`ChannelPriority::Strict`] + /// or [`ChannelPriority::Disabled`] pub channel_priority: ChannelPriority, - /// Exclude any package that has a timestamp newer than the specified timestamp. + /// Exclude any package that has a timestamp newer than the specified + /// timestamp. pub exclude_newer: Option>, /// The solve strategy. @@ -169,22 +182,23 @@ pub enum SolveStrategy { LowestVersionDirect, } -/// A representation of a collection of [`RepoDataRecord`] usable by a [`SolverImpl`] -/// implementation. +/// A representation of a collection of [`RepoDataRecord`] usable by a +/// [`SolverImpl`] implementation. /// -/// Some solvers might be able to cache the collection between different runs of the solver which -/// could potentially eliminate some overhead. This trait enables creating a representation of the -/// repodata that is most suitable for a specific backend. +/// Some solvers might be able to cache the collection between different runs of +/// the solver which could potentially eliminate some overhead. This trait +/// enables creating a representation of the repodata that is most suitable for +/// a specific backend. /// -/// Some solvers may add additional functionality to their specific implementation that enables -/// caching the repodata to disk in an efficient way (see [`crate::libsolv_c::RepoData`] for -/// an example). +/// Some solvers may add additional functionality to their specific +/// implementation that enables caching the repodata to disk in an efficient way +/// (see [`crate::libsolv_c::RepoData`] for an example). pub trait SolverRepoData<'a>: FromIterator<&'a RepoDataRecord> {} /// Defines the ability to convert a type into [`SolverRepoData`]. pub trait IntoRepoData<'a, S: SolverRepoData<'a>> { - /// Converts this instance into an instance of [`SolverRepoData`] which is consumable by a - /// specific [`SolverImpl`] implementation. + /// Converts this instance into an instance of [`SolverRepoData`] which is + /// consumable by a specific [`SolverImpl`] implementation. fn into(self) -> S; } diff --git a/crates/rattler_solve/src/libsolv_c/input.rs b/crates/rattler_solve/src/libsolv_c/input.rs index 799c2ef7c..0fb09c31f 100644 --- a/crates/rattler_solve/src/libsolv_c/input.rs +++ b/crates/rattler_solve/src/libsolv_c/input.rs @@ -1,5 +1,10 @@ -//! Contains business logic that loads information into libsolv in order to solve a conda -//! environment +//! Contains business logic that loads information into libsolv in order to +//! solve a conda environment + +use std::{cmp::Ordering, collections::HashMap}; + +use chrono::{DateTime, Utc}; +use rattler_conda_types::{package::ArchiveType, GenericVirtualPackage, RepoDataRecord}; use super::{ c_string, @@ -16,15 +21,14 @@ use super::{ solvable::SolvableId, }, }; -use chrono::{DateTime, Utc}; -use rattler_conda_types::{package::ArchiveType, GenericVirtualPackage, RepoDataRecord}; -use std::{cmp::Ordering, collections::HashMap}; +use crate::SolveError; #[cfg(not(target_family = "unix"))] /// Adds solvables to a repo from an in-memory .solv file /// -/// Note: this function relies on primitives that are only available on unix-like operating systems, -/// and will panic if called from another platform (e.g. Windows) +/// Note: this function relies on primitives that are only available on +/// unix-like operating systems, and will panic if called from another platform +/// (e.g. Windows) pub fn add_solv_file(_pool: &Pool, _repo: &Repo<'_>, _solv_bytes: &LibcByteSlice) { unimplemented!("this platform does not support in-memory .solv files"); } @@ -32,8 +36,9 @@ pub fn add_solv_file(_pool: &Pool, _repo: &Repo<'_>, _solv_bytes: &LibcByteSlice #[cfg(target_family = "unix")] /// Adds solvables to a repo from an in-memory .solv file /// -/// Note: this function relies on primitives that are only available on unix-like operating systems, -/// and will panic if called from another platform (e.g. Windows) +/// Note: this function relies on primitives that are only available on +/// unix-like operating systems, and will panic if called from another platform +/// (e.g. Windows) pub fn add_solv_file(pool: &Pool, repo: &Repo<'_>, solv_bytes: &LibcByteSlice) { // Add solv file from memory if available let mode = c_string("r"); @@ -50,12 +55,12 @@ pub fn add_repodata_records<'a>( repo: &Repo<'_>, repo_datas: impl IntoIterator, exclude_newer: Option<&DateTime>, -) -> Vec { +) -> Result, SolveError> { // Sanity check repo.ensure_belongs_to_pool(pool); - // Get all the IDs (these strings are internal to libsolv and always present, so we can - // unwrap them at will) + // Get all the IDs (these strings are internal to libsolv and always present, so + // we can unwrap them at will) let solvable_buildflavor_id = pool.find_interned_str(SOLVABLE_BUILDFLAVOR).unwrap(); let solvable_buildtime_id = pool.find_interned_str(SOLVABLE_BUILDTIME).unwrap(); let solvable_buildversion_id = pool.find_interned_str(SOLVABLE_BUILDVERSION).unwrap(); @@ -74,7 +79,8 @@ pub fn add_repodata_records<'a>( // Keeps a mapping from packages added to the repo to the type and solvable let mut package_to_type: HashMap<&str, (ArchiveType, SolvableId)> = HashMap::new(); - // Through `data` we can manipulate solvables (see the `Repodata` docs for details) + // Through `data` we can manipulate solvables (see the `Repodata` docs for + // details) let data = repo.add_repodata(); let mut solvable_ids = Vec::new(); @@ -87,7 +93,7 @@ pub fn add_repodata_records<'a>( // Create a solvable for the package let solvable_id = - match add_or_reuse_solvable(pool, repo, &data, &mut package_to_type, repo_data) { + match add_or_reuse_solvable(pool, repo, &data, &mut package_to_type, repo_data)? { Some(id) => id, None => continue, }; @@ -96,7 +102,8 @@ pub fn add_repodata_records<'a>( // from the final transaction data.set_num(solvable_id, solvable_index_id, repo_data_index as u64); - // Safe because there are no other active references to any solvable (so no aliasing) + // Safe because there are no other active references to any solvable (so no + // aliasing) let solvable = unsafe { solvable_id.resolve_raw(pool).as_mut() }; let record = &repo_data.package_record; @@ -201,21 +208,22 @@ pub fn add_repodata_records<'a>( repo.internalize(); - solvable_ids + Ok(solvable_ids) } -/// When adding packages, we want to make sure that `.conda` packages have preference over `.tar.bz` -/// packages. For that reason, when adding a solvable we check first if a `.conda` version of the -/// package has already been added, in which case we forgo adding its `.tar.bz` version (and return -/// `None`). If no `.conda` version has been added, we create a new solvable (replacing any existing -/// solvable for the `.tar.bz` version of the package). +/// When adding packages, we want to make sure that `.conda` packages have +/// preference over `.tar.bz` packages. For that reason, when adding a solvable +/// we check first if a `.conda` version of the package has already been added, +/// in which case we forgo adding its `.tar.bz` version (and return `None`). If +/// no `.conda` version has been added, we create a new solvable (replacing any +/// existing solvable for the `.tar.bz` version of the package). fn add_or_reuse_solvable<'a>( pool: &Pool, repo: &Repo<'_>, data: &Repodata<'_>, package_to_type: &mut HashMap<&'a str, (ArchiveType, SolvableId)>, repo_data: &'a RepoDataRecord, -) -> Option { +) -> Result, SolveError> { // Sometimes we can reuse an existing solvable if let Some((filename, archive_type)) = ArchiveType::split_str(&repo_data.file_name) { if let Some(&(other_package_type, old_solvable_id)) = package_to_type.get(filename) { @@ -223,7 +231,7 @@ fn add_or_reuse_solvable<'a>( Ordering::Less => { // A previous package that we already stored is actually a package of a better // "type" so we'll just use that instead (.conda > .tar.bz) - return None; + return Ok(None); } Ordering::Greater => { // A previous package has a worse package "type", we'll reuse the handle but @@ -234,22 +242,22 @@ fn add_or_reuse_solvable<'a>( // Reset and reuse the old solvable reset_solvable(pool, repo, data, old_solvable_id); - return Some(old_solvable_id); + return Ok(Some(old_solvable_id)); } Ordering::Equal => { - unreachable!("found a duplicate package") + return Err(SolveError::DuplicateRecords(filename.to_string())); } } } else { let solvable_id = repo.add_solvable(); package_to_type.insert(filename, (archive_type, solvable_id)); - return Some(solvable_id); + return Ok(Some(solvable_id)); } } else { tracing::warn!("unknown package extension: {}", &repo_data.file_name); } - Some(repo.add_solvable()) + Ok(Some(repo.add_solvable())) } pub fn add_virtual_packages(pool: &Pool, repo: &Repo<'_>, packages: &[GenericVirtualPackage]) { @@ -261,7 +269,8 @@ pub fn add_virtual_packages(pool: &Pool, repo: &Repo<'_>, packages: &[GenericVir // Create a solvable for the package let solvable_id = repo.add_solvable(); - // Safe because there are no other references to this solvable_id (we just created it) + // Safe because there are no other references to this solvable_id (we just + // created it) let solvable = unsafe { solvable_id.resolve_raw(pool).as_mut() }; // Name and version @@ -286,34 +295,36 @@ fn reset_solvable(pool: &Pool, repo: &Repo<'_>, data: &Repodata<'_>, solvable_id pool.swap_solvables(blank_solvable, solvable_id); data.swap_attrs(blank_solvable, solvable_id); - // It is safe to free the blank solvable, because there are no other references to it - // than in this function + // It is safe to free the blank solvable, because there are no other references + // to it than in this function unsafe { repo.free_solvable(blank_solvable) }; } /// Caches the repodata as an in-memory `.solv` file /// -/// Note: this function relies on primitives that are only available on unix-like operating systems, -/// and will panic if called from another platform (e.g. Windows) +/// Note: this function relies on primitives that are only available on +/// unix-like operating systems, and will panic if called from another platform +/// (e.g. Windows) #[cfg(not(target_family = "unix"))] -pub fn cache_repodata(_url: String, _data: &[RepoDataRecord]) -> LibcByteSlice { +pub fn cache_repodata(_url: String, _data: &[RepoDataRecord]) -> Result { unimplemented!("this function is only available on unix-like operating systems") } /// Caches the repodata as an in-memory `.solv` file /// -/// Note: this function relies on primitives that are only available on unix-like operating systems, -/// and will panic if called from another platform (e.g. Windows) +/// Note: this function relies on primitives that are only available on +/// unix-like operating systems, and will panic if called from another platform +/// (e.g. Windows) #[cfg(target_family = "unix")] pub fn cache_repodata( url: String, data: &[RepoDataRecord], channel_priority: Option, -) -> LibcByteSlice { +) -> Result { // Add repodata to a new pool + repo let pool = Pool::default(); let repo = Repo::new(&pool, url, channel_priority.unwrap_or(0)); - add_repodata_records(&pool, &repo, data, None); + add_repodata_records(&pool, &repo, data, None)?; // Export repo to .solv in memory let mut stream_ptr = std::ptr::null_mut(); @@ -326,6 +337,7 @@ pub fn cache_repodata( let stream_ptr = std::ptr::NonNull::new(stream_ptr).expect("stream_ptr was null"); - // Safe because we know `stream_ptr` points to an array of bytes of length `stream_size` - unsafe { LibcByteSlice::from_raw_parts(stream_ptr.cast(), stream_size) } + // Safe because we know `stream_ptr` points to an array of bytes of length + // `stream_size` + Ok(unsafe { LibcByteSlice::from_raw_parts(stream_ptr.cast(), stream_size) }) } diff --git a/crates/rattler_solve/src/libsolv_c/mod.rs b/crates/rattler_solve/src/libsolv_c/mod.rs index 0cbe181b3..ae3dfd4eb 100644 --- a/crates/rattler_solve/src/libsolv_c/mod.rs +++ b/crates/rattler_solve/src/libsolv_c/mod.rs @@ -1,15 +1,16 @@ //! Provides an solver implementation based on the [`rattler_libsolv_c`] crate. -use crate::{ChannelPriority, IntoRepoData, SolveStrategy, SolverRepoData}; -use crate::{SolveError, SolverTask}; +use std::{ + collections::{HashMap, HashSet}, + ffi::CString, + mem::ManuallyDrop, +}; + pub use input::cache_repodata; use input::{add_repodata_records, add_solv_file, add_virtual_packages}; pub use libc_byte_slice::LibcByteSlice; use output::get_required_packages; use rattler_conda_types::RepoDataRecord; -use std::collections::{HashMap, HashSet}; -use std::ffi::CString; -use std::mem::ManuallyDrop; use wrapper::{ flags::SolverFlag, pool::{Pool, Verbosity}, @@ -17,13 +18,15 @@ use wrapper::{ solve_goal::SolveGoal, }; +use crate::{ChannelPriority, IntoRepoData, SolveError, SolveStrategy, SolverRepoData, SolverTask}; + mod input; mod libc_byte_slice; mod output; mod wrapper; -/// Represents the information required to load available packages into libsolv for a single channel -/// and platform combination +/// Represents the information required to load available packages into libsolv +/// for a single channel and platform combination #[derive(Clone)] pub struct RepoData<'a> { /// The actual records after parsing `repodata.json` @@ -55,8 +58,8 @@ impl<'a> RepoData<'a> { impl<'a> SolverRepoData<'a> for RepoData<'a> {} -/// Convenience method that converts a string reference to a `CString`, replacing NUL characters -/// with whitespace (`b' '`) +/// Convenience method that converts a string reference to a `CString`, +/// replacing NUL characters with whitespace (`b' '`) fn c_string>(str: T) -> CString { let bytes = str.as_ref().as_bytes(); @@ -72,7 +75,8 @@ fn c_string>(str: T) -> CString { // Trailing 0 vec.push(0); - // Safe because the string does is guaranteed to have no NUL bytes other than the trailing one + // Safe because the string does is guaranteed to have no NUL bytes other than + // the trailing one unsafe { CString::from_vec_with_nul_unchecked(vec) } } @@ -118,10 +122,11 @@ impl super::SolverImpl for Solver { .map(IntoRepoData::into) .collect(); - // Determine the channel priority for each channel in the repodata in the order in which - // the repodatas are passed, where the first channel will have the highest priority value - // and each successive channel will descend in priority value. If not strict, the highest - // priority value will be 0 and the channel priority map will not be populated as it will + // Determine the channel priority for each channel in the repodata in the order + // in which the repodatas are passed, where the first channel will have + // the highest priority value and each successive channel will descend + // in priority value. If not strict, the highest priority value will be + // 0 and the channel priority map will not be populated as it will // not be used. let mut highest_priority: i32 = 0; let channel_priority: HashMap = @@ -183,7 +188,7 @@ impl super::SolverImpl for Solver { &repo, repodata.records.iter().copied(), task.exclude_newer.as_ref(), - ); + )?; } // Keep our own info about repodata_records @@ -193,7 +198,7 @@ impl super::SolverImpl for Solver { // Create a special pool for records that are already installed or locked. let repo = Repo::new(&pool, "locked", highest_priority); - let installed_solvables = add_repodata_records(&pool, &repo, &task.locked_packages, None); + let installed_solvables = add_repodata_records(&pool, &repo, &task.locked_packages, None)?; // Also add the installed records to the repodata repo_mapping.insert(repo.id(), repo_mapping.len()); @@ -201,7 +206,7 @@ impl super::SolverImpl for Solver { // Create a special pool for records that are pinned and cannot be changed. let repo = Repo::new(&pool, "pinned", highest_priority); - let pinned_solvables = add_repodata_records(&pool, &repo, &task.pinned_packages, None); + let pinned_solvables = add_repodata_records(&pool, &repo, &task.pinned_packages, None)?; // Also add the installed records to the repodata repo_mapping.insert(repo.id(), repo_mapping.len()); @@ -261,9 +266,10 @@ impl super::SolverImpl for Solver { #[cfg(test)] mod test { - use super::*; use rstest::rstest; + use super::*; + #[rstest] #[case("", "")] #[case("a\0b\0c\0d\0", "a b c d ")] diff --git a/crates/rattler_solve/src/resolvo/mod.rs b/crates/rattler_solve/src/resolvo/mod.rs index f305aedd3..809cf3106 100644 --- a/crates/rattler_solve/src/resolvo/mod.rs +++ b/crates/rattler_solve/src/resolvo/mod.rs @@ -1,35 +1,36 @@ //! Provides an solver implementation based on the [`resolvo`] crate. -use crate::{ChannelPriority, IntoRepoData, SolveError, SolveStrategy, SolverRepoData, SolverTask}; -use chrono::{DateTime, Utc}; -use rattler_conda_types::package::ArchiveType; -use rattler_conda_types::{ - GenericVirtualPackage, MatchSpec, NamelessMatchSpec, PackageRecord, ParseMatchSpecError, - ParseStrictness, RepoDataRecord, -}; -use resolvo::{ - Candidates, Dependencies, DependencyProvider, KnownDependencies, NameId, Pool, SolvableDisplay, - SolvableId, Solver as LibSolvRsSolver, SolverCache, UnsolvableOrCancelled, VersionSet, - VersionSetId, -}; -use std::collections::HashSet; -use std::rc::Rc; use std::{ cell::RefCell, cmp::Ordering, - collections::HashMap, + collections::{HashMap, HashSet}, fmt::{Display, Formatter}, marker::PhantomData, ops::Deref, + rc::Rc, }; -use crate::resolvo::conda_util::CompareStrategy; +use chrono::{DateTime, Utc}; use itertools::Itertools; +use rattler_conda_types::{ + package::ArchiveType, GenericVirtualPackage, MatchSpec, NamelessMatchSpec, PackageRecord, + ParseMatchSpecError, ParseStrictness, RepoDataRecord, +}; +use resolvo::{ + Candidates, Dependencies, DependencyProvider, KnownDependencies, NameId, Pool, SolvableDisplay, + SolvableId, Solver as LibSolvRsSolver, SolverCache, UnsolvableOrCancelled, VersionSet, + VersionSetId, +}; + +use crate::{ + resolvo::conda_util::CompareStrategy, ChannelPriority, IntoRepoData, SolveError, SolveStrategy, + SolverRepoData, SolverTask, +}; mod conda_util; -/// Represents the information required to load available packages into libsolv for a single channel -/// and platform combination +/// Represents the information required to load available packages into libsolv +/// for a single channel and platform combination #[derive(Clone)] pub struct RepoData<'a> { /// The actual records after parsing `repodata.json` @@ -188,7 +189,7 @@ impl<'a> CondaDependencyProvider<'a> { channel_priority: ChannelPriority, exclude_newer: Option>, strategy: SolveStrategy, - ) -> Self { + ) -> Result { let pool = Rc::new(Pool::default()); let mut records: HashMap = HashMap::default(); @@ -218,14 +219,17 @@ impl<'a> CondaDependencyProvider<'a> { // Add additional records for repo_datas in repodata { - // Iterate over all records and dedup records that refer to the same package data but with - // different archive types. This can happen if you have two variants of the same package but - // with different extensions. We prefer `.conda` packages over `.tar.bz`. + // Iterate over all records and dedup records that refer to the same package + // data but with different archive types. This can happen if you + // have two variants of the same package but with different + // extensions. We prefer `.conda` packages over `.tar.bz`. // - // Its important to insert the records in the same order as how they were presented to this - // function to ensure that each solve is deterministic. Iterating over HashMaps is not - // deterministic at runtime so instead we store the values in a Vec as we iterate over the - // records. This guarentees that the order of records remains the same over runs. + // Its important to insert the records in the same order as how they were + // presented to this function to ensure that each solve is + // deterministic. Iterating over HashMaps is not deterministic at + // runtime so instead we store the values in a Vec as we iterate over the + // records. This guarentees that the order of records remains the same over + // runs. let mut ordered_repodata = Vec::with_capacity(repo_datas.records.len()); let mut package_to_type: HashMap<&str, (ArchiveType, usize, bool)> = HashMap::with_capacity(repo_datas.records.len()); @@ -252,27 +256,29 @@ impl<'a> CondaDependencyProvider<'a> { ordered_repodata[*idx] = record; *previous_excluded = false; } else if excluded && !*previous_excluded { - // The previous package would not have been excluded by the solver but - // this one will, so we'll keep the previous one regardless of the type. + // The previous package would not have been excluded + // by the solver but + // this one will, so we'll keep the previous one + // regardless of the type. } else { match archive_type.cmp(prev_archive_type) { Ordering::Greater => { - // A previous package has a worse package "type", we'll use the current record - // instead. + // A previous package has a worse package "type", we'll use the + // current record instead. *prev_archive_type = archive_type; ordered_repodata[*idx] = record; + *previous_excluded = excluded; } Ordering::Less => { - // A previous package that we already stored is actually a package of a better - // "type" so we'll just use that instead (.conda > .tar.bz) + // A previous package that we already stored + // is actually a package of a better + // "type" so we'll just use that instead + // (.conda > .tar.bz) } Ordering::Equal => { - if record != ordered_repodata[*idx] { - unreachable!( - "found duplicate record with different values for {}", - &record.file_name - ); - } + return Err(SolveError::DuplicateRecords( + record.file_name.clone(), + )); } } } @@ -314,7 +320,8 @@ impl<'a> CondaDependencyProvider<'a> { if let Some(spec_channel) = &spec.channel { if record.channel != spec_channel.base_url.to_string() { tracing::debug!("Ignoring {} from {} because it was not requested from that channel.", &record.package_record.name.as_normalized(), &record.channel); - // Add record to the excluded with reason of being in the non requested channel. + // Add record to the excluded with reason of being in the non + // requested channel. let message = format!( "candidate not in requested channel: '{}'", spec_channel @@ -332,7 +339,8 @@ impl<'a> CondaDependencyProvider<'a> { } // Enforce channel priority - // This function makes the assumption that the records are given in order of the channels. + // This function makes the assumption that the records are given in order of the + // channels. if let (Some(first_channel), ChannelPriority::Strict) = ( package_name_found_in_channel .get(&record.package_record.name.as_normalized().to_string()), @@ -382,7 +390,7 @@ impl<'a> CondaDependencyProvider<'a> { candidates.locked = Some(solvable); } - Self { + Ok(Self { pool, records, matchspec_to_highest_version: RefCell::default(), @@ -390,7 +398,7 @@ impl<'a> CondaDependencyProvider<'a> { stop_time, strategy, direct_dependencies, - } + }) } } @@ -473,7 +481,8 @@ impl<'a> DependencyProvider> for CondaDependencyProvider<'a> } } -/// Displays the different candidates by their version and sorted by their version +/// Displays the different candidates by their version and sorted by their +/// version pub struct CondaSolvableDisplay; impl SolvableDisplay> for CondaSolvableDisplay { @@ -522,7 +531,7 @@ impl super::SolverImpl for Solver { task.channel_priority, task.exclude_newer, task.strategy, - ); + )?; let pool = provider.pool.clone(); // Construct the requirements that the solver needs to satisfy. diff --git a/crates/rattler_solve/tests/backends.rs b/crates/rattler_solve/tests/backends.rs index 2df43d150..c37d4a93f 100644 --- a/crates/rattler_solve/tests/backends.rs +++ b/crates/rattler_solve/tests/backends.rs @@ -1,3 +1,5 @@ +use std::{str::FromStr, time::Instant}; + use chrono::{DateTime, Utc}; use once_cell::sync::Lazy; use rattler_conda_types::{ @@ -6,8 +8,6 @@ use rattler_conda_types::{ }; use rattler_repodata_gateway::sparse::SparseRepoData; use rattler_solve::{ChannelPriority, SolveError, SolveStrategy, SolverImpl, SolverTask}; -use std::str::FromStr; -use std::time::Instant; use url::Url; fn channel_config() -> ChannelConfig { @@ -147,8 +147,8 @@ fn solve_real_world(specs: Vec<&str>) -> Vec { }) .collect::>(); - // The order of packages is nondeterministic, so we sort them to ensure we can compare them - // to a previous run + // The order of packages is nondeterministic, so we sort them to ensure we can + // compare them to a previous run pkgs.sort(); pkgs }; @@ -523,6 +523,22 @@ macro_rules! solver_backend_tests { "although there is a newer version available we expect an older version of foo because we exclude the newer version based on the timestamp"); assert_eq!(&info.file_name, "foo-3.0.2-py36h1af98f8_1.tar.bz2", "even though there is a conda version available we expect the tar.bz2 version because we exclude the .conda version based on the timestamp"); } + + #[test] + fn test_duplicate_record() { + use rattler_solve::SolverImpl; + + let mut records = super::read_repodata(&dummy_channel_json_path()); + records.push(records[0].clone()); + + let task = rattler_solve::SolverTask::from_iter([&records]); + + let result = <$T>::default().solve(task); + match result { + Err(rattler_solve::SolveError::DuplicateRecords(_)) => {}, + _ => panic!("expected a DuplicateRecord error"), + } + } }; } @@ -530,22 +546,23 @@ macro_rules! solver_backend_tests { mod libsolv_c { #![allow(unused_imports)] // For some reason windows thinks this is an unused import. + use rattler_solve::{ChannelPriority, SolveStrategy}; + use super::{ dummy_channel_json_path, installed_package, solve, solve_real_world, FromStr, GenericVirtualPackage, SimpleSolveTask, SolveError, Version, }; - use rattler_solve::ChannelPriority; - use rattler_solve::SolveStrategy; solver_backend_tests!(rattler_solve::libsolv_c::Solver); #[test] #[cfg(target_family = "unix")] fn test_solve_with_cached_solv_file_install_new() { - use super::read_repodata; use rattler_conda_types::{Channel, ChannelConfig, MatchSpec}; use rattler_solve::{SolverImpl, SolverTask}; + use super::read_repodata; + let repo_data = read_repodata(&dummy_channel_json_path()); let cached_repo_data = rattler_solve::libsolv_c::cache_repodata( @@ -558,7 +575,8 @@ mod libsolv_c { .to_string(), &repo_data, None, - ); + ) + .unwrap(); let libsolv_repodata = rattler_solve::libsolv_c::RepoData { records: repo_data.iter().collect(), @@ -819,8 +837,8 @@ fn compare_solve(task: CompareTask<'_>) { }) .collect::>(); - // The order of packages is nondeterministic, so we sort them to ensure we can compare them - // to a previous run + // The order of packages is nondeterministic, so we sort them to ensure we can + // compare them to a previous run pkgs.sort(); pkgs };