Skip to content

Commit

Permalink
fix: result grouped by subdir instead of channel (#666)
Browse files Browse the repository at this point in the history
Also refactor to add a runtime error when duplicate package records are encountered.
  • Loading branch information
baszalmstra authored May 22, 2024
1 parent 0e24e50 commit d77fb5c
Show file tree
Hide file tree
Showing 6 changed files with 211 additions and 153 deletions.
17 changes: 8 additions & 9 deletions crates/rattler_repodata_gateway/src/gateway/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,17 @@ impl GatewayQuery {
let channels_and_platforms = self
.channels
.iter()
.enumerate()
.cartesian_product(self.platforms.into_iter())
.collect_vec();

// Create barrier cells for each subdirectory. This can be used to wait until the subdir
// becomes available.
let mut subdirs = Vec::with_capacity(channels_and_platforms.len());
let mut pending_subdirs = FuturesUnordered::new();
for ((channel_idx, channel), platform) in channels_and_platforms {
for (subdir_idx, (channel, platform)) in channels_and_platforms.into_iter().enumerate() {
// Create a barrier so work that need this subdir can await it.
let barrier = Arc::new(BarrierCell::new());
subdirs.push((channel_idx, barrier.clone()));
subdirs.push((subdir_idx, barrier.clone()));

let inner = self.gateway.clone();
let reporter = self.reporter.clone();
Expand Down Expand Up @@ -136,14 +135,14 @@ impl GatewayQuery {
let mut pending_records = FuturesUnordered::new();

// The resulting list of repodata records.
let mut result = vec![RepoData::default(); self.channels.len()];
let mut result = vec![RepoData::default(); subdirs.len()];

// Loop until all pending package names have been fetched.
loop {
// Iterate over all pending package names and create futures to fetch them from all
// subdirs.
for (package_name, specs) in pending_package_specs.drain() {
for (channel_idx, subdir) in subdirs.iter().cloned() {
for (subdir_idx, subdir) in subdirs.iter().cloned() {
let specs = specs.clone();
let package_name = package_name.clone();
let reporter = self.reporter.clone();
Expand All @@ -154,8 +153,8 @@ impl GatewayQuery {
Subdir::Found(subdir) => subdir
.get_or_fetch_package_records(&package_name, reporter)
.await
.map(|records| (channel_idx, specs, records)),
Subdir::NotFound => Ok((channel_idx, specs, Arc::from(vec![]))),
.map(|records| (subdir_idx, specs, records)),
Subdir::NotFound => Ok((subdir_idx, specs, Arc::from(vec![]))),
}
});
}
Expand All @@ -170,7 +169,7 @@ impl GatewayQuery {

// Handle any records that were fetched
records = pending_records.select_next_some() => {
let (channel_idx, request_specs, records) = records?;
let (subdir_idx, request_specs, records) = records?;

if self.recursive {
// Extract the dependencies from the records and recursively add them to the
Expand All @@ -193,7 +192,7 @@ impl GatewayQuery {

// Add the records to the result
if records.len() > 0 {
let result = &mut result[channel_idx];
let result = &mut result[subdir_idx];
result.len += records.len();
result.shards.push(records);
}
Expand Down
80 changes: 47 additions & 33 deletions crates/rattler_solve/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! `rattler_solve` is a crate that provides functionality to solve Conda environments. It currently
//! exposes the functionality through the [`SolverImpl::solve`] function.
//! `rattler_solve` is a crate that provides functionality to solve Conda
//! environments. It currently exposes the functionality through the
//! [`SolverImpl::solve`] function.
#![deny(missing_docs)]

Expand All @@ -8,17 +9,18 @@ pub mod libsolv_c;
#[cfg(feature = "resolvo")]
pub mod resolvo;

use std::fmt;

use chrono::{DateTime, Utc};
use rattler_conda_types::{GenericVirtualPackage, MatchSpec, RepoDataRecord};
use std::fmt;

/// Represents a solver implementation, capable of solving [`SolverTask`]s
pub trait SolverImpl {
/// The repo data associated to a channel and platform combination
type RepoData<'a>: SolverRepoData<'a>;

/// Resolve the dependencies and return the [`RepoDataRecord`]s that should be present in the
/// environment.
/// Resolve the dependencies and return the [`RepoDataRecord`]s that should
/// be present in the environment.
fn solve<
'a,
R: IntoRepoData<'a, Self::RepoData<'a>>,
Expand All @@ -36,14 +38,17 @@ pub enum SolveError {
Unsolvable(Vec<String>),

/// The solver backend returned operations that we dont know how to install.
/// Each string is a somewhat user-friendly representation of which operation was not recognized
/// and can be used for error reporting
/// Each string is a somewhat user-friendly representation of which
/// operation was not recognized and can be used for error reporting
UnsupportedOperations(Vec<String>),

/// Error when converting matchspec
#[error(transparent)]
ParseMatchSpecError(#[from] rattler_conda_types::ParseMatchSpecError),

/// Encountered duplicate records in the available packages.
DuplicateRecords(String),

/// To support Resolvo cancellation
Cancelled,
}
Expand All @@ -67,48 +72,55 @@ impl fmt::Display for SolveError {
SolveError::Cancelled => {
write!(f, "Solve operation has been cancelled")
}
SolveError::DuplicateRecords(filename) => {
write!(f, "encountered duplicate records for {filename}")
}
}
}
}

/// Represents the channel priority option to use during solves.
#[derive(Clone, Copy, PartialEq, Eq, Default)]
pub enum ChannelPriority {
/// The channel that the package is first found in will be used as the only channel
/// for that package.
/// The channel that the package is first found in will be used as the only
/// channel for that package.
#[default]
Strict,

// Conda also has "Flexible" as an option, where packages present in multiple channels
// are only taken from lower-priority channels when this prevents unsatisfiable environment
// errors, but this would need implementation in the solvers.
// Flexible,
/// Packages can be retrieved from any channel as package version takes precedence.
/// Packages can be retrieved from any channel as package version takes
/// precedence.
Disabled,
}

/// Represents a dependency resolution task, to be solved by one of the backends (currently only
/// libsolv is supported)
/// Represents a dependency resolution task, to be solved by one of the backends
/// (currently only libsolv is supported)
pub struct SolverTask<TAvailablePackagesIterator> {
/// An iterator over all available packages
pub available_packages: TAvailablePackagesIterator,

/// Records of packages that are previously selected.
///
/// If the solver encounters multiple variants of a single package (identified by its name), it
/// will sort the records and select the best possible version. However, if there exists a
/// locked version it will prefer that variant instead. This is useful to reduce the number of
/// If the solver encounters multiple variants of a single package
/// (identified by its name), it will sort the records and select the
/// best possible version. However, if there exists a locked version it
/// will prefer that variant instead. This is useful to reduce the number of
/// packages that are updated when installing new packages.
///
/// Usually you add the currently installed packages or packages from a lock-file here.
/// Usually you add the currently installed packages or packages from a
/// lock-file here.
pub locked_packages: Vec<RepoDataRecord>,

/// Records of packages that are previously selected and CANNOT be changed.
///
/// If the solver encounters multiple variants of a single package (identified by its name), it
/// will sort the records and select the best possible version. However, if there is a variant
/// available in the `pinned_packages` field it will always select that version no matter what
/// even if that means other packages have to be downgraded.
/// If the solver encounters multiple variants of a single package
/// (identified by its name), it will sort the records and select the
/// best possible version. However, if there is a variant available in
/// the `pinned_packages` field it will always select that version no matter
/// what even if that means other packages have to be downgraded.
pub pinned_packages: Vec<RepoDataRecord>,

/// Virtual packages considered active
Expand All @@ -120,11 +132,12 @@ pub struct SolverTask<TAvailablePackagesIterator> {
/// The timeout after which the solver should stop
pub timeout: Option<std::time::Duration>,

/// The channel priority to solve with, either [`ChannelPriority::Strict`] or
/// [`ChannelPriority::Disabled`]
/// The channel priority to solve with, either [`ChannelPriority::Strict`]
/// or [`ChannelPriority::Disabled`]
pub channel_priority: ChannelPriority,

/// Exclude any package that has a timestamp newer than the specified timestamp.
/// Exclude any package that has a timestamp newer than the specified
/// timestamp.
pub exclude_newer: Option<DateTime<Utc>>,

/// The solve strategy.
Expand Down Expand Up @@ -169,22 +182,23 @@ pub enum SolveStrategy {
LowestVersionDirect,
}

/// A representation of a collection of [`RepoDataRecord`] usable by a [`SolverImpl`]
/// implementation.
/// A representation of a collection of [`RepoDataRecord`] usable by a
/// [`SolverImpl`] implementation.
///
/// Some solvers might be able to cache the collection between different runs of the solver which
/// could potentially eliminate some overhead. This trait enables creating a representation of the
/// repodata that is most suitable for a specific backend.
/// Some solvers might be able to cache the collection between different runs of
/// the solver which could potentially eliminate some overhead. This trait
/// enables creating a representation of the repodata that is most suitable for
/// a specific backend.
///
/// Some solvers may add additional functionality to their specific implementation that enables
/// caching the repodata to disk in an efficient way (see [`crate::libsolv_c::RepoData`] for
/// an example).
/// Some solvers may add additional functionality to their specific
/// implementation that enables caching the repodata to disk in an efficient way
/// (see [`crate::libsolv_c::RepoData`] for an example).
pub trait SolverRepoData<'a>: FromIterator<&'a RepoDataRecord> {}

/// Defines the ability to convert a type into [`SolverRepoData`].
pub trait IntoRepoData<'a, S: SolverRepoData<'a>> {
/// Converts this instance into an instance of [`SolverRepoData`] which is consumable by a
/// specific [`SolverImpl`] implementation.
/// Converts this instance into an instance of [`SolverRepoData`] which is
/// consumable by a specific [`SolverImpl`] implementation.
fn into(self) -> S;
}

Expand Down
Loading

0 comments on commit d77fb5c

Please sign in to comment.