Skip to content

Commit

Permalink
feat: implement base_url cep (#322)
Browse files Browse the repository at this point in the history
* feat: implement base_url cep

* feat: add tests to verify the computed file path

* fix: base_url relative to test_data

* fix: fmt
  • Loading branch information
baszalmstra authored Sep 11, 2023
1 parent efb3fdb commit 7573d02
Show file tree
Hide file tree
Showing 9 changed files with 179 additions and 19 deletions.
2 changes: 2 additions & 0 deletions crates/rattler_conda_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ rstest = "0.18.2"
assert_matches = "1.5.0"
hex-literal = "0.4.1"
criterion = { version = "0.5", features = ["html_reports"] }
pathdiff = "0.2.1"
dunce = "1.0.4"

[[bench]]
name = "parse"
Expand Down
4 changes: 3 additions & 1 deletion crates/rattler_conda_types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ pub use package_name::{InvalidPackageNameError, PackageName};
pub use platform::{Arch, ParseArchError, ParsePlatformError, Platform};
pub use prefix_record::PrefixRecord;
pub use repo_data::patches::{PackageRecordPatch, PatchInstructions, RepoDataPatch};
pub use repo_data::{ChannelInfo, ConvertSubdirError, PackageRecord, RepoData};
pub use repo_data::{
compute_package_url, ChannelInfo, ConvertSubdirError, PackageRecord, RepoData,
};
pub use repo_data_record::RepoDataRecord;
pub use run_export::RunExportKind;
pub use version::{
Expand Down
135 changes: 128 additions & 7 deletions crates/rattler_conda_types/src/repo_data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
pub mod patches;
mod topological_sort;

use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::{Display, Formatter};
use std::path::Path;
Expand All @@ -14,6 +15,7 @@ use rattler_digest::{serde::SerializableHash, Md5Hash, Sha256Hash};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, skip_serializing_none, OneOrMany};
use thiserror::Error;
use url::Url;

use rattler_macros::sorted;

Expand Down Expand Up @@ -57,6 +59,10 @@ pub struct RepoData {
pub struct ChannelInfo {
/// The channel's subdirectory
pub subdir: String,

/// The base_url for all package urls. Can be an absolute or relative url.
#[serde(skip_serializing_if = "Option::is_none")]
pub base_url: Option<String>,
}

/// A single record in the Conda repodata. A single record refers to a single binary distribution
Expand Down Expand Up @@ -173,17 +179,29 @@ impl RepoData {
Ok(serde_json::from_str(&contents)?)
}

/// Returns the `base_url` specified in the repodata.
pub fn base_url(&self) -> Option<&str> {
self.info.as_ref().and_then(|i| i.base_url.as_deref())
}

/// Builds a [`Vec<RepoDataRecord>`] from the packages in a [`RepoData`] given the source of the
/// data.
pub fn into_repo_data_records(self, channel: &Channel) -> Vec<RepoDataRecord> {
let mut records = Vec::with_capacity(self.packages.len() + self.conda_packages.len());
let channel_name = channel.canonical_name();
let base_url = self.base_url().map(ToOwned::to_owned);

// Determine the base_url of the channel
for (filename, package_record) in self.packages.into_iter().chain(self.conda_packages) {
records.push(RepoDataRecord {
url: channel
.base_url()
.join(&format!("{}/{}", &package_record.subdir, &filename))
.expect("failed to build a url from channel and package record"),
url: compute_package_url(
&channel
.base_url()
.join(&package_record.subdir)
.expect("cannot join channel base_url and subdir"),
base_url.as_deref(),
&filename,
),
channel: channel_name.clone(),
package_record,
file_name: filename,
Expand All @@ -193,6 +211,50 @@ impl RepoData {
}
}

/// Computes the URL for a package.
pub fn compute_package_url(
repo_data_base_url: &Url,
base_url: Option<&str>,
filename: &str,
) -> Url {
let mut absolute_url = match base_url {
None => repo_data_base_url.clone(),
Some(base_url) => match Url::parse(base_url) {
Err(url::ParseError::RelativeUrlWithoutBase) if !base_url.starts_with('/') => {
add_trailing_slash(repo_data_base_url)
.join(base_url)
.expect("failed to join base_url with channel")
}
Err(url::ParseError::RelativeUrlWithoutBase) => {
let mut url = repo_data_base_url.clone();
url.set_path(base_url);
url
}
Err(e) => unreachable!("{e}"),
Ok(base_url) => base_url,
},
};

let path = absolute_url.path();
if !path.ends_with('/') {
absolute_url.set_path(&format!("{path}/"))
}
absolute_url
.join(filename)
.expect("failed to join base_url and filename")
}

fn add_trailing_slash(url: &Url) -> Cow<Url> {
let path = url.path();
if !path.ends_with('/') {
let mut url = url.clone();
url.set_path(&format!("{path}/"));
Cow::Owned(url)
} else {
Cow::Borrowed(url)
}
}

impl PackageRecord {
/// A simple helper method that constructs a `PackageRecord` with the bare minimum values.
pub fn new(name: PackageName, version: impl Into<VersionWithSource>, build: String) -> Self {
Expand Down Expand Up @@ -351,10 +413,10 @@ fn sort_set_alphabetically<S: serde::Serializer>(

#[cfg(test)]
mod test {
use crate::repo_data::determine_subdir;
use crate::repo_data::{compute_package_url, determine_subdir};
use fxhash::FxHashSet;

use crate::RepoData;
use crate::{Channel, ChannelConfig, RepoData};

// isl-0.12.2-1.tar.bz2
// gmp-5.1.2-6.tar.bz2
Expand All @@ -371,7 +433,7 @@ mod test {
#[test]
fn test_serialize() {
let repodata = RepoData {
version: Some(1),
version: Some(2),
info: Default::default(),
packages: Default::default(),
conda_packages: Default::default(),
Expand All @@ -397,4 +459,63 @@ mod test {
let json = serde_json::to_string_pretty(&repodata).unwrap();
insta::assert_snapshot!(json);
}

#[test]
fn test_base_url_packages() {
// load test data
let test_data_path = dunce::canonicalize(
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data"),
)
.unwrap();
let data_path = test_data_path.join("channels/dummy/linux-64/repodata.json");
let repodata = RepoData::from_path(&data_path).unwrap();

let channel = Channel::from_str(
url::Url::from_directory_path(data_path.parent().unwrap().parent().unwrap())
.unwrap()
.as_str(),
&ChannelConfig::default(),
)
.unwrap();

let file_urls = repodata
.into_repo_data_records(&channel)
.into_iter()
.map(|r| {
pathdiff::diff_paths(r.url.to_file_path().unwrap(), &test_data_path)
.unwrap()
.to_string_lossy()
.replace('\\', "/")
})
.collect::<Vec<_>>();

// serialize to yaml
insta::assert_yaml_snapshot!(file_urls);
}

#[test]
fn test_base_url() {
let channel = Channel::from_str("conda-forge", &ChannelConfig::default()).unwrap();
let base_url = channel.base_url().join("linux-64/").unwrap();
assert_eq!(
compute_package_url(&base_url, None, "bla.conda").to_string(),
"https://conda.anaconda.org/conda-forge/linux-64/bla.conda"
);
assert_eq!(
compute_package_url(&base_url, Some("https://host.some.org"), "bla.conda",).to_string(),
"https://host.some.org/bla.conda"
);
assert_eq!(
compute_package_url(&base_url, Some("/root"), "bla.conda").to_string(),
"https://conda.anaconda.org/root/bla.conda"
);
assert_eq!(
compute_package_url(&base_url, Some("foo/bar"), "bla.conda").to_string(),
"https://conda.anaconda.org/conda-forge/linux-64/foo/bar/bla.conda"
);
assert_eq!(
compute_package_url(&base_url, Some("../../root"), "bla.conda").to_string(),
"https://conda.anaconda.org/root/bla.conda"
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
source: crates/rattler_conda_types/src/repo_data/mod.rs
assertion_line: 493
expression: file_urls
---
- channels/dummy/linux-64/foo-3.0.2-py36h1af98f8_1.tar.bz2
- "channels/dummy/linux-64/baz-1.0-unix_py36h1af98f8_2\u0000.tar.bz2"
- channels/dummy/linux-64/foo-4.0.2-py36h1af98f8_2.tar.bz2
- channels/dummy/linux-64/bar-1.0-unix_py36h1af98f8_2.tar.bz2
- channels/dummy/linux-64/foo-3.0.2-py36h1af98f8_1.conda

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
source: crates/rattler_conda_types/src/repo_data/mod.rs
assertion_line: 438
expression: repodata
---
info: ~
Expand All @@ -13,5 +14,5 @@ removed:
- quux
- qux
- xyz
repodata_version: 1
repodata_version: 2

Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
---
source: crates/rattler_conda_types/src/repo_data/mod.rs
assertion_line: 460
expression: json
---
{
"info": {
"subdir": "linux-64"
"subdir": "linux-64",
"base_url": "../linux-64"
},
"packages": {
"bar-1.0-unix_py36h1af98f8_2.tar.bz2": {
Expand Down Expand Up @@ -83,5 +85,5 @@ expression: json
}
},
"packages.conda": {},
"repodata_version": 1
"repodata_version": 2
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
---
source: crates/rattler_conda_types/src/repo_data/mod.rs
assertion_line: 448
expression: repodata
---
info:
subdir: linux-64
base_url: "../linux-64"
packages:
bar-1.0-unix_py36h1af98f8_2.tar.bz2:
build: unix_py36h1af98f8_2
Expand Down Expand Up @@ -73,5 +75,5 @@ packages:
timestamp: 1605110689658
version: 4.0.2
packages.conda: {}
repodata_version: 1
repodata_version: 2

29 changes: 24 additions & 5 deletions crates/rattler_repodata_gateway/src/sparse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
use futures::{stream, StreamExt, TryFutureExt, TryStreamExt};
use itertools::Itertools;
use rattler_conda_types::{Channel, PackageName, PackageRecord, RepoDataRecord};
use rattler_conda_types::{
compute_package_url, Channel, ChannelInfo, PackageName, PackageRecord, RepoDataRecord,
};
use serde::{
de::{Error, MapAccess, Visitor},
Deserialize, Deserializer,
Expand Down Expand Up @@ -90,16 +92,19 @@ impl SparseRepoData {
/// Returns all the records for the specified package name.
pub fn load_records(&self, package_name: &PackageName) -> io::Result<Vec<RepoDataRecord>> {
let repo_data = self.inner.borrow_repo_data();
let base_url = repo_data.info.as_ref().and_then(|i| i.base_url.as_deref());
let mut records = parse_records(
package_name,
&repo_data.packages,
base_url,
&self.channel,
&self.subdir,
self.patch_record_fn,
)?;
let mut conda_records = parse_records(
package_name,
&repo_data.conda_packages,
base_url,
&self.channel,
&self.subdir,
self.patch_record_fn,
Expand Down Expand Up @@ -133,18 +138,24 @@ impl SparseRepoData {
while let Some(next_package) = pending.pop_front() {
for (i, repo_data) in repo_data.iter().enumerate() {
let repo_data_packages = repo_data.inner.borrow_repo_data();
let base_url = repo_data_packages
.info
.as_ref()
.and_then(|i| i.base_url.as_deref());

// Get all records from the repodata
let mut records = parse_records(
&next_package,
&repo_data_packages.packages,
base_url,
&repo_data.channel,
&repo_data.subdir,
patch_function,
)?;
let mut conda_records = parse_records(
&next_package,
&repo_data_packages.conda_packages,
base_url,
&repo_data.channel,
&repo_data.subdir,
patch_function,
Expand Down Expand Up @@ -180,6 +191,9 @@ impl SparseRepoData {
/// A serde compatible struct that only sparsely parses a repodata.json file.
#[derive(Deserialize)]
struct LazyRepoData<'i> {
/// The channel information contained in the repodata.json file
info: Option<ChannelInfo>,

/// The tar.bz2 packages contained in the repodata.json file
#[serde(borrow)]
#[serde(deserialize_with = "deserialize_filename_and_raw_record")]
Expand All @@ -196,6 +210,7 @@ struct LazyRepoData<'i> {
fn parse_records<'i>(
package_name: &PackageName,
packages: &[(PackageFilename<'i>, &'i RawValue)],
base_url: Option<&str>,
channel: &Channel,
subdir: &str,
patch_function: Option<fn(&mut PackageRecord)>,
Expand All @@ -212,10 +227,14 @@ fn parse_records<'i>(
package_record.subdir = subdir.to_owned();
}
result.push(RepoDataRecord {
url: channel
.base_url()
.join(&format!("{}/{}", &package_record.subdir, &key.filename))
.expect("failed to build a url from channel and package record"),
url: compute_package_url(
&channel
.base_url
.join(&format!("{}/", &package_record.subdir))
.expect("failed determine repo_base_url"),
base_url,
key.filename,
),
channel: channel_name.clone(),
package_record,
file_name: key.filename.to_owned(),
Expand Down
4 changes: 2 additions & 2 deletions test-data/channels/dummy/linux-64/repodata.json
Git LFS file not shown

0 comments on commit 7573d02

Please sign in to comment.