Skip to content

Commit

Permalink
Apply review suggestions, reduce amount downloaded
Browse files Browse the repository at this point in the history
Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com>
  • Loading branch information
jakub-nt committed Nov 13, 2024
1 parent b516b19 commit e6bc204
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 101 deletions.
2 changes: 1 addition & 1 deletion cfbs/masterfiles/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def initialize_vcf():
def versions_checksums_files(
files_dir_path, version, versions_dict, checksums_dict, files_dict
):
for root, dirs, files in os.walk(files_dir_path):
for root, _, files in os.walk(files_dir_path):
for name in files:
full_relpath = os.path.join(root, name)
tarball_relpath = os.path.relpath(full_relpath, files_dir_path)
Expand Down
4 changes: 2 additions & 2 deletions cfbs/masterfiles/check_download_matches_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ def check_download_matches_git(versions):
download_version_dict = download_versions_dict["versions"][version]["files"]
git_version_dict = git_versions_dict["versions"][version]["files"]

for diff in list(dictdiffer.diff(download_version_dict, git_version_dict)):
with open("differences/difference-" + version + ".txt", "w") as f:
with open("differences/difference-" + version + ".txt", "w") as f:
for diff in dictdiffer.diff(download_version_dict, git_version_dict):
print(diff, file=f)
18 changes: 10 additions & 8 deletions cfbs/masterfiles/check_tarball_checksums.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
import os

from cfbs.utils import file_sha256, immediate_files


def check_tarball_checksums(dir_path, downloaded_versions, reported_checksums):
does_match = True

print("Verifying checksums...")

for version in downloaded_versions:
print(version)
if version in ("3.10.0", "3.9.2"):
# 3.10.0 lists a .tar.gz, not a .pkg.tar.gz
# 3.9.2 lists no masterfiles
continue

version_path = dir_path / version
version_path = os.path.join(dir_path, version)

versions_files = immediate_files(version_path)
# the tarball should be the only file in the version's directory
tarball_name = versions_files[0]

tarball_path = version_path / tarball_name
tarball_path = os.path.join(version_path, tarball_name)

tarball_checksum = file_sha256(tarball_path)

if version in ("3.10.0", "3.9.2"):
# 3.10.0 lists a .tar.gz, not a .pkg.tar.gz
# 3.9.2 lists no masterfiles
continue

reported_checksum = reported_checksums[version]

if tarball_checksum != reported_checksum:
Expand Down
135 changes: 74 additions & 61 deletions cfbs/masterfiles/download_all_versions.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,99 @@
from pathlib import Path
import os
import shutil
import urllib.request

from cfbs.utils import get_json

DOWNLOAD = True
from cfbs.utils import fetch_url, get_json, mkdir

ENTERPRISE_URL = "https://cfengine.com/release-data/enterprise/releases.json"
COMMUNITY_URL = "https://cfengine.com/release-data/community/releases.json"

# TODO
# def download_all_versions_community():
# data = get_json(COMMUNITY_URL)
# # "masterfiles is at a different index" in 3.10.1 happens only for Enterprise, not Community

ENTERPRISE_DOWNLOAD_PATH = "enterprise"

def download_all_versions_enterprise():
data = get_json(ENTERPRISE_URL)

urls_dict = {}
def get_download_urls_enterprise():
download_urls = {}
reported_checksums = {}

for releases_data in data["releases"]:
version = releases_data["version"]
release_url = releases_data["URL"]
data = get_json(ENTERPRISE_URL)

subdata = get_json(release_url)
for release_data in data["releases"]:
version = release_data["version"]

if version == "3.10.0":
# for 3.10.0, for some reason, the masterfiles download link points to the .tar.gz tarball, rather than the .pkg.tar.gz tarball
# download the .pkg.tar.gz from an unlisted analoguous URL instead
download_url = "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.10.0.pkg.tar.gz"
download_urls[version] = download_url
continue
if version == "3.9.2":
# for 3.9.2, no masterfiles are listed, but an unlisted analoguous URL exists
download_url = "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.9.2.pkg.tar.gz"
download_urls[version] = download_url
continue

release_url = release_data["URL"]
subdata = get_json(release_url)
artifacts_data = subdata["artifacts"]

if "Additional Assets" not in artifacts_data:
# happens for 3.9.0b1, 3.8.0b1, 3.6.1, 3.6.0
download_url = None
continue

else:
# for 3.10.0, for some reason, the masterfiles download link points to the .tar.gz tarball, rather than the .pkg.tar.gz tarball
# here, download the .pkg.tar.gz from a hidden analoguous URL instead
if version == "3.10.0":
download_url = "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.10.0.pkg.tar.gz"
else:
# there's precisely one version (3.10.1) for which masterfiles is at a different index
if version == "3.10.1":
artifacts_data = artifacts_data["Additional Assets"][1]
else:
artifacts_data = artifacts_data["Additional Assets"][0]

if artifacts_data["Title"] != "Masterfiles ready-to-install tarball":
# happens for 3.10.1, 3.9.2, 3.9.0, 3.8.2, 3.8.1, 3.8.0, 3.6.2--3.7.4
# 3.10.1: see above
# 3.9.2: no masterfiles listed, but an analogous hidden URL exists
# 3.9.0 and others: no masterfiles listed, and analogous hidden URLs seemingly do not exist
if version == "3.9.2":
download_url = "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.9.2.pkg.tar.gz"
else:
download_url = None
else:
download_url = artifacts_data["URL"]
reported_checksums[version] = artifacts_data["SHA256"]

if download_url is not None:
urls_dict[version] = download_url
assets_data = artifacts_data["Additional Assets"]
masterfiles_data = None

for asset in assets_data:
if asset["Title"] == "Masterfiles ready-to-install tarball":
masterfiles_data = asset

if masterfiles_data is None:
# happens for 3.9.2, 3.9.0, 3.8.2, 3.8.1, 3.8.0, 3.7.4--3.6.2
# 3.9.2: see above
# 3.9.0 and below: no masterfiles listed, and analogous unlisted URLs seemingly do not exist
continue

download_urls[version] = masterfiles_data["URL"]
reported_checksums[version] = masterfiles_data["SHA256"]

return download_urls, reported_checksums


def download_versions_from_urls(output_path, download_urls):
downloaded_versions = []
if DOWNLOAD:
root_path = Path("./enterprise")
Path.mkdir(root_path, exist_ok=True)

for version, url in urls_dict.items():
# ignore master and .x versions
if url.startswith("http://buildcache"):
continue
mkdir(output_path)

for version, url in download_urls.items():
# ignore master and .x versions
if url.startswith("http://buildcache"):
continue

print("Downloading from", url)
downloaded_versions.append(version)

version_path = os.path.join(output_path, version)
mkdir(version_path)

filename = url.split("/")[-1]
tarball_path = os.path.join(version_path, filename)
fetch_url(url, tarball_path)

downloaded_versions.append(version)
print(url)
tarball_dir_path = os.path.join(version_path, "tarball")
shutil.unpack_archive(tarball_path, tarball_dir_path)

version_path = root_path / version
Path.mkdir(version_path, exist_ok=True)
return output_path, downloaded_versions

filename = url.split("/")[-1]
tarball_path = version_path / filename
urllib.request.urlretrieve(url, tarball_path)

shutil.unpack_archive(tarball_path, version_path / "tarball")
# TODO
# def download_all_versions_community():
# data = get_json(COMMUNITY_URL)


def download_all_versions_enterprise():
download_urls, reported_checksums = get_download_urls_enterprise()

output_path, downloaded_versions = download_versions_from_urls(
ENTERPRISE_DOWNLOAD_PATH, download_urls
)

# for local verification of the reported (Enterprise) (.pkg.tar.gz) checksums
return downloaded_versions, reported_checksums
return output_path, downloaded_versions, reported_checksums
14 changes: 6 additions & 8 deletions cfbs/masterfiles/generate_release_information.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# TODO document `cfbs generate-release-information`
# it generates the .json data files in the cwd
import sys
from pathlib import Path

from cfbs.masterfiles.download_all_versions import download_all_versions_enterprise
from cfbs.masterfiles.check_tarball_checksums import check_tarball_checksums
Expand All @@ -11,24 +10,23 @@
# commented out for now as this adds an extra dependency in its current state (dictdiffer)
# from cfbs.masterfiles.check_download_matches_git import check_download_matches_git

ENTERPRISE_PATH = Path("./enterprise")


def generate_release_information():
downloaded_versions, reported_checksums = download_all_versions_enterprise()
print("Downloading Enterprise masterfiles...")
output_path, downloaded_versions, reported_checksums = (
download_all_versions_enterprise()
)
# TODO Community coverage:
# downloaded_versions, reported_checksums = download_all_versions_community()

# Enterprise 3.9.2 is downloaded but there is no reported checksum, so both args are necessary
if check_tarball_checksums(
ENTERPRISE_PATH, downloaded_versions, reported_checksums
):
if check_tarball_checksums(output_path, downloaded_versions, reported_checksums):
print("Every checksum matches")
else:
print("Checksums differ!")
sys.exit(1)

generate_vcf_download(ENTERPRISE_PATH, downloaded_versions)
generate_vcf_download(output_path, downloaded_versions)
generate_vcf_git_checkout(downloaded_versions)

# TODO automatic analysis of the difference between downloadable MPF data and git MPF data
Expand Down
4 changes: 3 additions & 1 deletion cfbs/masterfiles/generate_vcf_download.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

from cfbs.utils import write_json
from cfbs.masterfiles.analyze import (
finalize_vcf,
Expand All @@ -14,7 +16,7 @@ def generate_vcf_download(dir_path, downloaded_versions):
versions_dict, checksums_dict, files_dict = initialize_vcf()

for version in downloaded_versions:
files_dir_path = dir_path / version / "tarball"
files_dir_path = os.path.join(dir_path, version, "tarball")

versions_dict, checksums_dict, files_dict = versions_checksums_files(
files_dir_path, version, versions_dict, checksums_dict, files_dict
Expand Down
45 changes: 25 additions & 20 deletions cfbs/masterfiles/generate_vcf_git_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,19 @@ def generate_vcf_git_checkout(interesting_tags=None):
required_commands = ["git", "make", "automake", "autoconf"]
check_required_commands(required_commands)

# clone the MPF repo every time the script is run, in case there are updates
if os.path.isdir(MPF_PATH):
shutil.rmtree(MPF_PATH)

subprocess.run(
["git", "clone", MPF_URL],
cwd=DIR_PATH,
check=True,
)
# get the current version of the MPF repo
if not os.path.isdir(MPF_PATH):
subprocess.run(
["git", "clone", "--no-checkout", MPF_URL],
cwd=DIR_PATH,
check=True,
)
else:
subprocess.run(
["git", "fetch", "--all"],
cwd=DIR_PATH,
check=True,
)

result = subprocess.run(
["git", "tag"], cwd=MPF_PATH, capture_output=True, check=True
Expand All @@ -58,19 +62,11 @@ def generate_vcf_git_checkout(interesting_tags=None):
versions_dict, checksums_dict, files_dict = initialize_vcf()

for tag in interesting_tags:
print(tag)
print("Checkouting tag", tag)

# checkout the version tag
subprocess.run(
["git", "checkout", "--force", tag],
cwd=MPF_PATH,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
# a clean is necessary to remove all the undesired files
# checkout the version
subprocess.run(
["git", "clean", "-dffx"],
["git", "checkout", tag],
cwd=MPF_PATH,
check=True,
stdout=subprocess.DEVNULL,
Expand All @@ -92,6 +88,15 @@ def generate_vcf_git_checkout(interesting_tags=None):
MPF_PATH, tag, versions_dict, checksums_dict, files_dict
)

# clean the files to prevent spillage to other versions
subprocess.run(
["git", "clean", "-dffx"],
cwd=MPF_PATH,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)

versions_dict, checksums_dict, files_dict = finalize_vcf(
versions_dict, checksums_dict, files_dict
)
Expand Down

0 comments on commit e6bc204

Please sign in to comment.