Skip to content

Commit

Permalink
feat: make cloning parallel and mock build process (#150)
Browse files Browse the repository at this point in the history
Run clones in parallel and mock build process in tests for speed up reasons.
  • Loading branch information
nichmor authored Jul 2, 2024
1 parent 1baf81f commit 18f34ba
Show file tree
Hide file tree
Showing 11 changed files with 1,563 additions and 302 deletions.
1,620 changes: 1,368 additions & 252 deletions pixi.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ pyinstrument = ">=4.6.2,<4.7"
[feature.fmt.tasks]
# Format recipes using ruff
fmt = "ruff format"
check-fmt = "ruff check --fix"


# Default dependencies
Expand All @@ -78,6 +79,7 @@ pydantic = "*"
[pypi-dependencies]
repror = { path = ".", editable = true }
conda-recipe-manager = { git = "https://github.com/conda-incubator/conda-recipe-manager.git" }
pytest-depends = "*"


[environments]
Expand Down
6 changes: 4 additions & 2 deletions src/repror/cli/build_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
from rich import print


def recipes_for_names(recipe_names: Optional[list[str]]) -> list[Recipe | RemoteRecipe]:
def recipes_for_names(
recipe_names: Optional[list[str]], config_path: str = "config.yaml"
) -> list[Recipe | RemoteRecipe]:
"""
Get recipes objects for the given names. If no names are given, return all recipes
"""
all_recipes = load_all_recipes()
all_recipes = load_all_recipes(config_path)
if recipe_names:
recipes_to_build = []
all_recipes_names = [recipe.name for recipe in all_recipes]
Expand Down
21 changes: 16 additions & 5 deletions src/repror/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from rich.live import Live

from repror.internals.config import load_config
from repror.internals.db import get_rebuild_data
from repror.internals.db import get_rebuild_data, setup_engine
from repror.internals.print import print
from repror.internals import patch_database
from repror.internals.rattler_build import rattler_build_hash
Expand Down Expand Up @@ -39,6 +39,7 @@ def main(
skip_setup_rattler_build: bool = False,
in_memory_sql: bool = False,
no_output: bool = False,
config_path: str = "config.yaml",
):
"""
\bRepror is a tool to:
Expand All @@ -47,17 +48,23 @@ def main(
- Rewrite the reproducible-builds README.md file with update statistics
"""
global_options.no_output = no_output
global_options.config_path = config_path
if skip_setup_rattler_build:
print("[dim yellow]Will skip setting up rattler-build[/dim yellow]")
global_options.skip_setup_rattler_build = True
if in_memory_sql:
print("[yellow]Will use in-memory SQLite database[/yellow]")
global_options.in_memory_sql = True
setup_engine(in_memory_sql)


@app.command()
def generate_recipes(all_: Annotated[bool, typer.Option()] = False):
"""Generate list of recipes from the configuration file."""
def generate_recipes(
all_: Annotated[
bool, typer.Option("--all", help="Generate all recipe names")
] = False,
):
"""Generate list of recipes from the configuration file. By default it will print only the ones that are not built yet."""
generate.generate_recipes(rattler_build_hash=rattler_build_hash(), all_=all_)


Expand Down Expand Up @@ -95,7 +102,9 @@ def build_recipe(
_check_local_rattler_build()
else:
os.environ["RATTLER_BUILD_BIN"] = str(rattler_build_exe)
recipes_to_build = build.recipes_for_names(recipe_names)
recipes_to_build = build.recipes_for_names(
recipe_names, global_options.config_path
)

build.build_recipes(recipes_to_build, Path(tmp_dir), force, patch, actions_url)
if run_rebuild:
Expand Down Expand Up @@ -128,7 +137,9 @@ def rebuild_recipe(
_check_local_rattler_build()
else:
os.environ["RATTLER_BUILD_BIN"] = str(rattler_build_exe)
recipes_to_rebuild = build.recipes_for_names(recipe_names)
recipes_to_rebuild = build.recipes_for_names(
recipe_names, global_options.config_path
)
rebuild.rebuild_recipe(
recipes_to_rebuild, Path(tmp_dir), force, patch, actions_url
)
Expand Down
8 changes: 5 additions & 3 deletions src/repror/internals/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,12 @@ def calculate_hash(conda_file: Path):


def find_conda_file(build_folder: Path) -> Path:
"""Find the conda file in the build folder."""
conda_file = glob.glob(str(build_folder) + "/**/*.conda", recursive=True)[0]
"""Find the conda file in the build folder. Return the first one found ( which currently is not *the* correct way )."""
conda_files = glob.glob(str(build_folder) + "/**/*.conda", recursive=True)
if conda_files:
return Path(conda_files[0])

return Path(conda_file)
raise FileNotFoundError(f"No conda file found in the build folder {build_folder}")


def find_all_conda_files(build_folder: Path):
Expand Down
70 changes: 49 additions & 21 deletions src/repror/internals/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from collections import defaultdict
from functools import lru_cache
from pathlib import Path
import tempfile
from typing import Optional
import yaml
from pydantic import BaseModel, Field

# from repror.internals.git import checkout_branch_or_commit, clone_repo
from multiprocessing.pool import ThreadPool
from repror.internals.db import Recipe as RecipeDB, RemoteRecipe, get_recipe, save
from repror.internals.recipe import (
get_recipe_name,
Expand All @@ -28,6 +29,9 @@ class RemoteRepository(BaseModel):
rev: str
recipes: list[LocalRecipe]

def __hash__(self):
return hash((self.url, self.rev))


class RattlerBuildConfig(BaseModel):
rev: str
Expand All @@ -54,34 +58,58 @@ def save_config(data: ConfigYaml, config_path: str = "config.yaml"):
yaml.safe_dump(data_as_dict, file)


def load_remote_recipes(
repo: RemoteRepository, recipes: list[LocalRecipe], clone_dir: Path
) -> list[RemoteRecipe]:
remote_recipes = []
for recipe in recipes:
logger.debug(f"Recipe {recipe.path} not found in the database, adding it")
remote_config, raw_config = load_remote_recipe_config(
repo.url, repo.rev, recipe.path, Path(clone_dir)
)
recipe_name = get_recipe_name(remote_config)
recipe_content_hash = recipe_files_hash(Path(recipe.path).parent)
stored_recipe = RemoteRecipe(
name=recipe_name,
url=repo.url,
path=str(recipe.path),
raw_config=raw_config,
rev=repo.rev,
content_hash=recipe_content_hash,
)
remote_recipes.append(stored_recipe)
return remote_recipes


def load_all_recipes(config_path: str = "config.yaml") -> list[RecipeDB | RemoteRecipe]:
config = load_config(config_path)
recipes = []
with tempfile.TemporaryDirectory() as clone_dir:
# iterate over existing recipes
# this is done to avoid not-so-intuitive setup of the :memory: database
# with the StaticPool for the Session
# it also seems to throw flush errors when same session is used for multiple threads
# so I thought that it would be better to separate fetching and saving
recipes_to_fetch = defaultdict(list)
for repo in config.repositories:
for recipe in repo.recipes:
stored_recipe = get_recipe(repo.url, recipe.path, repo.rev)
if not stored_recipe:
logger.debug(
f"Recipe {recipe.path} not found in the database, adding it"
)
remote_config, raw_config = load_remote_recipe_config(
repo.url, repo.rev, recipe.path, Path(clone_dir)
)
recipe_name = get_recipe_name(remote_config)
recipe_content_hash = recipe_files_hash(Path(recipe.path).parent)
stored_recipe = RemoteRecipe(
name=recipe_name,
url=repo.url,
path=str(recipe.path),
raw_config=raw_config,
rev=repo.rev,
content_hash=recipe_content_hash,
)
save(stored_recipe)
else:
if stored_recipe:
logger.debug(f"Recipe {recipe.path} found in the database")
recipes.append(stored_recipe)
recipes.append(stored_recipe)
else:
recipes_to_fetch[repo].append(recipe)

with ThreadPool() as pool:
remote_recipes = pool.starmap(
load_remote_recipes,
[
(repo, recipes, clone_dir)
for repo, recipes in recipes_to_fetch.items()
],
)
[save(recipe) for recipe in recipes for recipes in remote_recipes]
[recipes.extend(recipe_list) for recipe_list in remote_recipes]

for local in config.local:
local_config = load_recipe_config(local.path)
Expand Down
20 changes: 8 additions & 12 deletions src/repror/internals/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pydantic import BaseModel
from sqlalchemy import func, text
from typing import Sequence
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlmodel import (
Field,
Relationship,
Expand All @@ -27,15 +27,15 @@

from repror.internals.recipe import clone_remote_recipe
from .print import print
from .options import global_options


# Suppress SQLAlchemy INFO logs
logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)

engine = None
# Create a session class that binds to SQLModelSession
__Session = sessionmaker(class_=SqlModelSession, expire_on_commit=False)
session_factory = sessionmaker(class_=SqlModelSession, expire_on_commit=False)
__Session = scoped_session(session_factory)

# Name of the production database
PROD_DB = "repro.db"
Expand All @@ -55,7 +55,7 @@ class BuildState(str, Enum):

def create_db_and_tables():
"""Create the database and tables, if they don't exist."""
__set_engine()
global engine
assert engine # This should not fail
SQLModel.metadata.create_all(engine)

Expand All @@ -66,10 +66,11 @@ def setup_engine(in_memory: bool = False):
global engine, __Session
if engine:
# Engine is already set, skip initialization
print("Engine already set, skipping initialization")
return

if in_memory:
engine = create_engine("sqlite:///:memory:", echo=False)
engine = create_engine("sqlite:///:memory:")
else:
# Get the name of the database from an environment variable
# or use the default name which is a local database
Expand Down Expand Up @@ -100,15 +101,10 @@ def setup_local_db() -> sessionmaker[SqlModelSession]:
return session


def __set_engine() -> None:
global engine
if not engine:
setup_engine(global_options.in_memory_sql)


def get_session() -> SqlModelSession:
"""Get a new session."""
__set_engine()
global engine
assert engine
return __Session()


Expand Down
2 changes: 2 additions & 0 deletions src/repror/internals/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ class GlobalOptions:
# This is useful when we want to run a command and not show the output
# when generating recipe names that are used to start dynamic jobs
no_output: bool = False
# What config file to use for loading
config_path: str = "config.yaml"


global_options = GlobalOptions()
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ def setup_recipe_directory(tmp_path: Path):
return recipe_folder


@pytest.fixture
def test_config_yaml_path():
return Path(__file__).parent / "data" / "test_config.yaml"


@pytest.fixture(scope="session")
def in_memory_session():
return setup_local_db()
Expand Down
11 changes: 11 additions & 0 deletions tests/data/test_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# A smaller subset of config.yaml used for testing
local:
- path: recipes/boltons/recipe.yaml
rattler-build:
rev: a47f907eaacc345dcd9441fe08d43057b18e2e83
url: https://github.com/prefix-dev/rattler-build.git
repositories:
- recipes:
- path: examples/rich/recipe.yaml
rev: c609b1d5ea67ebd095c2077d113d07694cf55fdb
url: https://github.com/prefix-dev/rattler-build.git
Loading

0 comments on commit 18f34ba

Please sign in to comment.