Skip to content

Commit

Permalink
ignore some keys when checking for project changes
Browse files Browse the repository at this point in the history
  • Loading branch information
maximlt committed Oct 22, 2024
1 parent 020dd53 commit 393e003
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 21 deletions.
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,6 @@ doc/gallery/*
jupyter_execute/
builtdocs/

# Ignore output of the git diff
.diff

# Ignore output of test_small_data_setup
*/tmp_catalog.yml

Expand Down
110 changes: 92 additions & 18 deletions dodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import collections
import contextlib
import datetime
import functools
import glob
import imghdr
import itertools
Expand Down Expand Up @@ -40,6 +41,21 @@
'template',
]

PROJECT_CONFIG_IGNORE_KEYS = [
# Website only
'description',
'examples_config.created',
'examples_config.maintainers',
'examples_config.labels',
'examples_config.title',
# TODO: add categories

# TODO: Ideally, we could ignore these two to test/build the project
# but consider them to re-deploy a project.
# 'examples_config.deployments',
# 'commands',
]

README_TEMPLATE = 'readme_template.md'

# But it's included on the dev site if this env var is set.
Expand Down Expand Up @@ -264,12 +280,58 @@ def last_commit_date(name, root='.', verbose=True):
return last_committer_date


def print_changes_in_dir(filepath='.diff'):
def remove_ignored_keys(mapping, ignored_keys):
"""
>>> d = {'a': 1, 'b': {'c': 3, 'd': 4}, 'e': 5}
>>> remove_ignored_keys(d, ['a', 'b.d'])
{'b': {'c': 3}, 'e': 5}
"""
for key in ignored_keys:
mapping = remove_nested_key(mapping, key)
return mapping


def remove_nested_key(mapping, ref):
expanded = ref.split('.')
obj = mapping
for i, key in enumerate(expanded, 1):
if key in obj:
if i == len(expanded):
# last
del obj[key]
else:
obj = obj[key]
return mapping


def yaml_file_changed(file_path, git_cmd_tmpl, ignore_keys=PROJECT_CONFIG_IGNORE_KEYS):
"""
Check whether the content of a yaml file changed between the current branch
and some git reference, optionally ignoring some keys.
"""
from yaml import safe_load

with open(file_path, 'r') as f:
current_yaml = safe_load(f)

previous_version = subprocess.run(
git_cmd_tmpl.format(file_path=file_path),
stdout=subprocess.PIPE,
shell=True,
)
previous_yaml = safe_load(previous_version.stdout.decode())

current_yaml = remove_ignored_keys(current_yaml, ignore_keys)
previous_yaml = remove_ignored_keys(previous_yaml, ignore_keys)

return current_yaml != previous_yaml


def print_changes_in_dir(paths: list[str], project_file_changed):
"""Dumps as JSON a dict of the changed projects and removed projects.
New projects are in the changed list.
"""
paths = pathlib.Path(filepath).read_text().splitlines()
paths = [pathlib.Path(p) for p in paths]
all_projects = set(all_project_names(root=''))
changed_dirs = []
Expand All @@ -283,7 +345,11 @@ def print_changes_in_dir(filepath='.diff'):
if root in DEFAULT_EXCLUDE:
continue
if root in all_projects:
changed_dirs.append(root)
if path.name == 'anaconda-project.yml':
if project_file_changed(path):
changed_dirs.append(root)
else:
changed_dirs.append(root)
else:
removed_dirs.append(root)

Expand Down Expand Up @@ -965,27 +1031,35 @@ def task_util_list_changed_dirs_with_main():
"""
Print the projects that changed compared to main
"""
return {
'actions': [
'git fetch origin main',
'git diff --merge-base --name-only origin/main > .diff',
print_changes_in_dir,
],
'teardown': [lambda: pathlib.Path('.diff').unlink(missing_ok=True)]
}
def list_changed_dirs_with_main():
subprocess.run(
['git', 'fetch', 'origin', 'main']
)
result = subprocess.run(
['git', 'diff', '--merge-base', '--name-only', 'origin/main'],
stdout=subprocess.PIPE
)
files = result.stdout.decode().splitlines()
tmpl = 'git show $(git merge-base origin/main HEAD):{file_path}'
print_changes_in_dir(files, functools.partial(yaml_file_changed, git_cmd_tmpl=tmpl))

return {'actions': [list_changed_dirs_with_main]}


def task_util_list_changed_dirs_with_last_commit():
"""
Print the projects that changed compared to the last commit.
"""
return {
'actions': [
'git diff HEAD^ HEAD --name-only > .diff',
print_changes_in_dir,
],
'teardown': [lambda: pathlib.Path('.diff').unlink(missing_ok=True)]
}
def list_changed_dirs_with_main():
result = subprocess.run(
['git', 'diff', '--merge-base', '--name-only', 'origin/main'],
stdout=subprocess.PIPE
)
files = result.stdout.decode().splitlines()
tmpl = 'git show HEAD^:{file_path}'
print_changes_in_dir(files, functools.partial(yaml_file_changed, git_cmd_tmpl=tmpl))

return {'actions': [list_changed_dirs_with_main]}


def task_util_list_comma_separated_projects():
Expand Down

0 comments on commit 393e003

Please sign in to comment.