From 9087529d133aa07d5ee4865b944085fb9630ceef Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 17 Jul 2023 14:52:48 -0700 Subject: [PATCH 01/27] implement Flow __len__, __contains__, __iter__, __repr__ and arithmetic operators --- src/jobflow/core/flow.py | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 2be987a3..1cab7c18 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -144,6 +144,63 @@ def __init__( self.add_jobs(jobs) self.output = output + def __len__(self) -> int: + """Get the number of jobs or subflows in the flow.""" + return len(self.jobs) + + def __getitem__(self, idx: int) -> Flow | jobflow.Job: + """Get the job or subflow at the given index.""" + return self.jobs[idx] + + def __setitem__(self, idx: int, value: Flow | jobflow.Job) -> None: + """Set the job or subflow at the given index.""" + self.jobs[idx] = value + + def __iter__(self) -> list[Flow | jobflow.Job]: + """Iterate through the jobs in the flow.""" + return iter(self.jobs) + + def __contains__(self, item: Flow | jobflow.Job) -> bool: + """Check if the flow contains a job or subflow.""" + return item in self.jobs + + def __add__(self, other: Flow | jobflow.Job) -> Flow: + """Add a job or subflow to the flow.""" + return Flow([self, other]) + + def __sub__(self, other: Flow | jobflow.Job) -> Flow: + """Remove a job or subflow from the flow.""" + return Flow([job for job in self.jobs if job != other]) + + def __repr__(self) -> str: + """Get a string representation of the flow.""" + name, uuid = self.name, self.uuid + job_reprs = "\n".join(f" {idx}. {job}" for idx, job in enumerate(self.jobs, 1)) + return ( + f"Flow({name=}, {uuid=})\nconsisting of {len(self.jobs)} jobs:\n{job_reprs}" + ) + + def __eq__(self, other: Flow | jobflow.Job) -> bool: + """Check if the flow is equal to another flow.""" + if not isinstance(other, Flow): + return False + return self.uuid == other.uuid + + def __hash__(self) -> int: + """Get the hash of the flow.""" + return hash(self.uuid) + + def __copy__(self) -> Flow: + """Get a copy of the flow.""" + return Flow( + jobs=self.jobs, + output=self.output, + name=self.name, + order=self.order, + uuid=self.uuid, + hosts=self.hosts, + ) + @property def jobs(self) -> tuple[Flow | jobflow.Job, ...]: """ From 5f105d453c47dd349179ae2bb575c28612d6e9bd Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 17 Jul 2023 14:53:24 -0700 Subject: [PATCH 02/27] implement Job implement __contains__, __hash__, __repr__, __eq__ --- src/jobflow/core/job.py | 60 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/src/jobflow/core/job.py b/src/jobflow/core/job.py index b3ceda00..8532fc6f 100644 --- a/src/jobflow/core/job.py +++ b/src/jobflow/core/job.py @@ -179,7 +179,6 @@ def decorator(func): @wraps(func) def get_job(*args, **kwargs) -> Job: - f = func if len(args) > 0: # see if the first argument has a function with the same name as @@ -367,6 +366,63 @@ def __init__( f"inputs to your Job." ) + def __repr__(self): + """Get a string representation of the job.""" + name, uuid = self.name, self.uuid + return f"Job({name=}, {uuid=})" + + def __contains__(self, item: Hashable) -> bool: + """ + Check if the job contains a reference to a given UUID. + + Parameters + ---------- + item + A UUID. + + Returns + ------- + bool + Whether the job contains a reference to the UUID. + """ + return item in self.input_uuids + + def __eq__(self, other: Job) -> bool: + """ + Check if two jobs are equal. + + Parameters + ---------- + other + Another job. + + Returns + ------- + bool + Whether the jobs are equal. + """ + return self.uuid == other.uuid + + def __hash__(self) -> int: + """Get the hash of the job.""" + return hash(self.uuid) + + def __getitem__(self, item: str | int) -> OutputReference: + """ + Get an output reference from the job. + + Parameters + ---------- + item + The key of the output reference. + + Returns + ------- + OutputReference + An output reference. + """ + return self.output[item] + @property def input_references(self) -> tuple[jobflow.OutputReference, ...]: """ @@ -574,7 +630,6 @@ def run(self, store: jobflow.JobStore) -> Response: passed_config = None if passed_config: - if response.addition is not None: pass_manager_config(response.addition, passed_config) @@ -1323,7 +1378,6 @@ def pass_manager_config( all_jobs: list[Job] = [] def get_jobs(arg): - if isinstance(arg, Job): all_jobs.append(arg) elif isinstance(arg, (list, tuple)): From e0c15555d31d07f09513eeab5aee4a38f43f69df Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Tue, 18 Jul 2023 15:17:50 -0700 Subject: [PATCH 03/27] increase Flow.__repr__ indentation for every level of subflow --- src/jobflow/core/flow.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 1cab7c18..95d67274 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -172,12 +172,17 @@ def __sub__(self, other: Flow | jobflow.Job) -> Flow: """Remove a job or subflow from the flow.""" return Flow([job for job in self.jobs if job != other]) - def __repr__(self) -> str: + def __repr__(self, level=0, index=None) -> str: """Get a string representation of the flow.""" + indent = " " * level name, uuid = self.name, self.uuid - job_reprs = "\n".join(f" {idx}. {job}" for idx, job in enumerate(self.jobs, 1)) + flow_index = f"{index}." if index is not None else "" + job_reprs = "\n".join( + f"{indent}{flow_index}{idx}. {job.__repr__(level + 1, f'{flow_index}{idx}') if isinstance(job, Flow) else job}" + for idx, job in enumerate(self.jobs, 1) + ) return ( - f"Flow({name=}, {uuid=})\nconsisting of {len(self.jobs)} jobs:\n{job_reprs}" + f"Flow({name=}, {uuid=}) consisting of {len(self.jobs)} jobs:\n{job_reprs}" ) def __eq__(self, other: Flow | jobflow.Job) -> bool: From f9d3ece6f57e78a43f2981517a0780487e560e57 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Tue, 18 Jul 2023 15:20:12 -0700 Subject: [PATCH 04/27] drop 'consisting of n jobs' from repr --- src/jobflow/core/flow.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 95d67274..40ad9909 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -181,9 +181,7 @@ def __repr__(self, level=0, index=None) -> str: f"{indent}{flow_index}{idx}. {job.__repr__(level + 1, f'{flow_index}{idx}') if isinstance(job, Flow) else job}" for idx, job in enumerate(self.jobs, 1) ) - return ( - f"Flow({name=}, {uuid=}) consisting of {len(self.jobs)} jobs:\n{job_reprs}" - ) + return f"Flow({name=}, {uuid=})\n{job_reprs}" def __eq__(self, other: Flow | jobflow.Job) -> bool: """Check if the flow is equal to another flow.""" From 771869dc0f8b1195f0928a9e56472392a60e2bca Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 28 Jul 2023 20:06:55 -0700 Subject: [PATCH 05/27] add Flow.jobs setter --- .pre-commit-config.yaml | 4 ++-- src/jobflow/core/flow.py | 29 ++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c8fa10a8..351b01d0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,8 +2,8 @@ default_language_version: python: python3 exclude: '^src/atomate2/vasp/schemas/calc_types/' repos: -- repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.250 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.280 hooks: - id: ruff args: [--fix] diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 40ad9909..183ebde8 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -8,6 +8,7 @@ from monty.json import MSONable +import jobflow from jobflow.core.reference import find_and_get_references from jobflow.utils import ValueEnum, contains_flow_or_job, suuid @@ -16,7 +17,7 @@ from networkx import DiGraph - import jobflow + from jobflow import Job __all__ = ["JobOrder", "Flow", "get_flow"] @@ -205,7 +206,7 @@ def __copy__(self) -> Flow: ) @property - def jobs(self) -> tuple[Flow | jobflow.Job, ...]: + def jobs(self) -> tuple[Flow | Job, ...]: """ Get the Jobs in the Flow. @@ -216,6 +217,20 @@ def jobs(self) -> tuple[Flow | jobflow.Job, ...]: """ return self._jobs + @jobs.setter + def jobs(self, jobs: list[Flow | Job] | Job | Flow): + """ + Set the Jobs in the Flow. + + Parameters + ---------- + jobs + The list of Jobs/Flows of the Flow. + """ + if isinstance(jobs, (Flow, jobflow.Job)): + jobs = [jobs] + self._jobs = tuple(jobs) + @property def output(self) -> Any: """ @@ -626,9 +641,9 @@ def update_metadata( def update_config( self, config: jobflow.JobConfig | dict, - name_filter: str = None, - function_filter: Callable = None, - attributes: list[str] | str = None, + name_filter: str | None = None, + function_filter: Callable | None = None, + attributes: list[str] | str | None = None, dynamic: bool = True, ): """ @@ -726,7 +741,7 @@ def add_hosts_uuids( for j in self.jobs: j.add_hosts_uuids(hosts_uuids, prepend=prepend) - def add_jobs(self, jobs: list[Flow | jobflow.Job] | jobflow.Job | Flow): + def add_jobs(self, jobs: list[Flow | Job] | Job | Flow): """ Add Jobs or Flows to the Flow. @@ -803,7 +818,7 @@ def remove_jobs(self, indices: int | list[int]): def get_flow( - flow: Flow | jobflow.Job | list[jobflow.Job], + flow: Flow | Job | list[jobflow.Job], ) -> Flow: """ Check dependencies and return flow object. From a92252efe6f44dc80bff54523536dfbcdc0ae383 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 28 Jul 2023 20:10:15 -0700 Subject: [PATCH 06/27] __setitem__ raise TypeError if value is not a Job or Flow --- src/jobflow/core/flow.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 183ebde8..3424b713 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -149,13 +149,21 @@ def __len__(self) -> int: """Get the number of jobs or subflows in the flow.""" return len(self.jobs) - def __getitem__(self, idx: int) -> Flow | jobflow.Job: - """Get the job or subflow at the given index.""" + def __getitem__(self, idx: int | slice) -> Flow | Job | list[Flow | Job]: + """Get the job or subflow at the given index/slice.""" return self.jobs[idx] - def __setitem__(self, idx: int, value: Flow | jobflow.Job) -> None: + def __setitem__( + self, idx: int | slice, value: Flow | Job | list[Flow | Job] + ) -> None: """Set the job or subflow at the given index.""" - self.jobs[idx] = value + if not isinstance(value, (Flow, jobflow.Job)): + raise TypeError( + f"Flow can only contain Job or Flow objects, not {type(value)}" + ) + jobs = list(self.jobs) + jobs[idx] = value + self.jobs = jobs def __iter__(self) -> list[Flow | jobflow.Job]: """Iterate through the jobs in the flow.""" From 11a02ce118798c606ab628eeb7d7a6717e2331de Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 28 Jul 2023 20:10:50 -0700 Subject: [PATCH 07/27] Flow.__sub__ raise ValueError if the value is not in the flow --- src/jobflow/core/flow.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 3424b713..5b5d6e9a 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -121,11 +121,11 @@ class Flow(MSONable): def __init__( self, - jobs: list[Flow | jobflow.Job] | jobflow.Job | Flow, + jobs: list[Flow | Job] | Job | Flow, output: Any | None = None, name: str = "Flow", order: JobOrder = JobOrder.AUTO, - uuid: str = None, + uuid: str | None = None, hosts: list[str] | None = None, ): from jobflow.core.job import Job @@ -165,21 +165,25 @@ def __setitem__( jobs[idx] = value self.jobs = jobs - def __iter__(self) -> list[Flow | jobflow.Job]: + def __iter__(self) -> list[Flow | Job]: """Iterate through the jobs in the flow.""" return iter(self.jobs) - def __contains__(self, item: Flow | jobflow.Job) -> bool: + def __contains__(self, item: Flow | Job) -> bool: """Check if the flow contains a job or subflow.""" return item in self.jobs - def __add__(self, other: Flow | jobflow.Job) -> Flow: + def __add__(self, other: Flow | Job) -> Flow: """Add a job or subflow to the flow.""" return Flow([self, other]) - def __sub__(self, other: Flow | jobflow.Job) -> Flow: + def __sub__(self, other: Flow | Job) -> Flow: """Remove a job or subflow from the flow.""" - return Flow([job for job in self.jobs if job != other]) + if other not in self.jobs: + raise ValueError(f"{other!r} not found in flow") + new_flow = self.__copy__() + new_flow.jobs = [job for job in new_flow.jobs if job != other] + return new_flow def __repr__(self, level=0, index=None) -> str: """Get a string representation of the flow.""" From c3ca46514a4dd1f2a2c2f3e2bf495e8e44b3f304 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 28 Jul 2023 20:11:36 -0700 Subject: [PATCH 08/27] fix line too long --- src/jobflow/core/flow.py | 7 ++++--- src/jobflow/core/job.py | 19 ++++++++----------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 5b5d6e9a..3175ca78 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -191,12 +191,13 @@ def __repr__(self, level=0, index=None) -> str: name, uuid = self.name, self.uuid flow_index = f"{index}." if index is not None else "" job_reprs = "\n".join( - f"{indent}{flow_index}{idx}. {job.__repr__(level + 1, f'{flow_index}{idx}') if isinstance(job, Flow) else job}" - for idx, job in enumerate(self.jobs, 1) + f"{indent}{flow_index}{i}. " + f"{j.__repr__(level + 1, f'{flow_index}{i}') if isinstance(j, Flow) else j}" + for i, j in enumerate(self.jobs, 1) ) return f"Flow({name=}, {uuid=})\n{job_reprs}" - def __eq__(self, other: Flow | jobflow.Job) -> bool: + def __eq__(self, other: Flow | Job) -> bool: """Check if the flow is equal to another flow.""" if not isinstance(other, Flow): return False diff --git a/src/jobflow/core/job.py b/src/jobflow/core/job.py index 8532fc6f..442781fe 100644 --- a/src/jobflow/core/job.py +++ b/src/jobflow/core/job.py @@ -194,10 +194,7 @@ def get_job(*args, **kwargs) -> Job: args = args[1:] return Job( - function=f, - function_args=args, - function_kwargs=kwargs, - **job_kwargs, + function=f, function_args=args, function_kwargs=kwargs, **job_kwargs ) get_job.original = func @@ -306,13 +303,13 @@ class Job(MSONable): def __init__( self, function: Callable, - function_args: tuple[Any, ...] = None, - function_kwargs: dict[str, Any] = None, + function_args: tuple[Any, ...] | None = None, + function_kwargs: dict[str, Any] | None = None, output_schema: type[BaseModel] | None = None, - uuid: str = None, + uuid: str | None = None, index: int = 1, name: str | None = None, - metadata: dict[str, Any] = None, + metadata: dict[str, Any] | None = None, config: JobConfig = None, hosts: list[str] | None = None, metadata_updates: list[dict[str, Any]] | None = None, @@ -992,9 +989,9 @@ def update_metadata( def update_config( self, config: JobConfig | dict, - name_filter: str = None, - function_filter: Callable = None, - attributes: list[str] | str = None, + name_filter: str | None = None, + function_filter: Callable | None = None, + attributes: list[str] | str | None = None, dynamic: bool = True, ): """ From 2fb76d72bc32a378463e4b1626d7badf2fca56c8 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 28 Jul 2023 20:11:58 -0700 Subject: [PATCH 09/27] add test_flow_magic_methods() --- tests/core/test_flow.py | 81 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tests/core/test_flow.py b/tests/core/test_flow.py index f454d662..6242e378 100644 --- a/tests/core/test_flow.py +++ b/tests/core/test_flow.py @@ -831,3 +831,84 @@ def test_update_config(): assert flow.jobs[0].config.resolve_references assert flow.jobs[1].config.manager_config == {"a": "b"} assert flow.jobs[1].config.resolve_references + + +def test_flow_magic_methods(): + from jobflow import Flow + + # prepare test jobs and flows + job1, job2, job3, job4, job5, job6 = map(get_test_job, range(6)) + + flow1 = Flow([job1]) + flow2 = Flow([job2, job3]) + + # test __len__ + assert len(flow1) == 1 + assert len(flow2) == 2 + + # test __getitem__ + assert flow2[0] == job2 + assert flow2[1] == job3 + + # test __setitem__ + flow2[0] = job4 + assert flow2[0] == job4 + + # test __iter__ + for job in flow2: + assert job in [job4, job3] + + # test __contains__ + assert job1 in flow1 + assert job4 in flow2 + assert job3 in flow2 + + # test __add__ + flow3 = flow1 + job5 + assert len(flow3) == 2 + assert job5 in flow3 + assert flow1 in flow3 + + # test __sub__ + flow4 = flow3 - job5 + assert len(flow4) == 1 == len(flow1) + assert job5 not in flow4 + + # test __eq__ and __hash__ + assert flow1 == flow1 + assert flow1 != flow2 + assert hash(flow1) != hash(flow2) + + # test __copy__ + flow_copy = flow1.__copy__() + assert flow_copy == flow1 + assert id(flow_copy) != id(flow1) + + # test __getitem__ with out of range index + with pytest.raises(IndexError): + _ = flow1[10] + + # test __setitem__ with out of range index + with pytest.raises(IndexError): + flow1[10] = job4 + + # test __contains__ with job not in flow + assert job5 not in flow1 + + # test __add__ with non-job item + with pytest.raises(TypeError): + _ = job6 + "not a job" + + # test __sub__ with non-job item + with pytest.raises(TypeError): + _ = job6 - "not a job" + + # test __sub__ with job not in flow + with pytest.raises( + ValueError, match=r"Job\(name='add', uuid='.+'\) not found in flow" + ): + _ = flow1 - job5 + + # test __eq__ with non-flow item + assert flow1 != "not a flow" + From d38c8e00f44ee607ab59a2449fc94b6cb887dd31 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 28 Jul 2023 20:12:35 -0700 Subject: [PATCH 10/27] add test_flow_magic_methods_edge_cases() still unfinished --- tests/core/test_flow.py | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/core/test_flow.py b/tests/core/test_flow.py index 6242e378..920f98a6 100644 --- a/tests/core/test_flow.py +++ b/tests/core/test_flow.py @@ -9,7 +9,7 @@ def div(a, b=2): return a / b -def get_test_job(): +def get_test_job(*args): from jobflow import Job return Job(add, function_args=(1, 2)) @@ -504,7 +504,6 @@ def test_update_kwargs(): def test_update_maker_kwargs(): - # test no filter flow = get_maker_flow() flow.update_maker_kwargs({"b": 10}) @@ -912,3 +911,37 @@ def test_flow_magic_methods(): # test __eq__ with non-flow item assert flow1 != "not a flow" + +def test_flow_magic_methods_edge_cases(): + from jobflow import Flow + + # prepare test jobs and flows + job1, job2, job3, job4, job5 = map(get_test_job, range(5)) + _empty_flow = Flow([]) + _subflow = Flow([job5]) + + flow1 = Flow([job1, job2, job3, job4]) + + # test negative indexing with __getitem__ and __setitem__ + assert flow1[-1] == job4 + flow1[-1] = job5 + assert flow1[-1] == job5 + + # TODO these tests are currently failing + # test slicing with __getitem__ and __setitem__ + # assert flow1[1:3] == [job2, job3] + # flow1[1:3] = [job4, job5] + # assert flow1[1:3] == [job4, job5] + + # test __add__ and __sub__ with an empty flow + # assert len(flow1 + empty_flow) == len(flow1) + # assert len(flow1 - empty_flow) == len(flow1) + + # # test __add__ and __sub__ with job already in the flow + # assert len(flow1 + job5) == len(flow1) + # flow1 - job5 + # assert job5 not in flow1 + + # # test __contains__ with a flow object + # assert subflow in flow1 + From 387da32dc88ab8910f0c6d75a9baac611162669c Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 28 Jul 2023 20:13:03 -0700 Subject: [PATCH 11/27] add test_flow_repr() --- tests/core/test_flow.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/core/test_flow.py b/tests/core/test_flow.py index 920f98a6..b07b0a51 100644 --- a/tests/core/test_flow.py +++ b/tests/core/test_flow.py @@ -945,3 +945,36 @@ def test_flow_magic_methods_edge_cases(): # # test __contains__ with a flow object # assert subflow in flow1 + +def test_flow_repr(): + from jobflow import Flow + + # prepare jobs and flows + job1, job2, job3, job4, job5, job6, job7 = map(get_test_job, range(7)) + + flow1 = Flow([job1]) + flow2 = Flow([job2, job3]) + sub_flow1 = Flow([job6, job7]) + flow3 = Flow([job4, job5, sub_flow1]) + flow4 = Flow([flow1, flow2, flow3]) + + flow_repr = repr(flow4).splitlines() + + lines = ( + "Flow(name='Flow', uuid='", + "1. Flow(name='Flow', uuid='", + " 1.1. Job(name='add', uuid='", + "2. Flow(name='Flow', uuid='", + " 2.1. Job(name='add', uuid='", + " 2.2. Job(name='add', uuid='", + "3. Flow(name='Flow', uuid='", + " 3.1. Job(name='add', uuid='", + " 3.2. Job(name='add', uuid='", + " 3.3. Flow(name='Flow', uuid='", + " 3.3.1. Job(name='add', uuid='", + " 3.3.2. Job(name='add', uuid='", + ) + + assert len(lines) == len(flow_repr) + for expected, line in zip(lines, flow_repr): + assert line.startswith(expected), f"{line=} doesn't start with {expected=}" From 47a03472ec60f88c39254040407615f9f9c58e13 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 29 Jul 2023 13:46:43 -0700 Subject: [PATCH 12/27] self.__class__.__name__ -> type(self).__name__ --- pyproject.toml | 47 ++++++++++++++++++----------------- src/jobflow/core/job.py | 6 ++--- src/jobflow/core/reference.py | 14 ++++------- tests/core/test_job.py | 10 ++++---- tests/managers/test_local.py | 8 +++--- 5 files changed, 41 insertions(+), 44 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 485498a4..2badcade 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,54 +11,54 @@ license = { text = "modified BSD" } authors = [{ name = "Alex Ganose", email = "alexganose@gmail.com" }] dynamic = ["version"] classifiers = [ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", "Intended Audience :: System Administrators", - "Intended Audience :: Information Technology", "Operating System :: OS Independent", - "Topic :: Other/Nonlisted Topic", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Database :: Front-Ends", + "Topic :: Other/Nonlisted Topic", "Topic :: Scientific/Engineering", ] requires-python = ">=3.8" dependencies = [ + "PyYAML", + "maggma>=0.38.1", "monty>=2021.5.9", - "pydash", "networkx", - "maggma>=0.38.1", "pydantic", - "PyYAML", + "pydash", ] [project.optional-dependencies] docs = [ - "sphinx==7.1.1", - "furo==2023.7.26", - "myst_parser==2.0.0", + "autodoc_pydantic==1.9.0", + "furo==2023.5.20", "ipython==8.14.0", + "myst_parser==2.0.0", "nbsphinx==0.9.2", - "autodoc_pydantic==1.9.0", "sphinx-copybutton==0.5.2", + "sphinx==6.2.0", ] dev = ["pre-commit>=2.12.1"] -tests = ["pytest==7.4.0", "pytest-cov==4.1.0"] +tests = ["pytest-cov==4.1.0", "pytest==7.4.0"] vis = ["matplotlib", "pydot"] fireworks = ["FireWorks"] strict = [ + "FireWorks==2.0.3", + "PyYAML==6.0", + "maggma==0.51.18", + "matplotlib==3.7.2", "monty==2023.5.8", + "moto==4.1.12", "networkx==3.1", - "pydash==7.0.6", - "maggma==0.51.24", "pydantic==1.10.9", - "PyYAML==6.0.1", - "FireWorks==2.0.3", - "matplotlib==3.7.2", + "pydash==7.0.5", "pydot==1.4.2", - "moto==4.1.14", "typing-extensions==4.7.1", ] @@ -91,9 +91,9 @@ no_strict_optional = true [tool.pytest.ini_options] filterwarnings = [ "ignore:.*POTCAR.*:UserWarning", - "ignore:.*magmom.*:UserWarning", - "ignore:.*is not gzipped.*:UserWarning", "ignore:.*input structure.*:UserWarning", + "ignore:.*is not gzipped.*:UserWarning", + "ignore:.*magmom.*:UserWarning", "ignore::DeprecationWarning", ] @@ -109,9 +109,9 @@ source = ["src/"] skip_covered = true show_missing = true exclude_lines = [ + '^\s*@overload( |$)', '^\s*assert False(,|$)', 'if typing.TYPE_CHECKING:', - '^\s*@overload( |$)', ] [tool.ruff] @@ -134,6 +134,7 @@ select = [ "W", # pycodestyle "YTT", # flake8-2020 ] +ignore = ["B028"] pydocstyle.convention = "numpy" isort.known-first-party = ["jobflow"] diff --git a/src/jobflow/core/job.py b/src/jobflow/core/job.py index 442781fe..d95f5d81 100644 --- a/src/jobflow/core/job.py +++ b/src/jobflow/core/job.py @@ -13,7 +13,7 @@ from jobflow.utils.uuid import suuid if typing.TYPE_CHECKING: - from typing import Any, Callable, Hashable + from typing import Any, Callable, Hashable, Sequence from networkx import DiGraph from pydantic import BaseModel @@ -528,7 +528,7 @@ def host(self): """ return self.hosts[0] if self.hosts else None - def set_uuid(self, uuid: str): + def set_uuid(self, uuid: str) -> None: """ Set the UUID of the job. @@ -1133,7 +1133,7 @@ def __setattr__(self, key, value): else: super().__setattr__(key, value) - def add_hosts_uuids(self, hosts_uuids: str | list[str], prepend: bool = False): + def add_hosts_uuids(self, hosts_uuids: str | Sequence[str], prepend: bool = False): """ Add a list of UUIDs to the internal list of hosts. diff --git a/src/jobflow/core/reference.py b/src/jobflow/core/reference.py index ea5d6e4a..591a2706 100644 --- a/src/jobflow/core/reference.py +++ b/src/jobflow/core/reference.py @@ -264,7 +264,7 @@ def __repr__(self) -> str: else: attribute_str = "" - return f"OutputReference({str(self.uuid)}{attribute_str})" + return f"OutputReference({self.uuid!s}{attribute_str})" def __hash__(self) -> int: """Return a hash of the reference.""" @@ -277,10 +277,8 @@ def __eq__(self, other: Any) -> bool: self.uuid == other.uuid and len(self.attributes) == len(other.attributes) and all( - [ - a[0] == b[0] and a[1] == b[1] - for a, b in zip(self.attributes, other.attributes) - ] + a[0] == b[0] and a[1] == b[1] + for a, b in zip(self.attributes, other.attributes) ) ) return False @@ -288,9 +286,7 @@ def __eq__(self, other: Any) -> bool: @property def attributes_formatted(self): """Get a formatted description of the attributes.""" - return [ - f".{x[1]}" if x[0] == "a" else f"[{repr(x[1])}]" for x in self.attributes - ] + return [f".{x[1]}" if x[0] == "a" else f"[{x[1]!r}]" for x in self.attributes] def as_dict(self): """Serialize the reference as a dict.""" @@ -298,7 +294,7 @@ def as_dict(self): schema_dict = MontyEncoder().default(schema) if schema is not None else None data = { "@module": self.__class__.__module__, - "@class": self.__class__.__name__, + "@class": type(self).__name__, "@version": None, "uuid": self.uuid, "attributes": self.attributes, diff --git a/tests/core/test_job.py b/tests/core/test_job.py index 3273dc75..cce8e768 100644 --- a/tests/core/test_job.py +++ b/tests/core/test_job.py @@ -545,7 +545,7 @@ class MySchema(BaseModel): response = Response.from_job_returns( {"number": "5", "name": "Ian"}, output_schema=MySchema ) - assert response.output.__class__.__name__ == "MySchema" + assert type(response.output).__name__ == "MySchema" assert response.output.number == 5 assert response.output.name == "Ian" @@ -886,22 +886,22 @@ def add_schema_replace(a, b): test_job = add_schema(5, 6) response = test_job.run(memory_jobstore) - assert response.output.__class__.__name__ == "AddSchema" + assert type(response.output).__name__ == "AddSchema" assert response.output.result == 11 test_job = add_schema_dict(5, 6) response = test_job.run(memory_jobstore) - assert response.output.__class__.__name__ == "AddSchema" + assert type(response.output).__name__ == "AddSchema" assert response.output.result == 11 test_job = add_schema_response(5, 6) response = test_job.run(memory_jobstore) - assert response.output.__class__.__name__ == "AddSchema" + assert type(response.output).__name__ == "AddSchema" assert response.output.result == 11 test_job = add_schema_response_dict(5, 6) response = test_job.run(memory_jobstore) - assert response.output.__class__.__name__ == "AddSchema" + assert type(response.output).__name__ == "AddSchema" assert response.output.result == 11 test_job = add_schema_replace(5, 6) diff --git a/tests/managers/test_local.py b/tests/managers/test_local.py index ff78b99e..77ba9a93 100644 --- a/tests/managers/test_local.py +++ b/tests/managers/test_local.py @@ -125,7 +125,7 @@ def test_addition_flow(memory_jobstore, clean_dir, addition_flow): # run with log responses = run_locally(flow, store=memory_jobstore) - uuid2 = [u for u in responses if u != uuid1][0] + uuid2 = next(u for u in responses if u != uuid1) # check responses has been filled assert len(responses) == 2 @@ -150,7 +150,7 @@ def test_detour_flow(memory_jobstore, clean_dir, detour_flow): # run with log responses = run_locally(flow, store=memory_jobstore) - uuid2 = [u for u in responses if u != uuid1 and u != uuid3][0] + uuid2 = next(u for u in responses if u != uuid1 and u != uuid3) # check responses has been filled assert len(responses) == 3 @@ -218,7 +218,7 @@ def test_replace_flow_nested(memory_jobstore, clean_dir, replace_flow_nested): assert len(responses[uuid1]) == 2 assert responses[uuid1][1].output == 11 assert responses[uuid1][1].replace is not None - assert responses[uuid1][2].output["first"].__class__.__name__ == "OutputReference" + assert type(responses[uuid1][2].output["first"]).__name__ == "OutputReference" assert responses[uuid2][1].output == "12345_end" # check store has the activity output @@ -345,7 +345,7 @@ def test_detour_stop_flow(memory_jobstore, clean_dir, detour_stop_flow): # run with log responses = run_locally(flow, store=memory_jobstore) - uuid2 = [u for u in responses if u != uuid1 and u != uuid3][0] + uuid2 = next(u for u in responses if u != uuid1 and u != uuid3) # check responses has been filled assert len(responses) == 2 From 763033ed5e25210426217f42f411e543e0815608 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 29 Jul 2023 13:47:46 -0700 Subject: [PATCH 13/27] use broader typing.Sequence for type hints --- src/jobflow/core/flow.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 3175ca78..5a7a2d9e 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -3,17 +3,18 @@ from __future__ import annotations import logging -import typing import warnings +from typing import TYPE_CHECKING +from git import Sequence from monty.json import MSONable import jobflow from jobflow.core.reference import find_and_get_references from jobflow.utils import ValueEnum, contains_flow_or_job, suuid -if typing.TYPE_CHECKING: - from typing import Any, Callable +if TYPE_CHECKING: + from typing import Any, Callable, Iterator from networkx import DiGraph @@ -121,7 +122,7 @@ class Flow(MSONable): def __init__( self, - jobs: list[Flow | Job] | Job | Flow, + jobs: Sequence[Flow | Job] | Job | Flow, output: Any | None = None, name: str = "Flow", order: JobOrder = JobOrder.AUTO, @@ -149,15 +150,15 @@ def __len__(self) -> int: """Get the number of jobs or subflows in the flow.""" return len(self.jobs) - def __getitem__(self, idx: int | slice) -> Flow | Job | list[Flow | Job]: - """Get the job or subflow at the given index/slice.""" + def __getitem__(self, idx: int | slice) -> Flow | Job | tuple[Flow | Job]: + """Get the job(s) or subflow(s) at the given index/slice.""" return self.jobs[idx] def __setitem__( - self, idx: int | slice, value: Flow | Job | list[Flow | Job] + self, idx: int | slice, value: Flow | Job | Sequence[Flow | Job] ) -> None: - """Set the job or subflow at the given index.""" - if not isinstance(value, (Flow, jobflow.Job)): + """Set the job(s) or subflow(s) at the given index/slice.""" + if not isinstance(value, (Flow, jobflow.Job, Sequence)): raise TypeError( f"Flow can only contain Job or Flow objects, not {type(value)}" ) @@ -165,7 +166,7 @@ def __setitem__( jobs[idx] = value self.jobs = jobs - def __iter__(self) -> list[Flow | Job]: + def __iter__(self) -> Iterator[Flow | Job]: """Iterate through the jobs in the flow.""" return iter(self.jobs) @@ -231,7 +232,7 @@ def jobs(self) -> tuple[Flow | Job, ...]: return self._jobs @jobs.setter - def jobs(self, jobs: list[Flow | Job] | Job | Flow): + def jobs(self, jobs: Sequence[Flow | Job] | Job | Flow): """ Set the Jobs in the Flow. @@ -754,7 +755,7 @@ def add_hosts_uuids( for j in self.jobs: j.add_hosts_uuids(hosts_uuids, prepend=prepend) - def add_jobs(self, jobs: list[Flow | Job] | Job | Flow): + def add_jobs(self, jobs: Job | Flow | Sequence[Flow | Job]) -> None: """ Add Jobs or Flows to the Flow. @@ -774,7 +775,7 @@ def add_jobs(self, jobs: list[Flow | Job] | Job | Flow): for job in jobs: if job.host is not None and job.host != self.uuid: raise ValueError( - f"{job.__class__.__name__} {job.name} ({job.uuid}) already belongs " + f"{type(job).__name__} {job.name} ({job.uuid}) already belongs " f"to another flow." ) if job.uuid in job_ids: From e5aaf0621f68a5221bdb0f063f4708dec4af8db1 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 29 Jul 2023 13:52:29 -0700 Subject: [PATCH 14/27] rewrite __copy__ to __deepcopy__ Shallow copy doesn't make sense; jobs aren't allowed to belong to multiple flows --- src/jobflow/core/flow.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 5a7a2d9e..81b6e2f0 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -2,6 +2,7 @@ from __future__ import annotations +import copy import logging import warnings from typing import TYPE_CHECKING @@ -174,15 +175,19 @@ def __contains__(self, item: Flow | Job) -> bool: """Check if the flow contains a job or subflow.""" return item in self.jobs - def __add__(self, other: Flow | Job) -> Flow: + def __add__(self, other: Job | Flow | Sequence[Flow | Job]) -> Flow: """Add a job or subflow to the flow.""" - return Flow([self, other]) + if not isinstance(other, (Flow, jobflow.Job, Sequence)): + return NotImplemented + new_flow = self.__deepcopy__() + new_flow.add_jobs(other) + return new_flow def __sub__(self, other: Flow | Job) -> Flow: """Remove a job or subflow from the flow.""" if other not in self.jobs: raise ValueError(f"{other!r} not found in flow") - new_flow = self.__copy__() + new_flow = self.__deepcopy__() new_flow.jobs = [job for job in new_flow.jobs if job != other] return new_flow @@ -208,16 +213,21 @@ def __hash__(self) -> int: """Get the hash of the flow.""" return hash(self.uuid) - def __copy__(self) -> Flow: - """Get a copy of the flow.""" - return Flow( - jobs=self.jobs, - output=self.output, - name=self.name, - order=self.order, - uuid=self.uuid, - hosts=self.hosts, - ) + def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Flow: + """Get a deep copy of the flow. + + Shallow copy doesn't make sense; jobs aren't allowed to belong to multiple flows + """ + kwds = self.as_dict() + for key in ("jobs", "@class", "@module", "@version"): + kwds.pop(key) + jobs = copy.deepcopy(self.jobs, memo) + new_flow = Flow(jobs=[], **kwds) + # reassign host + for job in jobs: + job.hosts = [new_flow.uuid] + new_flow.jobs = jobs + return new_flow @property def jobs(self) -> tuple[Flow | Job, ...]: From 1cf1b51f630f49a8f145778fdb4fc4928d5b4e65 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 29 Jul 2023 13:53:01 -0700 Subject: [PATCH 15/27] finish test_flow_magic_methods_edge_cases() --- tests/core/test_flow.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/tests/core/test_flow.py b/tests/core/test_flow.py index b07b0a51..44387469 100644 --- a/tests/core/test_flow.py +++ b/tests/core/test_flow.py @@ -866,7 +866,6 @@ def test_flow_magic_methods(): flow3 = flow1 + job5 assert len(flow3) == 2 assert job5 in flow3 - assert flow1 in flow3 # test __sub__ flow4 = flow3 - job5 @@ -878,8 +877,8 @@ def test_flow_magic_methods(): assert flow1 != flow2 assert hash(flow1) != hash(flow2) - # test __copy__ - flow_copy = flow1.__copy__() + # test __deepcopy__ + flow_copy = flow1.__deepcopy__() assert flow_copy == flow1 assert id(flow_copy) != id(flow1) @@ -893,6 +892,7 @@ def test_flow_magic_methods(): # test __contains__ with job not in flow assert job5 not in flow1 + assert flow2 not in flow1 # test __add__ with non-job item with pytest.raises(TypeError): @@ -916,10 +916,9 @@ def test_flow_magic_methods_edge_cases(): from jobflow import Flow # prepare test jobs and flows - job1, job2, job3, job4, job5 = map(get_test_job, range(5)) - _empty_flow = Flow([]) - _subflow = Flow([job5]) - + job1, job2, job3, job4, job5, job6 = map(get_test_job, range(6)) + Flow([job6]) + empty_flow = Flow([]) flow1 = Flow([job1, job2, job3, job4]) # test negative indexing with __getitem__ and __setitem__ @@ -927,23 +926,22 @@ def test_flow_magic_methods_edge_cases(): flow1[-1] = job5 assert flow1[-1] == job5 - # TODO these tests are currently failing # test slicing with __getitem__ and __setitem__ - # assert flow1[1:3] == [job2, job3] - # flow1[1:3] = [job4, job5] - # assert flow1[1:3] == [job4, job5] + assert flow1[1:3] == (job2, job3) + flow1[1:3] = (job4, job5) + assert flow1[1:3] == (job4, job5) - # test __add__ and __sub__ with an empty flow - # assert len(flow1 + empty_flow) == len(flow1) - # assert len(flow1 - empty_flow) == len(flow1) + # adding an empty flow still increases len by 1 + assert len(flow1 + empty_flow) == len(flow1) + 1 - # # test __add__ and __sub__ with job already in the flow - # assert len(flow1 + job5) == len(flow1) - # flow1 - job5 - # assert job5 not in flow1 + # test __add__ and __sub__ with job already in the flow + with pytest.raises( + ValueError, match="jobs array contains multiple jobs/flows with the same uuid" + ): + _ = flow1 + job1 - # # test __contains__ with a flow object - # assert subflow in flow1 + with pytest.raises(ValueError, match="Job .+ already belongs to another flow"): + _ = flow1 + job6 def test_flow_repr(): From 2094ecdf2f3cd0061f8aeb143a080dfd1c72ad2d Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 09:49:41 -0700 Subject: [PATCH 16/27] fix mypy --- src/jobflow/core/flow.py | 10 +++++----- src/jobflow/core/job.py | 6 ++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index ec0027f2..9d0aa7f4 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -151,7 +151,7 @@ def __len__(self) -> int: """Get the number of jobs or subflows in the flow.""" return len(self.jobs) - def __getitem__(self, idx: int | slice) -> Flow | Job | tuple[Flow | Job]: + def __getitem__(self, idx: int | slice) -> Flow | Job | tuple[Flow | Job, ...]: """Get the job(s) or subflow(s) at the given index/slice.""" return self.jobs[idx] @@ -165,7 +165,7 @@ def __setitem__( ) jobs = list(self.jobs) jobs[idx] = value - self.jobs = jobs + self.jobs = tuple(jobs) def __iter__(self) -> Iterator[Flow | Job]: """Iterate through the jobs in the flow.""" @@ -188,7 +188,7 @@ def __sub__(self, other: Flow | Job) -> Flow: if other not in self.jobs: raise ValueError(f"{other!r} not found in flow") new_flow = self.__deepcopy__() - new_flow.jobs = [job for job in new_flow.jobs if job != other] + new_flow.jobs = tuple([job for job in new_flow.jobs if job != other]) return new_flow def __repr__(self, level=0, index=None) -> str: @@ -203,10 +203,10 @@ def __repr__(self, level=0, index=None) -> str: ) return f"Flow({name=}, {uuid=})\n{job_reprs}" - def __eq__(self, other: Flow | Job) -> bool: + def __eq__(self, other: object) -> bool: """Check if the flow is equal to another flow.""" if not isinstance(other, Flow): - return False + return NotImplemented return self.uuid == other.uuid def __hash__(self) -> int: diff --git a/src/jobflow/core/job.py b/src/jobflow/core/job.py index 03c2f0d0..fe4bf1e6 100644 --- a/src/jobflow/core/job.py +++ b/src/jobflow/core/job.py @@ -384,7 +384,7 @@ def __contains__(self, item: Hashable) -> bool: """ return item in self.input_uuids - def __eq__(self, other: Job) -> bool: + def __eq__(self, other: object) -> bool: """ Check if two jobs are equal. @@ -398,6 +398,8 @@ def __eq__(self, other: Job) -> bool: bool Whether the jobs are equal. """ + if not isinstance(other, Job): + return NotImplemented return self.uuid == other.uuid def __hash__(self) -> int: @@ -1149,7 +1151,7 @@ def add_hosts_uuids(self, hosts_uuids: str | Sequence[str], prepend: bool = Fals Insert the UUIDs at the beginning of the list rather than extending it. """ if not isinstance(hosts_uuids, (list, tuple)): - hosts_uuids = [hosts_uuids] + hosts_uuids = [hosts_uuids] # type: ignore if prepend: self.hosts[0:0] = hosts_uuids else: From 8fd09252d33bde6113de7f620d4d6e200af8f517 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 09:56:30 -0700 Subject: [PATCH 17/27] fix bad import auto-complete --- src/jobflow/core/flow.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 9d0aa7f4..de90d477 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -5,9 +5,8 @@ import copy import logging import warnings -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Sequence -from git import Sequence from monty.json import MSONable import jobflow @@ -164,7 +163,7 @@ def __setitem__( f"Flow can only contain Job or Flow objects, not {type(value)}" ) jobs = list(self.jobs) - jobs[idx] = value + jobs[idx] = value # type: ignore[index, assignment] self.jobs = tuple(jobs) def __iter__(self) -> Iterator[Flow | Job]: @@ -778,7 +777,7 @@ def add_jobs(self, jobs: Job | Flow | Sequence[Flow | Job]) -> None: A list of Jobs and Flows. """ if not isinstance(jobs, (tuple, list)): - jobs = [jobs] + jobs = [jobs] # type: ignore[list-item] job_ids = set(self.all_uuids) hosts = [self.uuid, *self.hosts] From 7a19168c428ebdd53c36b14d3a0bf5d0ce197dae Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 09:58:32 -0700 Subject: [PATCH 18/27] make Job.__eq__ more strict: require self.__dict__ == other.__dict__ for equality --- src/jobflow/core/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jobflow/core/job.py b/src/jobflow/core/job.py index fe4bf1e6..c24fa071 100644 --- a/src/jobflow/core/job.py +++ b/src/jobflow/core/job.py @@ -400,7 +400,7 @@ def __eq__(self, other: object) -> bool: """ if not isinstance(other, Job): return NotImplemented - return self.uuid == other.uuid + return self.__dict__ == other.__dict__ def __hash__(self) -> int: """Get the hash of the job.""" From 59584cadcba9fec0030dd989b32e4895179f01c2 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 10:03:01 -0700 Subject: [PATCH 19/27] revert accidental auto-fix of implicit optional --- src/jobflow/core/flow.py | 8 ++++---- src/jobflow/core/job.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index de90d477..e68c7307 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -212,7 +212,7 @@ def __hash__(self) -> int: """Get the hash of the flow.""" return hash(self.uuid) - def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Flow: + def __deepcopy__(self, memo: dict[int, Any] = None) -> Flow: """Get a deep copy of the flow. Shallow copy doesn't make sense; jobs aren't allowed to belong to multiple flows @@ -664,9 +664,9 @@ def update_metadata( def update_config( self, config: jobflow.JobConfig | dict, - name_filter: str | None = None, - function_filter: Callable | None = None, - attributes: list[str] | str | None = None, + name_filter: str = None, + function_filter: Callable = None, + attributes: list[str] | str = None, dynamic: bool = True, ): """ diff --git a/src/jobflow/core/job.py b/src/jobflow/core/job.py index c24fa071..f0d0d51b 100644 --- a/src/jobflow/core/job.py +++ b/src/jobflow/core/job.py @@ -991,9 +991,9 @@ def update_metadata( def update_config( self, config: JobConfig | dict, - name_filter: str | None = None, - function_filter: Callable | None = None, - attributes: list[str] | str | None = None, + name_filter: str = None, + function_filter: Callable = None, + attributes: list[str] | str = None, dynamic: bool = True, ): """ From 9940df02c8c28296d7c5ef4ca4fd91441d43941c Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 10:25:50 -0700 Subject: [PATCH 20/27] add test_job_magic_methods() --- tests/core/test_job.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/core/test_job.py b/tests/core/test_job.py index cce8e768..5a4812b0 100644 --- a/tests/core/test_job.py +++ b/tests/core/test_job.py @@ -1261,3 +1261,39 @@ def use_maker(maker): response = test_job.run(memory_jobstore) assert response.replace.jobs[0].config == new_config assert response.replace.jobs[0].config_updates[0]["config"] == new_config + + +def test_job_magic_methods(): + import os + + from jobflow import Job + + # prepare test jobs + job1 = Job(function=sum, function_args=(1, 2)) + job2 = Job(function=os.path.join, function_args=("folder", "filename.txt")) + job3 = Job(function=sum, function_args=(1, 2)) + + # test __repr__ + assert repr(job1) == f"Job(name='sum', uuid='{job1.uuid}')" + assert repr(job2) == f"Job(name='join', uuid='{job2.uuid}')" + assert repr(job3) == f"Job(name='sum', uuid='{job3.uuid}')" + assert repr(job1) != repr(job3) + + # test __contains__ (using some fake UUID) + # initial job.input_references is empty so can't test positive case + assert "fake-uuid" not in job1 + + # test __eq__ + assert job1 == job1 + assert job2 == job2 + assert job1 != job2 + assert job1 != job3 # Different UUIDs + + # test __hash__ + assert hash(job1) != hash(job2) != hash(job3) + + # Test __init__ with True for multiple additional stores + with pytest.raises( + ValueError, match="Cannot select True for multiple additional stores" + ): + _ = Job(function=sum, function_args=(1, 2), store1=True, store2=True) From e26a9242b059282d69e291377040ce4363c307e2 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 10:36:41 -0700 Subject: [PATCH 21/27] coverage ignore if TYPE_CHECKING --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index a4946947..b1458520 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,6 +111,7 @@ show_missing = true exclude_lines = [ '^\s*@overload( |$)', '^\s*assert False(,|$)', + 'if TYPE_CHECKING:', 'if typing.TYPE_CHECKING:', ] From 955c6b81622c59f3ca7bbc2224ad9a1c7b07f22d Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 10:45:58 -0700 Subject: [PATCH 22/27] move test error msg for multiple stores to test_store_inputs() --- tests/core/test_job.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/core/test_job.py b/tests/core/test_job.py index 5a4812b0..f9bc7753 100644 --- a/tests/core/test_job.py +++ b/tests/core/test_job.py @@ -922,7 +922,7 @@ def add_schema_replace(a, b): def test_store_inputs(memory_jobstore): - from jobflow.core.job import OutputReference, store_inputs + from jobflow.core.job import Job, OutputReference, store_inputs test_job = store_inputs(1) test_job.run(memory_jobstore) @@ -935,6 +935,12 @@ def test_store_inputs(memory_jobstore): output = memory_jobstore.query_one({"uuid": test_job.uuid}, ["output"])["output"] assert OutputReference.from_dict(output) == ref + # test error msg for multiple stores + with pytest.raises( + ValueError, match="Cannot select True for multiple additional stores" + ): + _ = Job(function=sum, function_args=([1, 2],), store1=True, store2=True) + def test_pass_manager_config(): from jobflow import Flow, Job @@ -1269,9 +1275,9 @@ def test_job_magic_methods(): from jobflow import Job # prepare test jobs - job1 = Job(function=sum, function_args=(1, 2)) + job1 = Job(function=sum, function_args=([1, 2],)) job2 = Job(function=os.path.join, function_args=("folder", "filename.txt")) - job3 = Job(function=sum, function_args=(1, 2)) + job3 = Job(function=sum, function_args=([1, 2],)) # test __repr__ assert repr(job1) == f"Job(name='sum', uuid='{job1.uuid}')" @@ -1291,9 +1297,3 @@ def test_job_magic_methods(): # test __hash__ assert hash(job1) != hash(job2) != hash(job3) - - # Test __init__ with True for multiple additional stores - with pytest.raises( - ValueError, match="Cannot select True for multiple additional stores" - ): - _ = Job(function=sum, function_args=(1, 2), store1=True, store2=True) From 6dd27bd8467af8a77d8d936740f55e959180d8e2 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 10:46:21 -0700 Subject: [PATCH 23/27] add test for Flow.__setitem__ raising TypeError --- src/jobflow/core/flow.py | 8 ++++++-- tests/core/test_flow.py | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index e68c7307..afb3ec82 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -158,9 +158,13 @@ def __setitem__( self, idx: int | slice, value: Flow | Job | Sequence[Flow | Job] ) -> None: """Set the job(s) or subflow(s) at the given index/slice.""" - if not isinstance(value, (Flow, jobflow.Job, Sequence)): + if ( + not isinstance(value, (Flow, jobflow.Job, tuple, list)) + or isinstance(value, (tuple, list)) + and not all(isinstance(v, (Flow, jobflow.Job)) for v in value) + ): raise TypeError( - f"Flow can only contain Job or Flow objects, not {type(value)}" + f"Flow can only contain Job or Flow objects, not {type(value).__name__}" ) jobs = list(self.jobs) jobs[idx] = value # type: ignore[index, assignment] diff --git a/tests/core/test_flow.py b/tests/core/test_flow.py index 44387469..2c7c2681 100644 --- a/tests/core/test_flow.py +++ b/tests/core/test_flow.py @@ -931,6 +931,14 @@ def test_flow_magic_methods_edge_cases(): flow1[1:3] = (job4, job5) assert flow1[1:3] == (job4, job5) + for val in (None, 1.0, 1, "1", [1], (1,), {1: 1}): + type_name = type(val).__name__ + with pytest.raises( + TypeError, + match=f"Flow can only contain Job or Flow objects, not {type_name}", + ): + flow1[1:3] = val + # adding an empty flow still increases len by 1 assert len(flow1 + empty_flow) == len(flow1) + 1 From 1307d3040773fb8fdb14ed12c9e34426894a2d3c Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 11:05:09 -0700 Subject: [PATCH 24/27] add test Flow.__setitem__ with single item and __add__ with bad type --- src/jobflow/core/flow.py | 2 +- tests/core/test_flow.py | 7 ++++++- tests/core/test_job.py | 6 ++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index afb3ec82..2daf69bf 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -180,7 +180,7 @@ def __contains__(self, item: Flow | Job) -> bool: def __add__(self, other: Job | Flow | Sequence[Flow | Job]) -> Flow: """Add a job or subflow to the flow.""" - if not isinstance(other, (Flow, jobflow.Job, Sequence)): + if not isinstance(other, (Flow, jobflow.Job, tuple, list)): return NotImplemented new_flow = self.__deepcopy__() new_flow.add_jobs(other) diff --git a/tests/core/test_flow.py b/tests/core/test_flow.py index 2c7c2681..3289f0a7 100644 --- a/tests/core/test_flow.py +++ b/tests/core/test_flow.py @@ -928,9 +928,14 @@ def test_flow_magic_methods_edge_cases(): # test slicing with __getitem__ and __setitem__ assert flow1[1:3] == (job2, job3) - flow1[1:3] = (job4, job5) + flow1[1] = job4 # test single item + assert flow1[1] == job4 + flow1[1:3] = (job4, job5) # test multiple items with slicing assert flow1[1:3] == (job4, job5) + # test __add__ with bad type + assert flow1.__add__("string") == NotImplemented + for val in (None, 1.0, 1, "1", [1], (1,), {1: 1}): type_name = type(val).__name__ with pytest.raises( diff --git a/tests/core/test_job.py b/tests/core/test_job.py index f9bc7753..8999ccc9 100644 --- a/tests/core/test_job.py +++ b/tests/core/test_job.py @@ -1270,18 +1270,16 @@ def use_maker(maker): def test_job_magic_methods(): - import os - from jobflow import Job # prepare test jobs job1 = Job(function=sum, function_args=([1, 2],)) - job2 = Job(function=os.path.join, function_args=("folder", "filename.txt")) + job2 = Job(function=dict, function_args=((("a", 1), ("b", 2)),)) job3 = Job(function=sum, function_args=([1, 2],)) # test __repr__ assert repr(job1) == f"Job(name='sum', uuid='{job1.uuid}')" - assert repr(job2) == f"Job(name='join', uuid='{job2.uuid}')" + assert repr(job2) == f"Job(name='dict', uuid='{job2.uuid}')" assert repr(job3) == f"Job(name='sum', uuid='{job3.uuid}')" assert repr(job1) != repr(job3) From 962ad44429ea7f6111c0e03770d0de2876433346 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 11:12:25 -0700 Subject: [PATCH 25/27] delete Job.__getitem__ --- src/jobflow/core/job.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/jobflow/core/job.py b/src/jobflow/core/job.py index f0d0d51b..cd9f19f6 100644 --- a/src/jobflow/core/job.py +++ b/src/jobflow/core/job.py @@ -406,22 +406,6 @@ def __hash__(self) -> int: """Get the hash of the job.""" return hash(self.uuid) - def __getitem__(self, item: str | int) -> OutputReference: - """ - Get an output reference from the job. - - Parameters - ---------- - item - The key of the output reference. - - Returns - ------- - OutputReference - An output reference. - """ - return self.output[item] - @property def input_references(self) -> tuple[jobflow.OutputReference, ...]: """ From 94fdb641829806a2f1b49f0d4d82f8a5e6330c1f Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 7 Aug 2023 11:12:50 -0700 Subject: [PATCH 26/27] test passing single job to @jobs setter --- tests/core/test_flow.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/core/test_flow.py b/tests/core/test_flow.py index 3289f0a7..bbba6611 100644 --- a/tests/core/test_flow.py +++ b/tests/core/test_flow.py @@ -686,6 +686,11 @@ def test_add_jobs(): with pytest.raises(ValueError): flow1.add_jobs(flow3) + # test passing single job to @jobs setter + flow1.jobs = add_job1 + assert len(flow1.jobs) == 1 + assert flow1.jobs[0] is add_job1 + def test_remove_jobs(): from jobflow.core.flow import Flow From 781d3b5f9affb2b5e360339a1cf4ebfa9835d815 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 10 Aug 2023 19:37:57 -0700 Subject: [PATCH 27/27] document difference between flow.iterflow() and flow.__iter__() --- docs/tutorials/8-fireworks.md | 10 ++++++++++ tests/core/test_flow.py | 6 +++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/8-fireworks.md b/docs/tutorials/8-fireworks.md index 981f8c5d..f1ae2618 100644 --- a/docs/tutorials/8-fireworks.md +++ b/docs/tutorials/8-fireworks.md @@ -94,6 +94,16 @@ flow.update_config({"manager_config": {"_fworker": "fworker1"}}, name_filter="jo flow.update_config({"manager_config": {"_fworker": "fworker2"}}, name_filter="job2") ``` +NB: There are two ways to iterate over a `Flow`. The `iterflow` method iterates through a flow such that root nodes of the graph are always returned first. This has the benefit that the `job.output` references can always be resolved. +`Flow` also has an `__iter__` method, meaning you can write + +```py +for job_or_subflow in flow: + ... +``` + +to simply iterate through the `Flow.jobs` array. Note that `jobs` can also contain other flows. + ### Launching the Jobs As described above, convert the flow to a workflow via {obj}`flow_to_workflow` and add it to your launch pad. diff --git a/tests/core/test_flow.py b/tests/core/test_flow.py index bbba6611..2216e753 100644 --- a/tests/core/test_flow.py +++ b/tests/core/test_flow.py @@ -456,7 +456,11 @@ def test_dag_validation(): job2 = Job(add, function_args=(job1.output, 2)) job1.function_args = (job2.output, 2) flow = Flow(jobs=[job1, job2]) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="Job connectivity contains cycles therefore job execution order " + "cannot be determined", + ): next(flow.iterflow())