From 72062813597cb2888b28567c71a11ed78f28f333 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Wed, 26 Oct 2022 14:46:00 +0200 Subject: [PATCH 01/19] Changes related to file deletion based on start and end dates Changes related to file deletion based on start and end dates --- client/src/lab_client/environment.py | 40 +++++++++++- client/src/lab_client/handler/file_handler.py | 17 +++++ client/tests/test_file_handler.py | 64 ++++++++++++++++++- contaxy | 2 +- 4 files changed, 120 insertions(+), 3 deletions(-) diff --git a/client/src/lab_client/environment.py b/client/src/lab_client/environment.py index ffb4bf55..8ec519b2 100644 --- a/client/src/lab_client/environment.py +++ b/client/src/lab_client/environment.py @@ -1,5 +1,6 @@ +from datetime import datetime import os -from typing import Optional, Literal +from typing import Optional, Literal, List from contaxy.clients import AuthClient, FileClient, ExtensionClient from contaxy.clients import DeploymentClient @@ -301,6 +302,43 @@ def get_file_metadata( """ return self.file_handler.get_file_metadata(project, key, version) + def delete_remote_file( + self, key: str, version: Optional[str] = None, keep_latest_version: bool = False + ) -> None: + """Deletes a file based on the specified 'key'. + + Args: + key (str): Key or url of the requested file. + version (Optional[str], optional): Version of the file to be deleted. Defaults to None. + keep_latest_version (bool, optional): _description_. Defaults to False. + """ + self.file_handler.delete_remote_file(key, version, keep_latest_version) + + def delete_remote_files( + self, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None + ) -> None: + """Deletes all files (models/datasets) of a project. + + Args: + date_from (Optional[datetime], optional): The start date (in UTC format) from which the file has to be deleted. If none, all files will be deleted. + date_to (Optional[datetime], optional): The end date (in UTC format) until which the file has to be deleted. If none, all files will be deleted. + """ + self.file_handler.delete_remote_files(date_from, date_to) + + def list_remote_files( + self, data_type: Literal['model', 'dataset'] = None, prefix: str = None + ) -> List[File]: + """Lists all the remote files. + + Args: + data_type (Literal['model', 'dataset'], optional): The date type of the files to list. Defaults to None. + prefix (str, optional): The file name prefix. If 'None', all files would be listed. + + Returns: + List[File]: List of the remote files. + """ + return self.file_handler.list_remote_files(data_type, prefix) + @property def mlflow_handler(self) -> MLFlowHandler: if self._mlflow_handler is None: diff --git a/client/src/lab_client/handler/file_handler.py b/client/src/lab_client/handler/file_handler.py index 4b8048eb..d1af6398 100644 --- a/client/src/lab_client/handler/file_handler.py +++ b/client/src/lab_client/handler/file_handler.py @@ -12,6 +12,7 @@ from lab_client.utils import file_handler_utils, request_utils from zipfile import ZipFile import shutil +from datetime import datetime VALID_DATATYPES = ['dataset', 'model'] @@ -182,6 +183,21 @@ def delete_remote_file( keep_latest_version=keep_latest_version, ) + def delete_remote_files( + self, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None + ) -> None: + """ + Delete a file from remote storage. + # Arguments + key: Key of the file. + keep_latest_version: If `True` the latest file version will be kept. + """ + self.file_client.delete_files( + project_id=self.env.project, + date_from=date_from, + date_to=date_to + ) + def resolve_path_from_key(self, key: str, version: Optional[str] = None) -> str: """ Return the local path for a given key. @@ -259,3 +275,4 @@ def get_file_metadata( file_key, version) return metadata_file + diff --git a/client/tests/test_file_handler.py b/client/tests/test_file_handler.py index bf1bd814..2d725068 100644 --- a/client/tests/test_file_handler.py +++ b/client/tests/test_file_handler.py @@ -5,7 +5,7 @@ from .conftest import test_settings import requests import pytest - +from datetime import datetime, timedelta, timezone @pytest.mark.integration class TestFile: @@ -184,3 +184,65 @@ def test_list_file_with_invalid_data_type(self) -> None: env.upload_file(tf.name, "dataset") env.file_handler.list_remote_files(data_type="datasets") + + @pytest.mark.xfail(raises=Exception) + def test_delete_single_file(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + tf = tempfile.NamedTemporaryFile() + with open(tf.name, 'w') as f: + f.write("content 1") + f.close() + key = env.upload_file(tf.name, "dataset") + + env.delete_remote_file(key) + # This should throw a 404 error as file will not be present. + env.get_file(key) + + def test_delete_files_within_time_window(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + # Clear/Delete all files initially. + env.delete_remote_files() + + # Test that files within the date window are deleted. + tf = tempfile.NamedTemporaryFile(prefix='abcd_') + with open(tf.name, 'w') as f: + f.write("content 1") + f.close() + env.upload_file(tf.name, "dataset") + + date_from = datetime.now(timezone.utc) - timedelta(days=1) + date_to = datetime.now(timezone.utc) + timedelta(days=1) + + env.delete_remote_files(date_from, date_to) + + files = env.list_remote_files(data_type='dataset') + assert len(files) == 0 + + def test_delete_files_outside_time_window(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + # Clear/Delete all files initially. + env.delete_remote_files() + + # Test that files outside the date window are not deleted. + tf = tempfile.NamedTemporaryFile(prefix='qwertz_') + with open(tf.name, 'w') as f: + f.write("content 1") + f.close() + env.upload_file(tf.name, "dataset") + + date_from = datetime.now(timezone.utc) - timedelta(days=3) + date_to = datetime.now(timezone.utc) - timedelta(days=2) + + env.delete_remote_files(date_from, date_to) + + files = env.list_remote_files(data_type="dataset") + assert len(files) == 1 diff --git a/contaxy b/contaxy index 37c83e8c..8febfea6 160000 --- a/contaxy +++ b/contaxy @@ -1 +1 @@ -Subproject commit 37c83e8cf3df01637ab3be232612f507137cd2a4 +Subproject commit 8febfea6be8208546b8ff01f2c561308bc764864 From 0e0a6ce34c7b4a011a8aa05bbf8adf12d470cb19 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Mon, 28 Nov 2022 23:10:14 +0100 Subject: [PATCH 02/19] Client changes for time specific file and job deletions. Client changes for time specific file and job deletions. --- client/src/lab_client/handler/job_handler.py | 6 ++- client/tests/test_deployment_handler.py | 48 +++++++++++++++++++- client/tests/test_file_handler.py | 2 +- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/client/src/lab_client/handler/job_handler.py b/client/src/lab_client/handler/job_handler.py index 176bf00a..4373d9ab 100644 --- a/client/src/lab_client/handler/job_handler.py +++ b/client/src/lab_client/handler/job_handler.py @@ -205,11 +205,13 @@ def delete_job(self, job_id: str) -> None: job_id=job_id ) - def delete_jobs(self) -> None: + def delete_jobs(self, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None) -> None: """Deletes all jobs of a project. """ self.deployment_client.delete_jobs( - project_id=self.env.project + project_id=self.env.project, + date_from=date_from, + date_to=date_to ) def get_job_logs( diff --git a/client/tests/test_deployment_handler.py b/client/tests/test_deployment_handler.py index c51d87e1..cf85293c 100644 --- a/client/tests/test_deployment_handler.py +++ b/client/tests/test_deployment_handler.py @@ -3,7 +3,7 @@ from lab_client import Environment from .conftest import test_settings import pytest - +from datetime import datetime, timedelta, timezone @pytest.mark.integration class TestJob: @@ -51,6 +51,52 @@ def test_list_jobs_get_job_metadata(self) -> None: job_list = env.job_handler.list_jobs() assert len(job_list) == 0 + def test_delete_jobs_within_time_period(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + input = JobInput( + container_image='ubuntu:latest', + display_name='Job2', + command=['/bin/bash', '-c', '--'], + args=['sleep 5'] + ) + job_id = env.job_handler.deploy_job(input) + + status = env.job_handler.wait_for_job_completion(job_id) + assert status == True + + date_from = datetime.now(timezone.utc) + date_to = datetime.now(timezone.utc) + timedelta(days=1) + + env.job_handler.delete_jobs(date_from, date_to) + job_list = env.job_handler.list_jobs() + assert len(job_list) == 0 + + def test_delete_jobs_outside_time_period(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + input = JobInput( + container_image='ubuntu:latest', + display_name='Job2', + command=['/bin/bash', '-c', '--'], + args=['sleep 5'] + ) + job_id = env.job_handler.deploy_job(input) + + status = env.job_handler.wait_for_job_completion(job_id) + assert status == True + + date_from = datetime.now(timezone.utc) - timedelta(days=3) + date_to = datetime.now(timezone.utc) - timedelta(days=2) + + env.job_handler.delete_jobs(date_from, date_to) + job_list = env.job_handler.list_jobs() + assert len(job_list) == 1 + def test_job_logs(self) -> None: env = Environment(lab_endpoint=test_settings.LAB_BACKEND, lab_api_token=test_settings.LAB_TOKEN, diff --git a/client/tests/test_file_handler.py b/client/tests/test_file_handler.py index 2d725068..688c1d20 100644 --- a/client/tests/test_file_handler.py +++ b/client/tests/test_file_handler.py @@ -216,7 +216,7 @@ def test_delete_files_within_time_window(self) -> None: f.close() env.upload_file(tf.name, "dataset") - date_from = datetime.now(timezone.utc) - timedelta(days=1) + date_from = datetime.now(timezone.utc) date_to = datetime.now(timezone.utc) + timedelta(days=1) env.delete_remote_files(date_from, date_to) From 771e38d210474bb52c420f5a7d994b1156642ae2 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Fri, 2 Dec 2022 13:02:01 +0100 Subject: [PATCH 03/19] Update setup.cfg Updated setup.cfg to fix the flake8 error code --- client/setup.cfg | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/client/setup.cfg b/client/setup.cfg index 868f1fd5..eccfb544 100644 --- a/client/setup.cfg +++ b/client/setup.cfg @@ -7,11 +7,14 @@ universal = false [flake8] ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 From 40987fd2b7c84488a597ffe3fbd91159bececa5d Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Fri, 2 Dec 2022 13:13:35 +0100 Subject: [PATCH 04/19] Updated setup.cfg Updated setup.cfg in accordance with flake8 documentation --- client/setup.cfg | 2 +- components/template/backend/setup.cfg | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/client/setup.cfg b/client/setup.cfg index eccfb544..902ddfed 100644 --- a/client/setup.cfg +++ b/client/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -ignore = +extend-ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/template/backend/setup.cfg b/components/template/backend/setup.cfg index 868f1fd5..902ddfed 100644 --- a/components/template/backend/setup.cfg +++ b/components/template/backend/setup.cfg @@ -6,12 +6,15 @@ license_files = LICENSE universal = false [flake8] -ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator +extend-ignore = + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 From 92cc71fb2c39f9c92850c8200f6176ea35c2becb Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Sat, 3 Dec 2022 15:10:24 +0100 Subject: [PATCH 05/19] Updated flake8 ignore codes in setup.cfg Updated flake8 ignore codes in setup.cfg --- components/lab-job-scheduler/backend/setup.cfg | 15 +++++++++------ components/lab-mlflow-manager/backend/setup.cfg | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/components/lab-job-scheduler/backend/setup.cfg b/components/lab-job-scheduler/backend/setup.cfg index 868f1fd5..902ddfed 100644 --- a/components/lab-job-scheduler/backend/setup.cfg +++ b/components/lab-job-scheduler/backend/setup.cfg @@ -6,12 +6,15 @@ license_files = LICENSE universal = false [flake8] -ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator +extend-ignore = + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 diff --git a/components/lab-mlflow-manager/backend/setup.cfg b/components/lab-mlflow-manager/backend/setup.cfg index 868f1fd5..902ddfed 100644 --- a/components/lab-mlflow-manager/backend/setup.cfg +++ b/components/lab-mlflow-manager/backend/setup.cfg @@ -6,12 +6,15 @@ license_files = LICENSE universal = false [flake8] -ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator +extend-ignore = + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 From b31b38cafc558122af73aae7ecf26dab55647564 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Mon, 5 Dec 2022 22:09:46 +0100 Subject: [PATCH 06/19] Update setup.cfg --- .../lab-workspace-manager/backend/setup.cfg | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/components/lab-workspace-manager/backend/setup.cfg b/components/lab-workspace-manager/backend/setup.cfg index 868f1fd5..902ddfed 100644 --- a/components/lab-workspace-manager/backend/setup.cfg +++ b/components/lab-workspace-manager/backend/setup.cfg @@ -6,12 +6,15 @@ license_files = LICENSE universal = false [flake8] -ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator +extend-ignore = + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 From cd094b5db43b2bddf05e0dde0a34e8e6b99e3cea Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Tue, 6 Dec 2022 11:04:43 +0100 Subject: [PATCH 07/19] Updated the ignore checks in flake8 config files Updated the ignore checks in flake8 config files --- client/setup.cfg | 2 +- components/lab-job-scheduler/backend/setup.cfg | 2 +- .../lab-job-scheduler/backend/src/lab_job_scheduler/schema.py | 4 ++-- components/lab-mlflow-manager/backend/setup.cfg | 2 +- components/lab-workspace-manager/backend/setup.cfg | 2 +- components/template/backend/setup.cfg | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/client/setup.cfg b/client/setup.cfg index 902ddfed..eccfb544 100644 --- a/client/setup.cfg +++ b/client/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/lab-job-scheduler/backend/setup.cfg b/components/lab-job-scheduler/backend/setup.cfg index 902ddfed..eccfb544 100644 --- a/components/lab-job-scheduler/backend/setup.cfg +++ b/components/lab-job-scheduler/backend/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py index f19b04ad..c309cd75 100644 --- a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py +++ b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py @@ -26,12 +26,12 @@ class ScheduledJob(ScheduledJobInput): example="2022-09-29T11:39:52.441287", ) last_run: str = Field( - None, + ..., description="The last time the job was run in ISO format.", example="2022-09-29T11:39:52.441287", ) next_run: str = Field( - None, + ..., description="The next time the job will be run in ISO format.", example="2022-09-29T11:39:52.441287", ) diff --git a/components/lab-mlflow-manager/backend/setup.cfg b/components/lab-mlflow-manager/backend/setup.cfg index 902ddfed..eccfb544 100644 --- a/components/lab-mlflow-manager/backend/setup.cfg +++ b/components/lab-mlflow-manager/backend/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/lab-workspace-manager/backend/setup.cfg b/components/lab-workspace-manager/backend/setup.cfg index 902ddfed..eccfb544 100644 --- a/components/lab-workspace-manager/backend/setup.cfg +++ b/components/lab-workspace-manager/backend/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/template/backend/setup.cfg b/components/template/backend/setup.cfg index 902ddfed..eccfb544 100644 --- a/components/template/backend/setup.cfg +++ b/components/template/backend/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long From 1c650156e415437cc6c19813cbef4846731d8e9a Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Tue, 6 Dec 2022 12:11:03 +0100 Subject: [PATCH 08/19] Updated the job scheduler arguments Updated the job scheduler arguments --- .../backend/src/lab_job_scheduler/app.py | 4 ++-- .../backend/src/lab_job_scheduler/schema.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/components/lab-job-scheduler/backend/src/lab_job_scheduler/app.py b/components/lab-job-scheduler/backend/src/lab_job_scheduler/app.py index 9459ee24..ca4aa4a0 100644 --- a/components/lab-job-scheduler/backend/src/lab_job_scheduler/app.py +++ b/components/lab-job-scheduler/backend/src/lab_job_scheduler/app.py @@ -3,7 +3,7 @@ import json import os import threading -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from contaxy.operations.components import ComponentOperations from contaxy.schema.exceptions import CREATE_RESOURCE_RESPONSES @@ -231,7 +231,7 @@ def get_all_scheduled_jobs_from_db( def get_job_from_job_input( - job_schedule: ScheduledJobInput, job_id: str = None + job_schedule: ScheduledJobInput, job_id: Optional[str] = None ) -> ScheduledJob: return ScheduledJob( diff --git a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py index c309cd75..6d7ba6ca 100644 --- a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py +++ b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py @@ -1,6 +1,6 @@ from contaxy.schema.deployment import JobInput from pydantic import BaseModel, Field - +from typing import Optional class ScheduledJobInput(BaseModel): cron_string: str = Field( @@ -25,13 +25,13 @@ class ScheduledJob(ScheduledJobInput): description="The time the job was created in ISO format.", example="2022-09-29T11:39:52.441287", ) - last_run: str = Field( - ..., + last_run: Optional[str] = Field( + None, description="The last time the job was run in ISO format.", example="2022-09-29T11:39:52.441287", ) - next_run: str = Field( - ..., + next_run: Optional[str] = Field( + None, description="The next time the job will be run in ISO format.", example="2022-09-29T11:39:52.441287", ) From 7048d88c6d0f0541b7eb09a9b1d696f567fa367c Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Wed, 26 Oct 2022 14:46:00 +0200 Subject: [PATCH 09/19] Changes related to file deletion based on start and end dates Changes related to file deletion based on start and end dates --- client/src/lab_client/environment.py | 40 +++++++++++- client/src/lab_client/handler/file_handler.py | 17 +++++ client/tests/test_file_handler.py | 64 ++++++++++++++++++- contaxy | 2 +- 4 files changed, 120 insertions(+), 3 deletions(-) diff --git a/client/src/lab_client/environment.py b/client/src/lab_client/environment.py index ffb4bf55..8ec519b2 100644 --- a/client/src/lab_client/environment.py +++ b/client/src/lab_client/environment.py @@ -1,5 +1,6 @@ +from datetime import datetime import os -from typing import Optional, Literal +from typing import Optional, Literal, List from contaxy.clients import AuthClient, FileClient, ExtensionClient from contaxy.clients import DeploymentClient @@ -301,6 +302,43 @@ def get_file_metadata( """ return self.file_handler.get_file_metadata(project, key, version) + def delete_remote_file( + self, key: str, version: Optional[str] = None, keep_latest_version: bool = False + ) -> None: + """Deletes a file based on the specified 'key'. + + Args: + key (str): Key or url of the requested file. + version (Optional[str], optional): Version of the file to be deleted. Defaults to None. + keep_latest_version (bool, optional): _description_. Defaults to False. + """ + self.file_handler.delete_remote_file(key, version, keep_latest_version) + + def delete_remote_files( + self, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None + ) -> None: + """Deletes all files (models/datasets) of a project. + + Args: + date_from (Optional[datetime], optional): The start date (in UTC format) from which the file has to be deleted. If none, all files will be deleted. + date_to (Optional[datetime], optional): The end date (in UTC format) until which the file has to be deleted. If none, all files will be deleted. + """ + self.file_handler.delete_remote_files(date_from, date_to) + + def list_remote_files( + self, data_type: Literal['model', 'dataset'] = None, prefix: str = None + ) -> List[File]: + """Lists all the remote files. + + Args: + data_type (Literal['model', 'dataset'], optional): The date type of the files to list. Defaults to None. + prefix (str, optional): The file name prefix. If 'None', all files would be listed. + + Returns: + List[File]: List of the remote files. + """ + return self.file_handler.list_remote_files(data_type, prefix) + @property def mlflow_handler(self) -> MLFlowHandler: if self._mlflow_handler is None: diff --git a/client/src/lab_client/handler/file_handler.py b/client/src/lab_client/handler/file_handler.py index 4b8048eb..d1af6398 100644 --- a/client/src/lab_client/handler/file_handler.py +++ b/client/src/lab_client/handler/file_handler.py @@ -12,6 +12,7 @@ from lab_client.utils import file_handler_utils, request_utils from zipfile import ZipFile import shutil +from datetime import datetime VALID_DATATYPES = ['dataset', 'model'] @@ -182,6 +183,21 @@ def delete_remote_file( keep_latest_version=keep_latest_version, ) + def delete_remote_files( + self, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None + ) -> None: + """ + Delete a file from remote storage. + # Arguments + key: Key of the file. + keep_latest_version: If `True` the latest file version will be kept. + """ + self.file_client.delete_files( + project_id=self.env.project, + date_from=date_from, + date_to=date_to + ) + def resolve_path_from_key(self, key: str, version: Optional[str] = None) -> str: """ Return the local path for a given key. @@ -259,3 +275,4 @@ def get_file_metadata( file_key, version) return metadata_file + diff --git a/client/tests/test_file_handler.py b/client/tests/test_file_handler.py index bf1bd814..2d725068 100644 --- a/client/tests/test_file_handler.py +++ b/client/tests/test_file_handler.py @@ -5,7 +5,7 @@ from .conftest import test_settings import requests import pytest - +from datetime import datetime, timedelta, timezone @pytest.mark.integration class TestFile: @@ -184,3 +184,65 @@ def test_list_file_with_invalid_data_type(self) -> None: env.upload_file(tf.name, "dataset") env.file_handler.list_remote_files(data_type="datasets") + + @pytest.mark.xfail(raises=Exception) + def test_delete_single_file(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + tf = tempfile.NamedTemporaryFile() + with open(tf.name, 'w') as f: + f.write("content 1") + f.close() + key = env.upload_file(tf.name, "dataset") + + env.delete_remote_file(key) + # This should throw a 404 error as file will not be present. + env.get_file(key) + + def test_delete_files_within_time_window(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + # Clear/Delete all files initially. + env.delete_remote_files() + + # Test that files within the date window are deleted. + tf = tempfile.NamedTemporaryFile(prefix='abcd_') + with open(tf.name, 'w') as f: + f.write("content 1") + f.close() + env.upload_file(tf.name, "dataset") + + date_from = datetime.now(timezone.utc) - timedelta(days=1) + date_to = datetime.now(timezone.utc) + timedelta(days=1) + + env.delete_remote_files(date_from, date_to) + + files = env.list_remote_files(data_type='dataset') + assert len(files) == 0 + + def test_delete_files_outside_time_window(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + # Clear/Delete all files initially. + env.delete_remote_files() + + # Test that files outside the date window are not deleted. + tf = tempfile.NamedTemporaryFile(prefix='qwertz_') + with open(tf.name, 'w') as f: + f.write("content 1") + f.close() + env.upload_file(tf.name, "dataset") + + date_from = datetime.now(timezone.utc) - timedelta(days=3) + date_to = datetime.now(timezone.utc) - timedelta(days=2) + + env.delete_remote_files(date_from, date_to) + + files = env.list_remote_files(data_type="dataset") + assert len(files) == 1 diff --git a/contaxy b/contaxy index 37c83e8c..8febfea6 160000 --- a/contaxy +++ b/contaxy @@ -1 +1 @@ -Subproject commit 37c83e8cf3df01637ab3be232612f507137cd2a4 +Subproject commit 8febfea6be8208546b8ff01f2c561308bc764864 From ca914244a6580c8323de1d141d80b7765e40ae7d Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Mon, 28 Nov 2022 23:10:14 +0100 Subject: [PATCH 10/19] Client changes for time specific file and job deletions. Client changes for time specific file and job deletions. --- client/src/lab_client/handler/job_handler.py | 6 ++- client/tests/test_deployment_handler.py | 48 +++++++++++++++++++- client/tests/test_file_handler.py | 2 +- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/client/src/lab_client/handler/job_handler.py b/client/src/lab_client/handler/job_handler.py index 176bf00a..4373d9ab 100644 --- a/client/src/lab_client/handler/job_handler.py +++ b/client/src/lab_client/handler/job_handler.py @@ -205,11 +205,13 @@ def delete_job(self, job_id: str) -> None: job_id=job_id ) - def delete_jobs(self) -> None: + def delete_jobs(self, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None) -> None: """Deletes all jobs of a project. """ self.deployment_client.delete_jobs( - project_id=self.env.project + project_id=self.env.project, + date_from=date_from, + date_to=date_to ) def get_job_logs( diff --git a/client/tests/test_deployment_handler.py b/client/tests/test_deployment_handler.py index c51d87e1..cf85293c 100644 --- a/client/tests/test_deployment_handler.py +++ b/client/tests/test_deployment_handler.py @@ -3,7 +3,7 @@ from lab_client import Environment from .conftest import test_settings import pytest - +from datetime import datetime, timedelta, timezone @pytest.mark.integration class TestJob: @@ -51,6 +51,52 @@ def test_list_jobs_get_job_metadata(self) -> None: job_list = env.job_handler.list_jobs() assert len(job_list) == 0 + def test_delete_jobs_within_time_period(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + input = JobInput( + container_image='ubuntu:latest', + display_name='Job2', + command=['/bin/bash', '-c', '--'], + args=['sleep 5'] + ) + job_id = env.job_handler.deploy_job(input) + + status = env.job_handler.wait_for_job_completion(job_id) + assert status == True + + date_from = datetime.now(timezone.utc) + date_to = datetime.now(timezone.utc) + timedelta(days=1) + + env.job_handler.delete_jobs(date_from, date_to) + job_list = env.job_handler.list_jobs() + assert len(job_list) == 0 + + def test_delete_jobs_outside_time_period(self) -> None: + env = Environment(lab_endpoint=test_settings.LAB_BACKEND, + lab_api_token=test_settings.LAB_TOKEN, + project=test_settings.LAB_PROJECT) + + input = JobInput( + container_image='ubuntu:latest', + display_name='Job2', + command=['/bin/bash', '-c', '--'], + args=['sleep 5'] + ) + job_id = env.job_handler.deploy_job(input) + + status = env.job_handler.wait_for_job_completion(job_id) + assert status == True + + date_from = datetime.now(timezone.utc) - timedelta(days=3) + date_to = datetime.now(timezone.utc) - timedelta(days=2) + + env.job_handler.delete_jobs(date_from, date_to) + job_list = env.job_handler.list_jobs() + assert len(job_list) == 1 + def test_job_logs(self) -> None: env = Environment(lab_endpoint=test_settings.LAB_BACKEND, lab_api_token=test_settings.LAB_TOKEN, diff --git a/client/tests/test_file_handler.py b/client/tests/test_file_handler.py index 2d725068..688c1d20 100644 --- a/client/tests/test_file_handler.py +++ b/client/tests/test_file_handler.py @@ -216,7 +216,7 @@ def test_delete_files_within_time_window(self) -> None: f.close() env.upload_file(tf.name, "dataset") - date_from = datetime.now(timezone.utc) - timedelta(days=1) + date_from = datetime.now(timezone.utc) date_to = datetime.now(timezone.utc) + timedelta(days=1) env.delete_remote_files(date_from, date_to) From 9e1f8f7424557ab65bfd896d4212ef3c9e24a55d Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Fri, 2 Dec 2022 13:02:01 +0100 Subject: [PATCH 11/19] Update setup.cfg Updated setup.cfg to fix the flake8 error code --- client/setup.cfg | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/client/setup.cfg b/client/setup.cfg index 868f1fd5..eccfb544 100644 --- a/client/setup.cfg +++ b/client/setup.cfg @@ -7,11 +7,14 @@ universal = false [flake8] ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 From b757f3aa0b743759a78948502b739fd8fc31ae54 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Fri, 2 Dec 2022 13:13:35 +0100 Subject: [PATCH 12/19] Updated setup.cfg Updated setup.cfg in accordance with flake8 documentation --- client/setup.cfg | 2 +- components/template/backend/setup.cfg | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/client/setup.cfg b/client/setup.cfg index eccfb544..902ddfed 100644 --- a/client/setup.cfg +++ b/client/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -ignore = +extend-ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/template/backend/setup.cfg b/components/template/backend/setup.cfg index 868f1fd5..902ddfed 100644 --- a/components/template/backend/setup.cfg +++ b/components/template/backend/setup.cfg @@ -6,12 +6,15 @@ license_files = LICENSE universal = false [flake8] -ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator +extend-ignore = + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 From aaf4fe882c1ae76e4c89db56fe0d108f2e8eb8f1 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Sat, 3 Dec 2022 15:10:24 +0100 Subject: [PATCH 13/19] Updated flake8 ignore codes in setup.cfg Updated flake8 ignore codes in setup.cfg --- components/lab-job-scheduler/backend/setup.cfg | 15 +++++++++------ components/lab-mlflow-manager/backend/setup.cfg | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/components/lab-job-scheduler/backend/setup.cfg b/components/lab-job-scheduler/backend/setup.cfg index 868f1fd5..902ddfed 100644 --- a/components/lab-job-scheduler/backend/setup.cfg +++ b/components/lab-job-scheduler/backend/setup.cfg @@ -6,12 +6,15 @@ license_files = LICENSE universal = false [flake8] -ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator +extend-ignore = + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 diff --git a/components/lab-mlflow-manager/backend/setup.cfg b/components/lab-mlflow-manager/backend/setup.cfg index 868f1fd5..902ddfed 100644 --- a/components/lab-mlflow-manager/backend/setup.cfg +++ b/components/lab-mlflow-manager/backend/setup.cfg @@ -6,12 +6,15 @@ license_files = LICENSE universal = false [flake8] -ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator +extend-ignore = + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 From ad576bd89ac4e4bf68f794784e116cd9740c4ef5 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Mon, 5 Dec 2022 22:09:46 +0100 Subject: [PATCH 14/19] Update setup.cfg --- .../lab-workspace-manager/backend/setup.cfg | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/components/lab-workspace-manager/backend/setup.cfg b/components/lab-workspace-manager/backend/setup.cfg index 868f1fd5..902ddfed 100644 --- a/components/lab-workspace-manager/backend/setup.cfg +++ b/components/lab-workspace-manager/backend/setup.cfg @@ -6,12 +6,15 @@ license_files = LICENSE universal = false [flake8] -ignore = - E203, # space before : (needed for how black formats slicing) - # E266, # too many leading '#' for block comment - E501, # line too long - E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator +extend-ignore = + # space before : (needed for how black formats slicing) + E203, + # line too long + E501, + # do not assign a lambda expression, use a def + E731, + # line break before binary operator + W503, max-line-length = 88 # select = B,C,E,F,W,T4,B9 # max-complexity = 18 From dcc00265a015974a7e757b4e9aa7584690288e2a Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Tue, 6 Dec 2022 11:04:43 +0100 Subject: [PATCH 15/19] Updated the ignore checks in flake8 config files Updated the ignore checks in flake8 config files --- client/setup.cfg | 2 +- components/lab-job-scheduler/backend/setup.cfg | 2 +- .../lab-job-scheduler/backend/src/lab_job_scheduler/schema.py | 4 ++-- components/lab-mlflow-manager/backend/setup.cfg | 2 +- components/lab-workspace-manager/backend/setup.cfg | 2 +- components/template/backend/setup.cfg | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/client/setup.cfg b/client/setup.cfg index 902ddfed..eccfb544 100644 --- a/client/setup.cfg +++ b/client/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/lab-job-scheduler/backend/setup.cfg b/components/lab-job-scheduler/backend/setup.cfg index 902ddfed..eccfb544 100644 --- a/components/lab-job-scheduler/backend/setup.cfg +++ b/components/lab-job-scheduler/backend/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py index f19b04ad..c309cd75 100644 --- a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py +++ b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py @@ -26,12 +26,12 @@ class ScheduledJob(ScheduledJobInput): example="2022-09-29T11:39:52.441287", ) last_run: str = Field( - None, + ..., description="The last time the job was run in ISO format.", example="2022-09-29T11:39:52.441287", ) next_run: str = Field( - None, + ..., description="The next time the job will be run in ISO format.", example="2022-09-29T11:39:52.441287", ) diff --git a/components/lab-mlflow-manager/backend/setup.cfg b/components/lab-mlflow-manager/backend/setup.cfg index 902ddfed..eccfb544 100644 --- a/components/lab-mlflow-manager/backend/setup.cfg +++ b/components/lab-mlflow-manager/backend/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/lab-workspace-manager/backend/setup.cfg b/components/lab-workspace-manager/backend/setup.cfg index 902ddfed..eccfb544 100644 --- a/components/lab-workspace-manager/backend/setup.cfg +++ b/components/lab-workspace-manager/backend/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long diff --git a/components/template/backend/setup.cfg b/components/template/backend/setup.cfg index 902ddfed..eccfb544 100644 --- a/components/template/backend/setup.cfg +++ b/components/template/backend/setup.cfg @@ -6,7 +6,7 @@ license_files = LICENSE universal = false [flake8] -extend-ignore = +ignore = # space before : (needed for how black formats slicing) E203, # line too long From 4f5de4c43fb98f12c523dd685f0310302c13e0e1 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Tue, 6 Dec 2022 12:11:03 +0100 Subject: [PATCH 16/19] Updated the job scheduler arguments Updated the job scheduler arguments --- .../backend/src/lab_job_scheduler/app.py | 4 ++-- .../backend/src/lab_job_scheduler/schema.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/components/lab-job-scheduler/backend/src/lab_job_scheduler/app.py b/components/lab-job-scheduler/backend/src/lab_job_scheduler/app.py index 9459ee24..ca4aa4a0 100644 --- a/components/lab-job-scheduler/backend/src/lab_job_scheduler/app.py +++ b/components/lab-job-scheduler/backend/src/lab_job_scheduler/app.py @@ -3,7 +3,7 @@ import json import os import threading -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from contaxy.operations.components import ComponentOperations from contaxy.schema.exceptions import CREATE_RESOURCE_RESPONSES @@ -231,7 +231,7 @@ def get_all_scheduled_jobs_from_db( def get_job_from_job_input( - job_schedule: ScheduledJobInput, job_id: str = None + job_schedule: ScheduledJobInput, job_id: Optional[str] = None ) -> ScheduledJob: return ScheduledJob( diff --git a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py index c309cd75..6d7ba6ca 100644 --- a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py +++ b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py @@ -1,6 +1,6 @@ from contaxy.schema.deployment import JobInput from pydantic import BaseModel, Field - +from typing import Optional class ScheduledJobInput(BaseModel): cron_string: str = Field( @@ -25,13 +25,13 @@ class ScheduledJob(ScheduledJobInput): description="The time the job was created in ISO format.", example="2022-09-29T11:39:52.441287", ) - last_run: str = Field( - ..., + last_run: Optional[str] = Field( + None, description="The last time the job was run in ISO format.", example="2022-09-29T11:39:52.441287", ) - next_run: str = Field( - ..., + next_run: Optional[str] = Field( + None, description="The next time the job will be run in ISO format.", example="2022-09-29T11:39:52.441287", ) From e83f54ca7611b419549a09bbdf5d6cee467831f9 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Tue, 6 Dec 2022 17:10:20 +0100 Subject: [PATCH 17/19] Update schema.py --- .../lab-job-scheduler/backend/src/lab_job_scheduler/schema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py index 6d7ba6ca..12f58ee6 100644 --- a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py +++ b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py @@ -2,6 +2,7 @@ from pydantic import BaseModel, Field from typing import Optional + class ScheduledJobInput(BaseModel): cron_string: str = Field( ..., From d6cd3579e0c5e774fb38e28043a448d3d42ed412 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Wed, 7 Dec 2022 14:46:04 +0100 Subject: [PATCH 18/19] Update schema.py --- .../lab-job-scheduler/backend/src/lab_job_scheduler/schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py index 12f58ee6..dc835781 100644 --- a/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py +++ b/components/lab-job-scheduler/backend/src/lab_job_scheduler/schema.py @@ -1,6 +1,7 @@ +from typing import Optional + from contaxy.schema.deployment import JobInput from pydantic import BaseModel, Field -from typing import Optional class ScheduledJobInput(BaseModel): From c67b175cef3456f1e608ce637d0c1371ee2df1c7 Mon Sep 17 00:00:00 2001 From: Naveen Vishnu Kinnal Date: Wed, 7 Dec 2022 15:09:13 +0100 Subject: [PATCH 19/19] Update package.json --- components/lab-job-scheduler/webapp/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/lab-job-scheduler/webapp/package.json b/components/lab-job-scheduler/webapp/package.json index a25412e4..277dc9a7 100644 --- a/components/lab-job-scheduler/webapp/package.json +++ b/components/lab-job-scheduler/webapp/package.json @@ -39,7 +39,7 @@ "stylelint-config-standard": "^20.0.0" }, "scripts": { - "start": "react-scripts start", + "start": "GENERATE_SOURCEMAP=false react-scripts start", "start-debug": "REACT_APP_CONTAXY_ENDPOINT=http://localhost:30010/api REACT_APP_EXTENSION_ENDPOINT=http://localhost:8080/ yarn start", "container": "run(){ docker build -t project-template-dev ../.github/actions/build-environment && echo 'Starting the container. That can take a moment...' && docker run -it --rm -p 3000:3000 -p 6006:6006 -v $(pwd):/workspace:delegated --entrypoint \"/bin/bash\" project-template-dev -c \"cd workspace/ && yarn $1\"; }; run", "build": "react-scripts build",