-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'experiments/clearml_241203' into 'master'
Add exp dir See merge request ai-lab-pmo/mltools/automl/LightAutoML!31
- Loading branch information
Showing
12 changed files
with
246 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,7 @@ stages: | |
- default | ||
- all_pythons | ||
- docs | ||
|
||
|
||
default: | ||
stage: default | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
"""Run tabular automl using ClearML logging.""" | ||
import argparse | ||
import os | ||
import clearml | ||
|
||
|
||
def main( # noqa D103 | ||
dataset_name: str, | ||
queue: str, | ||
project: str, | ||
cpu_limit: int, | ||
memory_limit: int, | ||
dataset_project: str = None, | ||
dataset_partial_name: str = None, | ||
tags=None, | ||
): | ||
|
||
if (dataset_project is not None) or (dataset_partial_name is not None) or (tags is not None): | ||
tags = tags if isinstance(tags, list) else [tags] | ||
|
||
dataset_list = clearml.Dataset.list_datasets( | ||
dataset_project=dataset_project, | ||
partial_name=dataset_partial_name, | ||
tags=tags, | ||
ids=None, | ||
only_completed=True, | ||
recursive_project_search=True, | ||
include_archived=False, | ||
) | ||
print(dataset_list[0]) | ||
dataset_list = list(set([x["name"] for x in dataset_list])) | ||
|
||
else: | ||
dataset_list = [clearml.Dataset.get(dataset_id=None, dataset_name=dataset_name)] | ||
|
||
print(f"Running {len(dataset_list)} datasets...") | ||
print(dataset_list) | ||
|
||
for dataset in dataset_list: | ||
os.system( | ||
f'clearml-task --project {project} --name {dataset} --script scripts/experiments/run_tabular.py --queue {queue} --docker for_clearml:latest --docker_args "--cpus={cpu_limit} --memory={memory_limit}g" --args dataset={dataset}' | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="") | ||
parser.add_argument("--dataset", type=str, help="dataset name or id", default="sampled_app_train") | ||
parser.add_argument("--dataset_project", type=str, help="dataset_project", default=None) | ||
parser.add_argument("--dataset_partial_name", type=str, help="dataset_partial_name", default=None) | ||
parser.add_argument("--tags", type=str, help="tags", default=None) | ||
parser.add_argument("--cpu_limit", type=int, help="", default=8) | ||
parser.add_argument("--memory_limit", type=int, help="", default=16) | ||
parser.add_argument("--queue", type=str, help="", default="cpu_queue") | ||
parser.add_argument("--project", type=str, help="", default="junk") | ||
args = parser.parse_args() | ||
|
||
main( | ||
dataset_name=args.dataset, | ||
cpu_limit=args.cpu_limit, | ||
memory_limit=args.memory_limit, | ||
dataset_partial_name=args.dataset_partial_name, | ||
dataset_project=args.dataset_project, | ||
tags=args.tags, | ||
queue=args.queue, | ||
project=args.project, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
"""Run tabular automl using ClearML logging.""" | ||
|
||
from utils import Timer | ||
from utils import install_lightautoml | ||
|
||
|
||
install_lightautoml() | ||
|
||
import argparse | ||
import os | ||
|
||
import clearml | ||
import numpy as np | ||
import pandas as pd | ||
|
||
from sklearn.metrics import log_loss | ||
from sklearn.metrics import roc_auc_score | ||
|
||
from lightautoml.automl.presets.tabular_presets import TabularAutoML | ||
from lightautoml.tasks import Task | ||
|
||
|
||
def main(dataset_name: str, cpu_limit: int, memory_limit: int): # noqa D103 | ||
cml_task = clearml.Task.get_task(clearml.config.get_remote_task_id()) | ||
logger = cml_task.get_logger() | ||
|
||
dataset = clearml.Dataset.get(dataset_id=None, dataset_name=dataset_name) | ||
dataset_local_path = dataset.get_local_copy() | ||
|
||
with open(os.path.join(dataset_local_path, "task_type.txt"), "r") as f: | ||
task_type = f.readline() | ||
train = pd.read_csv(os.path.join(dataset_local_path, "train.csv")) | ||
test = pd.read_csv(os.path.join(dataset_local_path, "test.csv")) | ||
|
||
task = Task(task_type) | ||
|
||
# =================================== automl config: | ||
automl = TabularAutoML( | ||
task=task, | ||
cpu_limit=cpu_limit, | ||
memory_limit=memory_limit, | ||
timeout=10 * 60 * 60, | ||
# general_params={ | ||
# "use_algos": [["mlp"]] | ||
# }, # ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint', 'fttransformer'] or custom torch model | ||
# nn_params={"n_epochs": 10, "bs": 512, "num_workers": 0, "path_to_save": None, "freeze_defaults": True}, | ||
# nn_pipeline_params={"use_qnt": True, "use_te": False}, | ||
) | ||
# =================================== | ||
|
||
cml_task.connect(automl) | ||
|
||
target_name = test.columns[-1] | ||
|
||
with Timer() as timer_training: | ||
oof_predictions = automl.fit_predict(train, roles={"target": target_name}, verbose=10) | ||
|
||
with Timer() as timer_predict: | ||
test_predictions = automl.predict(test) | ||
|
||
if task_type == "binary": | ||
metric_oof = roc_auc_score(train[target_name].values, oof_predictions.data[:, 0]) | ||
metric_ho = roc_auc_score(test[target_name].values, test_predictions.data[:, 0]) | ||
|
||
elif task_type == "multiclass": | ||
not_nan = np.any(~np.isnan(oof_predictions.data), axis=1) | ||
metric_oof = log_loss(train[target_name].values[not_nan], oof_predictions.data[not_nan, :]) | ||
metric_ho = log_loss(test[target_name], test_predictions.data) | ||
|
||
elif task_type == "reg": | ||
metric_oof = task.metric_func(train[target_name].values, oof_predictions.data[:, 0]) | ||
metric_ho = task.metric_func(test[target_name].values, test_predictions.data[:, 0]) | ||
|
||
print(f"Score for out-of-fold predictions: {metric_oof}") | ||
print(f"Score for hold-out: {metric_ho}") | ||
print(f"Train duration: {timer_training.duration}") | ||
print(f"Predict duration: {timer_predict.duration}") | ||
|
||
logger.report_single_value("Metric OOF", metric_oof) | ||
logger.report_single_value("Metric HO", metric_ho) | ||
|
||
logger.report_single_value("Train duration", timer_training.duration) | ||
logger.report_single_value("Predict duration", timer_predict.duration) | ||
|
||
logger.flush() | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="") | ||
parser.add_argument("--dataset", type=str, help="dataset name or id", default="sampled_app_train") | ||
parser.add_argument("--cpu_limit", type=int, help="", default=8) | ||
parser.add_argument("--memory_limit", type=int, help="", default=16) | ||
args = parser.parse_args() | ||
|
||
main(dataset_name=args.dataset, cpu_limit=args.cpu_limit, memory_limit=args.memory_limit) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
"""Utils for running experiments.""" | ||
|
||
import os | ||
import time | ||
|
||
|
||
class Timer: # noqa: D101 | ||
@staticmethod | ||
def _zero(): | ||
return 0 | ||
|
||
def __init__(self, clock=time.time, enabled=True): | ||
self.start = 0 | ||
self.stop = 0 | ||
self._time = clock if enabled else Timer._zero | ||
self._tick = 0 | ||
|
||
def __enter__(self): | ||
self.start = self._tick = self._time() | ||
return self | ||
|
||
def __exit__(self, *args): | ||
self.stop = self._tick = self._time() | ||
|
||
@property | ||
def tick(self): | ||
"""Make one tick.""" | ||
if self.stop > 0: | ||
return -1 | ||
now = self._time() | ||
tick = now - self._tick | ||
self._tick = now | ||
return tick | ||
|
||
@property | ||
def duration(self): | ||
"""Get dureation in seconds.""" | ||
if self.stop > 0: | ||
return self.stop - self.start | ||
return self._time() - self.start | ||
|
||
|
||
def install_lightautoml(): | ||
"""Install lightautoml using pip.""" | ||
# os.system("curl -sSL https://install.python-poetry.org | ../../bin/python -vvv -") | ||
# os.system("/root/.local/bin/poetry build") | ||
# os.system("ls -la ./dist/") | ||
os.system("pip install packaging==22.0") | ||
os.system("python scripts/poetry_fix.py -f") | ||
os.system("../../bin/pip install .") # ./dist/*.whl | ||
|
||
|
||
# .pip install --upgrade pip | ||
# poetry config virtualenvs.create false --local | ||
# poetry run python ./scripts/poetry_fix.py -c | ||
# ls -la | ||
# poetry run pip install pillow==9.2.0 | ||
# poetry install | ||
# poetry run pip freeze | ||
# poetry run python -c "import sys; print(sys.path)" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters