From 907b60662dc464268ec0840d2b75af7991da3ad6 Mon Sep 17 00:00:00 2001 From: truskovskiyk Date: Mon, 15 Jul 2024 18:07:21 -0400 Subject: [PATCH] Module 3 --- module-3/classic-example/Makefile | 4 +-- .../classic-example/classic_example/cli.py | 6 ++-- .../classic-example/classic_example/data.py | 10 +++--- .../pipeline_api.py | 0 .../pipeline_open.py | 0 module-3/generative-example/Makefile | 17 ++-------- module-3/generative-example/README.md | 2 +- .../generative_example/config.py | 1 - .../generative_example/predictor.py | 2 +- module-3/generative-example/tests/conftest.py | 32 ------------------- module-3/generative-example/tests/test_llm.py | 21 ------------ 11 files changed, 14 insertions(+), 81 deletions(-) rename module-3/{generative-example/generative_example => generative-api}/pipeline_api.py (100%) rename module-3/{generative-example/generative_example => generative-api}/pipeline_open.py (100%) delete mode 100644 module-3/generative-example/tests/test_llm.py diff --git a/module-3/classic-example/Makefile b/module-3/classic-example/Makefile index 06e6a71..a0f9698 100644 --- a/module-3/classic-example/Makefile +++ b/module-3/classic-example/Makefile @@ -29,12 +29,12 @@ test_all: pytest --cov=classic_example tests/ train_example: - python classic_example/cli.py load-cola-data ./data + python classic_example/cli.py load-sst2-data ./data python classic_example/cli.py train ./conf/example.json python classic_example/cli.py upload-to-registry example_model /tmp/results train_fast_ci: - python classic_example/cli.py load-cola-data ./data + python classic_example/cli.py load-sst2-data ./data python classic_example/cli.py train ./conf/fast.json python classic_example/cli.py upload-to-registry fast-model /tmp/results \ No newline at end of file diff --git a/module-3/classic-example/classic_example/cli.py b/module-3/classic-example/classic_example/cli.py index 726424f..018a5df 100644 --- a/module-3/classic-example/classic_example/cli.py +++ b/module-3/classic-example/classic_example/cli.py @@ -1,14 +1,14 @@ import typer -from classic_example.data import load_cola_data, load_cola_data_file_input +from classic_example.data import load_sst2_data, load_sst2_data_file_input from classic_example.train import train from classic_example.utils import load_from_registry, upload_to_registry from classic_example.predictor import run_inference_on_dataframe app = typer.Typer() app.command()(train) -app.command()(load_cola_data) -app.command()(load_cola_data_file_input) +app.command()(load_sst2_data) +app.command()(load_sst2_data_file_input) app.command()(upload_to_registry) app.command()(load_from_registry) app.command()(run_inference_on_dataframe) diff --git a/module-3/classic-example/classic_example/data.py b/module-3/classic-example/classic_example/data.py index 0109860..f3f0d13 100644 --- a/module-3/classic-example/classic_example/data.py +++ b/module-3/classic-example/classic_example/data.py @@ -5,7 +5,7 @@ from sklearn.model_selection import train_test_split -def _get_cola_data(random_state: int = 42): +def _get_sst2_data(random_state: int = 42): dataset = load_dataset("glue", "sst2") df_all = ArrowReader.read_table( dataset.cache_files["train"][0]["filename"] @@ -19,24 +19,24 @@ def _get_cola_data(random_state: int = 42): return df_train, df_val, df_test -def load_cola_data(path_to_save: Path): +def load_sst2_data(path_to_save: Path): path_to_save.mkdir(parents=True, exist_ok=True) - df_train, df_val, df_test = _get_cola_data() + df_train, df_val, df_test = _get_sst2_data() df_train.to_csv(path_to_save / "train.csv", index=False) df_val.to_csv(path_to_save / "val.csv", index=False) df_test.to_csv(path_to_save / "test.csv", index=False) -def load_cola_data_file_input( +def load_sst2_data_file_input( path_to_train: Path, path_to_val: Path, path_to_test: Path ): path_to_train.parent.mkdir(parents=True, exist_ok=True) path_to_val.parent.mkdir(parents=True, exist_ok=True) path_to_test.parent.mkdir(parents=True, exist_ok=True) - df_train, df_val, df_test = _get_cola_data() + df_train, df_val, df_test = _get_sst2_data() df_train.to_csv(path_to_train, index=False) df_val.to_csv(path_to_val, index=False) diff --git a/module-3/generative-example/generative_example/pipeline_api.py b/module-3/generative-api/pipeline_api.py similarity index 100% rename from module-3/generative-example/generative_example/pipeline_api.py rename to module-3/generative-api/pipeline_api.py diff --git a/module-3/generative-example/generative_example/pipeline_open.py b/module-3/generative-api/pipeline_open.py similarity index 100% rename from module-3/generative-example/generative_example/pipeline_open.py rename to module-3/generative-api/pipeline_open.py diff --git a/module-3/generative-example/Makefile b/module-3/generative-example/Makefile index 00e8071..f8a409f 100644 --- a/module-3/generative-example/Makefile +++ b/module-3/generative-example/Makefile @@ -8,25 +8,12 @@ run_dev_gpu: build docker run --net=host --gpus all -it -v ${PWD}:/main generative-example:latest /bin/bash format: - ruff format nlp_sample/ tests/ + ruff format nlp_sample/ lint: - ruff check nlp_sample/ tests/ + ruff check nlp_sample/ -test: - pytest --disable-warnings ./tests/ -test_data: - pytest --disable-warnings -rc ./tests/test_data.py - -test_code: - pytest --disable-warnings -rc ./tests/test_code.py - -test_model: - pytest --disable-warnings -ss ./tests/test_model.py - -test_all: - pytest --cov=generative_example tests/ train_example: python generative_example/cli.py load-sql-data --subsample 0.1 ./data diff --git a/module-3/generative-example/README.md b/module-3/generative-example/README.md index 1ea6d74..ee11b6a 100644 --- a/module-3/generative-example/README.md +++ b/module-3/generative-example/README.md @@ -22,7 +22,7 @@ export WANDB_API_KEY=*********************** make test ``` -reference: https://madewithml.com/courses/mlops/testing/ +reference: https://docs.confident-ai.com/docs/confident-ai-github-actions ## Reports diff --git a/module-3/generative-example/generative_example/config.py b/module-3/generative-example/generative_example/config.py index 71ea69c..cd55271 100644 --- a/module-3/generative-example/generative_example/config.py +++ b/module-3/generative-example/generative_example/config.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Optional @dataclass diff --git a/module-3/generative-example/generative_example/predictor.py b/module-3/generative-example/generative_example/predictor.py index 0d898e8..89f9420 100644 --- a/module-3/generative-example/generative_example/predictor.py +++ b/module-3/generative-example/generative_example/predictor.py @@ -22,7 +22,7 @@ def __init__(self, model_load_path: str): model_load_path, low_cpu_mem_usage=True, return_dict=True, - torch_dtype=torch.bfloat16, #torch.float16, + torch_dtype=torch.bfloat16, trust_remote_code=True, device_map=device_map, ) diff --git a/module-3/generative-example/tests/conftest.py b/module-3/generative-example/tests/conftest.py index 4348b2a..e69de29 100644 --- a/module-3/generative-example/tests/conftest.py +++ b/module-3/generative-example/tests/conftest.py @@ -1,32 +0,0 @@ -from pathlib import Path -from typing import Tuple - -import great_expectations as ge -import pandas as pd -import pytest -from great_expectations.dataset.pandas_dataset import PandasDataset - -from nlp_sample.data import load_cola_data - - -@pytest.fixture(scope="session") -def data_path() -> Path: - _data_path = Path("/tmp/data") - _data_path.mkdir(exist_ok=True, parents=True) - - load_cola_data(path_to_save=_data_path) - - return _data_path - - -@pytest.fixture(scope="session") -def data(data_path: Path) -> Tuple[PandasDataset, PandasDataset]: - df_train = pd.read_csv(data_path / "train.csv") - df_val = pd.read_csv(data_path / "val.csv") - df_test = pd.read_csv(data_path / "test.csv") - - return ( - ge.dataset.PandasDataset(df_train), - ge.dataset.PandasDataset(df_val), - ge.dataset.PandasDataset(df_test), - ) diff --git a/module-3/generative-example/tests/test_llm.py b/module-3/generative-example/tests/test_llm.py deleted file mode 100644 index b6ccb63..0000000 --- a/module-3/generative-example/tests/test_llm.py +++ /dev/null @@ -1,21 +0,0 @@ -from typer.testing import CliRunner -from pathlib import Path -from nlp_sample.cli import app - -runner = CliRunner() - - -def test_app(): - result = runner.invoke(app, ["load-cola-data", "/tmp/data"]) - assert result.exit_code == 0, result.exception - assert Path("/tmp/data/train.csv").exists() - assert Path("/tmp/data/val.csv").exists() - assert Path("/tmp/data/test.csv").exists() - - result = runner.invoke(app, ["train", "tests/data/test_config.json"]) - assert result.exit_code == 0, result.exception - assert Path("/tmp/results").exists() - - result = runner.invoke(app, ["upload-to-registry", "cli-test", "/tmp/results"]) - assert result.exit_code == 0, result.exception -