Skip to content

Commit

Permalink
Module 3
Browse files Browse the repository at this point in the history
  • Loading branch information
truskovskiyk committed Jul 15, 2024
1 parent 4e3bace commit 907b606
Show file tree
Hide file tree
Showing 11 changed files with 14 additions and 81 deletions.
4 changes: 2 additions & 2 deletions module-3/classic-example/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ test_all:
pytest --cov=classic_example tests/

train_example:
python classic_example/cli.py load-cola-data ./data
python classic_example/cli.py load-sst2-data ./data
python classic_example/cli.py train ./conf/example.json
python classic_example/cli.py upload-to-registry example_model /tmp/results

train_fast_ci:
python classic_example/cli.py load-cola-data ./data
python classic_example/cli.py load-sst2-data ./data
python classic_example/cli.py train ./conf/fast.json
python classic_example/cli.py upload-to-registry fast-model /tmp/results

6 changes: 3 additions & 3 deletions module-3/classic-example/classic_example/cli.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import typer

from classic_example.data import load_cola_data, load_cola_data_file_input
from classic_example.data import load_sst2_data, load_sst2_data_file_input
from classic_example.train import train
from classic_example.utils import load_from_registry, upload_to_registry
from classic_example.predictor import run_inference_on_dataframe

app = typer.Typer()
app.command()(train)
app.command()(load_cola_data)
app.command()(load_cola_data_file_input)
app.command()(load_sst2_data)
app.command()(load_sst2_data_file_input)
app.command()(upload_to_registry)
app.command()(load_from_registry)
app.command()(run_inference_on_dataframe)
Expand Down
10 changes: 5 additions & 5 deletions module-3/classic-example/classic_example/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sklearn.model_selection import train_test_split


def _get_cola_data(random_state: int = 42):
def _get_sst2_data(random_state: int = 42):
dataset = load_dataset("glue", "sst2")
df_all = ArrowReader.read_table(
dataset.cache_files["train"][0]["filename"]
Expand All @@ -19,24 +19,24 @@ def _get_cola_data(random_state: int = 42):
return df_train, df_val, df_test


def load_cola_data(path_to_save: Path):
def load_sst2_data(path_to_save: Path):
path_to_save.mkdir(parents=True, exist_ok=True)

df_train, df_val, df_test = _get_cola_data()
df_train, df_val, df_test = _get_sst2_data()

df_train.to_csv(path_to_save / "train.csv", index=False)
df_val.to_csv(path_to_save / "val.csv", index=False)
df_test.to_csv(path_to_save / "test.csv", index=False)


def load_cola_data_file_input(
def load_sst2_data_file_input(
path_to_train: Path, path_to_val: Path, path_to_test: Path
):
path_to_train.parent.mkdir(parents=True, exist_ok=True)
path_to_val.parent.mkdir(parents=True, exist_ok=True)
path_to_test.parent.mkdir(parents=True, exist_ok=True)

df_train, df_val, df_test = _get_cola_data()
df_train, df_val, df_test = _get_sst2_data()

df_train.to_csv(path_to_train, index=False)
df_val.to_csv(path_to_val, index=False)
Expand Down
File renamed without changes.
File renamed without changes.
17 changes: 2 additions & 15 deletions module-3/generative-example/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,12 @@ run_dev_gpu: build
docker run --net=host --gpus all -it -v ${PWD}:/main generative-example:latest /bin/bash

format:
ruff format nlp_sample/ tests/
ruff format nlp_sample/

lint:
ruff check nlp_sample/ tests/
ruff check nlp_sample/

test:
pytest --disable-warnings ./tests/

test_data:
pytest --disable-warnings -rc ./tests/test_data.py

test_code:
pytest --disable-warnings -rc ./tests/test_code.py

test_model:
pytest --disable-warnings -ss ./tests/test_model.py

test_all:
pytest --cov=generative_example tests/

train_example:
python generative_example/cli.py load-sql-data --subsample 0.1 ./data
Expand Down
2 changes: 1 addition & 1 deletion module-3/generative-example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export WANDB_API_KEY=***********************
make test
```

reference: https://madewithml.com/courses/mlops/testing/
reference: https://docs.confident-ai.com/docs/confident-ai-github-actions

## Reports

Expand Down
1 change: 0 additions & 1 deletion module-3/generative-example/generative_example/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from dataclasses import dataclass
from typing import Optional


@dataclass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, model_load_path: str):
model_load_path,
low_cpu_mem_usage=True,
return_dict=True,
torch_dtype=torch.bfloat16, #torch.float16,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map=device_map,
)
Expand Down
32 changes: 0 additions & 32 deletions module-3/generative-example/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,32 +0,0 @@
from pathlib import Path
from typing import Tuple

import great_expectations as ge
import pandas as pd
import pytest
from great_expectations.dataset.pandas_dataset import PandasDataset

from nlp_sample.data import load_cola_data


@pytest.fixture(scope="session")
def data_path() -> Path:
_data_path = Path("/tmp/data")
_data_path.mkdir(exist_ok=True, parents=True)

load_cola_data(path_to_save=_data_path)

return _data_path


@pytest.fixture(scope="session")
def data(data_path: Path) -> Tuple[PandasDataset, PandasDataset]:
df_train = pd.read_csv(data_path / "train.csv")
df_val = pd.read_csv(data_path / "val.csv")
df_test = pd.read_csv(data_path / "test.csv")

return (
ge.dataset.PandasDataset(df_train),
ge.dataset.PandasDataset(df_val),
ge.dataset.PandasDataset(df_test),
)
21 changes: 0 additions & 21 deletions module-3/generative-example/tests/test_llm.py

This file was deleted.

0 comments on commit 907b606

Please sign in to comment.