-
Notifications
You must be signed in to change notification settings - Fork 86
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Finish Adapter work, start test and editing experiments
- Loading branch information
Showing
11 changed files
with
231 additions
and
245 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,20 @@ | ||
import abc | ||
from typing import Dict | ||
from typing import Dict, List, Union | ||
|
||
from numpy._typing import NDArray | ||
|
||
from baal.active.dataset.base import Dataset | ||
|
||
|
||
class FrameworkAdapter(abc.ABC): | ||
|
||
def reset_weights(self): | ||
raise NotImplementedError | ||
|
||
def train(self, al_dataset:Dataset) -> Dict[str, float]: | ||
def train(self, al_dataset: Dataset) -> Dict[str, float]: | ||
raise NotImplementedError | ||
|
||
def predict(self, dataset: Dataset, iterations: int) -> NDArray: | ||
def predict(self, dataset: Dataset, iterations: int) -> Union[NDArray, List[NDArray]]: | ||
raise NotImplementedError | ||
|
||
def evaluate(self, dataset: Dataset) -> Dict[str, float]: | ||
def evaluate(self, dataset: Dataset, average_predictions: int) -> Dict[str, float]: | ||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,77 @@ | ||
import itertools | ||
from typing import Union, Optional | ||
from typing import Union, Optional, TYPE_CHECKING | ||
|
||
import pandas as pd | ||
import structlog | ||
from tqdm import tqdm | ||
from transformers import Trainer | ||
|
||
from baal import ModelWrapper, ActiveLearningDataset | ||
from baal.active.dataset.base import Dataset | ||
from baal.active.heuristics import AbstractHeuristic | ||
from baal.active.stopping_criteria import StoppingCriterion, LabellingBudgetStoppingCriterion | ||
from baal.experiments import FrameworkAdapter | ||
from baal.experiments.modelwrapper import ModelWrapperAdapter | ||
|
||
try: | ||
import transformers | ||
|
||
TRANSFORMERS_AVAILABLE = True | ||
except ImportError: | ||
from baal.transformers_trainer_wrapper import BaalTransformersTrainer | ||
from baal.experiments.transformers import TransformersAdapter | ||
|
||
TRANSFORMERS_AVAILABLE = False | ||
|
||
log = structlog.get_logger(__name__) | ||
|
||
|
||
class ActiveLearningExperiment: | ||
def __init__(self, trainer: Union[ModelWrapper, Trainer], | ||
al_dataset: ActiveLearningDataset, eval_dataset: Dataset, heuristic: AbstractHeuristic, | ||
query_size: int = 100, iterations: int = 20, criterion: Optional[StoppingCriterion] = None): | ||
def __init__( | ||
self, | ||
trainer: Union[ModelWrapper, "BaalTransformersTrainer"], | ||
al_dataset: ActiveLearningDataset, | ||
eval_dataset: Dataset, | ||
heuristic: AbstractHeuristic, | ||
query_size: int = 100, | ||
iterations: int = 20, | ||
criterion: Optional[StoppingCriterion] = None, | ||
): | ||
self.al_dataset = al_dataset | ||
self.eval_dataset = eval_dataset | ||
self.heuristic = heuristic | ||
self.query_size = query_size | ||
self.iterations = iterations | ||
self.criterion = criterion or LabellingBudgetStoppingCriterion(al_dataset, | ||
labelling_budget=al_dataset.n_unlabelled) | ||
self.criterion = criterion or LabellingBudgetStoppingCriterion( | ||
al_dataset, labelling_budget=al_dataset.n_unlabelled | ||
) | ||
self.adapter = self._get_adapter(trainer) | ||
|
||
def start(self): | ||
records = [] | ||
_start = len(self.al_dataset) | ||
for step in tqdm(itertools.count(start=0)): | ||
for _ in tqdm(itertools.count(start=0)): | ||
self.adapter.reset_weights() | ||
train_metrics = self.adapter.train() | ||
eval_metrics = self.adapter.evaluate(self.eval_dataset) | ||
train_metrics = self.adapter.train(self.al_dataset) | ||
eval_metrics = self.adapter.evaluate( | ||
self.eval_dataset, average_predictions=self.iterations | ||
) | ||
ranks, uncertainty = self.heuristic.get_ranks( | ||
self.adapter.predict(self.al_dataset.pool, iterations=self.iterations)) | ||
self.al_dataset.label(ranks[:self.query_size]) | ||
|
||
self.adapter.predict(self.al_dataset.pool, iterations=self.iterations) | ||
) | ||
self.al_dataset.label(ranks[: self.query_size]) | ||
records.append({**train_metrics, **eval_metrics}) | ||
if self.criterion.should_stop(eval_metrics, uncertainty): | ||
log.info("Experiment complete", num_labelled=len(self.al_dataset) - _start) | ||
break | ||
print(pd.DataFrame.from_dict({**train_metrics, **eval_metrics})) | ||
return records | ||
|
||
def _get_adapter(self, trainer: Union[ModelWrapper, Trainer]) -> FrameworkAdapter: | ||
def _get_adapter( | ||
self, trainer: Union[ModelWrapper, "BaalTransformersTrainer"] | ||
) -> FrameworkAdapter: | ||
if isinstance(trainer, ModelWrapper): | ||
return ModelWrapperAdapter(trainer) | ||
elif TRANSFORMERS_AVAILABLE and isinstance(trainer, BaalTransformersTrainer): | ||
return TransformersAdapter(trainer) | ||
raise ValueError( | ||
f"{type(trainer)} is not a supported trainer." | ||
" Baal supports ModelWrapper and BaalTransformersTrainer" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from copy import deepcopy | ||
from typing import Dict, cast, List, Union | ||
|
||
from numpy._typing import NDArray | ||
|
||
from baal.active.dataset.base import Dataset | ||
from baal.experiments import FrameworkAdapter | ||
from baal.transformers_trainer_wrapper import BaalTransformersTrainer | ||
|
||
|
||
class TransformersAdapter(FrameworkAdapter): | ||
def __init__(self, wrapper: BaalTransformersTrainer): | ||
self.wrapper = wrapper | ||
self._init_weight = deepcopy(self.wrapper.model.state_dict()) | ||
self._init_scheduler = deepcopy(self.wrapper.lr_scheduler.state_dict()) | ||
self._init_optimizer = deepcopy(self.wrapper.optimizer.state_dict()) | ||
|
||
def reset_weights(self): | ||
self.wrapper.model.load_state_dict(self._init_weight) | ||
self.wrapper.lr_scheduler.load_state_dict(self._init_scheduler) | ||
self.wrapper.optimizer.load_state_dict(self._init_optimizer) | ||
|
||
def train(self, al_dataset: Dataset) -> Dict[str, float]: | ||
return self.wrapper.train().metrics | ||
|
||
def predict(self, dataset: Dataset, iterations: int) -> Union[NDArray, List[NDArray]]: | ||
return self.wrapper.predict_on_dataset(dataset, iterations=iterations) | ||
|
||
def evaluate(self, dataset: Dataset, average_predictions: int) -> Dict[str, float]: | ||
return cast(Dict[str, float], self.wrapper.evaluate(dataset)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.