-
Notifications
You must be signed in to change notification settings - Fork 894
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add utility to train multiple file variants at the same time
- Loading branch information
1 parent
59e8422
commit 427e276
Showing
5 changed files
with
231 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
""" | ||
Utils to evaluate many models of the same type at once | ||
""" | ||
import argparse | ||
import os | ||
import logging | ||
|
||
from stanza.models.lemma_classifier.evaluate_models import main as evaluate_main | ||
|
||
|
||
logger = logging.getLogger('stanza.lemmaclassifier') | ||
|
||
def evaluate_n_models(path_to_models_dir, args): | ||
|
||
total_results = { | ||
"be": 0.0, | ||
"have": 0.0, | ||
"accuracy": 0.0, | ||
"weighted_f1": 0.0 | ||
} | ||
paths = os.listdir(path_to_models_dir) | ||
num_models = len(paths) | ||
for model_path in paths: | ||
full_path = os.path.join(path_to_models_dir, model_path) | ||
args.save_name = full_path | ||
mcc_results, confusion, acc, weighted_f1 = evaluate_main(predefined_args=args) | ||
|
||
for lemma in mcc_results: | ||
|
||
lemma_f1 = mcc_results.get(lemma, None).get("f1") * 100 | ||
total_results[lemma] += lemma_f1 | ||
|
||
total_results["accuracy"] += acc | ||
total_results["weighted_f1"] += weighted_f1 | ||
|
||
total_results["be"] /= num_models | ||
total_results["have"] /= num_models | ||
total_results["accuracy"] /= num_models | ||
total_results["weighted_f1"] /= num_models | ||
|
||
logger.info(f"Models in {path_to_models_dir} had average weighted f1 of {100 * total_results['weighted_f1']}.\nLemma 'be' had f1: {total_results['be']}\nLemma 'have' had f1: {total_results['have']}.\nAccuracy: {100 * total_results['accuracy']}.\n ({num_models} models evaluated).") | ||
return total_results | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--vocab_size", type=int, default=10000, help="Number of tokens in vocab") | ||
parser.add_argument("--embedding_dim", type=int, default=100, help="Number of dimensions in word embeddings (currently using GloVe)") | ||
parser.add_argument("--hidden_dim", type=int, default=256, help="Size of hidden layer") | ||
parser.add_argument('--wordvec_pretrain_file', type=str, default=None, help='Exact name of the pretrain file to read') | ||
parser.add_argument("--charlm", action='store_true', default=False, help="Whether not to use the charlm embeddings") | ||
parser.add_argument('--charlm_shorthand', type=str, default=None, help="Shorthand for character-level language model training corpus.") | ||
parser.add_argument("--charlm_forward_file", type=str, default=os.path.join(os.path.dirname(__file__), "charlm_files", "1billion_forward.pt"), help="Path to forward charlm file") | ||
parser.add_argument("--charlm_backward_file", type=str, default=os.path.join(os.path.dirname(__file__), "charlm_files", "1billion_backwards.pt"), help="Path to backward charlm file") | ||
parser.add_argument("--save_name", type=str, default=os.path.join(os.path.dirname(__file__), "saved_models", "lemma_classifier_model.pt"), help="Path to model save file") | ||
parser.add_argument("--model_type", type=str, default="roberta", help="Which transformer to use ('bert' or 'roberta' or 'lstm')") | ||
parser.add_argument("--bert_model", type=str, default=None, help="Use a specific transformer instead of the default bert/roberta") | ||
parser.add_argument("--eval_file", type=str, help="path to evaluation file") | ||
|
||
# Args specific to several model eval | ||
parser.add_argument("--base_path", type=str, default=None, help="path to dir for eval") | ||
|
||
args = parser.parse_args() | ||
evaluate_n_models(args.base_path, args) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
""" | ||
Utils for training and evaluating multiple models simultaneously | ||
""" | ||
|
||
import argparse | ||
import os | ||
|
||
from stanza.models.lemma_classifier.train_model import main as train_lstm_main | ||
from stanza.models.lemma_classifier.transformer_baseline.baseline_trainer import main as train_tfmr_main | ||
from stanza.models.lemma_classifier.constants import DEFAULT_BATCH_SIZE | ||
|
||
|
||
change_params_map = { | ||
"lstm_layer": [16, 32, 64, 128, 256, 512], | ||
"upos_emb_dim": [5, 10, 20, 30], | ||
"training_size": [150, 300, 450, 600, 'full'], | ||
} # TODO: Add attention | ||
|
||
def train_n_models(num_models: int, base_path: str, args): | ||
|
||
if args.change_param == "lstm_layer": | ||
for num_layers in change_params_map.get("lstm_layer", None): | ||
for i in range(num_models): | ||
new_save_name = os.path.join(base_path, f"{num_layers}_{i}.pt") | ||
args.save_name = new_save_name | ||
args.hidden_dim = num_layers | ||
train_lstm_main(predefined_args=args) | ||
|
||
if args.change_param == "upos_emb_dim": | ||
for upos_dim in change_params_map("upos_emb_dim", None): | ||
for i in range(num_models): | ||
new_save_name = os.path.join(base_path, f"dim_{upos_dim}_{i}.pt") | ||
args.save_name = new_save_name | ||
args.upos_emb_dim = upos_dim | ||
train_lstm_main(predefined_args=args) | ||
|
||
if args.change_param == "training_size": | ||
for size in change_params_map.get("training_size", None): | ||
for i in range(num_models): | ||
new_save_name = os.path.join(base_path, f"{size}_examples_{i}.pt") | ||
new_train_file = os.path.join(os.path.dirname(__file__), "data", "processed_ud_en", "combined_train.txt") | ||
args.save_name = new_save_name | ||
args.train_file = new_train_file | ||
train_lstm_main(predefined_args=args) | ||
|
||
if args.change_param == "base": | ||
for i in range(num_models): | ||
new_save_name = os.path.join(base_path, f"lstm_model_{i}.pt") | ||
args.save_name = new_save_name | ||
args.weighted_loss = False | ||
train_lstm_main(predefined_args=args) | ||
|
||
if not args.weighted_loss: | ||
args.weighted_loss = True | ||
new_save_name = os.path.join(base_path, f"lstm_model_wloss_{i}.pt") | ||
args.save_name = new_save_name | ||
train_lstm_main(predefined_args=args) | ||
|
||
if args.change_param == "base_charlm": | ||
for i in range(num_models): | ||
new_save_name = os.path.join(base_path, f"lstm_charlm_{i}.pt") | ||
args.save_name = new_save_name | ||
train_lstm_main(predefined_args=args) | ||
|
||
if args.change_param == "base_charlm_upos": | ||
for i in range(num_models): | ||
new_save_name = os.path.join(base_path, f"lstm_charlm_upos_{i}.pt") | ||
args.save_name = new_save_name | ||
train_lstm_main(predefined_args=args) | ||
|
||
if args.change_param == "base_upos": | ||
for i in range(num_models): | ||
new_save_name = os.path.join(base_path, f"lstm_upos_{i}.pt") | ||
args.save_name = new_save_name | ||
train_lstm_main(predefined_args=args) | ||
|
||
if args.change_param == "attn_model": | ||
for i in range(num_models): | ||
new_save_name = os.path.join(base_path, f"attn_model_{args.num_heads}_heads_{i}.pt") | ||
args.save_name = new_save_name | ||
train_lstm_main(predefined_args=args) | ||
|
||
def train_n_tfmrs(num_models: int, base_path: str, args): | ||
|
||
if args.multi_train_type == "tfmr": | ||
|
||
for i in range(num_models): | ||
|
||
if args.change_param == "bert": | ||
new_save_name = os.path.join(base_path, f"bert_{i}.pt") | ||
args.save_name = new_save_name | ||
args.loss_fn = "ce" | ||
train_tfmr_main(predefined_args=args) | ||
|
||
new_save_name = os.path.join(base_path, f"bert_wloss_{i}.pt") | ||
args.save_name = new_save_name | ||
args.loss_fn = "weighted_bce" | ||
train_tfmr_main(predefined_args=args) | ||
|
||
elif args.change_param == "roberta": | ||
new_save_name = os.path.join(base_path, f"roberta_{i}.pt") | ||
args.save_name = new_save_name | ||
args.loss_fn = "ce" | ||
train_tfmr_main(predefined_args=args) | ||
|
||
new_save_name = os.path.join(base_path, f"roberta_wloss_{i}.pt") | ||
args.save_name = new_save_name | ||
args.loss_fn = "weighted_bce" | ||
train_tfmr_main(predefined_args=args) | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--hidden_dim", type=int, default=256, help="Size of hidden layer") | ||
parser.add_argument('--wordvec_pretrain_file', type=str, default=os.path.join(os.path.dirname(__file__), "pretrain", "glove.pt"), help='Exact name of the pretrain file to read') | ||
parser.add_argument("--charlm", action='store_true', dest='use_charlm', default=False, help="Whether not to use the charlm embeddings") | ||
parser.add_argument('--charlm_shorthand', type=str, default=None, help="Shorthand for character-level language model training corpus.") | ||
parser.add_argument("--charlm_forward_file", type=str, default=os.path.join(os.path.dirname(__file__), "charlm_files", "1billion_forward.pt"), help="Path to forward charlm file") | ||
parser.add_argument("--charlm_backward_file", type=str, default=os.path.join(os.path.dirname(__file__), "charlm_files", "1billion_backwards.pt"), help="Path to backward charlm file") | ||
parser.add_argument("--upos_emb_dim", type=int, default=20, help="Dimension size for UPOS tag embeddings.") | ||
parser.add_argument("--use_attn", action='store_true', dest='attn', default=False, help='Whether to use multihead attention instead of LSTM.') | ||
parser.add_argument("--num_heads", type=int, default=0, help="Number of heads to use for multihead attention.") | ||
parser.add_argument("--save_name", type=str, default=os.path.join(os.path.dirname(__file__), "saved_models", "lemma_classifier_model_weighted_loss_charlm_new.pt"), help="Path to model save file") | ||
parser.add_argument("--lr", type=float, default=0.001, help="learning rate") | ||
parser.add_argument("--num_epochs", type=float, default=10, help="Number of training epochs") | ||
parser.add_argument("--batch_size", type=int, default=DEFAULT_BATCH_SIZE, help="Number of examples to include in each batch") | ||
parser.add_argument("--train_file", type=str, default=os.path.join(os.path.dirname(__file__), "data", "processed_ud_en", "combined_train.txt"), help="Full path to training file") | ||
parser.add_argument("--weighted_loss", action='store_true', dest='weighted_loss', default=False, help="Whether to use weighted loss during training.") | ||
parser.add_argument("--eval_file", type=str, default=os.path.join(os.path.dirname(__file__), "data", "processed_ud_en", "combined_dev.txt"), help="Path to dev file used to evaluate model for saves") | ||
# Tfmr-specific args | ||
parser.add_argument("--model_type", type=str, default="roberta", help="Which transformer to use ('bert' or 'roberta')") | ||
parser.add_argument("--bert_model", type=str, default=None, help="Use a specific transformer instead of the default bert/roberta") | ||
parser.add_argument("--loss_fn", type=str, default="weighted_bce", help="Which loss function to train with (e.g. 'ce' or 'weighted_bce')") | ||
# Multi-model train args | ||
parser.add_argument("--multi_train_type", type=str, default="lstm", help="Whether you are attempting to multi-train an LSTM or transformer") | ||
parser.add_argument("--multi_train_count", type=int, default=5, help="Number of each model to build") | ||
parser.add_argument("--base_path", type=str, default=None, help="Path to start generating model type for.") | ||
parser.add_argument("--change_param", type=str, default=None, help="Which hyperparameter to change when training") | ||
|
||
|
||
args = parser.parse_args() | ||
|
||
if args.multi_train_type == "lstm": | ||
train_n_models(num_models=args.multi_train_count, | ||
base_path=args.base_path, | ||
args=args) | ||
elif args.multi_train_type == "tfmr": | ||
train_n_tfmrs(num_models=args.multi_train_count, | ||
base_path=args.base_path, | ||
args=args) | ||
else: | ||
raise ValueError(f"Improper input {args.multi_train_type}") | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters