From 010e0460b22ddd7f74e31163f69ab3da2e9741ba Mon Sep 17 00:00:00 2001 From: Travis McGuire <37486396+travismcguire@users.noreply.github.com> Date: Wed, 25 Mar 2020 13:40:03 -0700 Subject: [PATCH] Updated/added model cards (#3435) --- .../twmkn9/albert-base-v2-squad2/README.md | 36 ++++++++------- .../twmkn9/bert-base-uncased-squad2/README.md | 36 ++++++++------- .../distilbert-base-uncased-squad2/README.md | 45 +++++++++++++++++++ .../distilroberta-base-squad2/README.md | 44 ++++++++++++++++++ 4 files changed, 127 insertions(+), 34 deletions(-) create mode 100644 model_cards/twmkn9/distilbert-base-uncased-squad2/README.md create mode 100644 model_cards/twmkn9/distilroberta-base-squad2/README.md diff --git a/model_cards/twmkn9/albert-base-v2-squad2/README.md b/model_cards/twmkn9/albert-base-v2-squad2/README.md index a0a037715f04..5b615c470494 100644 --- a/model_cards/twmkn9/albert-base-v2-squad2/README.md +++ b/model_cards/twmkn9/albert-base-v2-squad2/README.md @@ -1,22 +1,24 @@ -This model is ALBERT base v2 trained on SQuAD v2 as: +This model is [ALBERT base v2](https://huggingface.co/albert-base-v2) trained on SQuAD v2 as: ``` -python run_squad.py ---model_type albert ---model_name_or_path albert-base-v2 ---do_train ---do_eval ---overwrite_cache ---do_lower_case ---version_2_with_negative ---train_file $SQUAD_DIR/train-v2.0.json ---predict_file $SQUAD_DIR/dev-v2.0.json ---per_gpu_train_batch_size 8 ---num_train_epochs 3 ---learning_rate 3e-5 ---max_seq_length 384 ---doc_stride 128 ---output_dir ./tmp/albert_base_fine/ +export SQUAD_DIR=../../squad2 +python3 run_squad.py + --model_type albert + --model_name_or_path albert-base-v2 + --do_train + --do_eval + --overwrite_cache + --do_lower_case + --version_2_with_negative + --save_steps 100000 + --train_file $SQUAD_DIR/train-v2.0.json + --predict_file $SQUAD_DIR/dev-v2.0.json + --per_gpu_train_batch_size 8 + --num_train_epochs 3 + --learning_rate 3e-5 + --max_seq_length 384 + --doc_stride 128 + --output_dir ./tmp/albert_fine/ ``` Performance on a dev subset is close to the original paper: diff --git a/model_cards/twmkn9/bert-base-uncased-squad2/README.md b/model_cards/twmkn9/bert-base-uncased-squad2/README.md index 00cafde157bb..20bdf07512e0 100644 --- a/model_cards/twmkn9/bert-base-uncased-squad2/README.md +++ b/model_cards/twmkn9/bert-base-uncased-squad2/README.md @@ -1,22 +1,24 @@ -This model is BERT base uncased trained on SQuAD v2 as: +This model is [BERT base uncased](https://huggingface.co/bert-base-uncased) trained on SQuAD v2 as: ``` -python run_squad.py ---model_type bert ---model_name_or_path bert-base-uncased ---do_train ---do_eval ---overwrite_cache ---do_lower_case ---version_2_with_negative ---train_file $SQUAD_DIR/train-v2.0.json ---predict_file $SQUAD_DIR/dev-v2.0.json ---per_gpu_train_batch_size 8 ---num_train_epochs 3 ---learning_rate 3e-5 ---max_seq_length 384 ---doc_stride 128 ---output_dir ./tmp/bert_base_fine/ +export SQUAD_DIR=../../squad2 +python3 run_squad.py + --model_type bert + --model_name_or_path bert-base-uncased + --do_train + --do_eval + --overwrite_cache + --do_lower_case + --version_2_with_negative + --save_steps 100000 + --train_file $SQUAD_DIR/train-v2.0.json + --predict_file $SQUAD_DIR/dev-v2.0.json + --per_gpu_train_batch_size 8 + --num_train_epochs 3 + --learning_rate 3e-5 + --max_seq_length 384 + --doc_stride 128 + --output_dir ./tmp/bert_fine_tuned/ ``` Performance on a dev subset is close to the original paper: diff --git a/model_cards/twmkn9/distilbert-base-uncased-squad2/README.md b/model_cards/twmkn9/distilbert-base-uncased-squad2/README.md new file mode 100644 index 000000000000..cb8542fb51d0 --- /dev/null +++ b/model_cards/twmkn9/distilbert-base-uncased-squad2/README.md @@ -0,0 +1,45 @@ +This model is [Distilbert base uncased](https://huggingface.co/distilbert-base-uncased) trained on SQuAD v2 as: + +``` +export SQUAD_DIR=../../squad2 +python3 run_squad.py + --model_type distilbert + --model_name_or_path distilbert-base-uncased + --do_train + --do_eval + --overwrite_cache + --do_lower_case + --version_2_with_negative + --save_steps 100000 + --train_file $SQUAD_DIR/train-v2.0.json + --predict_file $SQUAD_DIR/dev-v2.0.json + --per_gpu_train_batch_size 8 + --num_train_epochs 3 + --learning_rate 3e-5 + --max_seq_length 384 + --doc_stride 128 + --output_dir ./tmp/distilbert_fine_tuned/ +``` + +Performance on a dev subset is close to the original paper: + +``` +Results: +{ + 'exact': 64.88976637051661, + 'f1': 68.1776176526635, + 'total': 6078, + 'HasAns_exact': 69.7594501718213, + 'HasAns_f1': 76.62665295288285, + 'HasAns_total': 2910, + 'NoAns_exact': 60.416666666666664, + 'NoAns_f1': 60.416666666666664, + 'NoAns_total': 3168, + 'best_exact': 64.88976637051661, + 'best_exact_thresh': 0.0, + 'best_f1': 68.17761765266337, + 'best_f1_thresh': 0.0 +} +``` + +We are hopeful this might save you time, energy, and compute. Cheers! \ No newline at end of file diff --git a/model_cards/twmkn9/distilroberta-base-squad2/README.md b/model_cards/twmkn9/distilroberta-base-squad2/README.md new file mode 100644 index 000000000000..c6b6569f6585 --- /dev/null +++ b/model_cards/twmkn9/distilroberta-base-squad2/README.md @@ -0,0 +1,44 @@ +This model is [Distilroberta base](https://huggingface.co/distilroberta-base) trained on SQuAD v2 as: + +``` +export SQUAD_DIR=../../squad2 +python3 run_squad.py + --model_type robberta + --model_name_or_path distilroberta-base + --do_train + --do_eval + --overwrite_cache + --do_lower_case + --version_2_with_negative + --save_steps 100000 + --train_file $SQUAD_DIR/train-v2.0.json + --predict_file $SQUAD_DIR/dev-v2.0.json + --per_gpu_train_batch_size 8 + --num_train_epochs 3 + --learning_rate 3e-5 + --max_seq_length 384 + --doc_stride 128 + --output_dir ./tmp/distilroberta_fine_tuned/ +``` + +Performance on a dev subset is close to the original paper: + +``` +Results: +{ + 'exact': 70.9279368213228, + 'f1': 74.60439802429168, + 'total': 6078, + 'HasAns_exact': 67.62886597938144, + 'HasAns_f1': 75.30774267754136, + 'HasAns_total': 2910, + 'NoAns_exact': 73.95833333333333, + 'NoAns_f1': 73.95833333333333, 'NoAns_total': 3168, + 'best_exact': 70.94438960184272, + 'best_exact_thresh': 0.0, + 'best_f1': 74.62085080481161, + 'best_f1_thresh': 0.0 +} +``` + +We are hopeful this might save you time, energy, and compute. Cheers! \ No newline at end of file