Skip to content

Commit

Permalink
Proper model downloading to fix flaky nlu featurizer tests (#12557)
Browse files Browse the repository at this point in the history
* Setting transformers to offline mode to prevent downloading during tests

* improved model downloading before tests
  • Loading branch information
twerkmeister authored Jun 28, 2023
1 parent 0b679ce commit 6e10e42
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 62 deletions.
12 changes: 4 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,8 @@ endif
rm data/MITIE*.bz2

prepare-transformers:
if [ $(OS) = "Windows_NT" ]; then HOME_DIR="$(HOMEDRIVE)$(HOMEPATH)"; else HOME_DIR=$(HOME); fi;\
CACHE_DIR=$$HOME_DIR/.cache/torch/transformers;\
mkdir -p "$$CACHE_DIR";\
i=0;\
while read -r URL; do read -r CACHE_FILE; if { [ $(CI) ] && [ $$i -gt 4 ]; } || ! [ $(CI) ]; then wget -nv $$URL -O $$CACHE_DIR/$$CACHE_FILE; fi; i=$$((i + 1)); done < "data/test/hf_transformers_models.txt"

while read -r MODEL; do poetry run python scripts/download_transformer_model.py $$MODEL ; done < data/test/hf_transformers_models.txt
if ! [ $(CI) ]; then poetry run python scripts/download_transformer_model.py rasa/LaBSE; fi
prepare-tests-macos:
brew install wget graphviz || true

Expand Down Expand Up @@ -178,7 +174,7 @@ test-nlu-predictors: prepare-spacy prepare-mitie test-marker

test-full-model-training: PYTEST_MARKER=category_full_model_training and (not flaky)
test-full-model-training: DD_ARGS := $(or $(DD_ARGS),)
test-full-model-training: prepare-spacy prepare-mitie test-marker
test-full-model-training: prepare-spacy prepare-mitie prepare-transformers test-marker

test-other-unit-tests: PYTEST_MARKER=category_other_unit_tests and (not flaky)
test-other-unit-tests: DD_ARGS := $(or $(DD_ARGS),)
Expand All @@ -198,7 +194,7 @@ test-gh-actions:
test-marker: clean
# OMP_NUM_THREADS can improve overall performance using one thread by process (on tensorflow), avoiding overload
# TF_CPP_MIN_LOG_LEVEL=2 sets C code log level for tensorflow to error suppressing lower log events
OMP_NUM_THREADS=1 TF_CPP_MIN_LOG_LEVEL=2 poetry run pytest tests -n $(JOBS) --dist loadscope -m "$(PYTEST_MARKER)" --cov rasa --ignore $(INTEGRATION_TEST_FOLDER) $(DD_ARGS)
TRANSFORMERS_OFFLINE=1 OMP_NUM_THREADS=1 TF_CPP_MIN_LOG_LEVEL=2 poetry run pytest tests -n $(JOBS) --dist loadscope -m "$(PYTEST_MARKER)" --cov rasa --ignore $(INTEGRATION_TEST_FOLDER) $(DD_ARGS)

generate-pending-changelog:
poetry run python -c "from scripts import release; release.generate_changelog('major.minor.patch')"
Expand Down
63 changes: 9 additions & 54 deletions data/test/hf_transformers_models.txt
Original file line number Diff line number Diff line change
@@ -1,54 +1,9 @@
https://s3.amazonaws.com/models.huggingface.co/bert/rasa/LaBSE/vocab.txt
21aa38329c730774d9f45df9ec5443a9bd4abd2191e1d510c27647c151c5437f.f2539f82b1008971c6ea6574f078d95c6eead57223fc74fdc420013fa9de391a
https://s3.amazonaws.com/models.huggingface.co/bert/rasa/LaBSE/special_tokens_map.json
99497d78492c90ab7d824d695b9a8d043369fbc2bf1112dcc7cdef9a6c4fa691.275045728fbf41c11d3dae08b8742c054377e18d92cc7b72b6351152a99b64e4
https://s3.amazonaws.com/models.huggingface.co/bert/rasa/LaBSE/tokenizer_config.json
527f618330e845c9d31826e7d9ce983aa816fafcf4f29f8c52f8ae1fdd097219.1c61d5d3dc67d88e0c74c64cda9b17bc30bdbd1c373cceeb740b9953729709aa
https://s3.amazonaws.com/models.huggingface.co/bert/rasa/LaBSE/config.json
90984a8da5021905af8679644b61bc5428ef16e9a307469152c163ec873db240.f1ba7080a92fc164a144311742f36dfb6a724bc9da532264b30d87040e15cc9d
https://cdn.huggingface.co/rasa/LaBSE/tf_model.h5
fd2ff7409cd4abbce31d54b8acebc305939787751dd697b6f38a3bf1f197a614.2589e15ea34b96d9bdcc478748ae77b629487da363566089fe6a8cdb1e6ea284.h5
https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt
8a0c070123c1f794c42a29c6904beb7c1b8715741e235bee04aca2c7636fc83f.9b42061518a39ca00b8b52059fd2bede8daa613f8a8671500e518a8c29de8c00
https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-config.json
8a3b1cfe5da58286e12a0f5d7d182b8d6eca88c08e26c332ee3817548cf7e60a.f12a4f986e43d8b328f5b067a641064d67b91597567a06c7b122d1ca7dfd9741
https://cdn.huggingface.co/bert-base-chinese-tf_model.h5
86a460b592673bcac3fe5d858ecf519e4890b4f6eddd1a46a077bd672dee6fe5.e6b974f59b54219496a89fd32be7afb020374df0976a796e5ccd3a1733d31537.h5
https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-vocab.json
4ab93d0cd78ae80e746c27c9cd34e90b470abdabe0590c9ec742df61625ba310.b9628f6fe5519626534b82ce7ec72b22ce0ae79550325f45c604a25c0ad87fd6
https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-merges.txt
0f8de0dbd6a2bb6bde7d758f4c120dd6dd20b46f2bf0a47bc899c89f46532fde.20808570f9a3169212a577f819c845330da870aeb14c40f7319819fce10c3b76
https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-config.json
a27bb7c70e9002d7558d2682d5a95f3c0a8b31034616309459e0b51ef07ade09.bd0797be126548711309ad2174d2afb16e3c37e891707667603d85e35a4ad001
https://cdn.huggingface.co/openai-gpt-tf_model.h5
642cba239b8eca22b702e71e92d507b8af47ddd2df74dc7751e2a4f65d8d434c.f26918df904593cca2dbd78a3bb760376f4f8ce1ce8d3b13bb6ab28228ee65fc.h5
https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json
f2808208f9bec2320371a9f5f891c184ae0b674ef866b79c58177067d15732dd.1512018be4ba4e8726e41b9145129dc30651ea4fec86aa61f4b9f40bf94eac71
https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt
d629f792e430b3c76a1291bb2766b0a047e36fae0588f9dbc1ae51decdff691b.70bec105b4158ed9a1747fea67a43f5dee97855c64d62b6ec3742f4cfdb5feda
https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json
4be02c5697d91738003fb1685c9872f284166aa32e061576bbe6aaeb95649fcf.db13c9bc9c7bdd738ec89e069621d88e05dc670366092d809a9cbcac6798e24e
https://cdn.huggingface.co/gpt2-tf_model.h5
132dec44f9ced4b20f1b1c88a426b1d3dab5ba9e5f24a82541833dae44d5b8db.afd2261c07481427cd087f622388c2c086be9c62875f5945922c7adb2239b63a.h5
https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model
dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8
https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json
c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.69e5e35e0b798cab5e473f253752f8bf4d280ee37682281a23eed80f6e2d09c6
https://cdn.huggingface.co/xlnet-base-cased-tf_model.h5
44ada4a49497a676c2d1fa2dbb7059df50f9cedb14f332862d2fca2c35d04a7d.42bc04b3944abffc38e9b60aadffb89f81aafa9c86473d157ed4b28953471ceb.h5
https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt
26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json
4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-config.json
a41e817d5c0743e29e86ff85edc8c257e61bc8d88e4271bb1b243b6e7614c633.8949e27aafafa845a18d98a0e3a88bc2d248bbc32a1b75947366664658f23b1c
https://cdn.huggingface.co/distilbert-base-uncased-tf_model.h5
cce28882467f298a29fc905b9dd1683695d96198a83432fe707089dccd71c019.e02bd57e9d8507853eccc7c04ac2e938a6cdaff4b9bf941c10e781b61ddb9bbd.h5
https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json
d0c5776499adc1ded22493fae699da0971c1ee4c2587111707a4d177d20257a2.ef00af9e673c7160b4d41cfda1f48c5f4cba57d5142754525572a846a1ab1b9b
https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt
b35e7cd126cd4229a746b5d5c29a749e8e84438b14bcdb575950584fe33207e8.70bec105b4158ed9a1747fea67a43f5dee97855c64d62b6ec3742f4cfdb5feda
https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-config.json
e1a2a406b5a05063c31f4dfdee7608986ba7c6393f7f79db5e69dcd197208534.117c81977c5979de8c088352e74ec6e70f5c66096c28b61d3c50101609b39690
https://cdn.huggingface.co/roberta-base-tf_model.h5
2e18f106492efe1a8e6766e4d4e0bf4a82cee267c0cc52af431cf97005d4e3db.34733ed140f011f207fb07b32b443050356e99a9638db284a22d77bd3d5f54b3.h5
bert-base-chinese
openai-gpt
gpt2
xlnet-base-cased
bert-base-uncased
roberta-base
distilbert-base-uncased
camembert-base
sentence-transformers/all-MiniLM-L6-v2
7 changes: 7 additions & 0 deletions scripts/download_transformer_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from huggingface_hub import snapshot_download
import sys

print(f"Downloading model files for {sys.argv[1]}...")
snapshot_download(
repo_id=sys.argv[1], allow_patterns=["*.txt", "*.json", "*.h5", "*.model"]
)

0 comments on commit 6e10e42

Please sign in to comment.