Skip to content

Commit

Permalink
unpin transformers version (#9606)
Browse files Browse the repository at this point in the history
* unpin transformers

Signed-off-by: dimapihtar <dpihtar@gmail.com>

* guard deprecated imports

Signed-off-by: dimapihtar <dpihtar@gmail.com>

* Apply isort and black reformatting

Signed-off-by: dimapihtar <dimapihtar@users.noreply.github.com>

* fix import guards

Signed-off-by: dimapihtar <dpihtar@gmail.com>

* fix import guards

Signed-off-by: dimapihtar <dpihtar@gmail.com>

* Apply isort and black reformatting

Signed-off-by: dimapihtar <dimapihtar@users.noreply.github.com>

* try fixing

Signed-off-by: Chen Cui <chcui@nvidia.com>

* disable HF tests

Signed-off-by: Dmytro Pykhtar <dpykhtar@login-eos01.eos.clusters.nvidia.com>

* try fixing

Signed-off-by: Chen Cui <chcui@nvidia.com>

* hard code model lists

Signed-off-by: Chen Cui <chcui@nvidia.com>

* Apply isort and black reformatting

Signed-off-by: cuichenx <cuichenx@users.noreply.github.com>

* hard code model lists

Signed-off-by: Chen Cui <chcui@nvidia.com>

---------

Signed-off-by: dimapihtar <dpihtar@gmail.com>
Signed-off-by: dimapihtar <dimapihtar@users.noreply.github.com>
Signed-off-by: Chen Cui <chcui@nvidia.com>
Signed-off-by: Dmytro Pykhtar <dpykhtar@login-eos01.eos.clusters.nvidia.com>
Signed-off-by: cuichenx <cuichenx@users.noreply.github.com>
Co-authored-by: dimapihtar <dimapihtar@users.noreply.github.com>
Co-authored-by: Chen Cui <chcui@nvidia.com>
Co-authored-by: Dmytro Pykhtar <dpykhtar@login-eos01.eos.clusters.nvidia.com>
Co-authored-by: cuichenx <cuichenx@users.noreply.github.com>
  • Loading branch information
5 people authored and maanug-nv committed Jul 14, 2024
1 parent 6478258 commit cc7a08b
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@
from typing import List, Optional

from transformers import (
ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
BERT_PRETRAINED_MODEL_ARCHIVE_LIST,
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
GPT2_PRETRAINED_MODEL_ARCHIVE_LIST,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
AlbertConfig,
AutoModel,
BertConfig,
Expand All @@ -41,6 +35,74 @@

__all__ = ["get_huggingface_lm_model", "get_huggingface_pretrained_lm_models_list", "VOCAB_FILE_NAME"]

# Manually specify the model archive lists since these are now removed in HF
# https://github.com/huggingface/transformers/blob/v4.40-release/src/transformers/models/deprecated/_archive_maps.py
ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"albert/albert-base-v1",
"albert/albert-large-v1",
"albert/albert-xlarge-v1",
"albert/albert-xxlarge-v1",
"albert/albert-base-v2",
"albert/albert-large-v2",
"albert/albert-xlarge-v2",
"albert/albert-xxlarge-v2",
]

BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google-bert/bert-base-uncased",
"google-bert/bert-large-uncased",
"google-bert/bert-base-cased",
"google-bert/bert-large-cased",
"google-bert/bert-base-multilingual-uncased",
"google-bert/bert-base-multilingual-cased",
"google-bert/bert-base-chinese",
"google-bert/bert-base-german-cased",
"google-bert/bert-large-uncased-whole-word-masking",
"google-bert/bert-large-cased-whole-word-masking",
"google-bert/bert-large-uncased-whole-word-masking-finetuned-squad",
"google-bert/bert-large-cased-whole-word-masking-finetuned-squad",
"google-bert/bert-base-cased-finetuned-mrpc",
"google-bert/bert-base-german-dbmdz-cased",
"google-bert/bert-base-german-dbmdz-uncased",
"cl-tohoku/bert-base-japanese",
"cl-tohoku/bert-base-japanese-whole-word-masking",
"cl-tohoku/bert-base-japanese-char",
"cl-tohoku/bert-base-japanese-char-whole-word-masking",
"TurkuNLP/bert-base-finnish-cased-v1",
"TurkuNLP/bert-base-finnish-uncased-v1",
"wietsedv/bert-base-dutch-cased",
]
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"almanach/camembert-base",
"Musixmatch/umberto-commoncrawl-cased-v1",
"Musixmatch/umberto-wikipedia-uncased-v1",
]

DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"distilbert-base-uncased",
"distilbert-base-uncased-distilled-squad",
"distilbert-base-cased",
"distilbert-base-cased-distilled-squad",
"distilbert-base-german-cased",
"distilbert-base-multilingual-cased",
"distilbert-base-uncased-finetuned-sst-2-english",
]
GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
"openai-community/gpt2",
"openai-community/gpt2-medium",
"openai-community/gpt2-large",
"openai-community/gpt2-xl",
"distilbert/distilgpt2",
]
ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
"FacebookAI/roberta-base",
"FacebookAI/roberta-large",
"FacebookAI/roberta-large-mnli",
"distilbert/distilroberta-base",
"openai-community/roberta-base-openai-detector",
"openai-community/roberta-large-openai-detector",
]


HUGGINGFACE_MODELS = {
"BertModel": {
Expand Down Expand Up @@ -94,7 +156,9 @@


def get_huggingface_lm_model(
pretrained_model_name: str, config_dict: Optional[dict] = None, config_file: Optional[str] = None,
pretrained_model_name: str,
config_dict: Optional[dict] = None,
config_file: Optional[str] = None,
):
"""
Returns lm model instantiated with Huggingface
Expand Down Expand Up @@ -135,7 +199,9 @@ def get_huggingface_lm_model(
raise ValueError(f"Use HuggingFace API directly in NeMo for {pretrained_model_name}")


def get_huggingface_pretrained_lm_models_list(include_external: bool = False,) -> List[str]:
def get_huggingface_pretrained_lm_models_list(
include_external: bool = False,
) -> List[str]:
"""
Returns the list of pretrained HuggingFace language models
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements_lightning.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ hydra-core>1.3,<=1.3.2
omegaconf<=2.3
pytorch-lightning>2.2.1
torchmetrics>=0.11.0
transformers>=4.36.0,<=4.40.2
transformers
wandb
webdataset>=0.2.86

0 comments on commit cc7a08b

Please sign in to comment.