Skip to content

Commit

Permalink
remove deprecated add_special_tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Nov 1, 2024
1 parent c20fe9a commit 0d7151a
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 27 deletions.
1 change: 0 additions & 1 deletion delft/applications/grobidTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import argparse
import json
import time
from typing import Dict

from sklearn.model_selection import train_test_split

Expand Down
41 changes: 17 additions & 24 deletions delft/utilities/Transformer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import Union, Iterable
from typing import Union

from transformers import AutoTokenizer, TFAutoModel, AutoConfig, BertTokenizer, TFBertModel

Expand Down Expand Up @@ -60,7 +60,6 @@ def __init__(self, name: str, resource_registry: dict = None, delft_local_path:
# will be None if the key does not exist
self.auth_token = os.getenv('HF_ACCESS_TOKEN')


def configure_from_registry(self, resource_registry) -> None:
"""
Fetch transformer information from the registry and infer the loading method:
Expand Down Expand Up @@ -103,14 +102,13 @@ def configure_from_registry(self, resource_registry) -> None:
print("Missing vocab-file or not a file.")
else:
self.loading_method = LOADING_METHOD_HUGGINGFACE_NAME
#print("No configuration for", self.name, "Loading from Hugging face.")
# print(No configuration for", self.name, "Loading from Hugging face.")
else:
self.loading_method = LOADING_METHOD_HUGGINGFACE_NAME
#print("No configuration for", self.name, "Loading from Hugging face.")
# print("No configuration for", self.name, "Loading from Hugging face.")

def init_preprocessor(self, max_sequence_length: int,
add_special_tokens: bool = True,
add_prefix_space: bool = True):
add_prefix_space: bool = True):
"""
Load the tokenizer according to the provided information, in case of missing configuration,
it will try to use huggingface as fallback solution.
Expand All @@ -126,35 +124,30 @@ def init_preprocessor(self, max_sequence_length: int,
do_lower_case = False

if do_lower_case is not None:
if self.auth_token != None:
if self.auth_token is not None:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case,
use_auth_token=self.auth_token)
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case,
use_auth_token=self.auth_token)
else:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case)
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case)
else:
if self.auth_token != None:
if self.auth_token is not None:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
use_auth_token=self.auth_token)
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
use_auth_token=self.auth_token)
else:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space)

elif self.loading_method == LOADING_METHOD_LOCAL_MODEL_DIR:
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir_path,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space)
elif self.loading_method == LOADING_METHOD_PLAIN_MODEL:
Expand Down Expand Up @@ -210,7 +203,7 @@ def instantiate_layer(self, load_pretrained_weights=True) -> Union[object, TFAut
else:
config_path = os.path.join(".", self.local_dir_path, TRANSFORMER_CONFIG_FILE_NAME)
self.transformer_config = AutoConfig.from_pretrained(config_path)
#self.transformer_config = AutoConfig.from_pretrained(self.local_dir_path)
# self.transformer_config = AutoConfig.from_pretrained(self.local_dir_path)
return TFAutoModel.from_config(self.transformer_config)

elif self.loading_method == LOADING_METHOD_PLAIN_MODEL:
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
numpy==1.22.3
regex==2021.11.10
scikit-learn==1.0.1
tqdm==4.62.3
tqdm==4.66.3
tensorflow==2.9.3
h5py==3.6.0
unidecode==1.3.2
Expand All @@ -10,7 +10,7 @@ lmdb==1.2.1
truecase
requests>=2.20
pandas==1.3.5
transformers==4.33.2
transformers==4.46.1
torch==1.10.1
pytest
tensorflow-addons==0.19.0
Expand Down

0 comments on commit 0d7151a

Please sign in to comment.