Skip to content

Commit

Permalink
Use closed-formula to round by multiple (NVIDIA#9307)
Browse files Browse the repository at this point in the history
* Use closed-formula to round by multiple

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>

* Apply isort and black reformatting

Signed-off-by: akoumpa <akoumpa@users.noreply.github.com>

---------

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>
Signed-off-by: akoumpa <akoumpa@users.noreply.github.com>
Co-authored-by: akoumpa <akoumpa@users.noreply.github.com>
Co-authored-by: Pablo Garay <palenq@gmail.com>
  • Loading branch information
3 people authored Jun 27, 2024
1 parent 6389c89 commit 265e680
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def encode(self, x):


class BERTTokenizer(AbstractEncoder):
""" Uses a pretrained BERT tokenizer by huggingface. Vocab size: 30522 (?)"""
"""Uses a pretrained BERT tokenizer by huggingface. Vocab size: 30522 (?)"""

def __init__(self, device="cuda", vq_interface=True, max_length=77):
super().__init__()
Expand Down Expand Up @@ -530,7 +530,10 @@ def __init__(
print(f"Downloading clip with", arch, version, cache_dir)
self.device = device
model, _, _ = open_clip.create_model_and_transforms(
arch, device=torch.device("cpu"), pretrained=version, cache_dir=cache_dir,
arch,
device=torch.device("cpu"),
pretrained=version,
cache_dir=cache_dir,
)
del model.visual
self.model = model
Expand Down Expand Up @@ -669,7 +672,11 @@ def build_tokenizer(self, cfg):
legacy=legacy,
)

_, self.text_transform = get_preprocess_fns(cfg, self.tokenizer, is_train=False,)
_, self.text_transform = get_preprocess_fns(
cfg,
self.tokenizer,
is_train=False,
)
self.max_length = cfg.text.get("max_position_embeddings")

def load_model(self, cfg, state_dict):
Expand Down Expand Up @@ -699,8 +706,7 @@ def load_model(self, cfg, state_dict):
def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by, tensor_model_parallel_size):
after = orig_vocab_size
multiple = make_vocab_size_divisible_by * tensor_model_parallel_size
while (after % multiple) != 0:
after += 1
after = ((after + multiple - 1) // multiple) * multiple
return after

def forward(self, text):
Expand Down Expand Up @@ -765,7 +771,11 @@ def __init__(
super().__init__()
assert layer in self.LAYERS
self.projection_dim = 1280
model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device("cpu"), pretrained=version,)
model, _, _ = open_clip.create_model_and_transforms(
arch,
device=torch.device("cpu"),
pretrained=version,
)
del model.visual
self.model = model

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,7 @@ def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by

after = orig_vocab_size
multiple = make_vocab_size_divisible_by * tensor_model_parallel_size
while (after % multiple) != 0:
after += 1
after = ((after + multiple - 1) // multiple) * multiple
logging.info(
f'Padded vocab_size: {after}, original vocab_size: {orig_vocab_size}, dummy tokens: {after - orig_vocab_size}.'
)
Expand Down
3 changes: 1 addition & 2 deletions nemo/lightning/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ def get_vocab_size(

after = vocab_size
multiple = make_vocab_size_divisible_by * config.tensor_model_parallel_size
while (after % multiple) != 0:
after += 1
after = ((after + multiple - 1) // multiple) * multiple
logging.info(
f"Padded vocab_size: {after}, original vocab_size: {vocab_size}, dummy tokens:" f" {after - vocab_size}."
)
Expand Down

0 comments on commit 265e680

Please sign in to comment.