Skip to content

Commit

Permalink
Use PackedSeqParams in accordance with changes in Megatron-LM (NVIDIA…
Browse files Browse the repository at this point in the history
…#8205)

* Use PackedSeqParams in accordance with changes in Megatron-LM

Signed-off-by: Chen Cui <chcui@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add import guard for packed sequence

Signed-off-by: Chen Cui <chcui@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Chen Cui <chcui@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
cuichenx and pre-commit-ci[bot] authored Jan 23, 2024
1 parent 0f239ca commit a39f526
Showing 1 changed file with 20 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
from contextlib import nullcontext
from dataclasses import fields
from functools import partial
from importlib.metadata import version
from typing import Any, Dict, Iterator, List, Optional, Union

import torch
from omegaconf import OmegaConf
from omegaconf.dictconfig import DictConfig
from pkg_resources import packaging
from pytorch_lightning.accelerators import CPUAccelerator
from pytorch_lightning.trainer.trainer import Trainer

Expand Down Expand Up @@ -925,12 +927,24 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_
cu_seqlens = cu_seqlens[: cu_seqlens_argmin.item()]
else:
cu_seqlens = cu_seqlens[: torch.argmin(cu_seqlens)]
forward_args['cu_seqlens_q'] = cu_seqlens
forward_args['cu_seqlens_kv'] = cu_seqlens
if max_seqlen is not None:
forward_args['max_seqlen_q'] = max_seqlen
forward_args['max_seqlen_kv'] = max_seqlen
forward_args['qkv_format'] = 'thd'

try:
from megatron.core.packed_seq_params import PackedSeqParams
except (ImportError, ModuleNotFoundError) as e:
mcore_version = packaging.version.Version(version('megatron-core'))
logging.error(
f"megatron-core v{mcore_version} does not support training with packed sequence. "
"Please use megatron-core >= 0.5.0, or set model.data.train_ds.packed_sequence=False"
)
raise e

forward_args['packed_seq_params'] = PackedSeqParams(
cu_seqlens_q=cu_seqlens,
cu_seqlens_kv=cu_seqlens,
max_seqlen_q=max_seqlen,
max_seqlen_kv=max_seqlen,
qkv_format='thd',
)

output_tensor = model(**forward_args)

Expand Down

0 comments on commit a39f526

Please sign in to comment.