Skip to content

Commit

Permalink
Merge branch 'main' into parallel_prompt_tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
vadam5 committed Feb 16, 2022
2 parents 3cfd023 + b5012d0 commit a31a264
Show file tree
Hide file tree
Showing 16 changed files with 121 additions and 78 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]"
python -c "import nemo.collections.tts as nemo_tts" && \
python -c "import nemo_text_processing.text_normalization as text_normalization"

# TODO: Try to remove once 21.07 container is the base container
# TODO: Update to newer numba 0.56.0RC1 for 22.02 container
# install pinned numba version
RUN conda install -c conda-forge numba=0.54.1
# RUN conda install -c conda-forge numba==0.54.1

# copy scripts/examples/tests into container for end user
WORKDIR /workspace/nemo
Expand Down
12 changes: 7 additions & 5 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ pipeline {
parallel {
stage('SGD-GEN') {
steps {
sh 'cd examples/nlp/dialogue_state_tracking_generative && \
sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue_state_tracking_generative && \
python sgd_gen.py \
model.dataset.data_dir=/home/TestData/nlp/sgd_small \
model.language_model.lm_checkpoint=/home/TestData/nlp/gpt2/pytorch_model.bin\
Expand All @@ -734,16 +734,18 @@ pipeline {
trainer.val_check_interval=0.0 \
trainer.gpus=[0] \
model.dataset.use_cache=false \
model.tokenizer.special_tokens={pad_token:"endoftext"}\
model.language_model.pretrained_model_name=gpt2 \
model.tokenizer.special_tokens={pad_token:"endoftext"} \
model.tokenizer.tokenizer_name=gpt2 \
model.tokenizer.vocab_file=/home/TestData/nlp/gpt2/vocab.json\
model.language_model.pretrained_model_name=/home/TestData/nlp/gpt2 \
trainer.accelerator=ddp \
exp_manager=null && \
rm -rf sgd_gen_outputs'
}
}
stage('SGD-GEN Backward compatible with SGDQA') {
steps {
sh 'cd examples/nlp/dialogue_state_tracking_generative && \
sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue_state_tracking_generative && \
python sgd_gen.py \
model.dataset.data_dir=/home/TestData/nlp/sgd_small \
model.dataset.dialogues_example_dir=sgd_gen_bert_outputs \
Expand All @@ -760,7 +762,7 @@ pipeline {
model.language_model.pretrained_model_name=bert-base-cased \
trainer.accelerator=ddp \
exp_manager=null && \
rm -rf sgd_gen_bert_outputs'
rm -rf sgd_gen_bert_outputs && TRANSFORMERS_OFFLINE=1'
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ trainer:
resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
checkpoint_callback: False # Provided by exp_manager
logger: False # Provided by exp_manager

logger: False # Provided by exp_manager
model:
tensor_model_parallel_size: 1
nemo_path: null # filename to save the model and associated artifacts to .nemo file
library: huggingface # huggingface or megatron
tokenizer:
tokenizer_name: ${model.language_model.pretrained_model_name} # or sentencepiece
vocab_file: null # path to vocab file
Expand Down
15 changes: 13 additions & 2 deletions examples/nlp/dialogue_state_tracking_generative/sgd_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,29 @@

from nemo.collections.nlp.models.dialogue_state_tracking_generative.dialogue_gpt_model import DialogueGPTModel
from nemo.collections.nlp.models.dialogue_state_tracking_sgdqa.sgdqa_model import SGDQAModel
from nemo.collections.nlp.modules.common.megatron.megatron_utils import compute_model_parallel_rank
from nemo.collections.nlp.parts.nlp_overrides import NLPDDPPlugin
from nemo.core.config import hydra_runner
from nemo.utils import logging
from nemo.utils.app_state import AppState
from nemo.utils.exp_manager import exp_manager


@hydra_runner(config_path="conf", config_name="dialogue_config")
def main(cfg: DictConfig) -> None:
pl.seed_everything(42)
logging.info(f'Config: {OmegaConf.to_yaml(cfg)}')
trainer = pl.Trainer(**cfg.trainer)

plugin = NLPDDPPlugin()
trainer = pl.Trainer(**cfg.trainer, plugins=plugin)

exp_manager(trainer, cfg.get("exp_manager", None))

app_state = AppState()
if cfg.model.tensor_model_parallel_size > 1:
app_state.model_parallel_size = cfg.model.tensor_model_parallel_size
app_state.model_parallel_rank = compute_model_parallel_rank(trainer.local_rank, app_state.model_parallel_size)

if 'bert' in cfg.model.language_model.pretrained_model_name:
model_class = SGDQAModel
elif 'gpt' in cfg.model.language_model.pretrained_model_name.lower():
Expand Down Expand Up @@ -155,7 +166,7 @@ def main(cfg: DictConfig) -> None:

if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.ds_item is not None:
gpu = 1 if cfg.trainer.gpus != 0 else 0
trainer = pl.Trainer(gpus=gpu)
trainer = pl.Trainer(gpus=gpu, plugins=plugin, precision=16)
model.setup_multiple_test_data(test_data_config=cfg.model.test_ds)
if model.prepare_test(trainer):
trainer.test(model)
Expand Down
7 changes: 4 additions & 3 deletions nemo/collections/asr/parts/preprocessing/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ def inverse(self, magnitude, phase):

if self.window is not None:
window_sum = librosa.filters.window_sumsquare(
self.window,
magnitude.size(-1),
window=self.window,
n_frames=magnitude.size(-1),
hop_length=self.hop_length,
win_length=self.win_length,
n_fft=self.filter_length,
Expand Down Expand Up @@ -302,7 +302,8 @@ def __init__(
highfreq = highfreq or sample_rate / 2

filterbanks = torch.tensor(
librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq), dtype=torch.float
librosa.filters.mel(sr=sample_rate, n_fft=self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq),
dtype=torch.float,
).unsqueeze(0)
self.register_buffer("fb", filterbanks)

Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/asr/parts/preprocessing/perturb.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ def perturb(self, data):
return

new_sr = int(self._sr * speed_rate)
data._samples = librosa.core.resample(data._samples, self._sr, new_sr, res_type=self._res_type)
data._samples = librosa.core.resample(
data._samples, orig_sr=self._sr, target_sr=new_sr, res_type=self._res_type
)


class TimeStretchPerturbation(Perturbation):
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/asr/parts/preprocessing/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ def __init__(self, samples, sample_rate, target_sr=None, trim=False, trim_db=60,
"""
samples = self._convert_samples_to_float32(samples)
if target_sr is not None and target_sr != sample_rate:
samples = librosa.core.resample(samples, sample_rate, target_sr)
samples = librosa.core.resample(samples, orig_sr=sample_rate, target_sr=target_sr)
sample_rate = target_sr
if trim:
samples, _ = librosa.effects.trim(samples, trim_db)
samples, _ = librosa.effects.trim(samples, top_db=trim_db)
self._samples = samples
self._sample_rate = sample_rate
if self._samples.ndim >= 2:
Expand Down
20 changes: 8 additions & 12 deletions nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,11 @@ def _greedy_decode(
# out_len: [seq_len]

# Initialize blank state and empty label set in Hypothesis
hypothesis = rnnt_utils.Hypothesis(score=0.0, y_sequence=[], dec_state=None, timestep=[])
hypothesis = rnnt_utils.Hypothesis(score=0.0, y_sequence=[], dec_state=None, timestep=[], last_token=None)

if partial_hypotheses is not None:
if len(partial_hypotheses.y_sequence) > 0:
hypothesis.y_sequence.append(partial_hypotheses.y_sequence[-1].cpu().numpy())
hypothesis.last_token = partial_hypotheses.last_token
if partial_hypotheses.dec_state is not None:
hypothesis.dec_state = self.decoder.batch_concat_states([partial_hypotheses.dec_state])
hypothesis.dec_state = _states_to_device(hypothesis.dec_state, x.device)

Expand All @@ -308,11 +308,10 @@ def _greedy_decode(
while not_blank and (self.max_symbols is None or symbols_added < self.max_symbols):
# In the first timestep, we initialize the network with RNNT Blank
# In later timesteps, we provide previous predicted label as input.
last_label = (
self._SOS
if (hypothesis.y_sequence == [] and hypothesis.dec_state is None)
else hypothesis.y_sequence[-1]
)
if hypothesis.last_token is None and hypothesis.dec_state is None:
last_label = self._SOS
else:
last_label = label_collate([[hypothesis.last_token]])

# Perform prediction network and joint network steps.
g, hidden_prime = self._pred_step(last_label, hypothesis.dec_state)
Expand Down Expand Up @@ -347,6 +346,7 @@ def _greedy_decode(
hypothesis.score += float(v)
hypothesis.timestep.append(time_idx)
hypothesis.dec_state = hidden_prime
hypothesis.last_token = k

# Increment token counter.
symbols_added += 1
Expand All @@ -359,10 +359,6 @@ def _greedy_decode(
# Unpack the hidden states
hypothesis.dec_state = self.decoder.batch_select_state(hypothesis.dec_state, 0)

# Remove the original input label if partial hypothesis was provided
if partial_hypotheses is not None:
hypothesis.y_sequence = hypothesis.y_sequence[1:]

return hypothesis


Expand Down
Loading

0 comments on commit a31a264

Please sign in to comment.