From dfdb4fb4f0a19ecfa77cb958897c972d39b4977c Mon Sep 17 00:00:00 2001 From: Tugrul Konuk Date: Mon, 22 Jul 2024 11:16:10 -0500 Subject: [PATCH] Adds Tiktoken tokenizer for Nemotron-Mistral 12B (#9797) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adding context- & expert-parallism to MegatronStrategy (#9525) Signed-off-by: Tugrul Konuk * Add CICD test for Stable Diffusion (#9464) * Add CICD test for Stable Diffusion Signed-off-by: Michal Futrega * Update cicd-main.yml Signed-off-by: Michal Futrega * Use single gpu runner Signed-off-by: Michal Futrega --------- Signed-off-by: Michal Futrega Signed-off-by: Tugrul Konuk * Akoumparouli/nemo ux mixtral (#9446) * use default collate if dataset does not have one Signed-off-by: Alexandros Koumparoulis * mixtral config Signed-off-by: Alexandros Koumparoulis * add convert_state Signed-off-by: Alexandros Koumparoulis * fix StateDictTransform for 2D layers, e.g. MoE Signed-off-by: Alexandros Koumparoulis * pass num_moe_experts to specs Signed-off-by: Alexandros Koumparoulis * udpate MixtralModel Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa Signed-off-by: Alexandros Koumparoulis * mini docstring Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Co-authored-by: akoumpa Signed-off-by: Tugrul Konuk * update mcoreddp call (#9345) * update mcoreddp call Signed-off-by: Alexandros Koumparoulis * update mcore commits Signed-off-by: Alexandros Koumparoulis --------- Signed-off-by: Alexandros Koumparoulis Co-authored-by: Pablo Garay Signed-off-by: Tugrul Konuk * [NeMo-UX] Llama and Gemma (#9528) * add llama Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * add llama Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * add llama3 Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * fix typo Signed-off-by: Chen Cui * enable importers with multiple models Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * add gemma Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * checks Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx --------- Signed-off-by: Chen Cui Signed-off-by: cuichenx Co-authored-by: cuichenx Co-authored-by: Marc Romeyn Signed-off-by: Tugrul Konuk * [NeMo-UX] minor logging bug fixes (#9529) * minor exp_manager bug fixes * remove print statement * fix docstring * fix AppState defaults --------- Co-authored-by: Marc Romeyn Signed-off-by: Tugrul Konuk * mcore distOpt restore fix (#9421) Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * Custom Tiktoken tokenizer. Signed-off-by: Tugrul Konuk * Fixed the tokenizer decoding on special tokens. Signed-off-by: Tugrul Konuk * Apply isort and black reformatting Signed-off-by: ertkonuk Signed-off-by: Tugrul Konuk * Added token_to_id() method. Signed-off-by: Tugrul Konuk * Update neva conversion script from and to HF (#9296) * Update NeMo script Signed-off-by: yaoyu-33 * Fix example scripts Signed-off-by: yaoyu-33 * Apply isort and black reformatting Signed-off-by: yaoyu-33 * Update convert_llava_nemo_to_hf.py Signed-off-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com> * address comments Signed-off-by: yaoyu-33 * Apply isort and black reformatting Signed-off-by: yaoyu-33 --------- Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com> Co-authored-by: yaoyu-33 Signed-off-by: Tugrul Konuk * vLLM Export Support (#9381) * Export implementation for vLLM 0.4.3. Supports LLAMA2, Mistral, Mixtral (unverified), Gemma and StarCoder2 models. The nemo.export.tensorrt_llm alias was removed to avoid initializing TRT-LLM when importing anything from nemo.export. Signed-off-by: Alexey Panteleev * Fixed some CodeQL warnings. Signed-off-by: Alexey Panteleev * Apply isort and black reformatting Signed-off-by: apanteleev * Removed empty files. Signed-off-by: Alexey Panteleev * Apply isort and black reformatting Signed-off-by: apanteleev * Updated the integration for vLLM 0.5.0. Signed-off-by: Alexey Panteleev * Updated the vLLM deployment interface to use max_output_len instead of max_output_token. Signed-off-by: Alexey Panteleev * Apply isort and black reformatting Signed-off-by: apanteleev * Moved the Exporter class to nemo/export and renamed its file to vllm_exporter.py, to be more similar to TRT-LLM. Signed-off-by: Alexey Panteleev * Apply isort and black reformatting Signed-off-by: apanteleev * Implemented vLLM support in the export tests, added functional testing, implemented forward evaluation on vLLM without Triton. Signed-off-by: Alexey Panteleev * Apply isort and black reformatting Signed-off-by: apanteleev * Moved the vLLM deployment functionality to the common deploy_triton.py script. Signed-off-by: Alexey Panteleev * Apply isort and black reformatting Signed-off-by: apanteleev * Fixed the CodeQL discovered issues. Signed-off-by: Alexey Panteleev * Apply isort and black reformatting Signed-off-by: apanteleev * Fixed one more return of a wrong dimensionality... Signed-off-by: Alexey Panteleev * More wrong dimensionality returns. Signed-off-by: Alexey Panteleev --------- Signed-off-by: Alexey Panteleev Signed-off-by: apanteleev Co-authored-by: apanteleev Co-authored-by: Onur Yilmaz <35306097+oyilmaz-nvidia@users.noreply.github.com> Signed-off-by: Tugrul Konuk * PL: Delete precision if using plugin. TODO switch to MegatronTrainerBuilder (#9535) Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * Add page context fmha (#9526) Signed-off-by: Tugrul Konuk * extend get_gpt_layer_modelopt_spec to support MoE (#9532) Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * fix mock data generation for legacy dataset (#9530) Signed-off-by: dimapihtar Signed-off-by: Tugrul Konuk * [Nemo-UX] IO fixes (#9512) * Improve IOMixin.io_transform_args to handle dataclasses better * Dump task json + img inside NeMoLogger * Adding store_io to train task * Update opt.connect to also propagate to __io__ * Rename opt to optim for consistency * Moving to using safe serialization using fiddle, only use cloudpickle when needed * Apply isort and black reformatting Signed-off-by: marcromeyn * Using Config from fiddle instead of sdk for now * Apply isort and black reformatting Signed-off-by: marcromeyn * Move enable_nemo_ckpt_io from MegatronStrategy to ModelCheckpoint * Apply isort and black reformatting Signed-off-by: marcromeyn * Move nemo-ckpt to _get_finalize_save_checkpoint_callback * Apply isort and black reformatting Signed-off-by: marcromeyn * Update TrainerContext & io.load_ckpt * Use renamed TrainerContext inside ModelCheckpoint * Remove double io saving * Rename lightning.pytorch.opt -> optim * Apply isort and black reformatting Signed-off-by: marcromeyn * Remove store_io from train-task * Adding fiddle-extension for torch * Apply isort and black reformatting Signed-off-by: marcromeyn * Move fdl_torch import * Apply isort and black reformatting Signed-off-by: marcromeyn * Adding dtype to serialization * Some fixes * Apply isort and black reformatting Signed-off-by: marcromeyn * Make TransformerConfig inherit from IOMixin to fix serialization error * Make TransformerConfig inherit from IOMixin to fix serialization error * Apply isort and black reformatting Signed-off-by: marcromeyn * Add support for BuiltinFunctionType * Apply isort and black reformatting Signed-off-by: marcromeyn * Add missing import * Apply isort and black reformatting Signed-off-by: marcromeyn * Fix dataclass fields --------- Signed-off-by: marcromeyn Co-authored-by: marcromeyn Signed-off-by: Tugrul Konuk * Test C++ runtime on demand in nemo_export.py to avoid possible OOMs (#9544) * Add test_cpp_runtime flag Signed-off-by: Jan Lasek * Apply isort and black reformatting Signed-off-by: janekl --------- Signed-off-by: Jan Lasek Signed-off-by: janekl Co-authored-by: janekl Signed-off-by: Tugrul Konuk * Fix lhotse tests for v1.24.2 (#9546) * Fix lhotse tests for v1.24.0 Signed-off-by: Piotr Żelasko * Fix RIR test Signed-off-by: Piotr Żelasko --------- Signed-off-by: Piotr Żelasko Signed-off-by: Tugrul Konuk * gpu_unitTests_notOptional (#9551) Signed-off-by: Tugrul Konuk * add reset learning rate functionality (#9372) * add reset_lr functionality Signed-off-by: dimapihtar * fix reset_lr logic Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * move reset_lr from optim section Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * add reset_lr value to config Signed-off-by: dimapihtar * set reset_lr False by default Signed-off-by: dimapihtar * remove extra line Signed-off-by: dimapihtar * add reset_lr test Signed-off-by: dimapihtar * add reset_lr test Signed-off-by: dimapihtar * remove extra quote Signed-off-by: dimapihtar * add ability to reset schedule's max_steps and decay_steps Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * change scheduler's first step logic when using reset_lr Signed-off-by: dimapihtar * revert config Signed-off-by: dimapihtar * fix reset_lr logic Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * revert config Signed-off-by: dimapihtar * revert config Signed-off-by: dimapihtar * update reset_lr comments Signed-off-by: dimapihtar * add use cases for reset_lr feature Signed-off-by: dimapihtar --------- Signed-off-by: dimapihtar Signed-off-by: dimapihtar Co-authored-by: dimapihtar Signed-off-by: Tugrul Konuk * Add Python AIStore SDK to container and bump min Lhotse version (#9537) * Add Python AIStore SDK to requirements and bump min Lhotse version Signed-off-by: Piotr Żelasko * Move AIStore Python SDK to Dockerfile, remove matplotlib/ipywidgets deps Signed-off-by: Piotr Żelasko --------- Signed-off-by: Piotr Żelasko Signed-off-by: Tugrul Konuk * Adding 'use_dynamo' option for export to use onnx.dynamo_export() instead of onnx.export() (#9147) * Ininial WARs to implement dynamo option for export Signed-off-by: Boris Fomitchev * including weights in .onnx Signed-off-by: Boris Fomitchev * dynamo_export works for many small models Signed-off-by: Boris Fomitchev * External weights behaviour fixed Signed-off-by: Boris Fomitchev * Cleanup Signed-off-by: Boris Fomitchev * Apply isort and black reformatting Signed-off-by: borisfom * print cleaned up Signed-off-by: Boris Fomitchev * Added overloadable dynamic_shapes_for_export Signed-off-by: Boris Fomitchev * Addressing code review Signed-off-by: Boris Fomitchev * Fixing CI issues Signed-off-by: Boris Fomitchev * Fixing CI test failure Signed-off-by: Boris Fomitchev * Eliminated test cross-contamination Signed-off-by: Boris Fomitchev --------- Signed-off-by: Boris Fomitchev Signed-off-by: borisfom Co-authored-by: Eric Harper Co-authored-by: Somshubra Majumdar Signed-off-by: Tugrul Konuk * [NeMo-UX] Fix tokenizer IO (#9555) * Adding tokenizer to io-test + making it pass * Handling tokenizer correctly inside dump_io * Apply isort and black reformatting Signed-off-by: marcromeyn * Removing not used import --------- Signed-off-by: marcromeyn Co-authored-by: marcromeyn Signed-off-by: Tugrul Konuk * [NeMo UX] Move mistral_7b.py to mistral.py (#9545) * Move mistral_7b.py to mistral.py Signed-off-by: Alexandros Koumparoulis * rename MixtralConfig to MixtralConfig8x7B Signed-off-by: Alexandros Koumparoulis * mistral rename: mistralconfig7b & mistralmodel Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * Use closed-formula to round by multiple (#9307) * Use closed-formula to round by multiple Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Co-authored-by: akoumpa Co-authored-by: Pablo Garay Signed-off-by: Tugrul Konuk * ci: Do not attempt to send slack on fork (#9556) * ci: Do not attempt to send slack on fork Signed-off-by: Oliver Koenig * test Signed-off-by: Oliver Koenig --------- Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * Fix nemo export test (#9547) * fix minor import bug Signed-off-by: Onur Yilmaz * fix export test Signed-off-by: Onur Yilmaz * Apply isort and black reformatting Signed-off-by: oyilmaz-nvidia --------- Signed-off-by: Onur Yilmaz Signed-off-by: oyilmaz-nvidia Co-authored-by: oyilmaz-nvidia Co-authored-by: Pablo Garay Signed-off-by: Tugrul Konuk * Fix SDXL incorrect name in docs (#9534) Signed-off-by: Tugrul Konuk * GPU unit tests: Mark flaky tests to be fixed (#9559) Signed-off-by: Tugrul Konuk * Bump PTL version (#9557) Signed-off-by: Abhishree Signed-off-by: Tugrul Konuk * [Resiliency] Straggler detection (#9473) * Initial straggler det impl Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * Fixed CI code checks Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * Removed unused import Signed-off-by: Jacek Bieniusiewicz * remove submodule Signed-off-by: Maanu Grover * Updated documentation; Updated callback params; Cosmetic changes Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * Fixed straggler det config; Added basic test Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * Fixes in test_straggler_det.py Signed-off-by: Jacek Bieniusiewicz * Updated straggler callback API Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * stop_if_detected=False by default Signed-off-by: Jacek Bieniusiewicz --------- Signed-off-by: Jacek Bieniusiewicz Signed-off-by: jbieniusiewi Signed-off-by: Maanu Grover Co-authored-by: jbieniusiewi Co-authored-by: Maanu Grover Signed-off-by: Tugrul Konuk * switch to torch_dist as default dist checkpointing backend (#9541) Signed-off-by: ashors1 Co-authored-by: Marc Romeyn Signed-off-by: Tugrul Konuk * [NeMo-UX] Checkpointing bug fixes (#9562) * fix checkpoint loading * fix * fixes * another fix * Apply isort and black reformatting Signed-off-by: ashors1 --------- Signed-off-by: ashors1 Co-authored-by: ashors1 Co-authored-by: Marc Romeyn Signed-off-by: Tugrul Konuk * Add tps and pps params to the export script (#9558) * fix minor import bug Signed-off-by: Onur Yilmaz * fix export test Signed-off-by: Onur Yilmaz * Apply isort and black reformatting Signed-off-by: oyilmaz-nvidia * remove n_gpus param Signed-off-by: Onur Yilmaz * add and fix parameters Signed-off-by: Onur Yilmaz * fix deploy script Signed-off-by: Onur Yilmaz * Apply isort and black reformatting Signed-off-by: oyilmaz-nvidia * rename tps and pps params Signed-off-by: Onur Yilmaz --------- Signed-off-by: Onur Yilmaz Signed-off-by: oyilmaz-nvidia Co-authored-by: oyilmaz-nvidia Signed-off-by: Tugrul Konuk * Consolidate gpt continue training script into pretraining script (#9413) * Consolidate gpt continue training with pretraining Signed-off-by: yaoyu-33 * Apply isort and black reformatting Signed-off-by: yaoyu-33 * fix default config Signed-off-by: yaoyu-33 * Add github action cicd Signed-off-by: yaoyu-33 * extract _integrate_original_checkpoint_data as a method Signed-off-by: yaoyu-33 * Apply isort and black reformatting Signed-off-by: yaoyu-33 * fix getattr Signed-off-by: yaoyu-33 * Revert "Add github action cicd" This reverts commit a453f16ba2be6413db932623009da893208acdd5. * Update comments in nlp_overrides.py Signed-off-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com> --------- Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com> Co-authored-by: yaoyu-33 Signed-off-by: Tugrul Konuk * Add support to change Multi task model prompt (#9542) * Add support to change Multi task model prompt Signed-off-by: smajumdar * Add support to change Multi task model prompt Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * Update nemo/collections/common/prompts/formatter.py Co-authored-by: Piotr Żelasko Signed-off-by: Somshubra Majumdar * Address comments Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * Address comments Signed-off-by: smajumdar --------- Signed-off-by: smajumdar Signed-off-by: titu1994 Signed-off-by: Somshubra Majumdar Co-authored-by: Piotr Żelasko Signed-off-by: Tugrul Konuk * Add Multimodal Exporter (#9256) * Add video-neva TRT export * Add TRT inference * Change config * Apply isort and black reformatting Signed-off-by: meatybobby * Change export params * Remove unused import * Add neva export * Apply isort and black reformatting Signed-off-by: meatybobby * Change unpack nemo * Apply isort and black reformatting Signed-off-by: meatybobby * Add trt infer config * Fix neva trt inference * Apply isort and black reformatting Signed-off-by: meatybobby * Add exporter * Apply isort and black reformatting Signed-off-by: meatybobby * Fix infer * Add PyTriton * Apply isort and black reformatting Signed-off-by: meatybobby * Fix deploy wrong dim * Apply isort and black reformatting Signed-off-by: meatybobby * Change to pass PIL Image * Apply isort and black reformatting Signed-off-by: meatybobby * Fix video neva deploy * Change query * Change deploy * Remove unused import * Change ptuning * Change to mm exporter * Add script * Apply isort and black reformatting Signed-off-by: meatybobby * Fix script --------- Signed-off-by: meatybobby Co-authored-by: meatybobby Signed-off-by: Tugrul Konuk * Enable encoder adapters for Canary and MultiTaskAED models (#9409) * Fix assertions for adapter types Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * Cleanup Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * Finalize support for decoder adapters Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * fix the freeze/unfreeze problem by replacing as_frozen with torch.inference_mode * Apply isort and black reformatting Signed-off-by: weiqingw4ng * Update tests to new generic way of module update Signed-off-by: smajumdar * Finalize code for update module Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * Fix variable name Signed-off-by: smajumdar * Finalize projection support for transformer mha adapters Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * Correct implementation of freeze restore Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * Corrects the implementation of replace_adapter_modules to limit to just the top level modules Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * Remove registration of Transformer MHA Signed-off-by: smajumdar * Remove registration of Transformer MHA Signed-off-by: smajumdar * Address reviewer comments Signed-off-by: smajumdar --------- Signed-off-by: smajumdar Signed-off-by: titu1994 Signed-off-by: weiqingw4ng Co-authored-by: Weiqing Wang Co-authored-by: weiqingw4ng Signed-off-by: Tugrul Konuk * pass option through (#9570) Signed-off-by: Maanu Grover Signed-off-by: Tugrul Konuk * PTQ refinements (#9574) * Rename megatron_gpt_quantization -> megatron_gpt_ptq Signed-off-by: Jan Lasek * Configure export.save_path as dir or tarball Signed-off-by: Jan Lasek * PTQ docs update Signed-off-by: Jan Lasek * Make model_type optional in case of quantized checkpoints Signed-off-by: Jan Lasek * Drop unused save_nemo_model_config argument Signed-off-by: Jan Lasek --------- Signed-off-by: Jan Lasek Signed-off-by: Tugrul Konuk * Audio model collection (#9263) * Audio model collection Signed-off-by: Ante Jukić * Apply isort and black reformatting Signed-off-by: anteju * Fix imports Signed-off-by: Ante Jukić * Addressed PR comments Signed-off-by: Ante Jukić * Apply isort and black reformatting Signed-off-by: anteju --------- Signed-off-by: Ante Jukić Signed-off-by: anteju Co-authored-by: anteju Signed-off-by: Tugrul Konuk * [NeMo-UX] Fix Trainer serialization (#9571) * Fix Trainer serialization * Apply isort and black reformatting Signed-off-by: marcromeyn --------- Signed-off-by: marcromeyn Co-authored-by: marcromeyn Signed-off-by: Tugrul Konuk * Update click version requirement (#9580) Signed-off-by: Dong Hyuk Chang Co-authored-by: Dong Hyuk Chang Signed-off-by: Tugrul Konuk * [Fault tolerance] Heartbeat detection (#9352) * Fault tolerance related changes Signed-off-by: Jacek Bieniusiewicz * Cosmetic changes in documentation Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * Doc update round2 Signed-off-by: Jacek Bieniusiewicz --------- Signed-off-by: Jacek Bieniusiewicz Signed-off-by: jbieniusiewi Co-authored-by: Jacek Bieniusiewicz Co-authored-by: jbieniusiewi Co-authored-by: jbieniusiewi <152396322+jbieniusiewi@users.noreply.github.com> Signed-off-by: Tugrul Konuk * Add ModelOpt QAT example for Llama2 SFT model (#9326) * add INT4 QAT example for Llama2 SFT model Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> * Add config parameter to control kv cache quantization Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> * Fix typo in cicd-main.yml for QAT test Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> * fix nlp_overrides.py Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> * address reviewer feedback Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> * quantize unwrapped model Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> * add compress export argument for qat config Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --------- Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Signed-off-by: Tugrul Konuk * Set TE flag in legacy -> mcore conversion script (#9585) * set TE flag Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx --------- Signed-off-by: Chen Cui Signed-off-by: cuichenx Co-authored-by: cuichenx Signed-off-by: Tugrul Konuk * [Nemo-UX] Add fabric-API for manual forward-pass (#9577) * First pass over fabric-API * Adding Trainer -> Fabric conversion * Some small fixes to get a forward-pass in Fabric working * Apply isort and black reformatting Signed-off-by: marcromeyn * Adding doc-string to Fabric.import_model * Adding track_io to io_init of Fabric * Fix Fabric.load_model + add doc-string * Apply isort and black reformatting Signed-off-by: marcromeyn * Remove unused import * Some small fixes * Fix failing test --------- Signed-off-by: marcromeyn Co-authored-by: marcromeyn Signed-off-by: Tugrul Konuk * [Nemo-UX] Add SDK-factories to llm-collection (#9589) * Adding sdk-factories to llm-collection * Removing _model from mistral + mixtral * Expose lr_scheduler inside lightning * Apply isort and black reformatting Signed-off-by: marcromeyn --------- Signed-off-by: marcromeyn Co-authored-by: marcromeyn Signed-off-by: Tugrul Konuk * Multimodal projection layer adapter fix for PP>1 (#9445) * enabling multimodal adapters to load in PP>1 Signed-off-by: paul-gibbons * Apply isort and black reformatting Signed-off-by: paul-gibbons * parameterizing validate_access_integrity, set to false when PP>1 Signed-off-by: paul-gibbons formatting fix Signed-off-by: paul-gibbons Apply isort and black reformatting Signed-off-by: paul-gibbons * Apply isort and black reformatting Signed-off-by: paul-gibbons * update nlp_model.py Signed-off-by: paul-gibbons * Apply isort and black reformatting Signed-off-by: paul-gibbons * update modelPT with validate_access_integrity Signed-off-by: paul-gibbons * Apply isort and black reformatting Signed-off-by: paul-gibbons * updating save_restore_connector w/ validate_access_integrity Signed-off-by: paul-gibbons * Apply isort and black reformatting Signed-off-by: paul-gibbons * addressing comment Signed-off-by: paul-gibbons * adding validate_access_integrity to super().load_config_and_state_dict() Signed-off-by: paul-gibbons * testing reorder of validate_access_integrity for CI failures Signed-off-by: paul-gibbons --------- Signed-off-by: paul-gibbons Signed-off-by: paul-gibbons Co-authored-by: paul-gibbons Co-authored-by: Eric Harper Signed-off-by: Tugrul Konuk * Add offline quantization script for QLoRA deployment (#9455) * add qlora offline quantization script Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * clean Signed-off-by: Chen Cui * docstring Signed-off-by: Chen Cui --------- Signed-off-by: Chen Cui Signed-off-by: cuichenx Co-authored-by: cuichenx Signed-off-by: Tugrul Konuk * qlora support more models (#9488) Signed-off-by: Chen Cui Signed-off-by: Tugrul Konuk * [NeMo-UX] Some improvements to NeMoLogger (#9591) Signed-off-by: Tugrul Konuk * Set n_gpu to None in nemo export (#9593) * fix minor import bug Signed-off-by: Onur Yilmaz * set ngpus to None Signed-off-by: Onur Yilmaz --------- Signed-off-by: Onur Yilmaz Signed-off-by: Tugrul Konuk * Inflight nemo model export support (#9527) * online model conversion and refit Signed-off-by: Jimmy Zhang * clean code Signed-off-by: Jimmy Zhang * cleanup Signed-off-by: Jimmy Zhang * add refit, cleanup code Signed-off-by: Jimmy Zhang * combine weight conversion functions Signed-off-by: Jimmy Zhang * cleanup code Signed-off-by: Jimmy Zhang * Apply isort and black reformatting Signed-off-by: JimmyZhang12 * remove debug print Signed-off-by: Jimmy Zhang * cleanup code Signed-off-by: Jimmy Zhang * fix single gpu and cleanup code Signed-off-by: Jimmy Zhang * Apply isort and black reformatting Signed-off-by: JimmyZhang12 --------- Signed-off-by: JimmyZhang12 Signed-off-by: Tugrul Konuk * vLLM Export Improvements (#9596) * Separated the vLLM export functionality from the common deployment script into deploy_vllm_triton.py. Signed-off-by: Alexey Panteleev * Fixed vocab_size for LLAMA3. Signed-off-by: Alexey Panteleev * Export test: fixed deployment testing w/o Megatron, made functional tests optional, added --gpu_memory_utilization. Signed-off-by: Alexey Panteleev * Apply isort and black reformatting Signed-off-by: apanteleev * Addressing review and CodeQL comments. Signed-off-by: Alexey Panteleev --------- Signed-off-by: Alexey Panteleev Signed-off-by: apanteleev Co-authored-by: apanteleev Co-authored-by: Onur Yilmaz <35306097+oyilmaz-nvidia@users.noreply.github.com> Signed-off-by: Tugrul Konuk * Set finalize_model_grads_func in on_fit_start instead to make sure it's being called (#9599) Signed-off-by: Tugrul Konuk * Set no_sync_func & grad_sync_fucn (#9601) * Set no_sync_func & grad_sync_fucn Signed-off-by: Alexandros Koumparoulis * set overlap_param_sync Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Co-authored-by: akoumpa Signed-off-by: Tugrul Konuk * small nemo logger bug fix (#9607) Co-authored-by: Marc Romeyn Signed-off-by: Tugrul Konuk * fix the dict format returned by scheduler method (#9609) Co-authored-by: Marc Romeyn Signed-off-by: Tugrul Konuk * [NeMo-UX] Dataloading enhancements and bug fixes (#9595) * fix dataloading + checkpoint restore * clean up data sampler * fix typo * support passing multiple paths to data module * fix validation dataloader * fix dataloader len when using gradient accumulation * fix progress bar * Apply isort and black reformatting Signed-off-by: ashors1 * fix step count in loggers * fix blended dataset * address comments * address comment * move step logging into strategy * Apply isort and black reformatting Signed-off-by: ashors1 --------- Signed-off-by: ashors1 Co-authored-by: Marc Romeyn Co-authored-by: ashors1 Signed-off-by: Tugrul Konuk * Fix serialization of AutoResume (#9616) * fix serialization of autoresume * update undefined variables Signed-off-by: Tugrul Konuk * Chat template support for megatron_gpt_eval.py (#9354) * Bump PTL version (#9557) Signed-off-by: Abhishree Signed-off-by: Alexandros Koumparoulis * [Resiliency] Straggler detection (#9473) * Initial straggler det impl Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * Fixed CI code checks Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * Removed unused import Signed-off-by: Jacek Bieniusiewicz * remove submodule Signed-off-by: Maanu Grover * Updated documentation; Updated callback params; Cosmetic changes Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * Fixed straggler det config; Added basic test Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * Fixes in test_straggler_det.py Signed-off-by: Jacek Bieniusiewicz * Updated straggler callback API Signed-off-by: Jacek Bieniusiewicz * Apply isort and black reformatting Signed-off-by: jbieniusiewi * stop_if_detected=False by default Signed-off-by: Jacek Bieniusiewicz --------- Signed-off-by: Jacek Bieniusiewicz Signed-off-by: jbieniusiewi Signed-off-by: Maanu Grover Co-authored-by: jbieniusiewi Co-authored-by: Maanu Grover Signed-off-by: Alexandros Koumparoulis * move model loading to separate function; call toContainer once; pad using closed formula Signed-off-by: Alexandros Koumparoulis * read prompts from file Signed-off-by: Alexandros Koumparoulis * If input prompt contains dict, apply model.tokenizer.chat_template Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa Signed-off-by: Alexandros Koumparoulis * apply @Gal Leibovich's patch Taken from: https://github.com/NVIDIA/NeMo/commit/17572905344db4692583e72799d55801a8860f35 Signed-off-by: Alexandros Koumparoulis * rename prompts_file to prompts_jsonl Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa Signed-off-by: Alexandros Koumparoulis * add chat_template param Signed-off-by: Alexandros Koumparoulis * Add ChatTemplateMixin to SentencePieceTokenizer Signed-off-by: Alexandros Koumparoulis * add chat-template to text-gen-strat Signed-off-by: Alexandros Koumparoulis * move load prompts to separate file Signed-off-by: Alexandros Koumparoulis * remove chat-template from text-gen-utils Signed-off-by: Alexandros Koumparoulis * make chat-template more generic Signed-off-by: Alexandros Koumparoulis * add assert message Signed-off-by: Alexandros Koumparoulis * small refactor for chat_template_mixin Signed-off-by: Alexandros Koumparoulis * undo ckpt conv changes Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa Signed-off-by: Alexandros Koumparoulis * move rounding to function Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Abhishree Signed-off-by: Alexandros Koumparoulis Signed-off-by: Jacek Bieniusiewicz Signed-off-by: jbieniusiewi Signed-off-by: Maanu Grover Signed-off-by: akoumpa Signed-off-by: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: jbieniusiewi <152396322+jbieniusiewi@users.noreply.github.com> Co-authored-by: jbieniusiewi Co-authored-by: Maanu Grover Co-authored-by: akoumpa Signed-off-by: Tugrul Konuk * Jsonl support (#9611) * Adding support to preprocess .jsonl and .jsonl.gz files in input directory Signed-off-by: adityavavre * Adding support to preprocess .jsonl and .jsonl.gz files in input directory Signed-off-by: adityavavre * Apply isort and black reformatting Signed-off-by: adityavavre --------- Signed-off-by: adityavavre Signed-off-by: adityavavre Co-authored-by: adityavavre Signed-off-by: Tugrul Konuk * [NeMo-UX] Add PEFT (#9490) * initial commit for PEFT in nemo2 * Apply isort and black reformatting Signed-off-by: cuichenx * address comments Signed-off-by: Chen Cui * make import easier Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * address comments Signed-off-by: Chen Cui * Update nemo/collections/llm/peft/lora.py Signed-off-by: Marc Romeyn * Some small fixes + adding more doc-strings * Apply isort and black reformatting Signed-off-by: marcromeyn * Adding ModelTransform callback * Apply isort and black reformatting Signed-off-by: marcromeyn * Fixing type-hint for model_transform * Apply isort and black reformatting Signed-off-by: marcromeyn * fix import Signed-off-by: Chen Cui * model transform for gemma llama Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * fix model transform Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * change lora target default to all linear modules Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * Small fix in mixtral * Apply isort and black reformatting Signed-off-by: marcromeyn * Integrating PEFT to the public-API + some fixes * Big refactor to allow to load adapter-states * Some fixes to support adapter_path * Apply isort and black reformatting Signed-off-by: marcromeyn * Disabling ckpt reloading when adapter_path is passed * Fix CLI * Apply isort and black reformatting Signed-off-by: marcromeyn * Remove commented-out code * Remove commented-out code * Remove un-used import * Fix callback imports * Apply isort and black reformatting Signed-off-by: marcromeyn * Fixing llm.pretrain * Some small fixes * Apply isort and black reformatting Signed-off-by: marcromeyn * Fix missing import + type-hint in finetune * Adding PreemptionCallback + some more tests * Apply isort and black reformatting Signed-off-by: marcromeyn * Clean up imports & clean up llm.api * Apply isort and black reformatting Signed-off-by: marcromeyn * Trying to fix failing tests * Remove __init__.py 2 * Apply isort and black reformatting Signed-off-by: marcromeyn * Fix failing test * Trying to fix last failing test --------- Signed-off-by: cuichenx Signed-off-by: Chen Cui Signed-off-by: Marc Romeyn Signed-off-by: marcromeyn Co-authored-by: cuichenx Co-authored-by: Marc Romeyn Co-authored-by: marcromeyn Signed-off-by: Tugrul Konuk * Akoumparouli/mistral import instruct chat template fix (#9567) * use bf16 by defualt mistral conv Signed-off-by: Alexandros Koumparoulis * add chat template Signed-off-by: Alexandros Koumparoulis * use capitalized role names Signed-off-by: Alexandros Koumparoulis --------- Signed-off-by: Alexandros Koumparoulis Co-authored-by: Marc Romeyn Signed-off-by: Tugrul Konuk * Remove .cuda calls, use device isntead (#9602) Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * fix converter defautl args (#9565) * fix converter defautl args Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Co-authored-by: akoumpa Signed-off-by: Tugrul Konuk * mixtral export (#9603) Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * fix: remove non_blocking from PTL's .cuda call (#9618) Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * Alit/mamba tmp (#9612) * adding mamba support * fix import mixins * rm convert jamba * Apply isort and black reformatting Signed-off-by: JRD971000 * more cleanups * use GPT text gen * Apply isort and black reformatting Signed-off-by: JRD971000 * fixing gbs in TP convetor * Apply isort and black reformatting Signed-off-by: JRD971000 * add reqs * add tutorial * minor fix to tutorial * moving finetuning files Signed-off-by: arendu * moving finetuning files Signed-off-by: arendu * address comments * Apply isort and black reformatting Signed-off-by: JRD971000 * address comments * Apply isort and black reformatting Signed-off-by: JRD971000 * add mamba_tmp * remove mamba import * Apply isort and black reformatting Signed-off-by: JRD971000 --------- Signed-off-by: JRD971000 Signed-off-by: arendu Co-authored-by: Ali Taghibakhshi Co-authored-by: JRD971000 Co-authored-by: arendu Signed-off-by: Tugrul Konuk * TitaNet Batch Verify Speaker (#9337) * add batch_inference for verify_speakers method Signed-off-by: msekoyan@nvidia.com * remove not used package Signed-off-by: msekoyan@nvidia.com * change batch inference logic Signed-off-by: msekoyan@nvidia.com * fixup Signed-off-by: msekoyan@nvidia.com * requested changes Signed-off-by: msekoyan@nvidia.com * add verify_speakers_batch to docs Signed-off-by: msekoyan@nvidia.com * handle None durations in manifest Signed-off-by: msekoyan@nvidia.com * change logging text Signed-off-by: msekoyan@nvidia.com * Apply isort and black reformatting Signed-off-by: monica-sekoyan * check duration presence Signed-off-by: msekoyan@nvidia.com * add channel_selector to dataset configs Signed-off-by: msekoyan@nvidia.com --------- Signed-off-by: msekoyan@nvidia.com Signed-off-by: monica-sekoyan Co-authored-by: monica-sekoyan Co-authored-by: Nithin Rao Signed-off-by: Tugrul Konuk * Enable MCore checkpointing optimizations (#9505) * Expose num processes in PyT Dist Signed-off-by: Mikołaj Błaż * Add parallel save/load optimizations from MCore Signed-off-by: Mikołaj Błaż * Remove async utils from MCore Signed-off-by: Mikołaj Błaż * Enable DistOpt paralell R/W Signed-off-by: Mikołaj Błaż * Enable PyT Dist caching Signed-off-by: Mikołaj Błaż * Small fixes Signed-off-by: Mikołaj Błaż * Make sure DistCkptIO is instantiated from config Signed-off-by: Mikołaj Błaż * Bump MCore version to v0.7 Signed-off-by: Mikołaj Błaż * Print load strategy Signed-off-by: Mikołaj Błaż * Forward MCore to model space DistOpt Signed-off-by: Mikołaj Błaż * Add separate flag to control DistOpt paralell R/W Signed-off-by: Mikołaj Błaż * Turn off parallel save by default Signed-off-by: Mikołaj Błaż --------- Signed-off-by: Mikołaj Błaż Signed-off-by: Tugrul Konuk * Change mixtral moe key name for trt-llm (#9620) * fix minor import bug Signed-off-by: Onur Yilmaz * change moe key values Signed-off-by: Onur Yilmaz * add weight to the key Signed-off-by: Onur Yilmaz --------- Signed-off-by: Onur Yilmaz Signed-off-by: Tugrul Konuk * fix ckpt load bug (#9621) * fix ckpt load bug Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar --------- Signed-off-by: dimapihtar Signed-off-by: dimapihtar Co-authored-by: dimapihtar Signed-off-by: Tugrul Konuk * NeVA Minor Fixes (#9608) * fix neva resume with empty param loaded for some pp stage Signed-off-by: yaoyu-33 * fix crop size check Signed-off-by: yaoyu-33 * Apply isort and black reformatting Signed-off-by: yaoyu-33 --------- Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 Co-authored-by: yaoyu-33 Signed-off-by: Tugrul Konuk * fix pretrianing data sizes and weights (#9627) Signed-off-by: Chen Cui Signed-off-by: Tugrul Konuk * Alit/mamba (#9575) * adding mamba support * fix import mixins * rm convert jamba * Apply isort and black reformatting Signed-off-by: JRD971000 * more cleanups * use GPT text gen * Apply isort and black reformatting Signed-off-by: JRD971000 * fixing gbs in TP convetor * Apply isort and black reformatting Signed-off-by: JRD971000 * add reqs * add tutorial * minor fix to tutorial * moving finetuning files Signed-off-by: arendu * moving finetuning files Signed-off-by: arendu * address comments * Apply isort and black reformatting Signed-off-by: JRD971000 * address comments * Apply isort and black reformatting Signed-off-by: JRD971000 * address comments * add mamba dependancies * add mcore tag * modify dockerfile ci * modify dockerfile ci --------- Signed-off-by: JRD971000 Signed-off-by: arendu Co-authored-by: Ali Taghibakhshi Co-authored-by: JRD971000 Co-authored-by: arendu Signed-off-by: Tugrul Konuk * [NeMo-UX] async checkpointing support (#9466) * add async checkpointing support * fixes * Apply isort and black reformatting Signed-off-by: ashors1 * add parallel read/write support and other optimizations * Apply isort and black reformatting Signed-off-by: ashors1 * address comments, make dist checkpointing args configurable * Apply isort and black reformatting Signed-off-by: ashors1 * fix small typo Signed-off-by: ashors1 * Update default sharding type Co-authored-by: mikolajblaz Signed-off-by: Anna Shors <71393111+ashors1@users.noreply.github.com> * Update default sharding type Co-authored-by: mikolajblaz Signed-off-by: Anna Shors <71393111+ashors1@users.noreply.github.com> * Apply isort and black reformatting Signed-off-by: ashors1 --------- Signed-off-by: ashors1 Signed-off-by: ashors1 Signed-off-by: Anna Shors <71393111+ashors1@users.noreply.github.com> Co-authored-by: ashors1 Co-authored-by: mikolajblaz Signed-off-by: Tugrul Konuk * Fix the arguments of forward_for_export function in msdd_models (#9624) * Fix the arguments of forward_for_export function Signed-off-by: Taejin Park * Apply isort and black reformatting Signed-off-by: tango4j --------- Signed-off-by: Taejin Park Signed-off-by: tango4j Co-authored-by: tango4j Signed-off-by: Tugrul Konuk * Change default parallel_save to False (#9632) Signed-off-by: Mikołaj Błaż Signed-off-by: Tugrul Konuk * Unwrap ckpt_io for model opt (async save) (#9622) Signed-off-by: Mikołaj Błaż Signed-off-by: Tugrul Konuk * MCore T5 support for NeMo - Training (#9432) * huvu/mcore_t5 first commit from local * removing DEBUGGING prints * cleaning megatron_lm_encoder_decoder_model.py code * cleaning code * adding Github action test * only run mcore T5 test * only run mcore T5 test * only run mcore T5 test * only run mcore T5 test * reset .github/workflows/cicd-main.yml * reset .github/workflows/cicd-main.yml * adding condition self.mcore_t5 when running self.build_transformer_config() * refractor megatron_lm_encoder_decoder_model.py to not use self.model * only run T5-related tests * remove all self.model * reset cicd file * reset cicd file * updating codes remove duplicate if/else; adding mcore/transformer_engine to config file * adjust +model.mcore_t5=True * Apply isort and black reformatting Signed-off-by: huvunvidia --------- Signed-off-by: huvunvidia Co-authored-by: Huy Vu2 Co-authored-by: huvunvidia Signed-off-by: Tugrul Konuk * [Nemo-UX] Expose transformer_layer_spec inside GPTConfig (#9592) * Expose transformer_layer_spec inside GPTConfig * Apply isort and black reformatting Signed-off-by: marcromeyn * Expose layer-specs * Apply isort and black reformatting Signed-off-by: marcromeyn --------- Signed-off-by: marcromeyn Co-authored-by: marcromeyn Signed-off-by: Tugrul Konuk * Update NeMo Clip to Use MCore Modules (#9594) * update clip model and config file Signed-off-by: yaoyu-33 * update clip for mcore Signed-off-by: yaoyu-33 * MCore CLIP Fix Signed-off-by: yaoyu-33 * fix no mask Signed-off-by: yaoyu-33 * few neva fixes Signed-off-by: yaoyu-33 * update siglip module Signed-off-by: yaoyu-33 * add siglip loss Signed-off-by: yaoyu-33 * fix Signed-off-by: yaoyu-33 * fix collate fn Signed-off-by: yaoyu-33 * update siglip conversion script Signed-off-by: yaoyu-33 * update siglip convert Signed-off-by: yaoyu-33 * clip fixes Signed-off-by: yaoyu-33 * Apply isort and black reformatting Signed-off-by: yaoyu-33 * clean up script Signed-off-by: yaoyu-33 * clip fixes Signed-off-by: yaoyu-33 * Apply isort and black reformatting Signed-off-by: yaoyu-33 * fix code styles Signed-off-by: yaoyu-33 * Update siglip_loss.py Signed-off-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com> --------- Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com> Co-authored-by: yaoyu-33 Signed-off-by: Tugrul Konuk * Add REST API to deploy module (#9539) * Add REST API and FastAPI to deploy module Signed-off-by: Abhishree * Add NemoQuery and requirements Signed-off-by: Abhishree * Edit path for config.json Signed-off-by: Abhishree * Add modifications for REST API for the correct functionality Move service dir under deploy Use NeMoQueryLLM instead of NemoQuery Signed-off-by: Abhishree * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply isort and black reformatting Signed-off-by: pre-commit-ci[bot] * Change default port for REST Service Change default port for REST service as Triton server also used the same port as default. Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> * Apply isort and black reformatting Signed-off-by: athitten --------- Signed-off-by: Abhishree Signed-off-by: pre-commit-ci[bot] Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: athitten Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] Co-authored-by: athitten Signed-off-by: Tugrul Konuk * Mistral + Mixtral Support for NeVa (#9459) * mistral template support Signed-off-by: paul-gibbons * get_specs neva fix Signed-off-by: paul-gibbons * mistral update Signed-off-by: paul-gibbons * fixed mistral tokenization Signed-off-by: paul-gibbons * text_gen_strategy add mistral support Signed-off-by: paul-gibbons * mistral text_gen fix Signed-off-by: paul-gibbons * Cleaning up neva config Signed-off-by: paul-gibbons * fix llama_2 default text_gen_strategy Signed-off-by: paul-gibbons * Apply isort and black reformatting Signed-off-by: paul-gibbons * fix forward() to account for new embedding optimization in MCore Signed-off-by: paul-gibbons * Apply isort and black reformatting Signed-off-by: paul-gibbons --------- Signed-off-by: paul-gibbons Signed-off-by: paul-gibbons Co-authored-by: paul-gibbons Signed-off-by: Tugrul Konuk * ci: Timeout per step, not job (#9635) Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * Adding support for mcore generate (#9566) * Adding support for mcore generate * Apply isort and black reformatting Signed-off-by: shanmugamr1992 * adding support * Apply isort and black reformatting Signed-off-by: shanmugamr1992 * adding support --------- Signed-off-by: shanmugamr1992 Signed-off-by: Shanmugam Ramasamy <111910568+shanmugamr1992@users.noreply.github.com> Co-authored-by: shanmugamr Co-authored-by: shanmugamr1992 Signed-off-by: Tugrul Konuk * Improve error messaging during trt-llm export (#9638) * fix minor import bug Signed-off-by: Onur Yilmaz * Raise error when number of query groups cannot be splitted by the tps Signed-off-by: Onur Yilmaz * moved the error message to the utils Signed-off-by: Onur Yilmaz --------- Signed-off-by: Onur Yilmaz Signed-off-by: Tugrul Konuk * Nemotron export - fixing megatron_export.py (#9625) * Nemotron ONNX export fixed Signed-off-by: Boris Fomitchev * Cleanup Signed-off-by: Boris Fomitchev * Addressing code review comments Signed-off-by: Boris Fomitchev --------- Signed-off-by: Boris Fomitchev Co-authored-by: Eric Harper Signed-off-by: Tugrul Konuk * support lora when kv_channel != hidden_size / num_heads (#9636) Signed-off-by: Tugrul Konuk * [Nemo CICD] Docker temp files auto-cleanup (#9642) * Docker cleanup Signed-off-by: Tugrul Konuk * Update Dockerfile.ci (#9651) Signed-off-by: huvunvidia <86480512+huvunvidia@users.noreply.github.com> Signed-off-by: Tugrul Konuk * SDXL improvements (and support for Draft+) [DRAFT PR] (#9543) * add slurm files to .gitignore * add differentiable decode to SDXL VAE * Optionally return predicted noise during the single step sampling process * also change `get_gamma` as a new function to use inside other functions which may interact with sampling (e.g. draft+) * debugging sdunet converter script * Added SD/SDXL conversion script from HF to NeMo * added 'from_nemo' config for VAE * tmp commit, please make changes (oci is super slow, cannot even run vim) * new inference yaml works * add logging to autoencoder * !(dont squash) Added enabling support for LinearWrapper for SDLoRA * added samples_per_batch and fsdp arguments to SDXL inference * added extra optionally wrapper to FSDP * remove unncessary comments * remove unnecessary comments * Apply isort and black reformatting Signed-off-by: yaoyu-33 --------- Signed-off-by: yaoyu-33 Co-authored-by: Rohit Jena Co-authored-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> Co-authored-by: yaoyu-33 Signed-off-by: Tugrul Konuk * Triton deployment improvements for in-framework models (#9600) * add NemoQueryLLMPyTorch class for triton query of in-framework models * nemo_export.py changes to better support in-framework models * separate out in-framework version of triton deploy script * add generate() function to MegatronLLMDeployable to allow for direct use in export tests * use NemoQueryLLMPyTorch in deploy tests * add warning message for when MegatronLLMDeployable overrides transformer_engine * remove enable_streaming argument from deploy_inframework_triton.py since MegatronLLMDeployable does not support streaming add query_inframework.py since original query.py does not work with in-framework deployments * Apply isort and black reformatting Signed-off-by: jukim-nv * skip trtllm support check if in_framework testing * remove unused imports * run_existing_checkpoints was passing wrong prompts argument for in-framework mode * fix unused import in query_inframework.py --------- Signed-off-by: jukim-nv Co-authored-by: jukim-nv Co-authored-by: Onur Yilmaz <35306097+oyilmaz-nvidia@users.noreply.github.com> Signed-off-by: Tugrul Konuk * Use FP8 in GPT TP2 test (#9451) * Use FP8 in GPT TP2 test Signed-off-by: Jan Baczek * Add hydra options to use TE, TP overlap and FP8 Signed-off-by: Jan Baczek * Override presence checks in hydra Signed-off-by: Jan Baczek * WIP: Add debug code Signed-off-by: Jan Baczek * Apply isort and black reformatting Signed-off-by: jbaczek * Add more debug code Signed-off-by: Jan Baczek * Apply isort and black reformatting Signed-off-by: jbaczek * Add more debug code Signed-off-by: Jan Baczek * Apply isort and black reformatting Signed-off-by: jbaczek * Remove debug code and change underlying transformer layer to TE Signed-off-by: Jan Baczek * Override hydra error Signed-off-by: Jan Baczek * Remove tp overlap from the test Signed-off-by: Jan Baczek * Change runner for fp8 tests Signed-off-by: Jan Baczek * fix Signed-off-by: Jan Baczek * Add tp overlap test Signed-off-by: Jan Baczek * Remove TP overlap from tests. It is unsupported in docker environment Signed-off-by: Jan Baczek * Adjust GPT PP2 test to use FP8. Change optimizer in TP2 test Signed-off-by: Jan Baczek * Remove env overrides form GPT PP2 test Signed-off-by: Jan Baczek --------- Signed-off-by: Jan Baczek Signed-off-by: jbaczek Co-authored-by: jbaczek Co-authored-by: Pablo Garay Signed-off-by: Tugrul Konuk * enables default data step in megatron parallel to operate on a wider variety of tensors (#9641) * enables default data step in megatron parallel to operate on a wider variety of tensors coming out of the dataloader * handles the case where a batch is empty * Apply isort and black reformatting Signed-off-by: jomitchellnv * Allows the default data step to operate on more types than just dictionaries Signed-off-by: Jonathan Mitchell --------- Signed-off-by: jomitchellnv Signed-off-by: Jonathan Mitchell Co-authored-by: jomitchellnv Co-authored-by: Marc Romeyn Signed-off-by: Tugrul Konuk * Revert "enables default data step in megatron parallel to operate on a wider …" (#9666) Signed-off-by: Tugrul Konuk * Contrastive Reranker/Reward model (#9171) * wip contrastive reranker Signed-off-by: arendu * wip Signed-off-by: arendu * wip Signed-off-by: arendu * working reranker training and validation Signed-off-by: arendu * default peft for reranker Signed-off-by: arendu * validation time update Signed-off-by: arendu * reranker test Signed-off-by: arendu * reranker inference Signed-off-by: arendu * reranker inference Signed-off-by: arendu * Apply isort and black reformatting Signed-off-by: arendu * updates Signed-off-by: arendu * Apply isort and black reformatting Signed-off-by: arendu * updates Signed-off-by: arendu * Apply isort and black reformatting Signed-off-by: arendu * also can support rlhf style reward model loss Signed-off-by: arendu * Apply isort and black reformatting Signed-off-by: arendu * Apply isort and black reformatting Signed-off-by: arendu * typo in cicd Signed-off-by: arendu --------- Signed-off-by: arendu Signed-off-by: arendu Signed-off-by: Adi Renduchintala Co-authored-by: arendu Signed-off-by: Tugrul Konuk * unpin transformers version (#9606) * unpin transformers Signed-off-by: dimapihtar * guard deprecated imports Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * fix import guards Signed-off-by: dimapihtar * fix import guards Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * try fixing Signed-off-by: Chen Cui * disable HF tests Signed-off-by: Dmytro Pykhtar * try fixing Signed-off-by: Chen Cui * hard code model lists Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * hard code model lists Signed-off-by: Chen Cui --------- Signed-off-by: dimapihtar Signed-off-by: dimapihtar Signed-off-by: Chen Cui Signed-off-by: Dmytro Pykhtar Signed-off-by: cuichenx Co-authored-by: dimapihtar Co-authored-by: Chen Cui Co-authored-by: Dmytro Pykhtar Co-authored-by: cuichenx Signed-off-by: Tugrul Konuk * Added CPU offloading docs (#9479) * Added CPU offloading docs Signed-off-by: Selvaraj Anandaraj * Tech writer review Signed-off-by: Selvaraj Anandaraj --------- Signed-off-by: Selvaraj Anandaraj Co-authored-by: Selvaraj Anandaraj Co-authored-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> Signed-off-by: Tugrul Konuk * Update llama-3 PEFT notebook to download model from NGC (#9667) * Update llama-3 PEFT notebook to download model from NGC Signed-off-by: Shashank Verma * Fix broken link in llama-3 PEFT tutorial README Signed-off-by: Shashank Verma * Fix broken code block in llama 3 PEFT tutorial README Signed-off-by: Shashank Verma * Copy-edits to Llama-3 8B PEFT tutorial README Signed-off-by: Shashank Verma * Fix broken link Signed-off-by: Shashank Verma * Minor formatting fixes Signed-off-by: Shashank Verma --------- Signed-off-by: Shashank Verma Signed-off-by: Tugrul Konuk * fix pipeline parallel dtype bug (#9637) (#9661) Signed-off-by: ashors1 Co-authored-by: Anna Shors <71393111+ashors1@users.noreply.github.com> Co-authored-by: Marc Romeyn Co-authored-by: ashors1 Signed-off-by: Tugrul Konuk * LITA integration (#9578) * add lita Signed-off-by: Slyne Deng * Apply isort and black reformatting Signed-off-by: Slyne * add part of the tutorial and fix format Signed-off-by: slyne deng * add tutorial Signed-off-by: slyne deng * fix Tutorial ckpt conversion Signed-off-by: slyne deng * Apply isort and black reformatting Signed-off-by: Slyne * update cicd Signed-off-by: Slyne Deng * add to CIICD test Signed-off-by: Slyne Deng * changes based on review comments Signed-off-by: Slyne Deng * fix bot warning Signed-off-by: Slyne Deng * update cicd main Signed-off-by: Slyne Deng * fix cicd ckpt conversion Signed-off-by: Slyne Deng --------- Signed-off-by: Slyne Deng Signed-off-by: Slyne Signed-off-by: slyne deng Co-authored-by: Slyne Deng Co-authored-by: Slyne Co-authored-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> Signed-off-by: Tugrul Konuk * Parametrize FPS group (#9648) (#9669) * Parametrize FPS group * Apply isort and black reformatting * Change deafult to False * Add logic to new ckptIO * Turn on parallel save by default --------- Signed-off-by: Mikołaj Błaż Signed-off-by: mikolajblaz Co-authored-by: mikolajblaz Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Signed-off-by: Tugrul Konuk * Huvu/mcore t5 (#9677) * huvu/mcore_t5 first commit from local * removing DEBUGGING prints * cleaning megatron_lm_encoder_decoder_model.py code * cleaning code * adding Github action test * only run mcore T5 test * only run mcore T5 test * only run mcore T5 test * only run mcore T5 test * reset .github/workflows/cicd-main.yml * reset .github/workflows/cicd-main.yml * adding condition self.mcore_t5 when running self.build_transformer_config() * refractor megatron_lm_encoder_decoder_model.py to not use self.model * only run T5-related tests * remove all self.model * reset cicd file * reset cicd file * updating codes remove duplicate if/else; adding mcore/transformer_engine to config file * adjust +model.mcore_t5=True * fix training for non-mcore, bf16, O2 * reset cicd-main.yml --------- Co-authored-by: Huy Vu2 Signed-off-by: Tugrul Konuk * chore: Version bump NeMo (#9631) Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * add a bit more for timeout (#9702) Signed-off-by: Pablo Garay Signed-off-by: Tugrul Konuk * Alit/mamba (#9696) * adding mamba support * fix import mixins * rm convert jamba * Apply isort and black reformatting Signed-off-by: JRD971000 * more cleanups * use GPT text gen * Apply isort and black reformatting Signed-off-by: JRD971000 * fixing gbs in TP convetor * Apply isort and black reformatting Signed-off-by: JRD971000 * add reqs * add tutorial * minor fix to tutorial * moving finetuning files Signed-off-by: arendu * moving finetuning files Signed-off-by: arendu * address comments * Apply isort and black reformatting Signed-off-by: JRD971000 * address comments * Apply isort and black reformatting Signed-off-by: JRD971000 * address comments * add mamba dependancies * add mcore tag * modify dockerfile ci * modify dockerfile ci * fix TP>1 to TP1 * add inference, update based on latest mcore commits * Apply isort and black reformatting Signed-off-by: JRD971000 * minor fix * Apply isort and black reformatting Signed-off-by: JRD971000 * minor fix * Apply isort and black reformatting Signed-off-by: JRD971000 * bug fix, tutorial update --------- Signed-off-by: JRD971000 Signed-off-by: arendu Co-authored-by: Ali Taghibakhshi Co-authored-by: JRD971000 Co-authored-by: arendu Signed-off-by: Tugrul Konuk * NeMo performance feature documentation (#9482) Signed-off-by: Tugrul Konuk * [TTS] Add fullband mel codec checkpoints (#9704) Signed-off-by: Ryan Signed-off-by: Tugrul Konuk * Adding support for mcore T5 Eval - SFT - PEFT (#9679) * commit to eval/sft/peft * update MCORE_COMMIT * address Chen's comments, updating retro unit test * Apply isort and black reformatting Signed-off-by: huvunvidia --------- Signed-off-by: huvunvidia Co-authored-by: Huy Vu2 Co-authored-by: huvunvidia Signed-off-by: Tugrul Konuk * Allows non-strict load with distributed checkpoints (#9613) (#9715) * Allow non-strict load * Point to non-stric load MCore branch * Avoid module level StrictHandling * Use MCore fork * Update to MCore fix * Restore ackward compatibility * Update flag defaults * Update MCore tag * Update PyT Dist interface * Update to latest core_r0.8.0 --------- Signed-off-by: Mikołaj Błaż Co-authored-by: mikolajblaz Signed-off-by: Tugrul Konuk * refactor: Uniform BRANCH for notebooks (#9710) Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * fix legacy ds padding bug (#9716) * fix legacy ds padding bug Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * avoid code repetition Signed-off-by: dimapihtar * fix typo Signed-off-by: dimapihtar --------- Signed-off-by: dimapihtar Signed-off-by: dimapihtar Co-authored-by: dimapihtar Signed-off-by: Tugrul Konuk * enables default data step in megatron parallel to operate on a wider variety of tensors - second try (#9671) * enables default data step in megatron parallel to operate on a wider variety of tensors coming out of the dataloader Signed-off-by: Jonathan Mitchell * handles the case where a batch is empty Signed-off-by: Jonathan Mitchell * Apply isort and black reformatting Signed-off-by: jomitchellnv Signed-off-by: Jonathan Mitchell * Allows the default data step to operate on more types than just dictionaries Signed-off-by: Jonathan Mitchell * Apply isort and black reformatting Signed-off-by: jomitchellnv --------- Signed-off-by: Jonathan Mitchell Signed-off-by: jomitchellnv Co-authored-by: jomitchellnv Co-authored-by: John St. John Signed-off-by: Tugrul Konuk * [NeMo-UX] Fix when optimizers are setup for PEFT (#9619) (#9647) * Fix when optimizers are setup for PEFT * Apply isort and black reformatting * Init DDP inside PEFT * Apply isort and black reformatting * Some fixes, loss seems to become nan with peft for some reason * Apply isort and black reformatting * Loss goes down on fp32 * Apply isort and black reformatting * Simplifying FNMixin * Apply isort and black reformatting * Fix bug with new checkpoint-io * Apply isort and black reformatting * Fix failing test: test_peft_on_train_epoch_start_with_adapter * Apply isort and black reformatting --------- Signed-off-by: marcromeyn Signed-off-by: ashors1 Co-authored-by: Marc Romeyn Co-authored-by: marcromeyn Co-authored-by: Chen Cui Co-authored-by: ashors1 Signed-off-by: Tugrul Konuk * refactor: README (#9712) * refactor: README * refactor: Use new README in `setup.py` Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * Remove mask if use fusion mask (#9723) * Remove mask if use fusion mask Signed-off-by: Cheng-Ping Hsieh * Apply isort and black reformatting Signed-off-by: hsiehjackson --------- Signed-off-by: Cheng-Ping Hsieh Signed-off-by: hsiehjackson Co-authored-by: hsiehjackson Signed-off-by: Tugrul Konuk * [NeMo-UX] Fix imports so local configuration of runs works again (#9690) (#9694) * Move tensorstore import inline * Moving AsyncFinalizableCheckpointIO import inline * Wrap AsyncCompatibleCheckpointIO in try/catch inside pl.py * Moving gpt_layer_specs import inline * Apply isort and black reformatting --------- Signed-off-by: marcromeyn Signed-off-by: ashors1 Co-authored-by: Marc Romeyn Co-authored-by: marcromeyn Signed-off-by: Tugrul Konuk * add contianer (#9731) * add contianer * modify tutorial * modify tutorial * modify tutorial --------- Co-authored-by: Ali Taghibakhshi Signed-off-by: Tugrul Konuk * update pretrained model text (#9724) (#9745) Signed-off-by: Elena Rastorgueva Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Signed-off-by: Tugrul Konuk * [Nemo-UX] Including all trainable-params in a PEFT-checkpoint (#9650) (#9691) * Nemotron export - fixing megatron_export.py (#9625) * Nemotron ONNX export fixed * Cleanup * Addressing code review comments --------- * Including all trainable-params in a PEFT-checkpoint * Apply isort and black reformatting * Small fixes to make model-importer work * Fixing failing tests --------- Signed-off-by: Boris Fomitchev Signed-off-by: marcromeyn Co-authored-by: Marc Romeyn Co-authored-by: Boris Fomitchev Co-authored-by: Eric Harper Co-authored-by: marcromeyn Co-authored-by: Chen Cui Co-authored-by: ashors1 Signed-off-by: Tugrul Konuk * [NeMo-UX] Make TE and Apex dependencies optional (#9732) * [NeMo-UX] Make TE and Apex dependencies optional (#9550) * Provide a pure pytorch/jit path to avoid required dependency on TE and Apex Signed-off-by: ashors1 * add missing file Signed-off-by: ashors1 * add minimal gpt pretraining example Signed-off-by: ashors1 * fix pre-training datamodule initialization Signed-off-by: ashors1 * add non-te/non-apex test Signed-off-by: ashors1 * add comment to pretraining script Signed-off-by: ashors1 * use microbatch calculator from mcore Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * fix nemo 2 test name Signed-off-by: ashors1 * update Mcore commit for CI Signed-off-by: ashors1 * replace apex microbatch calculator with megatron's in more places Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * fix missing import Signed-off-by: ashors1 * fix typo Signed-off-by: ashors1 * fix missed apex import Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 Signed-off-by: ashors1 * move imports Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 Signed-off-by: ashors1 * move imports Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * add types to command-line args Signed-off-by: ashors1 * bug fix Signed-off-by: ashors1 * fix path Signed-off-by: ashors1 * Disable distributed optimizer in nemo 2.0 test Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * fix optimizer config Signed-off-by: ashors1 * update checkpointing Signed-off-by: ashors1 * move import Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * fix failing unit test Signed-off-by: ashors1 * fix failing test Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * Updating num_weights check of RETRO due to underlying changes from mcore RETRO MLM Signed-off-by: huvunvidia <86480512+huvunvidia@users.noreply.github.com> * Apply isort and black reformatting Signed-off-by: huvunvidia * fix typo Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * remove stale warning Signed-off-by: ashors1 * fix lora notebook Signed-off-by: ashors1 * fix small typo Signed-off-by: ashors1 * add import guards to gemma2 Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 --------- Signed-off-by: ashors1 Signed-off-by: ashors1 Signed-off-by: huvunvidia <86480512+huvunvidia@users.noreply.github.com> Signed-off-by: huvunvidia Co-authored-by: ashors1 Co-authored-by: Eric Harper Co-authored-by: huvunvidia <86480512+huvunvidia@users.noreply.github.com> Co-authored-by: huvunvidia * fix cherry-pick Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 --------- Signed-off-by: ashors1 Signed-off-by: ashors1 Signed-off-by: huvunvidia <86480512+huvunvidia@users.noreply.github.com> Signed-off-by: huvunvidia Co-authored-by: ashors1 Co-authored-by: Eric Harper Co-authored-by: huvunvidia <86480512+huvunvidia@users.noreply.github.com> Co-authored-by: huvunvidia Signed-off-by: Tugrul Konuk * [NeMo-UX] Minor bug fix when TE/Apex not installed (#9749) * minor 2.0 bug fix when TE/Apex not installed Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 --------- Signed-off-by: ashors1 Signed-off-by: ashors1 Co-authored-by: ashors1 Signed-off-by: Tugrul Konuk * make 'load_directly_on_device' configurable (#9657) (#9674) Signed-off-by: ashors1 Co-authored-by: Anna Shors <71393111+ashors1@users.noreply.github.com> Co-authored-by: Pablo Garay Co-authored-by: ashors1 Signed-off-by: Tugrul Konuk * TorchAudio installation workaround for incorrect `PYTORCH_VERSION` env variable (#9736) (#9750) Signed-off-by: Vladimir Bataev Co-authored-by: Vladimir Bataev Signed-off-by: Tugrul Konuk * Create __init__.py (#9755) Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> Signed-off-by: Tugrul Konuk * Canary Adapters tutorial (#9670) * Fix issue with prompt_defaults Signed-off-by: smajumdar * Add core level support for grad map tracking Signed-off-by: smajumdar * Add core level support for grad map tracking Signed-off-by: smajumdar * Apply isort and black reformatting Signed-off-by: titu1994 * Add tutorial and update repr of formatters Signed-off-by: smajumdar * Update docs Signed-off-by: smajumdar --------- Signed-off-by: smajumdar Signed-off-by: titu1994 Signed-off-by: Tugrul Konuk * match nemo 1's default behavior for drop_last and pad_samples_to_global_batch_size (#9707) (#9753) Signed-off-by: ashors1 Co-authored-by: Anna Shors <71393111+ashors1@users.noreply.github.com> Co-authored-by: Marc Romeyn Signed-off-by: Tugrul Konuk * ci: Bump MCore tag (#9744) Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * Fix the serialization of partial functions in nemo 2.0 (#9668) * fix serialization of partial function * update serialization to handle value.args Signed-off-by: srabhi * add unit test Signed-off-by: srabhi * remove redundant code from unit-test Signed-off-by: srabhi --------- Signed-off-by: srabhi Signed-off-by: Tugrul Konuk * ci: Add PAT to create-pullrequest action (#9769) Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * Speeds up copying of necessary artifact files with SaveRestoreConnector (#9682) * Speeds up copying of neccesary artifact files with SaveRestoreConnector Previously, the SaveRestoreConnector would copy and untar entire checkpoints just to copy out a tokenizer. For models in the >100GB, this led to timeouts since only rank=0 did this work, while other ranks moved on and waited at an all-gather barrier (observed NCCL timeout at 10min). Signed-off-by: Terry Kong * cleanup Signed-off-by: Terry Kong * black formatting Signed-off-by: Terry Kong * Apply isort and black reformatting Signed-off-by: terrykong Signed-off-by: Terry Kong * restoring logic to previous tempdir logic Signed-off-by: Terry Kong * nlp overrides too Signed-off-by: Terry Kong * respect return_config Signed-off-by: Terry Kong * some unit tests Signed-off-by: Terry Kong * nodbg Signed-off-by: Terry Kong * Apply isort and black reformatting Signed-off-by: terrykong * correct typing Signed-off-by: Terry Kong * Fixes directory issue Signed-off-by: Terry Kong * Apply isort and black reformatting Signed-off-by: terrykong --------- Signed-off-by: Terry Kong Signed-off-by: terrykong Co-authored-by: terrykong Co-authored-by: Eric Harper Signed-off-by: Tugrul Konuk * ci: Remove ko3n1g from reviewers (#9773) Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * bump mcore commit in Dockerfile (#9766) Signed-off-by: ashors1 Signed-off-by: Tugrul Konuk * Yuya/add checkpoints section (#9329) * Add checkpoints section Signed-off-by: yaoyu-33 * Fix title Signed-off-by: yaoyu-33 * update Signed-off-by: yaoyu-33 * Add section on ".qnemo" checkpoints (#9503) * Add 'Quantized Checkpoints' section Signed-off-by: Jan Lasek * Address review comments Signed-off-by: Jan Lasek --------- Signed-off-by: Jan Lasek * Distributed checkpointing user guide (#9494) * Describe shardings and entrypoints Signed-off-by: Mikołaj Błaż * Strategies, optimizers, finalize entrypoints Signed-off-by: Mikołaj Błaż * Transformations Signed-off-by: Mikołaj Błaż * Integration Signed-off-by: Mikołaj Błaż * Add link from intro Signed-off-by: Mikołaj Błaż * Apply grammar suggestions Signed-off-by: Mikołaj Błaż * Explain the example Signed-off-by: Mikołaj Błaż * Apply review suggestions Signed-off-by: Mikołaj Błaż * Add zarr and torch_dist explanation --------- Signed-off-by: Mikołaj Błaż * add subsection Signed-off-by: yaoyu-33 * Update docs/source/checkpoints/intro.rst Co-authored-by: Chen Cui Signed-off-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> * address comments Signed-off-by: yaoyu-33 * fix Signed-off-by: yaoyu-33 * fix code block Signed-off-by: yaoyu-33 * address comments Signed-off-by: yaoyu-33 * formatting Signed-off-by: yaoyu-33 * fix Signed-off-by: yaoyu-33 * fix Signed-off-by: yaoyu-33 --------- Signed-off-by: yaoyu-33 Signed-off-by: Jan Lasek Signed-off-by: Mikołaj Błaż Signed-off-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> Co-authored-by: Jan Lasek Co-authored-by: mikolajblaz Co-authored-by: Chen Cui Signed-off-by: Tugrul Konuk * Release automation (#9687) * ci: Add workflow for code-freeze Signed-off-by: Oliver Koenig * ci: Add workflow for releasing NeMo Tookit Signed-off-by: Oliver Koenig --------- Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * Rename speech dockerfile appropriately (#9778) Signed-off-by: Pablo Garay Signed-off-by: Tugrul Konuk * Add option to convert PyTriton response to OpenAI format (#9726) * Option to convert response to OPenAI format Signed-off-by: Abhishree * Add OpenAI response arg and store_args_to_json method Signed-off-by: Abhishree * Apply isort and black reformatting Signed-off-by: athitten --------- Signed-off-by: Abhishree Signed-off-by: athitten Co-authored-by: athitten Signed-off-by: Tugrul Konuk * ci: Fix changelog-config (#9788) This fixes the template such that collapsable sections are properly rendered. Signed-off-by: Oliver Koenig Signed-off-by: Tugrul Konuk * Support configurable extra fields for LazyNeMoTarredIterator (#9548) * Support configurable extra fields for LazyNeMoTarredIterator Signed-off-by: Piotr Żelasko * Add tests and fixes Signed-off-by: Piotr Żelasko * Documentation, more tests Signed-off-by: Piotr Żelasko --------- Signed-off-by: Piotr Żelasko Signed-off-by: Tugrul Konuk * upper bound huggingface-hub version to 0.24.0 (exc.) (#9799) Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * CodeQL fixes Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * import guard Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * add tiktoken to requirements Signed-off-by: Alexandros Koumparoulis Signed-off-by: Tugrul Konuk * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Tugrul Konuk * Apply isort and black reformatting Signed-off-by: pre-commit-ci[bot] Signed-off-by: Tugrul Konuk * Apply isort and black reformatting Signed-off-by: ertkonuk Signed-off-by: Tugrul Konuk * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Tugrul Konuk Signed-off-by: Michal Futrega Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Signed-off-by: Chen Cui Signed-off-by: cuichenx Signed-off-by: ertkonuk Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com> Signed-off-by: Alexey Panteleev Signed-off-by: apanteleev Signed-off-by: dimapihtar Signed-off-by: marcromeyn Signed-off-by: Jan Lasek Signed-off-by: janekl Signed-off-by: Piotr Żelasko Signed-off-by: dimapihtar Signed-off-by: Boris Fomitchev Signed-off-by: borisfom Signed-off-by: Oliver Koenig Signed-off-by: Onur Yilmaz Signed-off-by: oyilmaz-nvidia Signed-off-by: Abhishree Signed-off-by: Jacek Bieniusiewicz Signed-off-by: jbieniusiewi Signed-off-by: Maanu Grover Signed-off-by: ashors1 Signed-off-by: ashors1 Signed-off-by: smajumdar Signed-off-by: titu1994 Signed-off-by: Somshubra Majumdar Signed-off-by: meatybobby Signed-off-by: weiqingw4ng Signed-off-by: Ante Jukić Signed-off-by: anteju Signed-off-by: Dong Hyuk Chang Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Signed-off-by: paul-gibbons Signed-off-by: paul-gibbons Signed-off-by: JimmyZhang12 Signed-off-by: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Signed-off-by: adityavavre Signed-off-by: adityavavre Signed-off-by: Marc Romeyn Signed-off-by: JRD971000 Signed-off-by: arendu Signed-off-by: msekoyan@nvidia.com Signed-off-by: monica-sekoyan Signed-off-by: Mikołaj Błaż Signed-off-by: Anna Shors <71393111+ashors1@users.noreply.github.com> Signed-off-by: Taejin Park Signed-off-by: tango4j Signed-off-by: huvunvidia Signed-off-by: pre-commit-ci[bot] Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: athitten Signed-off-by: shanmugamr1992 Signed-off-by: Shanmugam Ramasamy <111910568+shanmugamr1992@users.noreply.github.com> Signed-off-by: huvunvidia <86480512+huvunvidia@users.noreply.github.com> Signed-off-by: jukim-nv Signed-off-by: Jan Baczek Signed-off-by: jbaczek Signed-off-by: jomitchellnv Signed-off-by: Jonathan Mitchell Signed-off-by: arendu Signed-off-by: Adi Renduchintala Signed-off-by: Dmytro Pykhtar Signed-off-by: Selvaraj Anandaraj Signed-off-by: Shashank Verma Signed-off-by: Slyne Deng Signed-off-by: Slyne Signed-off-by: slyne deng Signed-off-by: mikolajblaz Signed-off-by: Pablo Garay Signed-off-by: Ryan Signed-off-by: Cheng-Ping Hsieh Signed-off-by: hsiehjackson Signed-off-by: Elena Rastorgueva Signed-off-by: Vladimir Bataev Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> Signed-off-by: srabhi Signed-off-by: Terry Kong Signed-off-by: terrykong Signed-off-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> Co-authored-by: Marc Romeyn Co-authored-by: Michal Futrega Co-authored-by: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Co-authored-by: akoumpa Co-authored-by: Pablo Garay Co-authored-by: Chen Cui Co-authored-by: cuichenx Co-authored-by: ashors1 <71393111+ashors1@users.noreply.github.com> Co-authored-by: ertkonuk Co-authored-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com> Co-authored-by: yaoyu-33 Co-authored-by: Alexey Panteleev Co-authored-by: apanteleev Co-authored-by: Onur Yilmaz <35306097+oyilmaz-nvidia@users.noreply.github.com> Co-authored-by: meatybobby Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Co-authored-by: marcromeyn Co-authored-by: Jan Lasek Co-authored-by: janekl Co-authored-by: Piotr Żelasko Co-authored-by: dimapihtar Co-authored-by: Boris Fomitchev Co-authored-by: Eric Harper Co-authored-by: Somshubra Majumdar Co-authored-by: oliver könig Co-authored-by: oyilmaz-nvidia Co-authored-by: Ao Tang Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: jbieniusiewi <152396322+jbieniusiewi@users.noreply.github.com> Co-authored-by: jbieniusiewi Co-authored-by: Maanu Grover Co-authored-by: ashors1 Co-authored-by: meatybobby Co-authored-by: Weiqing Wang Co-authored-by: weiqingw4ng Co-authored-by: Maanu Grover <109391026+maanug-nv@users.noreply.github.com> Co-authored-by: anteju <108555623+anteju@users.noreply.github.com> Co-authored-by: anteju Co-authored-by: Dong Hyuk Chang Co-authored-by: Dong Hyuk Chang Co-authored-by: Jacek Bieniusiewicz Co-authored-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Co-authored-by: paul-gibbons <87940629+paul-gibbons@users.noreply.github.com> Co-authored-by: paul-gibbons Co-authored-by: JimmyZhang12 <67203904+JimmyZhang12@users.noreply.github.com> Co-authored-by: Sara Rabhi Co-authored-by: Aditya Vavre Co-authored-by: adityavavre Co-authored-by: Ali Taghibakhshi <71892896+JRD971000@users.noreply.github.com> Co-authored-by: Ali Taghibakhshi Co-authored-by: JRD971000 Co-authored-by: arendu Co-authored-by: monica-sekoyan <166123533+monica-sekoyan@users.noreply.github.com> Co-authored-by: monica-sekoyan Co-authored-by: Nithin Rao Co-authored-by: mikolajblaz Co-authored-by: Taejin Park Co-authored-by: tango4j Co-authored-by: huvunvidia <86480512+huvunvidia@users.noreply.github.com> Co-authored-by: Huy Vu2 Co-authored-by: huvunvidia Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] Co-authored-by: athitten Co-authored-by: Shanmugam Ramasamy <111910568+shanmugamr1992@users.noreply.github.com> Co-authored-by: shanmugamr Co-authored-by: shanmugamr1992 Co-authored-by: Rohit Jena Co-authored-by: Rohit Jena Co-authored-by: Justin Kim Co-authored-by: jukim-nv Co-authored-by: jbaczek <45043825+jbaczek@users.noreply.github.com> Co-authored-by: jbaczek Co-authored-by: jomitchellnv <148147880+jomitchellnv@users.noreply.github.com> Co-authored-by: jomitchellnv Co-authored-by: arendu Co-authored-by: Dmytro Pykhtar Co-authored-by: Selvaraj Anandaraj Co-authored-by: Selvaraj Anandaraj Co-authored-by: Shashank Verma Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: ashors1 Co-authored-by: Slyne Deng Co-authored-by: Slyne Deng Co-authored-by: Slyne Co-authored-by: Sangkug Lym Co-authored-by: Ryan Langman Co-authored-by: John St. John Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Co-authored-by: hsiehjackson Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Co-authored-by: Vladimir Bataev Co-authored-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> Co-authored-by: Terry Kong Co-authored-by: terrykong Co-authored-by: Alexandros Koumparoulis --- .../collections/common/tokenizers/__init__.py | 1 + .../common/tokenizers/tiktoken_tokenizer.py | 200 ++++++++++++++++++ .../nlp/modules/common/tokenizer_utils.py | 5 + nemo/export/multimodal/run.py | 5 + requirements/requirements_nlp.txt | 1 + 5 files changed, 212 insertions(+) create mode 100644 nemo/collections/common/tokenizers/tiktoken_tokenizer.py diff --git a/nemo/collections/common/tokenizers/__init__.py b/nemo/collections/common/tokenizers/__init__.py index 6a71920bf6d4..4ba946cf9f76 100644 --- a/nemo/collections/common/tokenizers/__init__.py +++ b/nemo/collections/common/tokenizers/__init__.py @@ -19,6 +19,7 @@ from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer from nemo.collections.common.tokenizers.regex_tokenizer import RegExTokenizer from nemo.collections.common.tokenizers.sentencepiece_tokenizer import SentencePieceTokenizer +from nemo.collections.common.tokenizers.tiktoken_tokenizer import TiktokenTokenizer from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec from nemo.collections.common.tokenizers.word_tokenizer import WordTokenizer diff --git a/nemo/collections/common/tokenizers/tiktoken_tokenizer.py b/nemo/collections/common/tokenizers/tiktoken_tokenizer.py new file mode 100644 index 000000000000..4b1847051cdc --- /dev/null +++ b/nemo/collections/common/tokenizers/tiktoken_tokenizer.py @@ -0,0 +1,200 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import json +import os +from pathlib import Path +from typing import Dict, List, Optional + +try: + import tiktoken +except ImportError: + pass + +from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec + +__all__ = ['TiktokenTokenizer'] + + +def reload_mergeable_ranks( + path: str, + max_vocab: Optional[int] = None, +) -> Dict[bytes, int]: + """ + Reload the tokenizer JSON file and convert it to Tiktoken format. + """ + assert path.endswith(".json") + + # reload vocab + with open(path, "r") as f: + vocab = json.load(f) + assert isinstance(vocab, list) + print(f"Vocab size: {len(vocab)}") + if max_vocab is not None: + vocab = vocab[:max_vocab] + print(f"Cutting vocab to first {len(vocab)} tokens.") + + # build ranks + ranks: Dict[bytes, int] = {} + for i, x in enumerate(vocab): + assert x.keys() == {"rank", "token_bytes", "token_str"} + assert x["rank"] == i + merge = base64.b64decode(x["token_bytes"]) + assert i >= 256 or merge == bytes([i]) + ranks[merge] = x["rank"] + + # sanity check + assert len(ranks) == len(vocab) + assert set(ranks.values()) == set(range(len(ranks))) + + return ranks + + +PATTERN_TIKTOKEN = "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" +DEFAULT_TIKTOKEN_MAX_VOCAB = 2**17 # 131072 +SPECIAL_TOKENS = ["", "", ""] +SPECIAL_TOKEN_TEMPLATE = "" + + +class TiktokenTokenizer(TokenizerSpec): + """ + TiktokenTokenizer https://github.com/openai/tiktoken. + + Args: + model_path: path to tokenizer vocabulary + num_special_tokens: number of special tokens to generate + special_tokens: template for user-defined special tokens + pattern: Regex pattern to split the text + """ + + def __init__( + self, + vocab_file: str, + pattern: str = PATTERN_TIKTOKEN, + vocab_size: int = DEFAULT_TIKTOKEN_MAX_VOCAB, # 131072 + num_special_tokens: int = 1000, + special_tokens: Optional[List[str]] = None, + ): + if not vocab_file or not os.path.exists(vocab_file): + raise ValueError(f"vocab_file: {vocab_file} is invalid") + + if special_tokens is None: + special_tokens = SPECIAL_TOKENS.copy() + + assert len(special_tokens) == len(set(special_tokens)), f"Special tokens should be unique: {special_tokens}" + assert len(special_tokens) <= num_special_tokens < vocab_size + assert set(SPECIAL_TOKENS) <= set(special_tokens), f"Custom special tokens should include {SPECIAL_TOKENS}" + + self._unk_id = special_tokens.index("") + self._bos_id = special_tokens.index("") + self._eos_id = special_tokens.index("") + + self._vocab_size = vocab_size + print(f'{self._vocab_size = }') + self.num_special_tokens = num_special_tokens + special_filler = [SPECIAL_TOKEN_TEMPLATE.format(id=i) for i in range(len(special_tokens), num_special_tokens)] + if special_filler: + print(f"Adding special tokens {special_filler[0]}, ..., {special_filler[-1]}") + self.special_tokens = special_tokens + special_filler + assert len(set(self.special_tokens)) == len(self.special_tokens) == num_special_tokens, self.special_tokens + self.inner_vocab_size = vocab_size - num_special_tokens + + # reload vocab + self.token2id = reload_mergeable_ranks(vocab_file, max_vocab=self.inner_vocab_size) + self.id2token = {v: k for k, v in self.token2id.items()} + assert set(range(self.inner_vocab_size)) == set(self.id2token.keys()) + + self.shifted_id2token = {i: tok for i, tok in enumerate(self.special_tokens)} + for key, value in self.id2token.items(): + self.shifted_id2token[key + self.num_special_tokens] = value + + self.tokenizer = tiktoken.Encoding( + name=Path(vocab_file).parent.name, + pat_str=pattern, + mergeable_ranks=self.token2id, + special_tokens={}, # special tokens are handled manually + ) + + def text_to_tokens(self, text: str): + token_ids = self.tokenizer.encode(text) + return [self.tokenizer.decode_single_token_bytes(token) for token in token_ids] + + def tokens_to_text(self, tokens: List[int]): + token_ids = [self.tokenizer.encode_single_token(tokens) for tokens in tokens] + return self.tokenizer.decode(token_ids) + + def token_to_id(self, token): + return self.tokenizer.encode_single_token(token) + + def tokens_to_ids(self, tokens): + return [self.tokenizer.encode_single_token(token) for token in tokens] + + def ids_to_tokens(self, token_ids): + tokens = [] + for token_id in token_ids: + if token_id < self.num_special_tokens: + tokens.append(self.special_tokens[token_id]) + else: + token_id -= self.num_special_tokens + token_bytes = self.tokenizer.decode_single_token_bytes(token_id) + tokens.append(token_bytes.decode('utf-8', errors='replace')) + return tokens + + def text_to_ids(self, text: str): + tokens = self.tokenizer.encode(text) + tokens = [t + self.num_special_tokens for t in tokens] + return tokens + + def ids_to_text(self, tokens: List[int]): + # Filter out special tokens and adjust the remaining tokens + adjusted_tokens = [ + t - self.num_special_tokens + for t in tokens + if t not in {self.bos, self.eos} and t >= self.num_special_tokens + ] + + # Decode only if there are tokens left after filtering + if adjusted_tokens: + return self.tokenizer.decode(adjusted_tokens) + else: + return "" # Return an empty string if all tokens were filtered out + + @property + def bos_id(self): + return self._bos_id + + @property + def eos_id(self): + return self._eos_id + + @property + def unk_id(self): + return self._unk_id + + @property + def vocab(self): + return self.token2id + + @property + def decoder(self): + return self.shifted_id2token + + @property + def encoder(self): + return self.vocab + + @property + def vocab_size(self) -> int: + return self._vocab_size diff --git a/nemo/collections/nlp/modules/common/tokenizer_utils.py b/nemo/collections/nlp/modules/common/tokenizer_utils.py index d3ee69f75b25..4cbadd87fe52 100644 --- a/nemo/collections/nlp/modules/common/tokenizer_utils.py +++ b/nemo/collections/nlp/modules/common/tokenizer_utils.py @@ -22,6 +22,7 @@ from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer from nemo.collections.common.tokenizers.regex_tokenizer import RegExTokenizer from nemo.collections.common.tokenizers.tabular_tokenizer import TabularTokenizer +from nemo.collections.common.tokenizers.tiktoken_tokenizer import TiktokenTokenizer from nemo.collections.common.tokenizers.word_tokenizer import WordTokenizer from nemo.collections.nlp.modules.common.huggingface.huggingface_utils import get_huggingface_pretrained_lm_models_list from nemo.collections.nlp.modules.common.lm_utils import get_pretrained_lm_models_list @@ -122,6 +123,8 @@ def get_tokenizer( legacy=True, chat_template=chat_template, ) + elif tokenizer_name == 'tiktoken': + return nemo.collections.common.tokenizers.tiktoken_tokenizer.TiktokenTokenizer(vocab_file=vocab_file) elif tokenizer_name == 'word': return WordTokenizer(vocab_file=vocab_file, **special_tokens_dict) elif tokenizer_name == 'char': @@ -221,6 +224,8 @@ def get_nmt_tokenizer( ) elif library == 'tabular': return TabularTokenizer(vocab_file, delimiter=delimiter) + elif library == 'tiktoken': + return TiktokenTokenizer(vocab_file=vocab_file) else: raise NotImplementedError( 'Currently we only support "huggingface", "sentencepiece", "megatron", and "byte-level" tokenizer' diff --git a/nemo/export/multimodal/run.py b/nemo/export/multimodal/run.py index f94c2e3f3944..07a2c08ff04c 100644 --- a/nemo/export/multimodal/run.py +++ b/nemo/export/multimodal/run.py @@ -74,6 +74,11 @@ def init_tokenizer(self, llm_engine_dir): self.tokenizer = AutoTokenizer.from_pretrained(os.path.join(llm_engine_dir, 'huggingface_tokenizer')) self.tokenizer.pad_token = self.tokenizer.eos_token + if self.model_type == 'vita': + self.tokenizer.im_start_id = self.tokenizer.convert_tokens_to_ids("") + self.tokenizer.im_end_id = self.tokenizer.convert_tokens_to_ids("") + self.tokenizer.vid_start_id = self.tokenizer.convert_tokens_to_ids("") + self.tokenizer.vid_end_id = self.tokenizer.convert_tokens_to_ids("") else: from sentencepiece import SentencePieceProcessor diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt index a1dad5b64a8a..f98f7c318c56 100644 --- a/requirements/requirements_nlp.txt +++ b/requirements/requirements_nlp.txt @@ -20,4 +20,5 @@ rouge_score sacrebleu # manually install sacrebleu[ja] for Japanese support; MeCab is unsupported in Python 3.11+ sentence_transformers tensorstore<0.1.46 +tiktoken==0.7.0 zarr