diff --git a/README.rst b/README.rst index 7ac95b8cef70..0d3b23a964c0 100644 --- a/README.rst +++ b/README.rst @@ -132,8 +132,8 @@ Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training t Requirements ------------ -1) Python 3.8 or above -2) Pytorch 1.10.0 or above +1) Python 3.9 or above +2) Pytorch 1.13.1 or above 3) NVIDIA GPU for training Documentation diff --git a/docs/source/nlp/api.rst b/docs/source/nlp/api.rst index 0822ade0224c..b13dedca300f 100755 --- a/docs/source/nlp/api.rst +++ b/docs/source/nlp/api.rst @@ -124,7 +124,7 @@ Datasets .. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset.GPTSFTDataset :show-inheritance: -.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset.GPTSFTChatDataset +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_chat_dataset.GPTSFTChatDataset :show-inheritance: .. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.retro_dataset.RETRODataset diff --git a/docs/source/starthere/intro.rst b/docs/source/starthere/intro.rst index 2e0e272c93f4..70426d3fe4a0 100644 --- a/docs/source/starthere/intro.rst +++ b/docs/source/starthere/intro.rst @@ -34,9 +34,9 @@ Prerequisites Before you begin using NeMo, it's assumed you meet the following prerequisites. -#. You have Python version 3.6, 3.7 or 3.8. +#. You have Python version 3.9, 3.10. -#. You have Pytorch version 1.8.1. +#. You have Pytorch version 1.13.1 or 2.0+. #. You have access to an NVIDIA GPU for training. diff --git a/nemo/collections/asr/losses/rnnt.py b/nemo/collections/asr/losses/rnnt.py index a884f7d3cc68..894be6319c99 100644 --- a/nemo/collections/asr/losses/rnnt.py +++ b/nemo/collections/asr/losses/rnnt.py @@ -99,7 +99,7 @@ class RNNTLossConfig: min_version='0.53.0', is_available=NUMBA_RNNT_AVAILABLE, installation_msg=NUMBA_INSTALLATION_MESSAGE, - force_float32=not numba_utils.NUMBA_FP16_SUPPORTED, + force_float32=False, # This is only temporarily false, will be dynamically updated during resolution ), "pytorch": RNNTLossConfig( loss_name="pytorch", @@ -258,6 +258,9 @@ def resolve_rnnt_loss(loss_name: str, blank_idx: int, loss_kwargs: dict = None) _warn_unused_additional_kwargs(loss_name, loss_kwargs) elif loss_name == 'warprnnt_numba': + # Update loss config's forced float32 flag if set to None + loss_config.force_float32 = not numba_utils.is_numba_cuda_fp16_supported() + fastemit_lambda = loss_kwargs.pop('fastemit_lambda', 0.0) clamp = loss_kwargs.pop('clamp', -1.0) loss_func = RNNTLossNumba(blank=blank_idx, reduction='none', fastemit_lambda=fastemit_lambda, clamp=clamp) @@ -444,7 +447,7 @@ def forward(self, log_probs, targets, input_lengths, target_lengths): max_targets_len = target_lengths.max() # Force cast joint to float32 - if not self._force_float32 and numba_utils.NUMBA_FP16_SUPPORTED: + if not self._force_float32 and numba_utils.is_numba_cuda_fp16_supported(): # Execute the kernel in fp16 pass elif self._force_float32 and log_probs.dtype != torch.float32: diff --git a/nemo/core/utils/numba_utils.py b/nemo/core/utils/numba_utils.py index 04010a2f7db4..9117b2ea1010 100644 --- a/nemo/core/utils/numba_utils.py +++ b/nemo/core/utils/numba_utils.py @@ -29,9 +29,6 @@ __NUMBA_MINIMUM_VERSION__ = os.environ.get("NEMO_NUMBA_MINVER", __NUMBA_DEFAULT_MINIMUM_VERSION__) __NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__ = "0.57.0" -NUMBA_FP16_SUPPORTED = model_utils.check_lib_version( - 'numba', __NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__, operator=operator.ge -)[0] NUMBA_INSTALLATION_MESSAGE = ( @@ -171,12 +168,16 @@ def is_numba_cuda_fp16_supported(return_reason: bool = False) -> Union[bool, Tup use_nvidia_binding = False reason += "Env variable `NUMBA_CUDA_USE_NVIDIA_BINDING` is not available or has not set to `1`." - if NUMBA_FP16_SUPPORTED: + numba_fp16_version_correct = model_utils.check_lib_version( + 'numba', __NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__, operator=operator.ge + )[0] + + if numba_fp16_version_correct: reason += f"Numba CUDA FP16 is supported in installed numba version." else: reason += f"Numba CUDA FP16 is not supported in installed numba version." - result = use_nvidia_binding and NUMBA_FP16_SUPPORTED + result = use_nvidia_binding and numba_fp16_version_correct if return_reason: return result, reason diff --git a/nemo/utils/model_utils.py b/nemo/utils/model_utils.py index 211ffdcdf11e..42a0b108944d 100644 --- a/nemo/utils/model_utils.py +++ b/nemo/utils/model_utils.py @@ -13,6 +13,7 @@ # limitations under the License. import copy +import importlib import os from dataclasses import dataclass, is_dataclass from enum import Enum @@ -554,7 +555,7 @@ def check_lib_version(lib_name: str, checked_version: str, operator) -> Tuple[Op if '.' in lib_name: mod = import_class_by_path(lib_name) else: - mod = __import__(lib_name) + mod = importlib.import_module(lib_name) if hasattr(mod, '__version__'): lib_ver = version.Version(mod.__version__)