diff --git a/nemo/collections/tts/models/audio_codec.py b/nemo/collections/tts/models/audio_codec.py index be38406d3fa1..6414fa20e52d 100644 --- a/nemo/collections/tts/models/audio_codec.py +++ b/nemo/collections/tts/models/audio_codec.py @@ -13,7 +13,6 @@ # limitations under the License. import itertools -import random from pathlib import Path from typing import List, Tuple @@ -34,7 +33,7 @@ from nemo.collections.tts.parts.utils.helpers import get_batch_size, get_num_workers from nemo.core import ModelPT from nemo.core.classes.common import PretrainedModelInfo, typecheck -from nemo.core.neural_types.elements import AudioSignal, EncodedRepresentation, Index, LengthsType +from nemo.core.neural_types.elements import AudioSignal, EncodedRepresentation, LengthsType, TokenIndex from nemo.core.neural_types.neural_type import NeuralType from nemo.core.optim.lr_scheduler import compute_max_steps, prepare_lr_scheduler from nemo.utils import logging, model_utils @@ -168,7 +167,7 @@ def decode_audio(self, inputs: torch.Tensor, input_len: torch.Tensor) -> Tuple[t "encoded": NeuralType(('B', 'D', 'T_encoded'), EncodedRepresentation()), "encoded_len": NeuralType(tuple('B'), LengthsType()), }, - output_types={"tokens": NeuralType(('B', 'C', 'T_encoded'), Index())}, + output_types={"tokens": NeuralType(('B', 'C', 'T_encoded'), TokenIndex())}, ) def quantize(self, encoded: torch.Tensor, encoded_len: torch.Tensor) -> torch.Tensor: """Quantize the continuous encoded representation into a discrete @@ -192,7 +191,7 @@ def quantize(self, encoded: torch.Tensor, encoded_len: torch.Tensor) -> torch.Te @typecheck( input_types={ - "tokens": NeuralType(('B', 'C', 'T_encoded'), Index()), + "tokens": NeuralType(('B', 'C', 'T_encoded'), TokenIndex()), "tokens_len": NeuralType(tuple('B'), LengthsType()), }, output_types={"dequantized": NeuralType(('B', 'D', 'T_encoded'), EncodedRepresentation()),}, @@ -221,7 +220,7 @@ def dequantize(self, tokens: torch.Tensor, tokens_len: torch.Tensor) -> torch.Te "audio_len": NeuralType(tuple('B'), LengthsType()), }, output_types={ - "tokens": NeuralType(('B', 'C', 'T_encoded'), Index()), + "tokens": NeuralType(('B', 'C', 'T_encoded'), TokenIndex()), "tokens_len": NeuralType(tuple('B'), LengthsType()), }, ) @@ -244,7 +243,7 @@ def encode(self, audio: torch.Tensor, audio_len: torch.Tensor) -> Tuple[torch.Te @typecheck( input_types={ - "tokens": NeuralType(('B', 'C', 'T_encoded'), Index()), + "tokens": NeuralType(('B', 'C', 'T_encoded'), TokenIndex()), "tokens_len": NeuralType(tuple('B'), LengthsType()), }, output_types={ diff --git a/nemo/collections/tts/modules/vector_quantization.py b/nemo/collections/tts/modules/vector_quantization.py deleted file mode 100644 index 5ba2e6e58e3b..000000000000 --- a/nemo/collections/tts/modules/vector_quantization.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple - -import torch -import torch.nn as nn -import torch.nn.functional as F -from einops import rearrange, repeat -from torch import Tensor - -from nemo.collections.tts.losses.audio_codec_loss import MaskedMSELoss -from nemo.collections.tts.parts.utils.distributed import broadcast_tensors -from nemo.collections.tts.parts.utils.helpers import mask_sequence_tensor -from nemo.core.classes.common import typecheck -from nemo.core.classes.module import NeuralModule -from nemo.core.neural_types.elements import EncodedRepresentation, Index, LengthsType, LossType -from nemo.core.neural_types.neural_type import NeuralType -from nemo.utils.decorators import experimental