Skip to content

Commit

Permalink
automatic padding in segment.py due to different lengths of files
Browse files Browse the repository at this point in the history
  • Loading branch information
popcornell committed Feb 5, 2024
1 parent 2ea5d6b commit 17a567d
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions nemo/collections/asr/parts/preprocessing/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,9 +358,15 @@ def from_file_list(
else:
# Check the dimensions match
if len(a_samples) != len(samples):
raise RuntimeError(
f'Loaded samples need to have identical length: {a_samples.shape} != {samples.shape}'
)

logging.warning(f"Loaded samples for {a_file}, offset {offset}, duration {duration} have different length from previously loaded ones: {a_samples.shape} != {samples.shape}")
logging.warning("Using zero padding !")
maxlen = max(len(a_samples), len(samples))
a_samples = np.pad(a_samples, ((0, maxlen - len(a_samples)), (0, 0)))
samples = np.pad(samples, ((0, maxlen - len(samples)), (0, 0)))
#raise RuntimeError(
# f'Loaded samples need to have identical length: {a_samples.shape} != {samples.shape}'
#)

# Concatenate along channel dimension
samples = np.concatenate([samples, a_samples], axis=1)
Expand Down

0 comments on commit 17a567d

Please sign in to comment.