diff --git a/audiotools/__init__.py b/audiotools/__init__.py index 46f72e97..07c4be1e 100644 --- a/audiotools/__init__.py +++ b/audiotools/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.3.10" +__version__ = "0.3.11" from .core import AudioSignal, STFTParams, Meter, util from . import metrics from . import data diff --git a/audiotools/core/audio_signal.py b/audiotools/core/audio_signal.py index e426076b..9f251ff5 100644 --- a/audiotools/core/audio_signal.py +++ b/audiotools/core/audio_signal.py @@ -111,16 +111,18 @@ def excerpt(cls, audio_path, offset=None, duration=None, state=None, **kwargs): def salient_excerpt( cls, audio_path, loudness_cutoff=None, num_tries=None, state=None, **kwargs ): - loudness_cutoff = -np.inf if loudness_cutoff is None else loudness_cutoff state = util.random_state(state) - loudness = -np.inf - num_try = 0 - while loudness <= loudness_cutoff: + if loudness_cutoff is None: excerpt = cls.excerpt(audio_path, state=state, **kwargs) - loudness = excerpt.loudness() - num_try += 1 - if num_tries is not None and num_try >= num_tries: - break + else: + loudness = -np.inf + num_try = 0 + while loudness <= loudness_cutoff: + excerpt = cls.excerpt(audio_path, state=state, **kwargs) + loudness = excerpt.loudness() + num_try += 1 + if num_tries is not None and num_try >= num_tries: + break return excerpt @classmethod diff --git a/audiotools/core/effects.py b/audiotools/core/effects.py index ab82eb4a..82618ca0 100644 --- a/audiotools/core/effects.py +++ b/audiotools/core/effects.py @@ -78,15 +78,16 @@ def convolve(self, other, start_at_max=True): delta = torch.zeros_like(other.audio_data) delta[..., 0] = 1 - delta_fft = torch.fft.rfft(delta) - other_fft = torch.fft.rfft(other.audio_data) - self_fft = torch.fft.rfft(self.audio_data) + length = self.signal_length + delta_fft = torch.fft.rfft(delta, length) + other_fft = torch.fft.rfft(other.audio_data, length) + self_fft = torch.fft.rfft(self.audio_data, length) convolved_fft = other_fft * self_fft - convolved_audio = torch.fft.irfft(convolved_fft) + convolved_audio = torch.fft.irfft(convolved_fft, length) delta_convolved_fft = other_fft * delta_fft - delta_audio = torch.fft.irfft(delta_convolved_fft) + delta_audio = torch.fft.irfft(delta_convolved_fft, length) # Use the delta to rescale the audio exactly as needed. delta_max = delta_audio.abs().max(dim=-1, keepdims=True)[0] diff --git a/audiotools/data/transforms.py b/audiotools/data/transforms.py index fee8b10c..659f76c0 100644 --- a/audiotools/data/transforms.py +++ b/audiotools/data/transforms.py @@ -71,7 +71,7 @@ def __call__(self, *args, **kwargs): def instantiate( self, - state: RandomState, + state: RandomState = None, signal: AudioSignal = None, ): state = util.random_state(state) @@ -105,7 +105,7 @@ def instantiate( def batch_instantiate( self, - states: list, + states: list = None, signal: AudioSignal = None, ): kwargs = [] diff --git a/setup.py b/setup.py index 1bbf0b95..fb3fb685 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="audiotools", - version="0.3.10", + version="0.3.11", classifiers=[ "Intended Audience :: Developers", "Intended Audience :: Education", diff --git a/tests/core/test_audio_signal.py b/tests/core/test_audio_signal.py index 93669b94..e8221e0c 100644 --- a/tests/core/test_audio_signal.py +++ b/tests/core/test_audio_signal.py @@ -128,6 +128,11 @@ def test_salient_excerpt(loudness_cutoff): signal = AudioSignal.salient_excerpt( f.name, loudness_cutoff=np.inf, duration=1, num_tries=10 ) + signal = AudioSignal.salient_excerpt( + f.name, + loudness_cutoff=None, + duration=1, + ) def test_arithmetic():