Skip to content

Commit

Permalink
Merge pull request #211 from marciomazza/seek_step_for_detect_silence
Browse files Browse the repository at this point in the history
Add seek step to enable faster silence detection
  • Loading branch information
jiaaro authored Aug 29, 2017
2 parents 908c2e2 + 2a48cba commit 5950ece
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 15 deletions.
35 changes: 21 additions & 14 deletions pydub/silence.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .utils import (
db_to_float,
)
import itertools

from .utils import db_to_float

def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16):

def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
seg_len = len(audio_segment)

# you can't have a silent portion of a sound that is longer than the sound
Expand All @@ -16,11 +16,18 @@ def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16):
# find silence and add start and end indicies to the to_cut list
silence_starts = []

# check every (1 sec by default) chunk of sound for silence
slice_starts = seg_len - min_silence_len
# check successive (1 sec by default) chunk of sound for silence
# try a chunk at every "seek step" (or every chunk for a seek step == 1)
last_slice_start = seg_len - min_silence_len
slice_starts = range(0, last_slice_start + 1, seek_step)

# guarantee last_slice_start is included in the range
# to make sure the last portion of the audio is seached
if last_slice_start % seek_step:
slice_starts = itertools.chain(slice_starts, [last_slice_start])

for i in range(slice_starts + 1):
audio_slice = audio_segment[i:i+min_silence_len]
for i in slice_starts:
audio_slice = audio_segment[i:i + min_silence_len]
if audio_slice.rms < silence_thresh:
silence_starts.append(i)

Expand Down Expand Up @@ -54,8 +61,8 @@ def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16):
return silent_ranges


def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16):
silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh)
def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step)
len_seg = len(audio_segment)

# if there is no silence, the whole thing is nonsilent
Expand All @@ -81,8 +88,8 @@ def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16):
return nonsilent_ranges



def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100):
def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100,
seek_step=1):
"""
audio_segment - original pydub.AudioSegment() object
Expand All @@ -97,7 +104,7 @@ def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, ke
abruptly cut off. (default: 100ms)
"""

not_silence_ranges = detect_nonsilent(audio_segment, min_silence_len, silence_thresh)
not_silence_ranges = detect_nonsilent(audio_segment, min_silence_len, silence_thresh, seek_step)

chunks = []
for start_i, end_i in not_silence_ranges:
Expand All @@ -106,4 +113,4 @@ def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, ke

chunks.append(audio_segment[start_i:end_i])

return chunks
return chunks
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
[wheel]
universal = 1
universal = 1

[pep8]
max-line-length = 100
5 changes: 5 additions & 0 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,11 @@ def test_detect_silence_seg1(self):
silent_ranges = detect_silence(self.seg1, min_silence_len=500, silence_thresh=-20)
self.assertEqual(silent_ranges, [[0, 775], [3141, 4033], [5516, 6051]])

def test_detect_silence_seg1_with_seek_split(self):
silent_ranges = detect_silence(self.seg1, min_silence_len=500, silence_thresh=-20,
seek_step=10)
self.assertEqual(silent_ranges, [[0, 770], [3150, 4030], [5520, 6050]])

def test_realistic_audio(self):
silent_ranges = detect_silence(self.seg4, min_silence_len=1000, silence_thresh=self.seg4.dBFS)

Expand Down

0 comments on commit 5950ece

Please sign in to comment.