From 1ee573413becfe7142d7b91b52d6eb21fabc62f7 Mon Sep 17 00:00:00 2001 From: Zack Scholl Date: Fri, 6 Aug 2021 10:54:58 -0700 Subject: [PATCH 1/4] try using aubio --- src/aubio/aubio.go | 66 ++++++++++++++++++++++++++++++++ src/audiosegment/audiosegment.go | 12 ++++-- src/convert/convert.go | 4 +- src/ffmpeg/ffmpeg.go | 7 +++- 4 files changed, 82 insertions(+), 7 deletions(-) create mode 100644 src/aubio/aubio.go diff --git a/src/aubio/aubio.go b/src/aubio/aubio.go new file mode 100644 index 0000000..75079b4 --- /dev/null +++ b/src/aubio/aubio.go @@ -0,0 +1,66 @@ +package aubio + +import ( + "fmt" + "os/exec" + "strconv" + "strings" + + "github.com/schollz/logger" + "github.com/schollz/teoperator/src/ffmpeg" + "github.com/schollz/teoperator/src/models" +) + +// SplitOnSilence splits any audio file based on its silence +func SplitOnSilence(fname string, silenceDB int, silenceMinimumSeconds float64, correction float64) (segments []models.AudioSegment, err error) { + cmd := fmt.Sprintf("-s 30 %s", fname) + logger.Debug(cmd) + out, err := exec.Command("aubioonset", strings.Fields(cmd)...).CombinedOutput() + if err != nil { + return + } + logger.Debugf("aubio output: %s", out) + + numSamples, sampleRate, err := ffmpeg.NumSamples(fname) + if err != nil { + return + } + duration := float64(numSamples) / float64(sampleRate) + + var segment models.AudioSegment + segment.Start = 0 + segment.Filename = fname + for _, line := range strings.Split(string(out), "\n") { + seconds, err := strconv.ParseFloat(line, 64) + if err != nil { + continue + } + if seconds == 0 { + continue + } + segment.End = seconds + segment.Duration = segment.End - segment.Start + segments = append(segments, segment) + segment.Start = segment.End + } + if segment.Start < duration { + segment.End = duration + segment.Duration = segment.End - segment.Start + segments = append(segments, segment) + } + + newSegments := make([]models.AudioSegment, len(segments)) + i := 0 + for _, segment := range segments { + if segment.Duration > 0.1 { + newSegments[i] = segment + i++ + } + } + if i == 0 { + err = fmt.Errorf("could not find any segments") + return + } + newSegments = newSegments[:i] + return newSegments, nil +} diff --git a/src/audiosegment/audiosegment.go b/src/audiosegment/audiosegment.go index a40c036..1a5f1c7 100644 --- a/src/audiosegment/audiosegment.go +++ b/src/audiosegment/audiosegment.go @@ -14,6 +14,7 @@ import ( "strings" "github.com/schollz/logger" + "github.com/schollz/teoperator/src/aubio" "github.com/schollz/teoperator/src/ffmpeg" "github.com/schollz/teoperator/src/models" "github.com/schollz/teoperator/src/op1" @@ -89,11 +90,14 @@ func SplitEqual(fname string, secondsMax float64, secondsOverlap float64, splice } if splices == 0 { - r.segments, r.err = ffmpeg.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) + r.segments, r.err = aubio.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) if r.err != nil { - logger.Error(r.err) - results <- r - continue + r.segments, r.err = ffmpeg.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) + if r.err != nil { + logger.Error(r.err) + results <- r + continue + } } } else { r.segments = make([]models.AudioSegment, splices) diff --git a/src/convert/convert.go b/src/convert/convert.go index e2dada6..0ed3042 100644 --- a/src/convert/convert.go +++ b/src/convert/convert.go @@ -71,7 +71,7 @@ func ToDrumSplice(fname string, slices int) (err error) { } } else { var totalSamples int64 - totalSamples, err = ffmpeg.NumSamples(fname2) + totalSamples, _, err = ffmpeg.NumSamples(fname2) if err != nil { return } @@ -112,7 +112,7 @@ func ToDrum(fnames []string, slices int) (err error) { return } _, fnames2[i] = filepath.Split(fname2) - sampleEnd[i], err = ffmpeg.NumSamples(fname2) + sampleEnd[i], _, err = ffmpeg.NumSamples(fname2) if err != nil { return } diff --git a/src/ffmpeg/ffmpeg.go b/src/ffmpeg/ffmpeg.go index 3a9d70f..07d3a2b 100644 --- a/src/ffmpeg/ffmpeg.go +++ b/src/ffmpeg/ffmpeg.go @@ -28,12 +28,17 @@ func IsInstalled() bool { return true } -func NumSamples(fname string) (numSamples int64, err error) { +func NumSamples(fname string) (numSamples int64, sampleRate int64, err error) { file, err := os.Open(fname) if err != nil { return } reader := wav.NewReader(file) + format, err := reader.Format() + if err != nil { + return + } + sampleRate = int64(format.SampleRate) defer file.Close() From bc85fc30b343489f5e972e728735ba22d79ac461 Mon Sep 17 00:00:00 2001 From: Zack Scholl Date: Fri, 6 Aug 2021 11:18:47 -0700 Subject: [PATCH 2/4] update --- src/ffmpeg/ffmpeg.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ffmpeg/ffmpeg.go b/src/ffmpeg/ffmpeg.go index 07d3a2b..32d3f61 100644 --- a/src/ffmpeg/ffmpeg.go +++ b/src/ffmpeg/ffmpeg.go @@ -114,7 +114,7 @@ func Normalize(fname string, fnameout string) (err error) { if err != nil { return } - logger.Debugf("ffmpeg output: %s", out) + logger.Tracef("ffmpeg output: %s", out) index := bytes.LastIndex(out, []byte("{")) var n Normalization err = json.Unmarshal(out[index:], &n) @@ -142,7 +142,7 @@ func Normalize(fname string, fnameout string) (err error) { if err != nil { return } - logger.Debugf("ffmpeg output: %s", out) + logger.Tracef("ffmpeg output: %s", out) return @@ -156,7 +156,7 @@ func SplitOnSilence(fname string, silenceDB int, silenceMinimumSeconds float64, if err != nil { return } - logger.Debugf("ffmpeg output: %s", out) + logger.Tracef("ffmpeg output: %s", out) // if !strings.Contains(string(out), "silence_end") { // err = fmt.Errorf("could not find silence") // logger.Error(err) @@ -237,7 +237,7 @@ func RemoveSilence(fnameIn, fnameOut string) (err error) { if err != nil { return } - logger.Debugf("ffmpeg output: %s", out) + logger.Tracef("ffmpeg output: %s", out) return } From 7b9a8e18d5e7da228a64663bd537561945cc200e Mon Sep 17 00:00:00 2001 From: Zack Scholl Date: Fri, 6 Aug 2021 11:27:42 -0700 Subject: [PATCH 3/4] works better --- src/aubio/aubio.go | 6 ++++-- src/audiosegment/audiosegment.go | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/aubio/aubio.go b/src/aubio/aubio.go index 75079b4..e212a7d 100644 --- a/src/aubio/aubio.go +++ b/src/aubio/aubio.go @@ -13,7 +13,7 @@ import ( // SplitOnSilence splits any audio file based on its silence func SplitOnSilence(fname string, silenceDB int, silenceMinimumSeconds float64, correction float64) (segments []models.AudioSegment, err error) { - cmd := fmt.Sprintf("-s 30 %s", fname) + cmd := fmt.Sprintf("-s -30 %s", fname) logger.Debug(cmd) out, err := exec.Command("aubioonset", strings.Fields(cmd)...).CombinedOutput() if err != nil { @@ -31,8 +31,9 @@ func SplitOnSilence(fname string, silenceDB int, silenceMinimumSeconds float64, segment.Start = 0 segment.Filename = fname for _, line := range strings.Split(string(out), "\n") { - seconds, err := strconv.ParseFloat(line, 64) + seconds, err := strconv.ParseFloat(strings.TrimSpace(line), 64) if err != nil { + logger.Error(line, err) continue } if seconds == 0 { @@ -48,6 +49,7 @@ func SplitOnSilence(fname string, silenceDB int, silenceMinimumSeconds float64, segment.Duration = segment.End - segment.Start segments = append(segments, segment) } + logger.Debugf("segments: %+v", segments) newSegments := make([]models.AudioSegment, len(segments)) i := 0 diff --git a/src/audiosegment/audiosegment.go b/src/audiosegment/audiosegment.go index 1a5f1c7..afa4ba0 100644 --- a/src/audiosegment/audiosegment.go +++ b/src/audiosegment/audiosegment.go @@ -90,8 +90,10 @@ func SplitEqual(fname string, secondsMax float64, secondsOverlap float64, splice } if splices == 0 { + logger.Debug("-- splitting on silence --") r.segments, r.err = aubio.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) if r.err != nil { + logger.Debug("-- splitting on silence w/ ffmpeg --") r.segments, r.err = ffmpeg.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) if r.err != nil { logger.Error(r.err) From 3d843b35a576dfe1b669d203232a386209b9094b Mon Sep 17 00:00:00 2001 From: Zack Scholl Date: Fri, 6 Aug 2021 11:47:39 -0700 Subject: [PATCH 4/4] use ffmpeg more --- src/audiosegment/audiosegment.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/audiosegment/audiosegment.go b/src/audiosegment/audiosegment.go index afa4ba0..25771e3 100644 --- a/src/audiosegment/audiosegment.go +++ b/src/audiosegment/audiosegment.go @@ -92,7 +92,7 @@ func SplitEqual(fname string, secondsMax float64, secondsOverlap float64, splice if splices == 0 { logger.Debug("-- splitting on silence --") r.segments, r.err = aubio.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) - if r.err != nil { + if r.err != nil || len(r.segments) > 20 { logger.Debug("-- splitting on silence w/ ffmpeg --") r.segments, r.err = ffmpeg.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) if r.err != nil {