diff --git a/src/aubio/aubio.go b/src/aubio/aubio.go new file mode 100644 index 0000000..e212a7d --- /dev/null +++ b/src/aubio/aubio.go @@ -0,0 +1,68 @@ +package aubio + +import ( + "fmt" + "os/exec" + "strconv" + "strings" + + "github.com/schollz/logger" + "github.com/schollz/teoperator/src/ffmpeg" + "github.com/schollz/teoperator/src/models" +) + +// SplitOnSilence splits any audio file based on its silence +func SplitOnSilence(fname string, silenceDB int, silenceMinimumSeconds float64, correction float64) (segments []models.AudioSegment, err error) { + cmd := fmt.Sprintf("-s -30 %s", fname) + logger.Debug(cmd) + out, err := exec.Command("aubioonset", strings.Fields(cmd)...).CombinedOutput() + if err != nil { + return + } + logger.Debugf("aubio output: %s", out) + + numSamples, sampleRate, err := ffmpeg.NumSamples(fname) + if err != nil { + return + } + duration := float64(numSamples) / float64(sampleRate) + + var segment models.AudioSegment + segment.Start = 0 + segment.Filename = fname + for _, line := range strings.Split(string(out), "\n") { + seconds, err := strconv.ParseFloat(strings.TrimSpace(line), 64) + if err != nil { + logger.Error(line, err) + continue + } + if seconds == 0 { + continue + } + segment.End = seconds + segment.Duration = segment.End - segment.Start + segments = append(segments, segment) + segment.Start = segment.End + } + if segment.Start < duration { + segment.End = duration + segment.Duration = segment.End - segment.Start + segments = append(segments, segment) + } + logger.Debugf("segments: %+v", segments) + + newSegments := make([]models.AudioSegment, len(segments)) + i := 0 + for _, segment := range segments { + if segment.Duration > 0.1 { + newSegments[i] = segment + i++ + } + } + if i == 0 { + err = fmt.Errorf("could not find any segments") + return + } + newSegments = newSegments[:i] + return newSegments, nil +} diff --git a/src/audiosegment/audiosegment.go b/src/audiosegment/audiosegment.go index a40c036..25771e3 100644 --- a/src/audiosegment/audiosegment.go +++ b/src/audiosegment/audiosegment.go @@ -14,6 +14,7 @@ import ( "strings" "github.com/schollz/logger" + "github.com/schollz/teoperator/src/aubio" "github.com/schollz/teoperator/src/ffmpeg" "github.com/schollz/teoperator/src/models" "github.com/schollz/teoperator/src/op1" @@ -89,11 +90,16 @@ func SplitEqual(fname string, secondsMax float64, secondsOverlap float64, splice } if splices == 0 { - r.segments, r.err = ffmpeg.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) - if r.err != nil { - logger.Error(r.err) - results <- r - continue + logger.Debug("-- splitting on silence --") + r.segments, r.err = aubio.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) + if r.err != nil || len(r.segments) > 20 { + logger.Debug("-- splitting on silence w/ ffmpeg --") + r.segments, r.err = ffmpeg.SplitOnSilence(fnameTrunc, -22, 0.2, -0.2) + if r.err != nil { + logger.Error(r.err) + results <- r + continue + } } } else { r.segments = make([]models.AudioSegment, splices) diff --git a/src/convert/convert.go b/src/convert/convert.go index e2dada6..0ed3042 100644 --- a/src/convert/convert.go +++ b/src/convert/convert.go @@ -71,7 +71,7 @@ func ToDrumSplice(fname string, slices int) (err error) { } } else { var totalSamples int64 - totalSamples, err = ffmpeg.NumSamples(fname2) + totalSamples, _, err = ffmpeg.NumSamples(fname2) if err != nil { return } @@ -112,7 +112,7 @@ func ToDrum(fnames []string, slices int) (err error) { return } _, fnames2[i] = filepath.Split(fname2) - sampleEnd[i], err = ffmpeg.NumSamples(fname2) + sampleEnd[i], _, err = ffmpeg.NumSamples(fname2) if err != nil { return } diff --git a/src/ffmpeg/ffmpeg.go b/src/ffmpeg/ffmpeg.go index 3a9d70f..32d3f61 100644 --- a/src/ffmpeg/ffmpeg.go +++ b/src/ffmpeg/ffmpeg.go @@ -28,12 +28,17 @@ func IsInstalled() bool { return true } -func NumSamples(fname string) (numSamples int64, err error) { +func NumSamples(fname string) (numSamples int64, sampleRate int64, err error) { file, err := os.Open(fname) if err != nil { return } reader := wav.NewReader(file) + format, err := reader.Format() + if err != nil { + return + } + sampleRate = int64(format.SampleRate) defer file.Close() @@ -109,7 +114,7 @@ func Normalize(fname string, fnameout string) (err error) { if err != nil { return } - logger.Debugf("ffmpeg output: %s", out) + logger.Tracef("ffmpeg output: %s", out) index := bytes.LastIndex(out, []byte("{")) var n Normalization err = json.Unmarshal(out[index:], &n) @@ -137,7 +142,7 @@ func Normalize(fname string, fnameout string) (err error) { if err != nil { return } - logger.Debugf("ffmpeg output: %s", out) + logger.Tracef("ffmpeg output: %s", out) return @@ -151,7 +156,7 @@ func SplitOnSilence(fname string, silenceDB int, silenceMinimumSeconds float64, if err != nil { return } - logger.Debugf("ffmpeg output: %s", out) + logger.Tracef("ffmpeg output: %s", out) // if !strings.Contains(string(out), "silence_end") { // err = fmt.Errorf("could not find silence") // logger.Error(err) @@ -232,7 +237,7 @@ func RemoveSilence(fnameIn, fnameOut string) (err error) { if err != nil { return } - logger.Debugf("ffmpeg output: %s", out) + logger.Tracef("ffmpeg output: %s", out) return }