Skip to content

Commit

Permalink
Merge pull request #3 from emotechlab/docs/super-guide-time
Browse files Browse the repository at this point in the history
Tutorial stuff
  • Loading branch information
xd009642 authored Jan 31, 2024
2 parents b533aa2 + ac9dd0c commit 96ed4a5
Show file tree
Hide file tree
Showing 26 changed files with 354 additions and 45 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Checks

on:
push:
branches: ["master"]
pull_request:
jobs:
build_and_test:
name: Rust project
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt
- name: Cargo build
run: cargo build
- name: Cargo test
run: cargo test
- name: Rustfmt
run: cargo fmt --check
6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,26 @@ edition = "2021"

[features]
default = ["ndarray-linalg/intel-mkl"]
# Do not use me unless you want a lot of npy files dumped in your CWD
debug_dump = ["ndarray-npy"]

[dependencies]
anyhow = "1.0.71"
lbfgsb = "0.1.0"
ndarray = { version = "0.15.6", features = ["approx", "rayon"] }
ndarray-linalg = "0.16.0"
ndarray-npy = { version = "0.8.1", optional = true }
ndarray-rand = "0.14.0"
ndarray-stats = "0.5.1"
rand = "0.8.5"
rand_isaac = "0.3.0"
realfft = "3.3.0"
thiserror = "1.0.40"
tracing = "0.1.37"

[dev-dependencies]
approx = "0.5.1"
clap = { version = "4.4.7", features = ["derive"] }
float-cmp = "0.9.0"
hound = "3.5.1"
ndarray-npy = "0.8.1"
rand_isaac = "0.3.0"
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ audio.

To learn more about this algorithm:

* [Our own tutorial](./tutorial/README.md)
* [Papers with Code](https://paperswithcode.com/method/griffin-lim-algorithm)
90 changes: 63 additions & 27 deletions examples/run_griffin_lim.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,75 @@
use clap::Parser;
use griffin_lim::GriffinLim;
use hound::{SampleFormat, WavSpec, WavWriter};
use ndarray::prelude::*;
use ndarray_npy::read_npy;
use std::error::Error;
use std::path::PathBuf;
use std::result::Result;
use std::time::Instant;

#[derive(Parser, Debug)]
pub struct Args {
#[clap(long, short, default_value = "example_spectrogram.npy")]
input: String,
#[clap(short, long, default_value = "output.wav")]
output: PathBuf,
#[clap(long, default_value = "22050")]
sample_rate: u32,
#[clap(long, default_value = "1024")]
ffts: usize,
#[clap(long, default_value = "80")]
mels: usize,
#[clap(long, default_value = "256")]
hop_length: usize,
#[clap(long, default_value = "8000.0")]
max_frequency: f32,
#[clap(long, default_value = "1.7")]
power: f32,
#[clap(long, default_value = "10")]
iters: usize,
}

fn main() -> Result<(), Box<dyn Error>> {
let spectrogram: Array2<f32> = read_npy("resources/example_spectrogram.npy")?;

let mel_basis = griffin_lim::mel::create_mel_filter_bank(22050.0, 1024, 80, 0.0, Some(8000.0));

for iter in [0, 1, 2, 5, 10] {
let timer = Instant::now();
let mut vocoder = GriffinLim::new(mel_basis.clone(), 1024 - 256, 1.5, 1, 0.99)?;
vocoder.iter = iter;
let audio = vocoder.infer(&spectrogram)?;
let duration = Instant::now().duration_since(timer);
let rtf = duration.as_secs_f32() / (audio.len() as f32 / 22050_f32);
println!("Iterations: {}, rtf: {}", iter, rtf);
let spec = WavSpec {
channels: 1,
sample_rate: 22050,
bits_per_sample: 32,
sample_format: SampleFormat::Float,
};

let mut writer = WavWriter::create(format!("audio_output_griffinlim_{}.wav", iter), spec)?;

for sample in audio {
writer.write_sample(sample)?;
}

writer.finalize()?;
println!("Saved audio_output_griffinlim_{}.wav", iter);
let args = Args::parse();
let spectrogram: Array2<f32> = read_npy(args.input)?;

let mel_basis = griffin_lim::mel::create_mel_filter_bank(
args.sample_rate as f32,
args.ffts,
args.mels,
0.0,
Some(args.max_frequency),
);

let timer = Instant::now();
let vocoder = GriffinLim::new(
mel_basis.clone(),
args.ffts - args.hop_length,
args.power,
args.iters,
0.99,
)?;
let audio = vocoder.infer(&spectrogram)?;
let duration = Instant::now().duration_since(timer);
let rtf = duration.as_secs_f32() / (audio.len() as f32 / args.sample_rate as f32);
println!("Iterations: {}, rtf: {}", args.iters, rtf);

let spec = WavSpec {
channels: 1,
sample_rate: args.sample_rate,
bits_per_sample: 16,
sample_format: SampleFormat::Int,
};

let mut wav_writer = WavWriter::create(&args.output, spec)?;

let mut i16_writer = wav_writer.get_i16_writer(audio.len() as u32);
for sample in &audio {
i16_writer.write_sample((*sample * i16::MAX as f32) as i16);
}
i16_writer.flush()?;

println!("Saved {}", args.output.display());
Ok(())
}
56 changes: 39 additions & 17 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,39 @@ use ndarray::{par_azip, prelude::*, ScalarOperand};
use ndarray_linalg::error::LinalgError;
use ndarray_linalg::svd::SVD;
use ndarray_linalg::{Lapack, Scalar};
#[cfg(feature = "debug_dump")]
use ndarray_npy::WritableElement;
use ndarray_rand::rand_distr::uniform::SampleUniform;
use ndarray_rand::{rand_distr::Uniform, RandomExt};
use ndarray_stats::errors::MinMaxError;
use ndarray_stats::QuantileExt;
use num_traits::{Float, FloatConst, FromPrimitive};
use rand::SeedableRng;
use rand_isaac::isaac64::Isaac64Rng;
use realfft::num_complex::Complex;
use realfft::num_traits;
use realfft::num_traits::AsPrimitive;
use realfft::RealFftPlanner;
use std::fmt::Display;
use tracing::warn;

macro_rules! debug_dump_array {
($file:expr, $array:expr) => {
#[cfg(feature = "debug_dump")]
if let Err(e) = ndarray_npy::write_npy($file, &$array.view()) {
tracing::error!("Failed to write '{:?}': {}", $file, e);
}
};
}

/// Do not use this in any real way. Because we can't cfg on trait bounds and want to ensure the
/// matrices are dump-able via trait bounds we need to remove the cfg from the trait bound and push
/// it up to here. I tried doing this via trait inheritance but this didn't work for the `Complex<T>`
/// bound and this seemed the only reliable way.
#[cfg(not(feature = "debug_dump"))]
pub trait WritableElement {}

#[cfg(not(feature = "debug_dump"))]
impl<T> WritableElement for T {}

pub mod mel;

pub struct GriffinLim {
Expand All @@ -36,6 +55,7 @@ impl GriffinLim {
iter: usize,
momentum: f32,
) -> anyhow::Result<Self> {
debug_dump_array!("mel_basis.npy", mel_basis);
let nfft = 2 * (mel_basis.dim().1 - 1);
if noverlap >= nfft {
bail!(
Expand Down Expand Up @@ -63,6 +83,7 @@ impl GriffinLim {
}

pub fn infer(&self, mel_spec: &Array2<f32>) -> anyhow::Result<Array1<f32>> {
debug_dump_array!("mel_spectrogram.npy", mel_spec);
// mel_basis has dims (nmel, nfft)
// lin_spec has dims (nfft, time)
// mel_spec has dims (nmel, time)
Expand All @@ -71,6 +92,7 @@ impl GriffinLim {

// correct for "power" parameter of mel-spectrogram
lin_spec.mapv_inplace(|x| x.powf(1.0 / self.power));
debug_dump_array!("linear_spectrogram.npy", lin_spec);

let params = Parameters {
momentum: self.momentum,
Expand Down Expand Up @@ -106,7 +128,6 @@ impl GriffinLim {
/// Parameters to provide to the griffin-lim vocoder
pub struct Parameters<T> {
momentum: T,
seed: u64,
iter: usize,
init_random: bool,
}
Expand All @@ -128,7 +149,6 @@ where
pub fn new() -> Self {
Self {
momentum: T::from_f32(0.99).unwrap(),
seed: 42,
iter: 32,
init_random: true,
}
Expand All @@ -145,12 +165,6 @@ where
self
}

/// A random seed to use for initializing the phase.
pub fn seed(mut self, seed: u64) -> Self {
self.seed = seed;
self
}

/// Number of iterations to run - default value is 32.
pub fn iter(mut self, iter: usize) -> Self {
self.iter = iter;
Expand Down Expand Up @@ -273,8 +287,8 @@ pub fn griffin_lim<T>(
noverlap: usize,
) -> anyhow::Result<Array1<T>>
where
T: realfft::FftNum + Float + FloatConst + Display + SampleUniform,
Complex<T>: ScalarOperand,
T: realfft::FftNum + Float + FloatConst + Display + SampleUniform + WritableElement,
Complex<T>: ScalarOperand + WritableElement,
{
griffin_lim_with_params(spectrogram, nfft, noverlap, Parameters::new())
}
Expand All @@ -288,11 +302,10 @@ pub fn griffin_lim_with_params<T>(
params: Parameters<T>,
) -> anyhow::Result<Array1<T>>
where
T: realfft::FftNum + Float + FloatConst + Display + SampleUniform,
Complex<T>: ScalarOperand,
T: realfft::FftNum + Float + FloatConst + Display + SampleUniform + WritableElement,
Complex<T>: ScalarOperand + WritableElement,
{
// set up griffin lim parameters
let mut rng = Isaac64Rng::seed_from_u64(params.seed);
if params.momentum > T::one() || params.momentum < T::zero() {
bail!("Momentum is {}, should be in range [0,1]", params.momentum);
}
Expand All @@ -302,6 +315,7 @@ where

// Initialise estimate
let mut estimate = if params.init_random {
let mut rng = rand::thread_rng();
let mut angles = Array2::<T>::random_using(
spectrogram.raw_dim(),
Uniform::from(-T::PI()..T::PI()),
Expand All @@ -314,8 +328,11 @@ where
} else {
spectrogram.clone()
};
let mut _est_i = 1;
debug_dump_array!("estimate_spec_0.npy", estimate);

// TODO: Pre-allocate inverse and rebuilt and use `.assign` instead of `=`
// this requires some fighting with the borow checker
// this requires some fighting with the borrow checker
let mut inverse: Array1<T>;
let mut rebuilt: Array2<Complex<T>>;
let mut tprev: Option<Array2<Complex<T>>> = None;
Expand All @@ -335,12 +352,15 @@ where
} else {
tprev = Some(rebuilt);
}
// Get angles from estimate and apply to magnitueds
// Get angles from estimate and apply to magnitudes
let eps = T::min_positive_value();
// get angles from new estimate
estimate.mapv_inplace(|x| x / (x.norm() + eps));
// enforce magnitudes
estimate.assign(&(&estimate * &spectrogram));

debug_dump_array!(format!("estimate_spec_{}.npy", _est_i), estimate);
_est_i += 1;
}
let mut signal = istft(&estimate, &window, planner, nfft, noverlap);
let norm = T::from(nfft).unwrap();
Expand Down Expand Up @@ -490,6 +510,8 @@ where
mod tests {
use float_cmp::assert_approx_eq;
use ndarray_npy::read_npy;
use rand::SeedableRng;
use rand_isaac::isaac64::Isaac64Rng;

use super::*;

Expand Down
1 change: 1 addition & 0 deletions tutorial/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
estimate_spec*
Loading

0 comments on commit 96ed4a5

Please sign in to comment.