multitrack_musicvae.py

# -*- coding: utf-8 -*-
"""Multitrack MusicVAE.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/github/magenta/magenta-demos/blob/master/colab-notebooks/Multitrack_MusicVAE.ipynb

# Multitrack MusicVAE: Learning a Latent Space of Multitrack Measures
### ___Ian Simon, Adam Roberts, Colin Raffel, Jesse Engel, Curtis Hawthorne, Douglas Eck___

[MusicVAE](https://g.co/magenta/music-vae) learns a latent space of musical sequences.  Here we apply the MusicVAE framework to single measures of multi-instrument General MIDI, a symbolic music representation that uses a standard set of 128 instrument sounds.

The models in this notebook are capable of encoding and decoding single measures of up to 8 tracks, optionally conditioned on an underlying chord.  Encoding transforms a single measure into a vector in a latent space, and decoding transforms a latent vector back into a measure.  Both encoding and decoding are performed hierarchically, with one level operating on tracks and another operating on the notes (and choice of instrument) in each track.

See our [arXiv paper](https://arxiv.org/abs/1806.00195) for more details, along with our [blog post](http://g.co/magenta/multitrack) with links to JavaScript CodePens.

# Environment Setup
"""

#@title Setup Environment

print('Copying checkpoints and modified SGM SoundFont (https://sites.google.com/site/soundfonts4u) from GCS.')
print('This will take a few minutes...')
!gsutil -q -m cp gs://download.magenta.tensorflow.org/models/music_vae/multitrack/* /content/
!gsutil -q -m cp gs://download.magenta.tensorflow.org/soundfonts/SGM-v2.01-Sal-Guit-Bass-V1.3.sf2 /content/

print('Installing dependencies...')
!apt-get update -qq && apt-get install -qq libfluidsynth1 build-essential libasound2-dev libjack-dev
!pip install -qU magenta pyfluidsynth pretty_midi

print('Importing libraries...')

import numpy as np
import os
import tensorflow.compat.v1 as tf

from google.colab import files

import magenta.music as mm
from magenta.music.sequences_lib import concatenate_sequences
from magenta.models.music_vae import configs
from magenta.models.music_vae.trained_model import TrainedModel

tf.disable_v2_behavior()
print('Done!')

#@title Definitions

BATCH_SIZE = 4
Z_SIZE = 512
TOTAL_STEPS = 512
BAR_SECONDS = 2.0
CHORD_DEPTH = 49

SAMPLE_RATE = 44100
SF2_PATH = '/content/SGM-v2.01-Sal-Guit-Bass-V1.3.sf2'

# Play sequence using SoundFont.
def play(note_sequences):
  if not isinstance(note_sequences, list):
    note_sequences = [note_sequences]
  for ns in note_sequences:
    mm.play_sequence(ns, synth=mm.fluidsynth, sf2_path=SF2_PATH)
  
# Spherical linear interpolation.
def slerp(p0, p1, t):
  """Spherical linear interpolation."""
  omega = np.arccos(np.dot(np.squeeze(p0/np.linalg.norm(p0)), np.squeeze(p1/np.linalg.norm(p1))))
  so = np.sin(omega)
  return np.sin((1.0-t)*omega) / so * p0 + np.sin(t*omega)/so * p1

# Download sequence.
def download(note_sequence, filename):
  mm.sequence_proto_to_midi_file(note_sequence, filename)
  files.download(filename)

# Chord encoding tensor.
def chord_encoding(chord):
  index = mm.TriadChordOneHotEncoding().encode_event(chord)
  c = np.zeros([TOTAL_STEPS, CHORD_DEPTH])
  c[0,0] = 1.0
  c[1:,index] = 1.0
  return c

# Trim sequences to exactly one bar.
def trim_sequences(seqs, num_seconds=BAR_SECONDS):
  for i in range(len(seqs)):
    seqs[i] = mm.extract_subsequence(seqs[i], 0.0, num_seconds)
    seqs[i].total_time = num_seconds

# Consolidate instrument numbers by MIDI program.
def fix_instruments_for_concatenation(note_sequences):
  instruments = {}
  for i in range(len(note_sequences)):
    for note in note_sequences[i].notes:
      if not note.is_drum:
        if note.program not in instruments:
          if len(instruments) >= 8:
            instruments[note.program] = len(instruments) + 2
          else:
            instruments[note.program] = len(instruments) + 1
        note.instrument = instruments[note.program]
      else:
        note.instrument = 9

"""# Chord-Conditioned Model"""

#@title Load Checkpoint

config = configs.CONFIG_MAP['hier-multiperf_vel_1bar_med_chords']
model = TrainedModel(
    config, batch_size=BATCH_SIZE,
    checkpoint_dir_or_path='/content/model_chords_fb64.ckpt')

#@title Same Chord, Random Styles

chord = 'C' #@param {type:"string"}
temperature = 0.2 #@param {type:"slider", min:0.01, max:1.5, step:0.01}
seqs = model.sample(n=BATCH_SIZE, length=TOTAL_STEPS, temperature=temperature,
                    c_input=chord_encoding(chord))

trim_sequences(seqs)
play(seqs)

#@title Same Style, Chord Progression

chord_1 = 'C' #@param {type:"string"}
chord_2 = 'Caug' #@param {type:"string"}
chord_3 = 'Am' #@param {type:"string"}
chord_4 = 'E' #@param {type:"string"}
chords = [chord_1, chord_2, chord_3, chord_4]

temperature = 0.2 #@param {type:"slider", min:0.01, max:1.5, step:0.01}
z = np.random.normal(size=[1, Z_SIZE])
seqs = [
    model.decode(length=TOTAL_STEPS, z=z, temperature=temperature,
                 c_input=chord_encoding(c))[0]
    for c in chords
]

trim_sequences(seqs)
fix_instruments_for_concatenation(seqs)
prog_ns = concatenate_sequences(seqs)

play(prog_ns)
mm.plot_sequence(prog_ns)

#@title (Optional) Save Arrangement to MIDI
download(prog_ns, '_'.join(chords) + '.mid')

#@title Style Interpolation, Repeating Chord Progression

chord_1 = 'Dm' #@param {type:"string"}
chord_2 = 'F' #@param {type:"string"}
chord_3 = 'Am' #@param {type:"string"}
chord_4 = 'G' #@param {type:"string"}
chords = [chord_1, chord_2, chord_3, chord_4]

num_bars = 32 #@param {type:"slider", min:4, max:64, step:4}
temperature = 0.2 #@param {type:"slider", min:0.01, max:1.5, step:0.01}

z1 = np.random.normal(size=[Z_SIZE])
z2 = np.random.normal(size=[Z_SIZE])
z = np.array([slerp(z1, z2, t)
              for t in np.linspace(0, 1, num_bars)])

seqs = [
    model.decode(length=TOTAL_STEPS, z=z[i:i+1, :], temperature=temperature,
                 c_input=chord_encoding(chords[i % 4]))[0]
    for i in range(num_bars)
]

trim_sequences(seqs)
fix_instruments_for_concatenation(seqs)
prog_interp_ns = concatenate_sequences(seqs)

play(prog_interp_ns)
mm.plot_sequence(prog_interp_ns)

#@title (Optional) Save to MIDI
download(prog_interp_ns, 'interp_' + '_'.join(chords) + '.mid')

"""# Unconditioned Model"""

#@title Load Checkpoint

config = configs.CONFIG_MAP['hier-multiperf_vel_1bar_med']
model = TrainedModel(
    config, batch_size=BATCH_SIZE,
    checkpoint_dir_or_path='/content/model_fb256.ckpt')
model._config.data_converter._max_tensors_per_input = None

#@title Random Samples

temperature = 0.2 #@param {type:"slider", min:0.01, max:1.5, step:0.01}
seqs = model.sample(n=BATCH_SIZE, length=TOTAL_STEPS, temperature=temperature)

trim_sequences(seqs)
play(seqs)

#@title Interpolation Between Random Samples

num_bars = 32 #@param {type:"slider", min:4, max:64, step:1}
temperature = 0.2 #@param {type:"slider", min:0.01, max:1.5, step:0.01}

z1 = np.random.normal(size=[Z_SIZE])
z2 = np.random.normal(size=[Z_SIZE])
z = np.array([slerp(z1, z2, t)
              for t in np.linspace(0, 1, num_bars)])

seqs = model.decode(length=TOTAL_STEPS, z=z, temperature=temperature)

trim_sequences(seqs)
fix_instruments_for_concatenation(seqs)
interp_ns = concatenate_sequences(seqs)

play(interp_ns)
mm.plot_sequence(interp_ns)

#@title (Optional) Save to MIDI
download(interp_ns, 'interp.mid')

#@title Upload MIDI Files to Reconstruct
midi_files = files.upload().values()
seqs = [mm.midi_to_sequence_proto(midi) for midi in midi_files]

uploaded_seqs = []
for seq in seqs:
  _, tensors, _, _ = model._config.data_converter.to_tensors(seq)
  uploaded_seqs.extend(model._config.data_converter.from_tensors(tensors))
  
trim_sequences(uploaded_seqs)

print('Parsed %d measures' % len(uploaded_seqs))

#@title Encode and Decode

index = 0 #@param {type:"integer"}
temperature = 0.2 #@param {type:"slider", min:0.01, max:1.5, step:0.01}

z, _, _ = model.encode([uploaded_seqs[index]])
reconstructed_seq = model.decode(z, length=TOTAL_STEPS,
                                 temperature=temperature)[0]

trim_sequences([reconstructed_seq])

print('Original')
play(uploaded_seqs[index])
mm.plot_sequence(uploaded_seqs[index])

print('Reconstructed')
play(reconstructed_seq)
mm.plot_sequence(reconstructed_seq)

#@title Interpolation Between Encodings

index_1 = 0 #@param {type:"integer"}
index_2 = 1 #@param {type:"integer"}

num_bars = 32 #@param {type:"slider", min:4, max:64, step:4}
temperature = 0.2 #@param {type:"slider", min:0.01, max:1.5, step:0.01}

z1, _, _ = model.encode([uploaded_seqs[index_1]])
z2, _, _ = model.encode([uploaded_seqs[index_2]])
z = np.array([slerp(np.squeeze(z1), np.squeeze(z2), t)
              for t in np.linspace(0, 1, num_bars)])

seqs = model.decode(length=TOTAL_STEPS, z=z, temperature=temperature)

trim_sequences(seqs)
fix_instruments_for_concatenation(seqs)
recon_interp_ns = concatenate_sequences(seqs)

play(recon_interp_ns)
mm.plot_sequence(recon_interp_ns)

#@title (Optional) Save to MIDI
download(recon_interp_ns, 'recon_interp.mid')