A complete implementation of Mogrifier, a circuit for enhancing LSTMs and potentially other networks. It allows two vectors to modulate each other by having each gate the other in an interleaved, iterative fashion.
$ pip install mogrifier
import torch
from mogrifier import Mogrifier
mogrify = Mogrifier(
dim = 512,
dim_hidden = 256,
iters = 5, # number of iterations, defaults to 5 as paper recommended for LSTM
factorize_k = 16 # factorize weight matrices into (dim x k) and (k x dim), if specified
)
x = torch.randn(1, 16, 512)
h = torch.randn(1, 16, 256)
out, hidden_out = mogrify(x, h) # (1, 16, 512), (1, 16, 256)
assert out.shape == x.shape
assert hidden_out.shape == h.shape
@inproceedings{Melis2020Mogrifier,
title = {Mogrifier LSTM},
author = {Gábor Melis and Tomáš Kočiský and Phil Blunsom},
booktitle = {International Conference on Learning Representations},
year = {2020},
url = {https://openreview.net/forum?id=SJe5P6EYvS}
}