From bb263ee9dd43c3cacce93cd688647550492a107e Mon Sep 17 00:00:00 2001 From: Eli Date: Thu, 15 Feb 2024 13:29:17 -0500 Subject: [PATCH 1/6] Add a simple PyTorch training example --- examples/svi_torch.py | 108 ++++++++++++++++++++++++++++++++++ tutorial/source/index.rst | 4 +- tutorial/source/svi_torch.rst | 15 +++++ 3 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 examples/svi_torch.py create mode 100644 tutorial/source/svi_torch.rst diff --git a/examples/svi_torch.py b/examples/svi_torch.py new file mode 100644 index 0000000000..fb05130105 --- /dev/null +++ b/examples/svi_torch.py @@ -0,0 +1,108 @@ +# Copyright Contributors to the Pyro project. +# SPDX-License-Identifier: Apache-2.0 + +# Using vanilla PyTorch to perform optimization in SVI. +# +# This tutorial demonstrates how to use standard PyTorch optimizers, dataloaders and training loops +# to perform optimization in SVI. This is useful when you want to use custom optimizers, +# learning rate schedules, dataloaders, or other advanced training techniques, +# or just to simplify integration with other elements of the PyTorch ecosystem. + +from typing import Callable +import argparse + +import torch + +import pyro +import pyro.distributions as dist +from pyro.infer import Trace_ELBO +from pyro.infer.autoguide import AutoNormal +from pyro.nn import PyroModule + + +# We define a model as usual. This model is data parallel and supports subsampling. +class Model(PyroModule): + def __init__(self, size): + super().__init__() + self.size = size + # We register a buffer for a constant scalar tensor to represent zero. + # This is useful for making priors that do not depend on inputs + # or learnable parameters compatible with the Module.to() method + # for setting the device or dtype of a module and its parameters. + self.register_buffer("zero", torch.tensor(0.0)) + + def forward(self, covariates, data=None): + # Sample parameters from priors that make use of the zero buffer trick + coeff = pyro.sample("coeff", dist.Normal(self.zero, 1)) + bias = pyro.sample("bias", dist.Normal(self.zero, 1)) + scale = pyro.sample("scale", dist.LogNormal(self.zero, 1)) + + # Since we'll use a PyTorch dataloader during training, we need to + # manually pass minibatches of (covariates,data) that are smaller than + # the full self.size, rather than relying on pyro.plate to automatically subsample. + with pyro.plate("data", self.size, len(covariates)): + loc = bias + coeff * covariates + return pyro.sample("obs", dist.Normal(loc, scale), obs=data) + + +def main(args): + # Make PyroModule parameters local (like ordinary torch.nn.Parameters), + # rather than shared by name through Pyro's global parameter store. + # This is highly recommended whenever models can be written without pyro.param(). + pyro.settings.set(module_local_params=True) + + # set seed for reproducibility + pyro.set_rng_seed(args.seed) + + # Create a synthetic dataset from a randomly initialized model. + with torch.no_grad(): + covariates = torch.randn(args.size) + data = Model(args.size)(covariates) + + # Create a model and a guide, both as (Pyro)Modules. + model: torch.nn.Module = Model(args.size) + guide: torch.nn.Module = AutoNormal(model) + + # Create a loss function as a Module that includes model and guide parameters. + # All Pyro ELBO estimators can be __call__()ed with a model and guide pair as arguments + # to return a loss function Module that takes the same arguments as the model and guide + # and exposes all of their torch.nn.Parameters and pyro.nn.PyroParam parameters. + elbo: Callable[[torch.nn.Module, torch.nn.Module], torch.nn.Module] = Trace_ELBO() + loss_fn: torch.nn.Module = elbo(model, guide) + + # Create a dataloader. + dataset = torch.utils.data.TensorDataset(covariates, data) + dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size) + + # All relevant parameters need to be initialized before an optimizer can be created. + # Since we used AutoNormal guide our parameters have not be initialized yet. + # Therefore we initialize the model and guide by running one mini-batch through the loss. + mini_batch = dataset[: args.batch_size] + loss_fn(*mini_batch) + + # Create a PyTorch optimizer for the parameters of the model and guide in loss_fn. + optimizer = torch.optim.Adam(loss_fn.parameters(), lr=args.learning_rate) + + # Run stochastic variational inference using PyTorch optimizers from torch.optim + for epoch in range(args.max_epochs): + for batch in dataloader: + optimizer.zero_grad() + loss = loss_fn(*batch) + loss.backward() + optimizer.step() + print(f"epoch {epoch} loss = {loss}") + + +if __name__ == "__main__": + assert pyro.__version__.startswith("1.8.6") + parser = argparse.ArgumentParser( + description="Using vanilla PyTorch to perform optimization in SVI" + ) + parser.add_argument("--size", default=10000, type=int) + parser.add_argument("--batch_size", default=100, type=int) + parser.add_argument("--learning_rate", default=0.01, type=float) + parser.add_argument("--seed", default=20200723, type=int) + # pl.Trainer arguments. + parser.add_argument("--max_epochs", default=10, type=int) + args = parser.parse_args() + main(args) diff --git a/tutorial/source/index.rst b/tutorial/source/index.rst index 442cb74878..1320c63283 100644 --- a/tutorial/source/index.rst +++ b/tutorial/source/index.rst @@ -22,7 +22,8 @@ and look carefully through the series :ref:`practical-pyro-and-pytorch`, especially the :doc:`first Bayesian regression tutorial `. This tutorial goes step-by-step through solving a simple Bayesian machine learning problem with Pyro, grounding the concepts from the introductory tutorials in runnable code. -Industry users interested in serving predictions from a trained model in C++ should also read :doc:`the PyroModule tutorial `. +Users interested in integrating with existing PyTorch training and serving infrastructure or serving predictions from a trained model in C++ should also read :doc:`the PyroModule tutorial ` +and look at the :doc:`SVI with PyTorch ` and :doc:`SVI with Lightning ` examples. Most users who reach this point will also find our :doc:`guide to tensor shapes in Pyro ` essential reading. Pyro makes extensive use of the behavior of `"array broadcasting" `_ @@ -95,6 +96,7 @@ List of Tutorials workflow prior_predictive jit + svi_torch svi_horovod svi_lightning svi_flow_guide diff --git a/tutorial/source/svi_torch.rst b/tutorial/source/svi_torch.rst new file mode 100644 index 0000000000..f916d41e4e --- /dev/null +++ b/tutorial/source/svi_torch.rst @@ -0,0 +1,15 @@ +Example: using vanilla PyTorch to perform optimization in SVI +============================================================= + +This script uses argparse arguments to construct PyTorch optimizer and dataloader, for example:: + + $ python examples/svi_torch.py --size 10000 --batch_size 100 --max_epochs 100 + +`View svi_torch.py on github`__ + +.. _github: https://github.com/pyro-ppl/pyro/blob/dev/examples/svi_torch.py + +__ github_ + +.. literalinclude:: ../../examples/svi_torch.py + :language: python From af4af909b4c8e1ef31962c8af5065449a98b3293 Mon Sep 17 00:00:00 2001 From: Eli Date: Thu, 15 Feb 2024 13:33:40 -0500 Subject: [PATCH 2/6] isort --- examples/svi_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/svi_torch.py b/examples/svi_torch.py index fb05130105..de756286ee 100644 --- a/examples/svi_torch.py +++ b/examples/svi_torch.py @@ -8,8 +8,8 @@ # learning rate schedules, dataloaders, or other advanced training techniques, # or just to simplify integration with other elements of the PyTorch ecosystem. -from typing import Callable import argparse +from typing import Callable import torch From 5839685253f9fd765aa2c897a9de34899b1dc92a Mon Sep 17 00:00:00 2001 From: Eli Date: Thu, 15 Feb 2024 13:37:07 -0500 Subject: [PATCH 3/6] tweak intro --- tutorial/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorial/source/index.rst b/tutorial/source/index.rst index 1320c63283..0c4f0153c2 100644 --- a/tutorial/source/index.rst +++ b/tutorial/source/index.rst @@ -22,7 +22,7 @@ and look carefully through the series :ref:`practical-pyro-and-pytorch`, especially the :doc:`first Bayesian regression tutorial `. This tutorial goes step-by-step through solving a simple Bayesian machine learning problem with Pyro, grounding the concepts from the introductory tutorials in runnable code. -Users interested in integrating with existing PyTorch training and serving infrastructure or serving predictions from a trained model in C++ should also read :doc:`the PyroModule tutorial ` +Users interested in integrating with existing PyTorch training and serving infrastructure should also read :doc:`the PyroModule tutorial ` and look at the :doc:`SVI with PyTorch ` and :doc:`SVI with Lightning ` examples. Most users who reach this point will also find our :doc:`guide to tensor shapes in Pyro ` essential reading. From 56bc6629518e23b5244f9d4964c773957098dca7 Mon Sep 17 00:00:00 2001 From: Eli Date: Thu, 15 Feb 2024 13:45:56 -0500 Subject: [PATCH 4/6] add cuda and add to test_examples --- examples/svi_torch.py | 7 ++++--- tests/test_examples.py | 2 ++ tutorial/source/svi_torch.rst | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/svi_torch.py b/examples/svi_torch.py index de756286ee..f791dac24c 100644 --- a/examples/svi_torch.py +++ b/examples/svi_torch.py @@ -69,6 +69,7 @@ def main(args): # and exposes all of their torch.nn.Parameters and pyro.nn.PyroParam parameters. elbo: Callable[[torch.nn.Module, torch.nn.Module], torch.nn.Module] = Trace_ELBO() loss_fn: torch.nn.Module = elbo(model, guide) + loss_fn.to(device=torch.device("cuda" if args.cuda else "cpu")) # Create a dataloader. dataset = torch.utils.data.TensorDataset(covariates, data) @@ -84,7 +85,7 @@ def main(args): optimizer = torch.optim.Adam(loss_fn.parameters(), lr=args.learning_rate) # Run stochastic variational inference using PyTorch optimizers from torch.optim - for epoch in range(args.max_epochs): + for epoch in range(args.num_epochs): for batch in dataloader: optimizer.zero_grad() loss = loss_fn(*batch) @@ -102,7 +103,7 @@ def main(args): parser.add_argument("--batch_size", default=100, type=int) parser.add_argument("--learning_rate", default=0.01, type=float) parser.add_argument("--seed", default=20200723, type=int) - # pl.Trainer arguments. - parser.add_argument("--max_epochs", default=10, type=int) + parser.add_argument("--num_epochs", default=10, type=int) + parser.add_argument("--cuda", action="store_true", default=False) args = parser.parse_args() main(args) diff --git a/tests/test_examples.py b/tests/test_examples.py index 8e62a7f770..731665cd72 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -110,6 +110,7 @@ "sparse_gamma_def.py --num-epochs=2 --eval-particles=2 --eval-frequency=1 --guide custom", "sparse_gamma_def.py --num-epochs=2 --eval-particles=2 --eval-frequency=1 --guide auto", "sparse_gamma_def.py --num-epochs=2 --eval-particles=2 --eval-frequency=1 --guide easy", + "svi_torch.py --num-epochs=2 --size=400", "svi_horovod.py --num-epochs=2 --size=400 --no-horovod", pytest.param( "svi_lightning.py --max_epochs=2 --size=400 --accelerator cpu --devices 1", @@ -181,6 +182,7 @@ "sir_hmc.py -t=2 -w=2 -n=4 -d=2 -m=1 --enum --cuda", "sir_hmc.py -t=2 -w=2 -n=4 -d=2 -p=10000 --sequential --cuda", "sir_hmc.py -t=2 -w=2 -n=4 -d=100 -p=10000 --cuda", + "svi_torch.py --num-epochs=2 --size=400 --cuda", "svi_horovod.py --num-epochs=2 --size=400 --cuda --no-horovod", pytest.param( "svi_lightning.py --max_epochs=2 --size=400 --accelerator gpu --devices 1", diff --git a/tutorial/source/svi_torch.rst b/tutorial/source/svi_torch.rst index f916d41e4e..559e75c24b 100644 --- a/tutorial/source/svi_torch.rst +++ b/tutorial/source/svi_torch.rst @@ -3,7 +3,7 @@ Example: using vanilla PyTorch to perform optimization in SVI This script uses argparse arguments to construct PyTorch optimizer and dataloader, for example:: - $ python examples/svi_torch.py --size 10000 --batch_size 100 --max_epochs 100 + $ python examples/svi_torch.py --size 10000 --batch_size 100 --num-epochs 100 `View svi_torch.py on github`__ From 1610e919185acd4bb485405a14972fb28e7031a6 Mon Sep 17 00:00:00 2001 From: Eli Date: Thu, 15 Feb 2024 13:47:50 -0500 Subject: [PATCH 5/6] fix cuda --- examples/svi_torch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/svi_torch.py b/examples/svi_torch.py index f791dac24c..2ef0143bd4 100644 --- a/examples/svi_torch.py +++ b/examples/svi_torch.py @@ -58,6 +58,8 @@ def main(args): with torch.no_grad(): covariates = torch.randn(args.size) data = Model(args.size)(covariates) + covariates = covariates.to(device=torch.device("cuda" if args.cuda else "cpu")) + data = data.to(device=torch.device("cuda" if args.cuda else "cpu")) # Create a model and a guide, both as (Pyro)Modules. model: torch.nn.Module = Model(args.size) From 93c264a304a2344b6d9d59b8e73ed6911ba56712 Mon Sep 17 00:00:00 2001 From: Eli Date: Thu, 15 Feb 2024 13:50:20 -0500 Subject: [PATCH 6/6] arg consistency --- examples/svi_torch.py | 6 +++--- tutorial/source/svi_torch.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/svi_torch.py b/examples/svi_torch.py index 2ef0143bd4..5cf17393aa 100644 --- a/examples/svi_torch.py +++ b/examples/svi_torch.py @@ -102,10 +102,10 @@ def main(args): description="Using vanilla PyTorch to perform optimization in SVI" ) parser.add_argument("--size", default=10000, type=int) - parser.add_argument("--batch_size", default=100, type=int) - parser.add_argument("--learning_rate", default=0.01, type=float) + parser.add_argument("--batch-size", default=100, type=int) + parser.add_argument("--learning-rate", default=0.01, type=float) parser.add_argument("--seed", default=20200723, type=int) - parser.add_argument("--num_epochs", default=10, type=int) + parser.add_argument("--num-epochs", default=10, type=int) parser.add_argument("--cuda", action="store_true", default=False) args = parser.parse_args() main(args) diff --git a/tutorial/source/svi_torch.rst b/tutorial/source/svi_torch.rst index 559e75c24b..a0f74718fb 100644 --- a/tutorial/source/svi_torch.rst +++ b/tutorial/source/svi_torch.rst @@ -3,7 +3,7 @@ Example: using vanilla PyTorch to perform optimization in SVI This script uses argparse arguments to construct PyTorch optimizer and dataloader, for example:: - $ python examples/svi_torch.py --size 10000 --batch_size 100 --num-epochs 100 + $ python examples/svi_torch.py --size 10000 --batch-size 100 --num-epochs 100 `View svi_torch.py on github`__