From b694832789c7e32c3d011418e067216dca8eabcb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 12:23:22 +0300
Subject: [PATCH 01/13] Fix: amp_recipe.py fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 recipes_source/recipes/amp_recipe.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
index 2cdd37c803..99c6fa2e0e 100644
--- a/recipes_source/recipes/amp_recipe.py
+++ b/recipes_source/recipes/amp_recipe.py
@@ -79,8 +79,8 @@ def make_model(in_size, out_size, num_layers):
 # Creates data in default precision.
 # The same data is used for both default and mixed precision trials below.
 # You don't need to manually change inputs' ``dtype`` when enabling mixed precision.
-data = [torch.randn(batch_size, in_size, device="cuda") for _ in range(num_batches)]
-targets = [torch.randn(batch_size, out_size, device="cuda") for _ in range(num_batches)]
+data = [torch.randn(batch_size, in_size, device=torch.device('cuda')) for _ in range(num_batches)]
+targets = [torch.randn(batch_size, out_size, device=torch.device('cuda')) for _ in range(num_batches)]
 
 loss_fn = torch.nn.MSELoss().cuda()
 
@@ -116,7 +116,7 @@ def make_model(in_size, out_size, num_layers):
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
         # Runs the forward pass under ``autocast``.
-        with torch.autocast(device_type='cuda', dtype=torch.float16):
+        with torch.autocast(device_type=torch.device('cuda'), dtype=torch.float16):
             output = net(input)
             # output is float16 because linear layers ``autocast`` to float16.
             assert output.dtype is torch.float16
@@ -151,7 +151,7 @@ def make_model(in_size, out_size, num_layers):
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
-        with torch.autocast(device_type='cuda', dtype=torch.float16):
+        with torch.autocast(device_type=torch.device('cuda'), dtype=torch.float16):
             output = net(input)
             loss = loss_fn(output, target)
 
@@ -184,7 +184,7 @@ def make_model(in_size, out_size, num_layers):
 start_timer()
 for epoch in range(epochs):
     for input, target in zip(data, targets):
-        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
+        with torch.autocast(device_type=torch.device('cuda'), dtype=torch.float16, enabled=use_amp):
             output = net(input)
             loss = loss_fn(output, target)
         scaler.scale(loss).backward()
@@ -202,7 +202,7 @@ def make_model(in_size, out_size, num_layers):
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
-        with torch.autocast(device_type='cuda', dtype=torch.float16):
+        with torch.autocast(device_type=torch.device('cuda'), dtype=torch.float16):
             output = net(input)
             loss = loss_fn(output, target)
         scaler.scale(loss).backward()

From aa6f573e0428eb98a723a057fdc1e18a0005eb4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 12:27:35 +0300
Subject: [PATCH 02/13] Fix: amp_recipe fixed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 recipes_source/recipes/amp_recipe.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
index 99c6fa2e0e..8fb024f229 100644
--- a/recipes_source/recipes/amp_recipe.py
+++ b/recipes_source/recipes/amp_recipe.py
@@ -76,11 +76,14 @@ def make_model(in_size, out_size, num_layers):
 num_batches = 50
 epochs = 3
 
+device = torch.device('cuda')
+torch.set_default_device(device)
+
 # Creates data in default precision.
 # The same data is used for both default and mixed precision trials below.
 # You don't need to manually change inputs' ``dtype`` when enabling mixed precision.
-data = [torch.randn(batch_size, in_size, device=torch.device('cuda')) for _ in range(num_batches)]
-targets = [torch.randn(batch_size, out_size, device=torch.device('cuda')) for _ in range(num_batches)]
+data = [torch.randn(batch_size, in_size) for _ in range(num_batches)]
+targets = [torch.randn(batch_size, out_size) for _ in range(num_batches)]
 
 loss_fn = torch.nn.MSELoss().cuda()
 
@@ -116,7 +119,7 @@ def make_model(in_size, out_size, num_layers):
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
         # Runs the forward pass under ``autocast``.
-        with torch.autocast(device_type=torch.device('cuda'), dtype=torch.float16):
+        with torch.autocast(dtype=torch.float16):
             output = net(input)
             # output is float16 because linear layers ``autocast`` to float16.
             assert output.dtype is torch.float16
@@ -151,7 +154,7 @@ def make_model(in_size, out_size, num_layers):
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
-        with torch.autocast(device_type=torch.device('cuda'), dtype=torch.float16):
+        with torch.autocast(dtype=torch.float16):
             output = net(input)
             loss = loss_fn(output, target)
 
@@ -184,7 +187,7 @@ def make_model(in_size, out_size, num_layers):
 start_timer()
 for epoch in range(epochs):
     for input, target in zip(data, targets):
-        with torch.autocast(device_type=torch.device('cuda'), dtype=torch.float16, enabled=use_amp):
+        with torch.autocast(dtype=torch.float16, enabled=use_amp):
             output = net(input)
             loss = loss_fn(output, target)
         scaler.scale(loss).backward()
@@ -202,7 +205,7 @@ def make_model(in_size, out_size, num_layers):
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
-        with torch.autocast(device_type=torch.device('cuda'), dtype=torch.float16):
+        with torch.autocast(dtype=torch.float16):
             output = net(input)
             loss = loss_fn(output, target)
         scaler.scale(loss).backward()

From a92053403fd7c91cc34599421020dfd047f8a15c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 12:34:05 +0300
Subject: [PATCH 03/13] Fix: beginner/examples_autograd/polynomial_autograd.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 recipes_source/recipes/amp_recipe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
index 8fb024f229..94a68285f9 100644
--- a/recipes_source/recipes/amp_recipe.py
+++ b/recipes_source/recipes/amp_recipe.py
@@ -76,7 +76,7 @@ def make_model(in_size, out_size, num_layers):
 num_batches = 50
 epochs = 3
 
-device = torch.device('cuda')
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
 torch.set_default_device(device)
 
 # Creates data in default precision.

From 340cbd9b2e348e4d60ab70fc97175bbe1ca1b26f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 16:57:12 +0300
Subject: [PATCH 04/13] Polynomial autograd fixed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 .../examples_autograd/polynomial_autograd.py       | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/beginner_source/examples_autograd/polynomial_autograd.py b/beginner_source/examples_autograd/polynomial_autograd.py
index 05744ff560..9c992d2ca4 100755
--- a/beginner_source/examples_autograd/polynomial_autograd.py
+++ b/beginner_source/examples_autograd/polynomial_autograd.py
@@ -18,23 +18,23 @@
 import math
 
 dtype = torch.float
-device = torch.device("cpu")
-# device = torch.device("cuda:0")  # Uncomment this to run on GPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+torch.set_default_device(device)
 
 # Create Tensors to hold input and outputs.
 # By default, requires_grad=False, which indicates that we do not need to
 # compute gradients with respect to these Tensors during the backward pass.
-x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
 y = torch.sin(x)
 
 # Create random Tensors for weights. For a third order polynomial, we need
 # 4 weights: y = a + b x + c x^2 + d x^3
 # Setting requires_grad=True indicates that we want to compute gradients with
 # respect to these Tensors during the backward pass.
-a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
-b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
-c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
-d = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+a = torch.randn((), dtype=dtype, requires_grad=True)
+b = torch.randn((), dtype=dtype, requires_grad=True)
+c = torch.randn((), dtype=dtype, requires_grad=True)
+d = torch.randn((), dtype=dtype, requires_grad=True)
 
 learning_rate = 1e-6
 for t in range(2000):

From 5f3b837f534a637d68e584148286c2ab72af2957 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 17:01:11 +0300
Subject: [PATCH 05/13] Fix tuning_guide
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 recipes_source/recipes/tuning_guide.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes_source/recipes/tuning_guide.py b/recipes_source/recipes/tuning_guide.py
index 7c8aa135b1..0f82fb76d3 100644
--- a/recipes_source/recipes/tuning_guide.py
+++ b/recipes_source/recipes/tuning_guide.py
@@ -357,7 +357,7 @@ def fused_gelu(x):
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Instead of calling ``torch.rand(size).cuda()`` to generate a random tensor,
 # produce the output directly on the target device:
-# ``torch.rand(size, device=torch.device('cuda'))``.
+# ``torch.rand(size, device='cuda')``.
 #
 # This is applicable to all functions which create new tensors and accept
 # ``device`` argument:

From 7173e8b01cf297b37348e6ae0956527cf2cc0db0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 17:05:52 +0300
Subject: [PATCH 06/13] Fix nestedtensor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 prototype_source/nestedtensor.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/prototype_source/nestedtensor.py b/prototype_source/nestedtensor.py
index 0d2898cc4a..582f19c433 100644
--- a/prototype_source/nestedtensor.py
+++ b/prototype_source/nestedtensor.py
@@ -25,6 +25,7 @@
 import torch.nn.functional as F
 
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+torch.set_default_device(device)
 
 ######################################################################
 # NestedTensor Initialization
@@ -35,7 +36,7 @@
 # From the Python frontend, a nestedtensor can be created from a list of tensors.
 # We denote nt[i] as the ith tensor component of a nestedtensor.
 nt = torch.nested.nested_tensor([torch.arange(12).reshape(
-    2, 6), torch.arange(18).reshape(3, 6)], dtype=torch.float, device=device)
+    2, 6), torch.arange(18).reshape(3, 6)], dtype=torch.float)
 print(f"{nt=}")
 
 ######################################################################
@@ -111,7 +112,7 @@
 # Applying the operation on a nestedtensor is equivalent to
 # applying the operation to the underlying tensor components,
 # with the result being a nestedtensor as well.
-nt_mm = torch.nested.nested_tensor([torch.randn((2, 3, 4)), torch.randn((2, 3, 5))], device=device)
+nt_mm = torch.nested.nested_tensor([torch.randn((2, 3, 4)), torch.randn((2, 3, 5))])
 nt3 = torch.matmul(nt_transposed, nt_mm)
 print(f"Result of Matmul:\n {nt3}")
 
@@ -318,7 +319,7 @@ def mha_padded(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, nhea
 
     # Have to manipulate masks in order to apply them to the attention weights
     key_padding_mask = attn_mask_q.view(N, 1, 1, L_t).expand(-1, nheads, -1, -1).reshape(N*nheads, 1, L_t).to(device=device)
-    attn_mask = torch.zeros(key_padding_mask.shape, device=device, dtype=torch.float32)
+    attn_mask = torch.zeros(key_padding_mask.shape, dtype=torch.float32)
     attn_mask = attn_mask.masked_fill_(key_padding_mask, float("-inf"))
 
     # Zero out the attention weights where the mask is True by adding -inf prior to softmax
@@ -384,10 +385,10 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 # create inputs
 
 # create parameters
-W_q, b_q = torch.randn((E_total, E_q), device=device), torch.randn(E_total, device=device)
-W_k, b_k = torch.randn((E_total, E_k), device=device), torch.randn(E_total, device=device)
-W_v, b_v = torch.randn((E_total, E_v), device=device), torch.randn(E_total, device=device)
-W_out, b_out = torch.randn((E_out, E_total), device=device), torch.randn(E_out, device=device)
+W_q, b_q = torch.randn((E_total, E_q)), torch.randn(E_total)
+W_k, b_k = torch.randn((E_total, E_k)), torch.randn(E_total)
+W_v, b_v = torch.randn((E_total, E_v)), torch.randn(E_total)
+W_out, b_out = torch.randn((E_out, E_total)), torch.randn(E_out)
 
 # create nested input
 queries = []
@@ -396,9 +397,9 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 for i in range(N):
     l = sentence_lengths[i]
     s = l
-    queries.append(torch.randn((l, E_q), device=device))
-    keys   .append(torch.randn((s, E_k), device=device))
-    values .append(torch.randn((s, E_v), device=device))
+    queries.append(torch.randn((l, E_q)))
+    keys   .append(torch.randn((s, E_k)))
+    values .append(torch.randn((s, E_v)))
 query = torch.nested.nested_tensor(queries)
 key = torch.nested.nested_tensor(keys)
 value = torch.nested.nested_tensor(values)

From cf21c1de062fb57413c7590864db7434b0135647 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 17:26:35 +0300
Subject: [PATCH 07/13] Fix polynomial tensor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 advanced_source/cpp_extension.rst | 1 +
 prototype_source/nestedtensor.py  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/advanced_source/cpp_extension.rst b/advanced_source/cpp_extension.rst
index cb0e990797..6f6fa02f0e 100644
--- a/advanced_source/cpp_extension.rst
+++ b/advanced_source/cpp_extension.rst
@@ -553,6 +553,7 @@ creation time or using ``.to(cuda_device)`` after creation::
   import torch
 
   assert torch.cuda.is_available()
+  device = "cuda" if torch.cuda.is_available() else "cpu"
   cuda_device = torch.device("cuda")  # device object representing GPU
 
   batch_size = 16
diff --git a/prototype_source/nestedtensor.py b/prototype_source/nestedtensor.py
index 582f19c433..15bfb51b32 100644
--- a/prototype_source/nestedtensor.py
+++ b/prototype_source/nestedtensor.py
@@ -454,7 +454,7 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 
 # embeddings are assumed to be the same
 E = E_total
-mha_lib = torch.nn.MultiheadAttention(E, nheads, batch_first=True, device=device)
+mha_lib = torch.nn.MultiheadAttention(E, nheads, batch_first=True)
 mha_lib.eval()
 
 ######################################################################

From 3c19f993f6be4f2a209adeee56f3b88ee0b03dce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 17:30:42 +0300
Subject: [PATCH 08/13] Fix neural-style tutorial
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 advanced_source/neural_style_tutorial.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py
index 3d84fc508b..d606fa09be 100644
--- a/advanced_source/neural_style_tutorial.py
+++ b/advanced_source/neural_style_tutorial.py
@@ -72,6 +72,7 @@
 # method is used to move tensors or modules to a desired device. 
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch.set_default_device(device)
 
 ######################################################################
 # Loading the Images
@@ -107,7 +108,7 @@ def image_loader(image_name):
     image = Image.open(image_name)
     # fake batch dimension required to fit network's input dimensions
     image = loader(image).unsqueeze(0)
-    return image.to(device, torch.float)
+    return image
 
 
 style_img = image_loader("./data/images/neural-style/picasso.jpg")
@@ -263,7 +264,7 @@ def forward(self, input):
 # network to evaluation mode using ``.eval()``.
 # 
 
-cnn = models.vgg19(pretrained=True).features.to(device).eval()
+cnn = models.vgg19(pretrained=True).features.eval()
 
 
 
@@ -273,8 +274,8 @@ def forward(self, input):
 # We will use them to normalize the image before sending it into the network.
 # 
 
-cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
-cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)
+cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406])
+cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225])
 
 # create a module to normalize input image so we can easily put it in a
 # ``nn.Sequential``
@@ -310,7 +311,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
                                content_layers=content_layers_default,
                                style_layers=style_layers_default):
     # normalization module
-    normalization = Normalization(normalization_mean, normalization_std).to(device)
+    normalization = Normalization(normalization_mean, normalization_std)
 
     # just in order to have an iterable access to or list of content/style
     # losses
@@ -375,7 +376,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
 #
 # ::
 #
-#    input_img = torch.randn(content_img.data.size(), device=device)
+#    input_img = torch.randn(content_img.data.size())
 
 # add the original input image to the figure:
 plt.figure()

From 671960cd883d798b3267e6c1dda774a2d35261f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 18:38:41 +0300
Subject: [PATCH 09/13] Fix cpp_extension.rst
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 advanced_source/cpp_extension.rst        | 1 -
 advanced_source/neural_style_tutorial.py | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/advanced_source/cpp_extension.rst b/advanced_source/cpp_extension.rst
index 6f6fa02f0e..cb0e990797 100644
--- a/advanced_source/cpp_extension.rst
+++ b/advanced_source/cpp_extension.rst
@@ -553,7 +553,6 @@ creation time or using ``.to(cuda_device)`` after creation::
   import torch
 
   assert torch.cuda.is_available()
-  device = "cuda" if torch.cuda.is_available() else "cpu"
   cuda_device = torch.device("cuda")  # device object representing GPU
 
   batch_size = 16
diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py
index d606fa09be..9c7f0f8487 100644
--- a/advanced_source/neural_style_tutorial.py
+++ b/advanced_source/neural_style_tutorial.py
@@ -14,7 +14,7 @@
 developed by Leon A. Gatys, Alexander S. Ecker and Matthias Bethge.
 Neural-Style, or Neural-Transfer, allows you to take an image and
 reproduce it with a new artistic style. The algorithm takes three images,
-an input image, a content-image, and a style-image, and changes the input 
+an input image, a content-image, and a style-image, and changes the input
 to resemble the content of the content-image and the artistic style of the style-image.
 
  
@@ -107,8 +107,7 @@
 def image_loader(image_name):
     image = Image.open(image_name)
     # fake batch dimension required to fit network's input dimensions
-    image = loader(image).unsqueeze(0)
-    return image
+    return image.to(device, torch.float)
 
 
 style_img = image_loader("./data/images/neural-style/picasso.jpg")

From 2a34c3ccc94cff3db37e21413927c23b7c1fc7bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 18:39:25 +0300
Subject: [PATCH 10/13] Fix neural_style_tutorial
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 advanced_source/neural_style_tutorial.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py
index 9c7f0f8487..9b76b1bbcf 100644
--- a/advanced_source/neural_style_tutorial.py
+++ b/advanced_source/neural_style_tutorial.py
@@ -107,6 +107,7 @@
 def image_loader(image_name):
     image = Image.open(image_name)
     # fake batch dimension required to fit network's input dimensions
+    image = loader(image).unsqueeze(0)
     return image.to(device, torch.float)
 
 

From bc65968d1220d9257294fa96f48521fdee95688f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 18:54:39 +0300
Subject: [PATCH 11/13] Fix nested style
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 prototype_source/nestedtensor.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/prototype_source/nestedtensor.py b/prototype_source/nestedtensor.py
index 15bfb51b32..4385e963d3 100644
--- a/prototype_source/nestedtensor.py
+++ b/prototype_source/nestedtensor.py
@@ -25,7 +25,6 @@
 import torch.nn.functional as F
 
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-torch.set_default_device(device)
 
 ######################################################################
 # NestedTensor Initialization
@@ -36,7 +35,7 @@
 # From the Python frontend, a nestedtensor can be created from a list of tensors.
 # We denote nt[i] as the ith tensor component of a nestedtensor.
 nt = torch.nested.nested_tensor([torch.arange(12).reshape(
-    2, 6), torch.arange(18).reshape(3, 6)], dtype=torch.float)
+    2, 6), torch.arange(18).reshape(3, 6)], dtype=torch.float, device=device)
 print(f"{nt=}")
 
 ######################################################################
@@ -112,7 +111,7 @@
 # Applying the operation on a nestedtensor is equivalent to
 # applying the operation to the underlying tensor components,
 # with the result being a nestedtensor as well.
-nt_mm = torch.nested.nested_tensor([torch.randn((2, 3, 4)), torch.randn((2, 3, 5))])
+nt_mm = torch.nested.nested_tensor([torch.randn((2, 3, 4)), torch.randn((2, 3, 5))], device=device)
 nt3 = torch.matmul(nt_transposed, nt_mm)
 print(f"Result of Matmul:\n {nt3}")
 
@@ -319,7 +318,7 @@ def mha_padded(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, nhea
 
     # Have to manipulate masks in order to apply them to the attention weights
     key_padding_mask = attn_mask_q.view(N, 1, 1, L_t).expand(-1, nheads, -1, -1).reshape(N*nheads, 1, L_t).to(device=device)
-    attn_mask = torch.zeros(key_padding_mask.shape, dtype=torch.float32)
+    attn_mask = torch.zeros(key_padding_mask.shape, device=device, dtype=torch.float32)
     attn_mask = attn_mask.masked_fill_(key_padding_mask, float("-inf"))
 
     # Zero out the attention weights where the mask is True by adding -inf prior to softmax
@@ -385,10 +384,10 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 # create inputs
 
 # create parameters
-W_q, b_q = torch.randn((E_total, E_q)), torch.randn(E_total)
-W_k, b_k = torch.randn((E_total, E_k)), torch.randn(E_total)
-W_v, b_v = torch.randn((E_total, E_v)), torch.randn(E_total)
-W_out, b_out = torch.randn((E_out, E_total)), torch.randn(E_out)
+W_q, b_q = torch.randn((E_total, E_q), device=device), torch.randn(E_total, device=device)
+W_k, b_k = torch.randn((E_total, E_k), device=device), torch.randn(E_total, device=device)
+W_v, b_v = torch.randn((E_total, E_v), device=device), torch.randn(E_total, device=device)
+W_out, b_out = torch.randn((E_out, E_total), device=device), torch.randn(E_out, device=device)
 
 # create nested input
 queries = []
@@ -397,9 +396,9 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 for i in range(N):
     l = sentence_lengths[i]
     s = l
-    queries.append(torch.randn((l, E_q)))
-    keys   .append(torch.randn((s, E_k)))
-    values .append(torch.randn((s, E_v)))
+    queries.append(torch.randn((l, E_q), device=device))
+    keys   .append(torch.randn((s, E_k), device=device))
+    values .append(torch.randn((s, E_v), device=device))
 query = torch.nested.nested_tensor(queries)
 key = torch.nested.nested_tensor(keys)
 value = torch.nested.nested_tensor(values)
@@ -454,7 +453,7 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 
 # embeddings are assumed to be the same
 E = E_total
-mha_lib = torch.nn.MultiheadAttention(E, nheads, batch_first=True)
+mha_lib =  torch.nn.MultiheadAttention(E, nheads, batch_first=True, device=device)
 mha_lib.eval()
 
 ######################################################################

From b1a589de64da6f06d8c2c16eaec8eb3abff481ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 18:55:08 +0300
Subject: [PATCH 12/13] fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 prototype_source/nestedtensor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prototype_source/nestedtensor.py b/prototype_source/nestedtensor.py
index 4385e963d3..0d2898cc4a 100644
--- a/prototype_source/nestedtensor.py
+++ b/prototype_source/nestedtensor.py
@@ -453,7 +453,7 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 
 # embeddings are assumed to be the same
 E = E_total
-mha_lib =  torch.nn.MultiheadAttention(E, nheads, batch_first=True, device=device)
+mha_lib = torch.nn.MultiheadAttention(E, nheads, batch_first=True, device=device)
 mha_lib.eval()
 
 ######################################################################

From a76c9546033bd0da4306698966a39fd8a5f6f1a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20Berk=20T=C3=B6re?= <onurberk_t@hotmail.com>
Date: Sat, 10 Jun 2023 19:05:43 +0300
Subject: [PATCH 13/13] Fix amp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Onur Berk Töre <onurberk_t@hotmail.com>
---
 recipes_source/recipes/amp_recipe.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
index 94a68285f9..141bc41a03 100644
--- a/recipes_source/recipes/amp_recipe.py
+++ b/recipes_source/recipes/amp_recipe.py
@@ -119,7 +119,7 @@ def make_model(in_size, out_size, num_layers):
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
         # Runs the forward pass under ``autocast``.
-        with torch.autocast(dtype=torch.float16):
+        with torch.autocast(device_type=device, dtype=torch.float16):
             output = net(input)
             # output is float16 because linear layers ``autocast`` to float16.
             assert output.dtype is torch.float16
@@ -154,7 +154,7 @@ def make_model(in_size, out_size, num_layers):
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
-        with torch.autocast(dtype=torch.float16):
+        with torch.autocast(device_type=device, dtype=torch.float16):
             output = net(input)
             loss = loss_fn(output, target)
 
@@ -187,7 +187,7 @@ def make_model(in_size, out_size, num_layers):
 start_timer()
 for epoch in range(epochs):
     for input, target in zip(data, targets):
-        with torch.autocast(dtype=torch.float16, enabled=use_amp):
+        with torch.autocast(device_type=device, dtype=torch.float16, enabled=use_amp):
             output = net(input)
             loss = loss_fn(output, target)
         scaler.scale(loss).backward()
@@ -205,7 +205,7 @@ def make_model(in_size, out_size, num_layers):
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
-        with torch.autocast(dtype=torch.float16):
+        with torch.autocast(device_type=device, dtype=torch.float16):
             output = net(input)
             loss = loss_fn(output, target)
         scaler.scale(loss).backward()