From f88e34063b511b936cf63cf0acb414a7a46144d1 Mon Sep 17 00:00:00 2001
From: Jack Atkinson <jwa34@cam.ac.uk>
Date: Wed, 26 Jun 2024 08:13:09 +0100
Subject: [PATCH 01/10] Adjust the API in anticipation of future overloading by
 making assignment of data to torch_tensor a subroutine instead of a function.

Co-authored-by:  Joe Wallwork <22053413+jwallwork23@users.noreply.github.com>
---
 src/ftorch.f90  | 334 ++++++++++++++++++++++++++----------------------
 src/ftorch.fypp |  81 ++++++------
 2 files changed, 222 insertions(+), 193 deletions(-)

diff --git a/src/ftorch.f90 b/src/ftorch.f90
index ac4a8468..4c5478a3 100644
--- a/src/ftorch.f90
+++ b/src/ftorch.f90
@@ -97,17 +97,18 @@ end function torch_from_blob_c
 contains
 
   !> Returns a tensor filled with the scalar value 0.
-  function torch_tensor_zeros(ndims, tensor_shape, dtype, device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_zeros(tensor, ndims, tensor_shape, dtype, &
+                                device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_int64_t
-    integer(c_int), intent(in)     :: ndims      !! Number of dimensions of the tensor
-    integer(c_int64_t), intent(in) :: tensor_shape(*)   !! Shape of the tensor
-    integer(c_int), intent(in)     :: dtype      !! Data type of the tensor
-    integer(c_int), intent(in)     :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
+    type(torch_tensor), intent(out) :: tensor     !! Returned tensor
+    integer(c_int), intent(in)      :: ndims      !! Number of dimensions of the tensor
+    integer(c_int64_t), intent(in)  :: tensor_shape(*)   !! Shape of the tensor
+    integer(c_int), intent(in)      :: dtype      !! Data type of the tensor
+    integer(c_int), intent(in)      :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index     !! device index to use for `torch_kCUDA` case
     logical(c_bool), optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_tensor)             :: tensor     !! Returned tensor
-    integer(c_int)                 :: device_index_value  !! device index used
-    logical(c_bool)                :: requires_grad  !! Whether gradients need to be computed for the created tensor
+    integer(c_int)                  :: device_index_value  !! device index used
+    logical(c_bool)                 :: requires_grad  !! Whether gradients need to be computed for the created tensor
 
     interface
       function torch_zeros_c(ndims, tensor_shape, dtype, device_type, device_index, requires_grad) result(tensor) &
@@ -139,20 +140,21 @@ end function torch_zeros_c
     end if
 
     tensor%p = torch_zeros_c(ndims, tensor_shape, dtype, device_type, device_index_value, requires_grad)
-  end function torch_tensor_zeros
+  end subroutine torch_tensor_zeros
 
   !> Returns a tensor filled with the scalar value 1.
-  function torch_tensor_ones(ndims, tensor_shape, dtype, device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_ones(tensor, ndims, tensor_shape, dtype, &
+                               device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_int64_t
-    integer(c_int), intent(in)     :: ndims      !! Number of dimensions of the tensor
-    integer(c_int64_t), intent(in) :: tensor_shape(*)   !! Shape of the tensor
-    integer(c_int), intent(in)     :: dtype      !! Data type of the tensor
-    integer(c_int), intent(in)     :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
+    type(torch_tensor), intent(out) :: tensor     !! Returned tensor
+    integer(c_int), intent(in)      :: ndims      !! Number of dimensions of the tensor
+    integer(c_int64_t), intent(in)  :: tensor_shape(*)   !! Shape of the tensor
+    integer(c_int), intent(in)      :: dtype      !! Data type of the tensor
+    integer(c_int), intent(in)      :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index     !! device index to use for `torch_kCUDA` case
     logical(c_bool), optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_tensor)             :: tensor     !! Returned tensor
-    integer(c_int)                 :: device_index_value  !! device index used
-    logical(c_bool)                :: requires_grad  !! Whether gradients need to be computed for the created tensor
+    integer(c_int)                  :: device_index_value  !! device index used
+    logical(c_bool)                 :: requires_grad  !! Whether gradients need to be computed for the created tensor
 
     interface
       function torch_ones_c(ndims, tensor_shape, dtype, device_type, device_index, requires_grad) result(tensor) &
@@ -184,29 +186,29 @@ end function torch_ones_c
     end if
 
     tensor%p = torch_ones_c(ndims, tensor_shape, dtype, device_type, device_index_value, requires_grad)
-  end function torch_tensor_ones
+  end subroutine torch_tensor_ones
 
   ! Torch Tensor API
   !| Exposes the given data as a tensor without taking ownership of the original data.
   !  This routine will take an (i, j, k) array and return an (k, j, i) tensor.
-  function torch_tensor_from_blob(data, ndims, tensor_shape, layout, dtype,    &
-                                  device_type, device_index,                   &
-                                  requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_blob(tensor, data, ndims, tensor_shape, layout, dtype, &
+                                    device_type, device_index, &
+                                    requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_int64_t, c_ptr
-    type(c_ptr), intent(in)        :: data       !! Pointer to data
-    integer(c_int), intent(in)     :: ndims      !! Number of dimensions of the tensor
-    integer(c_int64_t), intent(in) :: tensor_shape(*)   !! Shape of the tensor
-    integer(c_int), intent(in)     :: layout(*)  !! Layout for strides for accessing data
-    integer(c_int), intent(in)     :: dtype      !! Data type of the tensor
-    integer(c_int), intent(in)     :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
+    type(torch_tensor), intent(out) :: tensor     !! Returned tensor
+    type(c_ptr), intent(in)         :: data       !! Pointer to data
+    integer(c_int), intent(in)      :: ndims      !! Number of dimensions of the tensor
+    integer(c_int64_t), intent(in)  :: tensor_shape(*)   !! Shape of the tensor
+    integer(c_int), intent(in)      :: layout(*)  !! Layout for strides for accessing data
+    integer(c_int), intent(in)      :: dtype      !! Data type of the tensor
+    integer(c_int), intent(in)      :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index     !! device index to use for `torch_kCUDA` case
     logical(c_bool), optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_tensor)             :: tensor     !! Returned tensor
 
-    integer(c_int)                 :: i          !! loop index
-    integer(c_int64_t)             :: strides(ndims) !! Strides for accessing data
-    integer(c_int)                 :: device_index_value  !! device index used
-    logical(c_bool)                :: requires_grad  !! Whether gradients need to be computed for the created tensor
+    integer(c_int)                  :: i          !! loop index
+    integer(c_int64_t)              :: strides(ndims) !! Strides for accessing data
+    integer(c_int)                  :: device_index_value  !! device index used
+    logical(c_bool)                 :: requires_grad  !! Whether gradients need to be computed for the created tensor
 
     if (.not. present(requires_grad_opt)) then
       requires_grad = logical(.false., c_bool)
@@ -229,7 +231,7 @@ function torch_tensor_from_blob(data, ndims, tensor_shape, layout, dtype,    &
     endif
 
     tensor%p = torch_from_blob_c(data, ndims, tensor_shape, strides, dtype, device_type, device_index_value, requires_grad)
-  end function torch_tensor_from_blob
+  end subroutine torch_tensor_from_blob
 
   !> Prints the contents of a tensor.
   subroutine torch_tensor_print(tensor)
@@ -249,7 +251,7 @@ end subroutine torch_tensor_print
   !> Determines the device index of a tensor.
   function torch_tensor_get_device_index(tensor) result(device_index)
     use, intrinsic :: iso_c_binding, only : c_int
-    type(torch_tensor), intent(in) :: tensor  !! Input tensor
+    type(torch_tensor), value, intent(in) :: tensor  !! Input tensor
     integer(c_int) :: device_index  !! Device index of tensor
 
     interface
@@ -409,10 +411,14 @@ end subroutine torch_jit_module_delete_c
   end subroutine torch_module_delete
 
   !> Return a Torch tensor pointing to data_in array of rank 1 containing data of type `int8`
-  function torch_tensor_from_array_int8_1d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int8_1d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int8
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int8), intent(in), target :: data_in(:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(1) !! Control order of indices
@@ -420,9 +426,6 @@ function torch_tensor_from_array_int8_1d(data_in, layout, c_device_type, device_
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(1)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt8 !! Data type
@@ -459,13 +462,17 @@ function torch_tensor_from_array_int8_1d(data_in, layout, c_device_type, device_
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int8_1d
+  end subroutine torch_tensor_from_array_int8_1d
 
   !> Return a Torch tensor pointing to data_in array of rank 2 containing data of type `int8`
-  function torch_tensor_from_array_int8_2d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int8_2d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int8
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int8), intent(in), target :: data_in(:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(2) !! Control order of indices
@@ -473,9 +480,6 @@ function torch_tensor_from_array_int8_2d(data_in, layout, c_device_type, device_
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(2)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt8 !! Data type
@@ -512,13 +516,17 @@ function torch_tensor_from_array_int8_2d(data_in, layout, c_device_type, device_
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int8_2d
+  end subroutine torch_tensor_from_array_int8_2d
 
   !> Return a Torch tensor pointing to data_in array of rank 3 containing data of type `int8`
-  function torch_tensor_from_array_int8_3d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int8_3d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int8
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int8), intent(in), target :: data_in(:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(3) !! Control order of indices
@@ -526,9 +534,6 @@ function torch_tensor_from_array_int8_3d(data_in, layout, c_device_type, device_
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(3)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt8 !! Data type
@@ -565,13 +570,17 @@ function torch_tensor_from_array_int8_3d(data_in, layout, c_device_type, device_
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int8_3d
+  end subroutine torch_tensor_from_array_int8_3d
 
   !> Return a Torch tensor pointing to data_in array of rank 4 containing data of type `int8`
-  function torch_tensor_from_array_int8_4d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int8_4d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int8
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int8), intent(in), target :: data_in(:,:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(4) !! Control order of indices
@@ -579,9 +588,6 @@ function torch_tensor_from_array_int8_4d(data_in, layout, c_device_type, device_
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(4)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt8 !! Data type
@@ -618,13 +624,17 @@ function torch_tensor_from_array_int8_4d(data_in, layout, c_device_type, device_
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int8_4d
+  end subroutine torch_tensor_from_array_int8_4d
 
   !> Return a Torch tensor pointing to data_in array of rank 1 containing data of type `int16`
-  function torch_tensor_from_array_int16_1d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int16_1d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int16
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int16), intent(in), target :: data_in(:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(1) !! Control order of indices
@@ -632,9 +642,6 @@ function torch_tensor_from_array_int16_1d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(1)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt16 !! Data type
@@ -671,13 +678,17 @@ function torch_tensor_from_array_int16_1d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int16_1d
+  end subroutine torch_tensor_from_array_int16_1d
 
   !> Return a Torch tensor pointing to data_in array of rank 2 containing data of type `int16`
-  function torch_tensor_from_array_int16_2d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int16_2d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int16
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int16), intent(in), target :: data_in(:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(2) !! Control order of indices
@@ -685,9 +696,6 @@ function torch_tensor_from_array_int16_2d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(2)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt16 !! Data type
@@ -724,13 +732,17 @@ function torch_tensor_from_array_int16_2d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int16_2d
+  end subroutine torch_tensor_from_array_int16_2d
 
   !> Return a Torch tensor pointing to data_in array of rank 3 containing data of type `int16`
-  function torch_tensor_from_array_int16_3d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int16_3d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int16
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int16), intent(in), target :: data_in(:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(3) !! Control order of indices
@@ -738,9 +750,6 @@ function torch_tensor_from_array_int16_3d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(3)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt16 !! Data type
@@ -777,13 +786,17 @@ function torch_tensor_from_array_int16_3d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int16_3d
+  end subroutine torch_tensor_from_array_int16_3d
 
   !> Return a Torch tensor pointing to data_in array of rank 4 containing data of type `int16`
-  function torch_tensor_from_array_int16_4d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int16_4d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int16
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int16), intent(in), target :: data_in(:,:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(4) !! Control order of indices
@@ -791,9 +804,6 @@ function torch_tensor_from_array_int16_4d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(4)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt16 !! Data type
@@ -830,13 +840,17 @@ function torch_tensor_from_array_int16_4d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int16_4d
+  end subroutine torch_tensor_from_array_int16_4d
 
   !> Return a Torch tensor pointing to data_in array of rank 1 containing data of type `int32`
-  function torch_tensor_from_array_int32_1d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int32_1d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int32
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int32), intent(in), target :: data_in(:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(1) !! Control order of indices
@@ -844,9 +858,6 @@ function torch_tensor_from_array_int32_1d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(1)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt32 !! Data type
@@ -883,13 +894,17 @@ function torch_tensor_from_array_int32_1d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int32_1d
+  end subroutine torch_tensor_from_array_int32_1d
 
   !> Return a Torch tensor pointing to data_in array of rank 2 containing data of type `int32`
-  function torch_tensor_from_array_int32_2d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int32_2d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int32
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int32), intent(in), target :: data_in(:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(2) !! Control order of indices
@@ -897,9 +912,6 @@ function torch_tensor_from_array_int32_2d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(2)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt32 !! Data type
@@ -936,13 +948,17 @@ function torch_tensor_from_array_int32_2d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int32_2d
+  end subroutine torch_tensor_from_array_int32_2d
 
   !> Return a Torch tensor pointing to data_in array of rank 3 containing data of type `int32`
-  function torch_tensor_from_array_int32_3d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int32_3d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int32
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int32), intent(in), target :: data_in(:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(3) !! Control order of indices
@@ -950,9 +966,6 @@ function torch_tensor_from_array_int32_3d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(3)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt32 !! Data type
@@ -989,13 +1002,17 @@ function torch_tensor_from_array_int32_3d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int32_3d
+  end subroutine torch_tensor_from_array_int32_3d
 
   !> Return a Torch tensor pointing to data_in array of rank 4 containing data of type `int32`
-  function torch_tensor_from_array_int32_4d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int32_4d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int32
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int32), intent(in), target :: data_in(:,:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(4) !! Control order of indices
@@ -1003,9 +1020,6 @@ function torch_tensor_from_array_int32_4d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(4)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt32 !! Data type
@@ -1042,13 +1056,17 @@ function torch_tensor_from_array_int32_4d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int32_4d
+  end subroutine torch_tensor_from_array_int32_4d
 
   !> Return a Torch tensor pointing to data_in array of rank 1 containing data of type `int64`
-  function torch_tensor_from_array_int64_1d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int64_1d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int64
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int64), intent(in), target :: data_in(:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(1) !! Control order of indices
@@ -1056,9 +1074,6 @@ function torch_tensor_from_array_int64_1d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(1)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt64 !! Data type
@@ -1095,13 +1110,17 @@ function torch_tensor_from_array_int64_1d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int64_1d
+  end subroutine torch_tensor_from_array_int64_1d
 
   !> Return a Torch tensor pointing to data_in array of rank 2 containing data of type `int64`
-  function torch_tensor_from_array_int64_2d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int64_2d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int64
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int64), intent(in), target :: data_in(:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(2) !! Control order of indices
@@ -1109,9 +1128,6 @@ function torch_tensor_from_array_int64_2d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(2)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt64 !! Data type
@@ -1148,13 +1164,17 @@ function torch_tensor_from_array_int64_2d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int64_2d
+  end subroutine torch_tensor_from_array_int64_2d
 
   !> Return a Torch tensor pointing to data_in array of rank 3 containing data of type `int64`
-  function torch_tensor_from_array_int64_3d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int64_3d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int64
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int64), intent(in), target :: data_in(:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(3) !! Control order of indices
@@ -1162,9 +1182,6 @@ function torch_tensor_from_array_int64_3d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(3)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt64 !! Data type
@@ -1201,13 +1218,17 @@ function torch_tensor_from_array_int64_3d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int64_3d
+  end subroutine torch_tensor_from_array_int64_3d
 
   !> Return a Torch tensor pointing to data_in array of rank 4 containing data of type `int64`
-  function torch_tensor_from_array_int64_4d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_int64_4d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : int64
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     integer(kind=int64), intent(in), target :: data_in(:,:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(4) !! Control order of indices
@@ -1215,9 +1236,6 @@ function torch_tensor_from_array_int64_4d(data_in, layout, c_device_type, device
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(4)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kInt64 !! Data type
@@ -1254,13 +1272,17 @@ function torch_tensor_from_array_int64_4d(data_in, layout, c_device_type, device
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_int64_4d
+  end subroutine torch_tensor_from_array_int64_4d
 
   !> Return a Torch tensor pointing to data_in array of rank 1 containing data of type `real32`
-  function torch_tensor_from_array_real32_1d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_real32_1d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : real32
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     real(kind=real32), intent(in), target :: data_in(:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(1) !! Control order of indices
@@ -1268,9 +1290,6 @@ function torch_tensor_from_array_real32_1d(data_in, layout, c_device_type, devic
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(1)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kFloat32 !! Data type
@@ -1307,13 +1326,17 @@ function torch_tensor_from_array_real32_1d(data_in, layout, c_device_type, devic
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_real32_1d
+  end subroutine torch_tensor_from_array_real32_1d
 
   !> Return a Torch tensor pointing to data_in array of rank 2 containing data of type `real32`
-  function torch_tensor_from_array_real32_2d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_real32_2d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : real32
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     real(kind=real32), intent(in), target :: data_in(:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(2) !! Control order of indices
@@ -1321,9 +1344,6 @@ function torch_tensor_from_array_real32_2d(data_in, layout, c_device_type, devic
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(2)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kFloat32 !! Data type
@@ -1360,13 +1380,17 @@ function torch_tensor_from_array_real32_2d(data_in, layout, c_device_type, devic
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_real32_2d
+  end subroutine torch_tensor_from_array_real32_2d
 
   !> Return a Torch tensor pointing to data_in array of rank 3 containing data of type `real32`
-  function torch_tensor_from_array_real32_3d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_real32_3d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : real32
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     real(kind=real32), intent(in), target :: data_in(:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(3) !! Control order of indices
@@ -1374,9 +1398,6 @@ function torch_tensor_from_array_real32_3d(data_in, layout, c_device_type, devic
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(3)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kFloat32 !! Data type
@@ -1413,13 +1434,17 @@ function torch_tensor_from_array_real32_3d(data_in, layout, c_device_type, devic
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_real32_3d
+  end subroutine torch_tensor_from_array_real32_3d
 
   !> Return a Torch tensor pointing to data_in array of rank 4 containing data of type `real32`
-  function torch_tensor_from_array_real32_4d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_real32_4d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : real32
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     real(kind=real32), intent(in), target :: data_in(:,:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(4) !! Control order of indices
@@ -1427,9 +1452,6 @@ function torch_tensor_from_array_real32_4d(data_in, layout, c_device_type, devic
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(4)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kFloat32 !! Data type
@@ -1466,13 +1488,17 @@ function torch_tensor_from_array_real32_4d(data_in, layout, c_device_type, devic
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_real32_4d
+  end subroutine torch_tensor_from_array_real32_4d
 
   !> Return a Torch tensor pointing to data_in array of rank 1 containing data of type `real64`
-  function torch_tensor_from_array_real64_1d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_real64_1d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : real64
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     real(kind=real64), intent(in), target :: data_in(:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(1) !! Control order of indices
@@ -1480,9 +1506,6 @@ function torch_tensor_from_array_real64_1d(data_in, layout, c_device_type, devic
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(1)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kFloat64 !! Data type
@@ -1519,13 +1542,17 @@ function torch_tensor_from_array_real64_1d(data_in, layout, c_device_type, devic
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_real64_1d
+  end subroutine torch_tensor_from_array_real64_1d
 
   !> Return a Torch tensor pointing to data_in array of rank 2 containing data of type `real64`
-  function torch_tensor_from_array_real64_2d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_real64_2d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : real64
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     real(kind=real64), intent(in), target :: data_in(:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(2) !! Control order of indices
@@ -1533,9 +1560,6 @@ function torch_tensor_from_array_real64_2d(data_in, layout, c_device_type, devic
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(2)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kFloat64 !! Data type
@@ -1572,13 +1596,17 @@ function torch_tensor_from_array_real64_2d(data_in, layout, c_device_type, devic
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_real64_2d
+  end subroutine torch_tensor_from_array_real64_2d
 
   !> Return a Torch tensor pointing to data_in array of rank 3 containing data of type `real64`
-  function torch_tensor_from_array_real64_3d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_real64_3d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : real64
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     real(kind=real64), intent(in), target :: data_in(:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(3) !! Control order of indices
@@ -1586,9 +1614,6 @@ function torch_tensor_from_array_real64_3d(data_in, layout, c_device_type, devic
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(3)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kFloat64 !! Data type
@@ -1625,13 +1650,17 @@ function torch_tensor_from_array_real64_3d(data_in, layout, c_device_type, devic
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_real64_3d
+  end subroutine torch_tensor_from_array_real64_3d
 
   !> Return a Torch tensor pointing to data_in array of rank 4 containing data of type `real64`
-  function torch_tensor_from_array_real64_4d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_real64_4d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : real64
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     real(kind=real64), intent(in), target :: data_in(:,:,:,:)   !! Input data that tensor will point at
     integer, intent(in)        :: layout(4) !! Control order of indices
@@ -1639,9 +1668,6 @@ function torch_tensor_from_array_real64_4d(data_in, layout, c_device_type, devic
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(4)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = torch_kFloat64 !! Data type
@@ -1678,7 +1704,7 @@ function torch_tensor_from_array_real64_4d(data_in, layout, c_device_type, devic
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_real64_4d
+  end subroutine torch_tensor_from_array_real64_4d
 
 
 end module ftorch
diff --git a/src/ftorch.fypp b/src/ftorch.fypp
index 225b0b50..161a421d 100644
--- a/src/ftorch.fypp
+++ b/src/ftorch.fypp
@@ -95,17 +95,18 @@ module ftorch
 contains
 
   !> Returns a tensor filled with the scalar value 0.
-  function torch_tensor_zeros(ndims, tensor_shape, dtype, device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_zeros(tensor, ndims, tensor_shape, dtype, &
+                                device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_int64_t
-    integer(c_int), intent(in)     :: ndims      !! Number of dimensions of the tensor
-    integer(c_int64_t), intent(in) :: tensor_shape(*)   !! Shape of the tensor
-    integer(c_int), intent(in)     :: dtype      !! Data type of the tensor
-    integer(c_int), intent(in)     :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
+    type(torch_tensor), intent(out) :: tensor     !! Returned tensor
+    integer(c_int), intent(in)      :: ndims      !! Number of dimensions of the tensor
+    integer(c_int64_t), intent(in)  :: tensor_shape(*)   !! Shape of the tensor
+    integer(c_int), intent(in)      :: dtype      !! Data type of the tensor
+    integer(c_int), intent(in)      :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index     !! device index to use for `torch_kCUDA` case
     logical(c_bool), optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_tensor)             :: tensor     !! Returned tensor
-    integer(c_int)                 :: device_index_value  !! device index used
-    logical(c_bool)                :: requires_grad  !! Whether gradients need to be computed for the created tensor
+    integer(c_int)                  :: device_index_value  !! device index used
+    logical(c_bool)                 :: requires_grad  !! Whether gradients need to be computed for the created tensor
 
     interface
       function torch_zeros_c(ndims, tensor_shape, dtype, device_type, device_index, requires_grad) result(tensor) &
@@ -137,20 +138,21 @@ contains
     end if
 
     tensor%p = torch_zeros_c(ndims, tensor_shape, dtype, device_type, device_index_value, requires_grad)
-  end function torch_tensor_zeros
+  end subroutine torch_tensor_zeros
 
   !> Returns a tensor filled with the scalar value 1.
-  function torch_tensor_ones(ndims, tensor_shape, dtype, device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_ones(tensor, ndims, tensor_shape, dtype, &
+                               device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_int64_t
-    integer(c_int), intent(in)     :: ndims      !! Number of dimensions of the tensor
-    integer(c_int64_t), intent(in) :: tensor_shape(*)   !! Shape of the tensor
-    integer(c_int), intent(in)     :: dtype      !! Data type of the tensor
-    integer(c_int), intent(in)     :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
+    type(torch_tensor), intent(out) :: tensor     !! Returned tensor
+    integer(c_int), intent(in)      :: ndims      !! Number of dimensions of the tensor
+    integer(c_int64_t), intent(in)  :: tensor_shape(*)   !! Shape of the tensor
+    integer(c_int), intent(in)      :: dtype      !! Data type of the tensor
+    integer(c_int), intent(in)      :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index     !! device index to use for `torch_kCUDA` case
     logical(c_bool), optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_tensor)             :: tensor     !! Returned tensor
-    integer(c_int)                 :: device_index_value  !! device index used
-    logical(c_bool)                :: requires_grad  !! Whether gradients need to be computed for the created tensor
+    integer(c_int)                  :: device_index_value  !! device index used
+    logical(c_bool)                 :: requires_grad  !! Whether gradients need to be computed for the created tensor
 
     interface
       function torch_ones_c(ndims, tensor_shape, dtype, device_type, device_index, requires_grad) result(tensor) &
@@ -182,29 +184,29 @@ contains
     end if
 
     tensor%p = torch_ones_c(ndims, tensor_shape, dtype, device_type, device_index_value, requires_grad)
-  end function torch_tensor_ones
+  end subroutine torch_tensor_ones
 
   ! Torch Tensor API
   !| Exposes the given data as a tensor without taking ownership of the original data.
   !  This routine will take an (i, j, k) array and return an (k, j, i) tensor.
-  function torch_tensor_from_blob(data, ndims, tensor_shape, layout, dtype,    &
-                                  device_type, device_index,                   &
-                                  requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_blob(tensor, data, ndims, tensor_shape, layout, dtype, &
+                                    device_type, device_index, &
+                                    requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_int64_t, c_ptr
-    type(c_ptr), intent(in)        :: data       !! Pointer to data
-    integer(c_int), intent(in)     :: ndims      !! Number of dimensions of the tensor
-    integer(c_int64_t), intent(in) :: tensor_shape(*)   !! Shape of the tensor
-    integer(c_int), intent(in)     :: layout(*)  !! Layout for strides for accessing data
-    integer(c_int), intent(in)     :: dtype      !! Data type of the tensor
-    integer(c_int), intent(in)     :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
+    type(torch_tensor), intent(out) :: tensor     !! Returned tensor
+    type(c_ptr), intent(in)         :: data       !! Pointer to data
+    integer(c_int), intent(in)      :: ndims      !! Number of dimensions of the tensor
+    integer(c_int64_t), intent(in)  :: tensor_shape(*)   !! Shape of the tensor
+    integer(c_int), intent(in)      :: layout(*)  !! Layout for strides for accessing data
+    integer(c_int), intent(in)      :: dtype      !! Data type of the tensor
+    integer(c_int), intent(in)      :: device_type  !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index     !! device index to use for `torch_kCUDA` case
     logical(c_bool), optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_tensor)             :: tensor     !! Returned tensor
 
-    integer(c_int)                 :: i          !! loop index
-    integer(c_int64_t)             :: strides(ndims) !! Strides for accessing data
-    integer(c_int)                 :: device_index_value  !! device index used
-    logical(c_bool)                :: requires_grad  !! Whether gradients need to be computed for the created tensor
+    integer(c_int)                  :: i          !! loop index
+    integer(c_int64_t)              :: strides(ndims) !! Strides for accessing data
+    integer(c_int)                  :: device_index_value  !! device index used
+    logical(c_bool)                 :: requires_grad  !! Whether gradients need to be computed for the created tensor
 
     if (.not. present(requires_grad_opt)) then
       requires_grad = logical(.false., c_bool)
@@ -227,7 +229,7 @@ contains
     endif
 
     tensor%p = torch_from_blob_c(data, ndims, tensor_shape, strides, dtype, device_type, device_index_value, requires_grad)
-  end function torch_tensor_from_blob
+  end subroutine torch_tensor_from_blob
 
   !> Prints the contents of a tensor.
   subroutine torch_tensor_print(tensor)
@@ -247,7 +249,7 @@ contains
   !> Determines the device index of a tensor.
   function torch_tensor_get_device_index(tensor) result(device_index)
     use, intrinsic :: iso_c_binding, only : c_int
-    type(torch_tensor), intent(in) :: tensor  !! Input tensor
+    type(torch_tensor), value, intent(in) :: tensor  !! Input tensor
     integer(c_int) :: device_index  !! Device index of tensor
 
     interface
@@ -409,10 +411,14 @@ contains
   #:for PREC in PRECISIONS
   #:for RANK in RANKS
   !> Return a Torch tensor pointing to data_in array of rank ${RANK}$ containing data of type `${PREC}$`
-  function torch_tensor_from_array_${PREC}$_${RANK}$d(data_in, layout, c_device_type, device_index, requires_grad_opt) result(tensor)
+  subroutine torch_tensor_from_array_${PREC}$_${RANK}$d(tensor, data_in, layout, &
+                                                        c_device_type, device_index, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_float, c_int, c_int64_t, c_loc
     use, intrinsic :: iso_fortran_env, only : ${PREC}$
 
+    ! output tensor
+    type(torch_tensor), intent(out) :: tensor !! Returned tensor
+
     ! inputs
     ${f_type(PREC)}$(kind=${PREC}$), intent(in), target :: data_in${ranksuffix(RANK)}$   !! Input data that tensor will point at
     integer, intent(in)        :: layout(${RANK}$) !! Control order of indices
@@ -420,9 +426,6 @@ contains
     integer(c_int), optional, intent(in) :: device_index    !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
 
-    ! output tensory
-    type(torch_tensor) :: tensor     !! Returned tensor
-
     ! local data
     integer(c_int64_t)        :: c_tensor_shape(${RANK}$)           !! Shape of the tensor
     integer(c_int), parameter :: c_dtype = ${enum_from_prec(PREC)}$ !! Data type
@@ -459,7 +462,7 @@ contains
                                  device_index_value,                           &
                                  logical(requires_grad, c_bool))
 
-  end function torch_tensor_from_array_${PREC}$_${RANK}$d
+  end subroutine torch_tensor_from_array_${PREC}$_${RANK}$d
 
   #:endfor
   #:endfor

From d9d7e6708ea8baebcea083e068df9c6382c39cb2 Mon Sep 17 00:00:00 2001
From: Joe Wallwork <jw2423@cam.ac.uk>
Date: Fri, 21 Jun 2024 09:49:37 +0100
Subject: [PATCH 02/10] Update existing examples

---
 examples/1_SimpleNet/simplenet_infer_fortran.f90 | 4 ++--
 examples/2_ResNet18/resnet_infer_fortran.f90     | 4 ++--
 examples/3_MultiGPU/simplenet_infer_fortran.f90  | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/1_SimpleNet/simplenet_infer_fortran.f90 b/examples/1_SimpleNet/simplenet_infer_fortran.f90
index 209f7296..d97a0e12 100644
--- a/examples/1_SimpleNet/simplenet_infer_fortran.f90
+++ b/examples/1_SimpleNet/simplenet_infer_fortran.f90
@@ -36,8 +36,8 @@ program inference
    in_data = [0.0, 1.0, 2.0, 3.0, 4.0]
 
    ! Create Torch input/output tensors from the above arrays
-   in_tensors(1) = torch_tensor_from_array(in_data, tensor_layout, torch_kCPU)
-   out_tensors(1) = torch_tensor_from_array(out_data, tensor_layout, torch_kCPU)
+   call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout, torch_kCPU)
+   call torch_tensor_from_array(out_tensor, out_data, tensor_layout, torch_kCPU)
 
    ! Load ML model
    model = torch_module_load(args(1))
diff --git a/examples/2_ResNet18/resnet_infer_fortran.f90 b/examples/2_ResNet18/resnet_infer_fortran.f90
index 8a50b6e2..80ee1a9e 100644
--- a/examples/2_ResNet18/resnet_infer_fortran.f90
+++ b/examples/2_ResNet18/resnet_infer_fortran.f90
@@ -76,9 +76,9 @@ subroutine main()
       call load_data(filename, tensor_length, in_data)
 
       ! Create input/output tensors from the above arrays
-      in_tensors(1) = torch_tensor_from_array(in_data, in_layout, torch_kCPU)
+      call torch_tensor_from_array(in_tensors(1), in_data, in_layout, torch_kCPU)
 
-      out_tensors(1) = torch_tensor_from_array(out_data, out_layout, torch_kCPU)
+      call torch_tensor_from_array(out_tensor, out_data, out_layout, torch_kCPU)
 
       ! Load ML model (edit this line to use different models)
       model = torch_module_load(args(1))
diff --git a/examples/3_MultiGPU/simplenet_infer_fortran.f90 b/examples/3_MultiGPU/simplenet_infer_fortran.f90
index 96583619..cb52f35d 100644
--- a/examples/3_MultiGPU/simplenet_infer_fortran.f90
+++ b/examples/3_MultiGPU/simplenet_infer_fortran.f90
@@ -48,13 +48,13 @@ program inference
    ! Create Torch input tensor from the above array and assign it to the first (and only)
    ! element in the array of input tensors.
    ! We use the torch_kCUDA device type with device index corresponding to the MPI rank.
-   in_tensors(1) = torch_tensor_from_array(in_data, tensor_layout, torch_kCUDA, &
-                                          device_index=rank)
+   call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout,         &
+                           torch_kCUDA, device_index=rank)
 
    ! Create Torch output tensor from the above array.
    ! Here we use the torch_kCPU device type since the tensor is for output only
    ! i.e. to be subsequently used by Fortran on CPU.
-   out_tensor = torch_tensor_from_array(out_data, tensor_layout, torch_kCPU)
+   call torch_tensor_from_array(out_tensor, out_data, tensor_layout, torch_kCPU)
 
    ! Load ML model. Ensure that the same device type and device index are used
    ! as for the input data.

From f5c3235316706ddff07032ad83d92ccb6960bc51 Mon Sep 17 00:00:00 2001
From: Jack Atkinson <jwa34@cam.ac.uk>
Date: Wed, 26 Jun 2024 08:39:00 +0100
Subject: [PATCH 03/10] Update README and docs to reflect new torch_tensor API.

---
 README.md    | 4 ++--
 pages/gpu.md | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 06b30f19..148e39a7 100644
--- a/README.md
+++ b/README.md
@@ -43,8 +43,8 @@ type(torch_tensor), dimension(n_inputs) :: model_inputs_arr
 type(torch_tensor) :: model_output
 ...
 model = torch_module_load("/my/saved/TorchScript/model.pt")
-model_inputs_arr(1) = torch_tensor_from_array(input_fortran, in_layout, torch_kCPU)
-model_output = torch_tensor_from_array(output_fortran, out_layout, torch_kCPU)
+call torch_tensor_from_array(model_inputs_arr(1), input_fortran, in_layout, torch_kCPU)
+call torch_tensor_from_array(model_output, output_fortran, out_layout, torch_kCPU)
 
 call torch_module_forward(model, model_input_arr, n_inputs, model_output)
 ```
diff --git a/pages/gpu.md b/pages/gpu.md
index 26bb81c4..e6355d7d 100644
--- a/pages/gpu.md
+++ b/pages/gpu.md
@@ -41,14 +41,15 @@ For example, the following code snippet sets up a Torch tensor with GPU device i
 
 ```fortran
 device_index = 2
-in_tensors(1) = torch_tensor_from_array(in_data, tensor_layout, torch_kCUDA,    &
-                                       device_index=device_index)
+call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout, &
+                           torch_kCUDA, device_index=device_index)
 ```
 
 Whereas the following code snippet sets up a Torch tensor with (default) device index 0:
 
 ```fortran
-in_tensors(1) = torch_tensor_from_array(in_data, tensor_layout, torch_kCUDA)
+call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout, &
+                             torch_kCUDA)
 ```
 
 See the

From ac458fd4eef55f87ff42c55c1dc4f5c0814cb66c Mon Sep 17 00:00:00 2001
From: Jack Atkinson <jwa34@cam.ac.uk>
Date: Wed, 26 Jun 2024 21:12:33 +0100
Subject: [PATCH 04/10] Update ftorch to refer to loaded Torchscript as a
 'model' rather than a 'module'.

---
 src/ftorch.f90  | 56 ++++++++++++++++++++++++-------------------------
 src/ftorch.fypp | 56 ++++++++++++++++++++++++-------------------------
 2 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/src/ftorch.f90 b/src/ftorch.f90
index 4c5478a3..68ec52e4 100644
--- a/src/ftorch.f90
+++ b/src/ftorch.f90
@@ -15,9 +15,9 @@ module ftorch
   implicit none
 
   !> Type for holding a torch neural net (nn.Module).
-  type torch_module
-    type(c_ptr) :: p = c_null_ptr  !! pointer to the neural net module in memory
-  end type torch_module
+  type torch_model
+    type(c_ptr) :: p = c_null_ptr  !! pointer to the neural net in memory
+  end type torch_model
 
   !> Type for holding a Torch tensor.
   type torch_tensor
@@ -281,23 +281,23 @@ end subroutine torch_tensor_delete_c
     call torch_tensor_delete_c(tensor%p)
   end subroutine torch_tensor_delete
 
-  ! Torch Module API
-  !> Loads a TorchScript module (pre-trained PyTorch model saved with TorchScript)
-  function torch_module_load(filename, device_type, device_index, requires_grad_opt, is_training_opt) result(module)
+  ! Torch Model API
+  !> Loads a TorchScript nn.module (pre-trained PyTorch model saved with TorchScript)
+  function torch_model_load(filename, device_type, device_index, requires_grad_opt, is_training_opt) result(model)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_null_char
-    character(*), intent(in)   :: filename !! Filename of TorchScript module
+    character(*), intent(in)   :: filename !! Filename of saved TorchScript model
     integer(c_int), optional, intent(in) :: device_type !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
     logical, optional, intent(in) :: is_training_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_module)         :: module   !! Returned deserialized module
+    type(torch_model)         :: model   !! Returned deserialized model
     integer(c_int) :: device_type_value
     integer(c_int) :: device_index_value
     logical :: requires_grad  !! Whether gradients need to be computed for the created tensor
     logical :: is_training  !! Whether the model is being trained, rather than evaluated
 
     interface
-      function torch_jit_load_c(filename, device_type, device_index, requires_grad, is_training) result(module) &
+      function torch_jit_load_c(filename, device_type, device_index, requires_grad, is_training) result(model) &
           bind(c, name = 'torch_jit_load')
         use, intrinsic :: iso_c_binding, only : c_bool, c_char, c_int, c_ptr
         character(c_char), intent(in) :: filename(*)
@@ -305,7 +305,7 @@ function torch_jit_load_c(filename, device_type, device_index, requires_grad, is
         integer(c_int), value, intent(in)    :: device_index
         logical(c_bool), value, intent(in) :: requires_grad
         logical(c_bool), value, intent(in) :: is_training
-        type(c_ptr)                   :: module
+        type(c_ptr)                   :: model
       end function torch_jit_load_c
     end interface
 
@@ -336,16 +336,16 @@ end function torch_jit_load_c
     end if
 
     ! Need to append c_null_char at end of filename
-    module%p = torch_jit_load_c(trim(adjustl(filename))//c_null_char,          &
+    model%p = torch_jit_load_c(trim(adjustl(filename))//c_null_char,          &
                                 device_type_value, device_index_value,         &
                                 logical(requires_grad, c_bool),                &
                                 logical(is_training, c_bool))
-  end function torch_module_load
+  end function torch_model_load
 
-  !> Performs a forward pass of the module with the input tensors
-  subroutine torch_module_forward(module, input_tensors, output_tensors, requires_grad_opt)
+  !> Performs a forward pass of the model with the input tensors
+  subroutine torch_model_forward(model, input_tensors, output_tensors, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_ptr, c_int, c_loc
-    type(torch_module), intent(in) :: module        !! Module
+    type(torch_model), intent(in) :: model        !! Model
     type(torch_tensor), intent(in), dimension(:) :: input_tensors  !! Array of Input tensors
     type(torch_tensor), intent(in), dimension(:) :: output_tensors !! Returned output tensors
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
@@ -358,17 +358,17 @@ subroutine torch_module_forward(module, input_tensors, output_tensors, requires_
     type(c_ptr), dimension(size(output_tensors)), target  :: output_ptrs
 
     interface
-      subroutine torch_jit_module_forward_c(module, input_tensors, n_inputs, &
+      subroutine torch_jit_model_forward_c(model, input_tensors, n_inputs, &
           output_tensors, n_outputs, requires_grad) &
           bind(c, name = 'torch_jit_module_forward')
         use, intrinsic :: iso_c_binding, only : c_bool, c_ptr, c_int
-        type(c_ptr), value, intent(in) :: module
+        type(c_ptr), value, intent(in) :: model
         type(c_ptr), value, intent(in) :: input_tensors
         integer(c_int), value, intent(in) :: n_inputs
         type(c_ptr), value, intent(in) :: output_tensors
         integer(c_int), value, intent(in) :: n_outputs
         logical(c_bool), value, intent(in) :: requires_grad
-      end subroutine torch_jit_module_forward_c
+      end subroutine torch_jit_model_forward_c
     end interface
 
     n_inputs = size(input_tensors)
@@ -390,25 +390,25 @@ end subroutine torch_jit_module_forward_c
       output_ptrs(i) = output_tensors(i)%p
     end do
 
-    call torch_jit_module_forward_c(module%p, c_loc(input_ptrs), n_inputs,     &
+    call torch_jit_model_forward_c(model%p, c_loc(input_ptrs), n_inputs,     &
                                     c_loc(output_ptrs), n_outputs,             &
                                     logical(requires_grad, c_bool))
-  end subroutine torch_module_forward
+  end subroutine torch_model_forward
 
-  !> Deallocates a TorchScript module
-  subroutine torch_module_delete(module)
-    type(torch_module), intent(in) :: module     !! Module to deallocate
+  !> Deallocates a TorchScript model
+  subroutine torch_model_delete(model)
+    type(torch_model), intent(in) :: model     !! Torch Model to deallocate
 
     interface
-      subroutine torch_jit_module_delete_c(module) &
+      subroutine torch_jit_model_delete_c(model) &
           bind(c, name = 'torch_jit_module_delete')
         use, intrinsic :: iso_c_binding, only : c_ptr
-        type(c_ptr), value, intent(in) :: module
-      end subroutine torch_jit_module_delete_c
+        type(c_ptr), value, intent(in) :: model
+      end subroutine torch_jit_model_delete_c
     end interface
 
-    call torch_jit_module_delete_c(module%p)
-  end subroutine torch_module_delete
+    call torch_jit_model_delete_c(model%p)
+  end subroutine torch_model_delete
 
   !> Return a Torch tensor pointing to data_in array of rank 1 containing data of type `int8`
   subroutine torch_tensor_from_array_int8_1d(tensor, data_in, layout, &
diff --git a/src/ftorch.fypp b/src/ftorch.fypp
index 161a421d..0a720a15 100644
--- a/src/ftorch.fypp
+++ b/src/ftorch.fypp
@@ -32,9 +32,9 @@ module ftorch
   implicit none
 
   !> Type for holding a torch neural net (nn.Module).
-  type torch_module
-    type(c_ptr) :: p = c_null_ptr  !! pointer to the neural net module in memory
-  end type torch_module
+  type torch_model
+    type(c_ptr) :: p = c_null_ptr  !! pointer to the neural net in memory
+  end type torch_model
 
   !> Type for holding a Torch tensor.
   type torch_tensor
@@ -279,23 +279,23 @@ contains
     call torch_tensor_delete_c(tensor%p)
   end subroutine torch_tensor_delete
 
-  ! Torch Module API
-  !> Loads a TorchScript module (pre-trained PyTorch model saved with TorchScript)
-  function torch_module_load(filename, device_type, device_index, requires_grad_opt, is_training_opt) result(module)
+  ! Torch Model API
+  !> Loads a TorchScript nn.module (pre-trained PyTorch model saved with TorchScript)
+  function torch_model_load(filename, device_type, device_index, requires_grad_opt, is_training_opt) result(model)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_null_char
-    character(*), intent(in)   :: filename !! Filename of TorchScript module
+    character(*), intent(in)   :: filename !! Filename of saved TorchScript model
     integer(c_int), optional, intent(in) :: device_type !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
     logical, optional, intent(in) :: is_training_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_module)         :: module   !! Returned deserialized module
+    type(torch_model)         :: model   !! Returned deserialized model
     integer(c_int) :: device_type_value
     integer(c_int) :: device_index_value
     logical :: requires_grad  !! Whether gradients need to be computed for the created tensor
     logical :: is_training  !! Whether the model is being trained, rather than evaluated
 
     interface
-      function torch_jit_load_c(filename, device_type, device_index, requires_grad, is_training) result(module) &
+      function torch_jit_load_c(filename, device_type, device_index, requires_grad, is_training) result(model) &
           bind(c, name = 'torch_jit_load')
         use, intrinsic :: iso_c_binding, only : c_bool, c_char, c_int, c_ptr
         character(c_char), intent(in) :: filename(*)
@@ -303,7 +303,7 @@ contains
         integer(c_int), value, intent(in)    :: device_index
         logical(c_bool), value, intent(in) :: requires_grad
         logical(c_bool), value, intent(in) :: is_training
-        type(c_ptr)                   :: module
+        type(c_ptr)                   :: model
       end function torch_jit_load_c
     end interface
 
@@ -334,16 +334,16 @@ contains
     end if
 
     ! Need to append c_null_char at end of filename
-    module%p = torch_jit_load_c(trim(adjustl(filename))//c_null_char,          &
+    model%p = torch_jit_load_c(trim(adjustl(filename))//c_null_char,          &
                                 device_type_value, device_index_value,         &
                                 logical(requires_grad, c_bool),                &
                                 logical(is_training, c_bool))
-  end function torch_module_load
+  end function torch_model_load
 
-  !> Performs a forward pass of the module with the input tensors
-  subroutine torch_module_forward(module, input_tensors, output_tensors, requires_grad_opt)
+  !> Performs a forward pass of the model with the input tensors
+  subroutine torch_model_forward(model, input_tensors, output_tensors, requires_grad_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_ptr, c_int, c_loc
-    type(torch_module), intent(in) :: module        !! Module
+    type(torch_model), intent(in) :: model        !! Model
     type(torch_tensor), intent(in), dimension(:) :: input_tensors  !! Array of Input tensors
     type(torch_tensor), intent(in), dimension(:) :: output_tensors !! Returned output tensors
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
@@ -356,17 +356,17 @@ contains
     type(c_ptr), dimension(size(output_tensors)), target  :: output_ptrs
 
     interface
-      subroutine torch_jit_module_forward_c(module, input_tensors, n_inputs, &
+      subroutine torch_jit_model_forward_c(model, input_tensors, n_inputs, &
           output_tensors, n_outputs, requires_grad) &
           bind(c, name = 'torch_jit_module_forward')
         use, intrinsic :: iso_c_binding, only : c_bool, c_ptr, c_int
-        type(c_ptr), value, intent(in) :: module
+        type(c_ptr), value, intent(in) :: model
         type(c_ptr), value, intent(in) :: input_tensors
         integer(c_int), value, intent(in) :: n_inputs
         type(c_ptr), value, intent(in) :: output_tensors
         integer(c_int), value, intent(in) :: n_outputs
         logical(c_bool), value, intent(in) :: requires_grad
-      end subroutine torch_jit_module_forward_c
+      end subroutine torch_jit_model_forward_c
     end interface
 
     n_inputs = size(input_tensors)
@@ -388,25 +388,25 @@ contains
       output_ptrs(i) = output_tensors(i)%p
     end do
 
-    call torch_jit_module_forward_c(module%p, c_loc(input_ptrs), n_inputs,     &
+    call torch_jit_model_forward_c(model%p, c_loc(input_ptrs), n_inputs,     &
                                     c_loc(output_ptrs), n_outputs,             &
                                     logical(requires_grad, c_bool))
-  end subroutine torch_module_forward
+  end subroutine torch_model_forward
 
-  !> Deallocates a TorchScript module
-  subroutine torch_module_delete(module)
-    type(torch_module), intent(in) :: module     !! Module to deallocate
+  !> Deallocates a TorchScript model
+  subroutine torch_model_delete(model)
+    type(torch_model), intent(in) :: model     !! Torch Model to deallocate
 
     interface
-      subroutine torch_jit_module_delete_c(module) &
+      subroutine torch_jit_model_delete_c(model) &
           bind(c, name = 'torch_jit_module_delete')
         use, intrinsic :: iso_c_binding, only : c_ptr
-        type(c_ptr), value, intent(in) :: module
-      end subroutine torch_jit_module_delete_c
+        type(c_ptr), value, intent(in) :: model
+      end subroutine torch_jit_model_delete_c
     end interface
 
-    call torch_jit_module_delete_c(module%p)
-  end subroutine torch_module_delete
+    call torch_jit_model_delete_c(model%p)
+  end subroutine torch_model_delete
 
   #:for PREC in PRECISIONS
   #:for RANK in RANKS

From 09d50eb5154d9c851b5bdb6296606620bedd31e5 Mon Sep 17 00:00:00 2001
From: Jack Atkinson <jwa34@cam.ac.uk>
Date: Wed, 26 Jun 2024 21:17:21 +0100
Subject: [PATCH 05/10] Update model loading to be a subroutine rather than a
 function for consistency with the rest of the API.

---
 src/ftorch.f90  | 8 ++++----
 src/ftorch.fypp | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/ftorch.f90 b/src/ftorch.f90
index 68ec52e4..f604d64e 100644
--- a/src/ftorch.f90
+++ b/src/ftorch.f90
@@ -283,14 +283,14 @@ end subroutine torch_tensor_delete
 
   ! Torch Model API
   !> Loads a TorchScript nn.module (pre-trained PyTorch model saved with TorchScript)
-  function torch_model_load(filename, device_type, device_index, requires_grad_opt, is_training_opt) result(model)
+  subroutine torch_model_load(model, filename, device_type, device_index, requires_grad_opt, is_training_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_null_char
-    character(*), intent(in)   :: filename !! Filename of saved TorchScript model
+    type(torch_model), intent(out)       :: model   !! Returned deserialized model
+    character(*), intent(in)             :: filename !! Filename of saved TorchScript model
     integer(c_int), optional, intent(in) :: device_type !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
     logical, optional, intent(in) :: is_training_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_model)         :: model   !! Returned deserialized model
     integer(c_int) :: device_type_value
     integer(c_int) :: device_index_value
     logical :: requires_grad  !! Whether gradients need to be computed for the created tensor
@@ -340,7 +340,7 @@ end function torch_jit_load_c
                                 device_type_value, device_index_value,         &
                                 logical(requires_grad, c_bool),                &
                                 logical(is_training, c_bool))
-  end function torch_model_load
+  end subroutine torch_model_load
 
   !> Performs a forward pass of the model with the input tensors
   subroutine torch_model_forward(model, input_tensors, output_tensors, requires_grad_opt)
diff --git a/src/ftorch.fypp b/src/ftorch.fypp
index 0a720a15..3d982c51 100644
--- a/src/ftorch.fypp
+++ b/src/ftorch.fypp
@@ -281,14 +281,14 @@ contains
 
   ! Torch Model API
   !> Loads a TorchScript nn.module (pre-trained PyTorch model saved with TorchScript)
-  function torch_model_load(filename, device_type, device_index, requires_grad_opt, is_training_opt) result(model)
+  subroutine torch_model_load(model, filename, device_type, device_index, requires_grad_opt, is_training_opt)
     use, intrinsic :: iso_c_binding, only : c_bool, c_int, c_null_char
-    character(*), intent(in)   :: filename !! Filename of saved TorchScript model
+    type(torch_model), intent(out)       :: model   !! Returned deserialized model
+    character(*), intent(in)             :: filename !! Filename of saved TorchScript model
     integer(c_int), optional, intent(in) :: device_type !! Device type the tensor will live on (`torch_kCPU` or `torch_kCUDA`)
     integer(c_int), optional, intent(in) :: device_index !! device index to use for `torch_kCUDA` case
     logical, optional, intent(in) :: requires_grad_opt  !! Whether gradients need to be computed for the created tensor
     logical, optional, intent(in) :: is_training_opt  !! Whether gradients need to be computed for the created tensor
-    type(torch_model)         :: model   !! Returned deserialized model
     integer(c_int) :: device_type_value
     integer(c_int) :: device_index_value
     logical :: requires_grad  !! Whether gradients need to be computed for the created tensor
@@ -338,7 +338,7 @@ contains
                                 device_type_value, device_index_value,         &
                                 logical(requires_grad, c_bool),                &
                                 logical(is_training, c_bool))
-  end function torch_model_load
+  end subroutine torch_model_load
 
   !> Performs a forward pass of the model with the input tensors
   subroutine torch_model_forward(model, input_tensors, output_tensors, requires_grad_opt)

From 7a35805a56139a2209301fd31c619dce9608913a Mon Sep 17 00:00:00 2001
From: Jack Atkinson <jwa34@cam.ac.uk>
Date: Wed, 26 Jun 2024 21:20:21 +0100
Subject: [PATCH 06/10] Remove superflous pass by value.

---
 src/ftorch.f90  | 2 +-
 src/ftorch.fypp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ftorch.f90 b/src/ftorch.f90
index f604d64e..d841f89a 100644
--- a/src/ftorch.f90
+++ b/src/ftorch.f90
@@ -251,7 +251,7 @@ end subroutine torch_tensor_print
   !> Determines the device index of a tensor.
   function torch_tensor_get_device_index(tensor) result(device_index)
     use, intrinsic :: iso_c_binding, only : c_int
-    type(torch_tensor), value, intent(in) :: tensor  !! Input tensor
+    type(torch_tensor), intent(in) :: tensor  !! Input tensor
     integer(c_int) :: device_index  !! Device index of tensor
 
     interface
diff --git a/src/ftorch.fypp b/src/ftorch.fypp
index 3d982c51..a57c1cc9 100644
--- a/src/ftorch.fypp
+++ b/src/ftorch.fypp
@@ -249,7 +249,7 @@ contains
   !> Determines the device index of a tensor.
   function torch_tensor_get_device_index(tensor) result(device_index)
     use, intrinsic :: iso_c_binding, only : c_int
-    type(torch_tensor), value, intent(in) :: tensor  !! Input tensor
+    type(torch_tensor), intent(in) :: tensor  !! Input tensor
     integer(c_int) :: device_index  !! Device index of tensor
 
     interface

From 462c36f1b742912532433c984a506b41f5b5de21 Mon Sep 17 00:00:00 2001
From: Jack Atkinson <jwa34@cam.ac.uk>
Date: Wed, 26 Jun 2024 21:25:58 +0100
Subject: [PATCH 07/10] Update examples to use subroutine for loading a model
 rather than a function.

---
 examples/1_SimpleNet/simplenet_infer_fortran.f90 | 10 +++++-----
 examples/2_ResNet18/resnet_infer_fortran.f90     | 10 +++++-----
 examples/3_MultiGPU/simplenet_infer_fortran.f90  | 14 +++++++-------
 examples/4_MultiIO/multiionet_infer_fortran.f90  | 16 ++++++++--------
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/examples/1_SimpleNet/simplenet_infer_fortran.f90 b/examples/1_SimpleNet/simplenet_infer_fortran.f90
index d97a0e12..fbbb6f86 100644
--- a/examples/1_SimpleNet/simplenet_infer_fortran.f90
+++ b/examples/1_SimpleNet/simplenet_infer_fortran.f90
@@ -21,7 +21,7 @@ program inference
 
    ! Set up Torch data structures
    ! The net, a vector of input tensors (in this case we only have one), and the output tensor
-   type(torch_module) :: model
+   type(torch_model) :: model
    type(torch_tensor), dimension(1) :: in_tensors
    type(torch_tensor), dimension(1) :: out_tensors
 
@@ -37,17 +37,17 @@ program inference
 
    ! Create Torch input/output tensors from the above arrays
    call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout, torch_kCPU)
-   call torch_tensor_from_array(out_tensor, out_data, tensor_layout, torch_kCPU)
+   call torch_tensor_from_array(out_tensors(1), out_data, tensor_layout, torch_kCPU)
 
    ! Load ML model
-   model = torch_module_load(args(1))
+   call torch_model_load(model, args(1))
 
    ! Infer
-   call torch_module_forward(model, in_tensors, out_tensors)
+   call torch_model_forward(model, in_tensors, out_tensors)
    write (*,*) out_data(:)
 
    ! Cleanup
-   call torch_module_delete(model)
+   call torch_model_delete(model)
    call torch_tensor_delete(in_tensors(1))
    call torch_tensor_delete(out_tensors(1))
 
diff --git a/examples/2_ResNet18/resnet_infer_fortran.f90 b/examples/2_ResNet18/resnet_infer_fortran.f90
index 80ee1a9e..670b9097 100644
--- a/examples/2_ResNet18/resnet_infer_fortran.f90
+++ b/examples/2_ResNet18/resnet_infer_fortran.f90
@@ -20,7 +20,7 @@ subroutine main()
       character(len=128), dimension(:), allocatable :: args
 
       ! Set up types of input and output data
-      type(torch_module) :: model
+      type(torch_model) :: model
       type(torch_tensor), dimension(1) :: in_tensors
       type(torch_tensor), dimension(1) :: out_tensors
 
@@ -78,13 +78,13 @@ subroutine main()
       ! Create input/output tensors from the above arrays
       call torch_tensor_from_array(in_tensors(1), in_data, in_layout, torch_kCPU)
 
-      call torch_tensor_from_array(out_tensor, out_data, out_layout, torch_kCPU)
+      call torch_tensor_from_array(out_tensors(1), out_data, out_layout, torch_kCPU)
 
       ! Load ML model (edit this line to use different models)
-      model = torch_module_load(args(1))
+      call torch_model_load(model, args(1))
 
       ! Infer
-      call torch_module_forward(model, in_tensors, out_tensors)
+      call torch_model_forward(model, in_tensors, out_tensors)
 
       ! Load categories
       call load_categories(filename_cats, N_cats, categories)
@@ -102,7 +102,7 @@ subroutine main()
       write (*,*) trim(categories(idx(2))), " (id=", idx(2), "), : probability =", probability
 
       ! Cleanup
-      call torch_module_delete(model)
+      call torch_model_delete(model)
       call torch_tensor_delete(in_tensors(1))
       call torch_tensor_delete(out_tensors(1))
       deallocate(in_data)
diff --git a/examples/3_MultiGPU/simplenet_infer_fortran.f90 b/examples/3_MultiGPU/simplenet_infer_fortran.f90
index cb52f35d..24ee3319 100644
--- a/examples/3_MultiGPU/simplenet_infer_fortran.f90
+++ b/examples/3_MultiGPU/simplenet_infer_fortran.f90
@@ -23,9 +23,9 @@ program inference
    integer :: tensor_layout(1) = [1]
 
    ! Set up Torch data structures
-   type(torch_module) :: model
+   type(torch_model) :: model
    type(torch_tensor), dimension(1) :: in_tensors
-   type(torch_tensor) :: out_tensor
+   type(torch_tensor), dimension(1) :: out_tensors
 
    ! MPI configuration
    integer :: rank, ierr, i
@@ -54,24 +54,24 @@ program inference
    ! Create Torch output tensor from the above array.
    ! Here we use the torch_kCPU device type since the tensor is for output only
    ! i.e. to be subsequently used by Fortran on CPU.
-   call torch_tensor_from_array(out_tensor, out_data, tensor_layout, torch_kCPU)
+   call torch_tensor_from_array(out_tensors(1), out_data, tensor_layout, torch_kCPU)
 
    ! Load ML model. Ensure that the same device type and device index are used
    ! as for the input data.
-   model = torch_module_load(args(1), device_type=torch_kCUDA,                 &
+   call torch_model_load(model, args(1), device_type=torch_kCUDA,                 &
                              device_index=rank)
 
    ! Infer
-   call torch_module_forward(model, in_tensors, out_tensor)
+   call torch_model_forward(model, in_tensors, out_tensors)
 
    ! Print the values computed on each MPI rank.
    write (6, 200) rank, out_data(:)
    200 format("output on rank ", i1,": [", 4(f5.1,","), f5.1,"]")
 
    ! Cleanup
-   call torch_module_delete(model)
+   call torch_model_delete(model)
    call torch_tensor_delete(in_tensors(1))
-   call torch_tensor_delete(out_tensor)
+   call torch_tensor_delete(out_tensors(1))
    call mpi_finalize(ierr)
 
 end program inference
diff --git a/examples/4_MultiIO/multiionet_infer_fortran.f90 b/examples/4_MultiIO/multiionet_infer_fortran.f90
index 4a6ed02a..2393ccb2 100644
--- a/examples/4_MultiIO/multiionet_infer_fortran.f90
+++ b/examples/4_MultiIO/multiionet_infer_fortran.f90
@@ -23,7 +23,7 @@ program inference
 
    ! Set up Torch data structures
    ! The net, a vector of input tensors (in this case we only have one), and the output tensor
-   type(torch_module) :: model
+   type(torch_model) :: model
    type(torch_tensor), dimension(2) :: in_tensors
    type(torch_tensor), dimension(2) :: out_tensors
 
@@ -39,21 +39,21 @@ program inference
    in_data2(:) = [0.0, -1.0, -2.0, -3.0]
 
    ! Create Torch input/output tensors from the above arrays
-   in_tensors(1) = torch_tensor_from_array(in_data1, tensor_layout, torch_kCPU)
-   in_tensors(2) = torch_tensor_from_array(in_data2, tensor_layout, torch_kCPU)
-   out_tensors(1) = torch_tensor_from_array(out_data1, tensor_layout, torch_kCPU)
-   out_tensors(2) = torch_tensor_from_array(out_data2, tensor_layout, torch_kCPU)
+   call torch_tensor_from_array(in_tensors(1), in_data1, tensor_layout, torch_kCPU)
+   call torch_tensor_from_array(in_tensors(2), in_data2, tensor_layout, torch_kCPU)
+   call torch_tensor_from_array(out_tensors(1), out_data1, tensor_layout, torch_kCPU)
+   call torch_tensor_from_array(out_tensors(2), out_data2, tensor_layout, torch_kCPU)
 
    ! Load ML model
-   model = torch_module_load(args(1))
+   call torch_model_load(model, args(1))
 
    ! Infer
-   call torch_module_forward(model, in_tensors, out_tensors)
+   call torch_model_forward(model, in_tensors, out_tensors)
    write (*,*) out_data1
    write (*,*) out_data2
 
    ! Cleanup
-   call torch_module_delete(model)
+   call torch_model_delete(model)
    call torch_tensor_delete(in_tensors(1))
    call torch_tensor_delete(in_tensors(2))
    call torch_tensor_delete(out_tensors(1))

From 8c1c98b68990c68202fb8a5e7e4fe1dffb46d96e Mon Sep 17 00:00:00 2001
From: Jack Atkinson <jwa34@cam.ac.uk>
Date: Wed, 26 Jun 2024 21:54:23 +0100
Subject: [PATCH 08/10] Update README to be consistent with new API, and also
 provide missing changes from multi-output updates.

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 148e39a7..fc9e5c63 100644
--- a/README.md
+++ b/README.md
@@ -38,15 +38,15 @@ examples for performing coupling.
 ```fortran
 use ftorch
 ...
-type(torch_module) :: model
-type(torch_tensor), dimension(n_inputs) :: model_inputs_arr
-type(torch_tensor) :: model_output
+type(torch_model) :: model
+type(torch_tensor), dimension(n_inputs)  :: model_inputs_arr
+type(torch_tensor), dimension(n_outputs) :: model_output_arr
 ...
-model = torch_module_load("/my/saved/TorchScript/model.pt")
+call torch_model_load(model, "/my/saved/TorchScript/model.pt")
 call torch_tensor_from_array(model_inputs_arr(1), input_fortran, in_layout, torch_kCPU)
-call torch_tensor_from_array(model_output, output_fortran, out_layout, torch_kCPU)
+call torch_tensor_from_array(model_output_arr(1), output_fortran, out_layout, torch_kCPU)
 
-call torch_module_forward(model, model_input_arr, n_inputs, model_output)
+call torch_model_forward(model, model_input_arr, model_output_arr)
 ```
 
 The following presentations provide an introduction and overview of _FTorch_:

From 63c496967f8e010e0a458198c7315809c4f1d500 Mon Sep 17 00:00:00 2001
From: Jack Atkinson <jwa34@cam.ac.uk>
Date: Wed, 26 Jun 2024 22:06:53 +0100
Subject: [PATCH 09/10] Update online docs to reflect the API changes.

---
 pages/examples.md        | 14 +++++++-------
 pages/gpu.md             |  2 +-
 pages/troubleshooting.md |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pages/examples.md b/pages/examples.md
index 8161c6ff..97e3746b 100644
--- a/pages/examples.md
+++ b/pages/examples.md
@@ -49,7 +49,7 @@ use ftorch
 implicit none
 
 ! Generate an object to hold the Torch model
-type(torch_module) :: model
+type(torch_model) :: model
 
 ! Set up array of n_inputs input tensors and array of n_outputs output tensors
 ! Note: In this example there is only one input tensor (n_inputs = 1) and one
@@ -70,7 +70,7 @@ integer, parameter :: out_dims = 1
 integer :: out_layout(out_dims) = [1]
 
 ! Initialise the Torch model to be used
-model = torch_module_load("/path/to/saved/model.pt")
+torch_model_load(model, "/path/to/saved/model.pt")
 
 ! Initialise the inputs as Fortran array of ones
 input = 1.0
@@ -78,18 +78,18 @@ input = 1.0
 ! Wrap Fortran data as no-copy Torch Tensors
 ! There may well be some reshaping required depending on the 
 ! structure of the model which is not covered here (see examples)
-model_input_arr(1) = torch_tensor_from_array(input, in_layout, torch_kCPU)
-model_output_arr(1) = torch_tensor_from_array(output, out_layout, torch_kCPU)
+call torch_tensor_from_array(model_input_arr(1), input, in_layout, torch_kCPU)
+call torch_tensor_from_array(model_output_arr(1), output, out_layout, torch_kCPU)
 
-! Run model and Infer
+! Run model forward method and Infer
 ! Again, there may be some reshaping required depending on model design
-call torch_module_forward(model, model_input_arr, model_output_arr)
+call torch_model_forward(model, model_input_arr, model_output_arr)
 
 ! Write out the result of running the model
 write(*,*) output
 
 ! Clean up
-call torch_module_delete(model)
+call torch_model_delete(model)
 call torch_tensor_delete(model_input_arr(1))
 call torch_tensor_delete(model_output_arr(1))
 ```
diff --git a/pages/gpu.md b/pages/gpu.md
index e6355d7d..ae84bcfe 100644
--- a/pages/gpu.md
+++ b/pages/gpu.md
@@ -42,7 +42,7 @@ For example, the following code snippet sets up a Torch tensor with GPU device i
 ```fortran
 device_index = 2
 call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout, &
-                           torch_kCUDA, device_index=device_index)
+                             torch_kCUDA, device_index=device_index)
 ```
 
 Whereas the following code snippet sets up a Torch tensor with (default) device index 0:
diff --git a/pages/troubleshooting.md b/pages/troubleshooting.md
index 7b68562b..9dd53f83 100644
--- a/pages/troubleshooting.md
+++ b/pages/troubleshooting.md
@@ -65,12 +65,12 @@ on locating Torch within a virtual environment (venv) for CMake.
 
 ### Why are inputs to torch models an array?
 
-The reason input and output tensors to [[torch_module_forward(subroutine)]] are
+The reason input and output tensors to [[torch_model_forward(subroutine)]] are
 contained in arrays is because it is possible to pass multiple input tensors to
 the `forward()` method of a torch net, and it is possible for the net to return
 multiple output arrays.<br>
 The nature of Fortran means that it is not possible to set an arbitrary number
-of inputs to the `torch_module_forward` subroutine, so instead we use a single
+of inputs to the `torch_model_forward` subroutine, so instead we use a single
 array of input tensors which _can_ have an arbitrary length. Similarly, a single
 array of output tensors is used.
 

From bfbbce9e3976df19615315589e3f163aa04cc0e7 Mon Sep 17 00:00:00 2001
From: Jack Atkinson <jwa34@cam.ac.uk>
Date: Thu, 27 Jun 2024 12:17:31 +0100
Subject: [PATCH 10/10] Whitespace and indentation fixes as spotted by
 @jwallwork23's review.

---
 examples/3_MultiGPU/simplenet_infer_fortran.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/3_MultiGPU/simplenet_infer_fortran.f90 b/examples/3_MultiGPU/simplenet_infer_fortran.f90
index 24ee3319..f3be3342 100644
--- a/examples/3_MultiGPU/simplenet_infer_fortran.f90
+++ b/examples/3_MultiGPU/simplenet_infer_fortran.f90
@@ -48,8 +48,8 @@ program inference
    ! Create Torch input tensor from the above array and assign it to the first (and only)
    ! element in the array of input tensors.
    ! We use the torch_kCUDA device type with device index corresponding to the MPI rank.
-   call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout,         &
-                           torch_kCUDA, device_index=rank)
+   call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout, &
+                                torch_kCUDA, device_index=rank)
 
    ! Create Torch output tensor from the above array.
    ! Here we use the torch_kCPU device type since the tensor is for output only