Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP- Do not merge!] Batched ODE solve. #1378

Closed
wants to merge 54 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
0291848
Adding initial copy of tftk_ODE code.
jennloe Feb 14, 2022
15dcd46
First attempt to add in unit test code with git.
jennloe Feb 14, 2022
65df4a7
Adding more ODE unit test files.
jennloe Feb 14, 2022
819048b
Merge remote-tracking branch 'upstream/develop' into ODESolve
jennloe Apr 1, 2022
7bfac28
Fix naming of unit test files.
jennloe Apr 1, 2022
be45977
Apply clang formatting.
jennloe Apr 1, 2022
925e82c
Fixed namespace issues that were breaking build.
jennloe Apr 1, 2022
fbb97e1
More clang formatting.
jennloe Apr 1, 2022
f5bd480
Changed filenames to KokkosBatched_ODE.
jennloe Apr 1, 2022
e2e39f5
Removed need for KokkosTypes file.
jennloe Apr 1, 2022
570e9ce
Removed need for Macros file.
jennloe Apr 1, 2022
13673c7
Remove extra comment.
jennloe Apr 1, 2022
d5e6103
Merge branch 'FixDeprecatedWarnings' into ODESolve
jennloe Apr 6, 2022
ae0a713
Fixed remaining tftk filenames.
jennloe Apr 6, 2022
30d075d
Renamed several files. Removed tftk defines.
jennloe Apr 6, 2022
cbcc15f
Fixed headers and copyright statement.
jennloe Apr 6, 2022
8e25ae2
Removed Experimental from math functions.
jennloe Apr 6, 2022
8c02abe
Fix unused variables in test problems file.
jennloe Apr 7, 2022
7ee6d5e
Fix more unused parameter warnings.
jennloe Apr 7, 2022
e57e2ee
Merge branch 'FixGEMMUnusedParams' into ODESolve
jennloe Apr 7, 2022
4a0b1a5
Renaming some variables and rearranging for clarity.
jennloe Apr 7, 2022
148b5ed
Fix type error from last commit.
jennloe Apr 7, 2022
de5a5b9
Fix unused type alias error.
jennloe Apr 7, 2022
942eb6d
Add explanatory comments.
jennloe Apr 8, 2022
5078f23
Fix test exec space issue.
jennloe Apr 8, 2022
348f2f7
Remove extra semicolons from compiler complaining.
jennloe Apr 8, 2022
4d4a5b4
First pass at moving RK solver details to impl.
jennloe Apr 9, 2022
895a0fa
Added namespace Experimental.
jennloe Apr 9, 2022
10fc9e5
Removed need for impl to know about AllocationState.
jennloe Apr 9, 2022
1d25b0c
Moved step function to internal interface. Commented unused args.
jennloe Apr 9, 2022
12966d3
Clean up Args.
jennloe Apr 9, 2022
b6bb97c
Make Kokkos views pass by reference.
jennloe Apr 9, 2022
70c6ae8
Added explanatory comments...
jennloe Apr 10, 2022
66778b8
Changed invoke to a static function that does not require a solver ob…
jennloe Apr 11, 2022
0a235a2
Moved performance tests to appropriate directory.
jennloe Apr 16, 2022
d1d9e7b
Fixing compiler warnings in impl and perf test. Shadowed and unused v…
jennloe Apr 18, 2022
fdb2c8a
Changed table from static var to internal function var.
jennloe Apr 18, 2022
ea55b16
Merge remote-tracking branch 'upstream/develop' into ODESolve
jennloe Apr 19, 2022
9587e60
Merge remote-tracking branch 'origin/AddVerboseGmres' into ODESolve
jennloe Apr 19, 2022
f5f4f15
Merge remote-tracking branch 'upstream/develop' into ODESolve
jennloe Apr 20, 2022
3e5f4e4
Reformat PR test reproducer instructions for clarity.
jennloe Apr 21, 2022
fe02bcf
Merge remote-tracking branch 'origin/UpdateBuildInstructions' into OD…
jennloe Apr 21, 2022
09614dc
Update formatting changes.
jennloe Apr 21, 2022
a28f0f5
Merge remote-tracking branch 'origin/UpdateBuildInstructions' into OD…
jennloe Apr 21, 2022
3f8c871
Add output operator for solver status.
jennloe Apr 22, 2022
ec813c9
Merge branch 'ODESolve' of github.com:jennloe/kokkos-kernels into ODE…
jennloe Apr 22, 2022
864c2e7
Renamed some files. Moved one to impl.
jennloe Apr 22, 2022
d06abe7
Add dimension checking to ode solve.
jennloe Apr 22, 2022
2f3e7d6
Merge remote-tracking branch 'upstream/develop' into ODESolve
jennloe Apr 22, 2022
c01c2ae
Put ODE perf tests in one file.
jennloe Apr 24, 2022
f35a758
Think this fixes it.
jennloe Apr 24, 2022
daac93a
Merge remote-tracking branch 'origin/UpdateBuildInstructions' into OD…
jennloe Apr 24, 2022
6e6b65b
Added explicit vs implicit solver tag to RKSolve template.
jennloe May 2, 2022
f75400c
Merge remote-tracking branch 'upstream/develop' into ODESolve
jennloe May 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/batched/dense)
LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/batched/dense/impl)
LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/batched/sparse)
LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/batched/sparse/impl)
LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/batched/ode)
LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/batched/ode/impl)

#Include Common
LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/common)
Expand Down
158 changes: 158 additions & 0 deletions src/batched/ode/tftk_DeviceODESolverState.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*--------------------------------------------------------------------*/
/* Copyright 2002 - 2008, 2010, 2011 National Technology & */
/* Engineering Solutions of Sandia, LLC (NTESS). Under the terms */
/* of Contract DE-NA0003525 with NTESS, there is a */
/* non-exclusive license for use of this work by or on behalf */
/* of the U.S. Government. Export of this program may require */
/* a license from the United States Government. */
/*--------------------------------------------------------------------*/
#ifndef SIERRA_tftk_DeviceODESolverState_h
#define SIERRA_tftk_DeviceODESolverState_h

#include "Kokkos_View.hpp"
//#include "tftk_util/tftk_KokkosTypes.h"
#include "tftk_KokkosTypes.h"

namespace tftk
{
namespace ode
{

struct StackAllocationTag;
struct ScratchAllocationTag;
struct DynamicAllocationTag;

struct EmptyRkStack
{
};

template <int NDOFS, int NSTAGES> struct RkStack
{
using type = StackAllocationTag;

using V1 = Kokkos::View<double[NDOFS], tftk::AnonSpace, Kokkos::MemoryUnmanaged>;
using V2 = Kokkos::
View<double[NSTAGES][NDOFS], Kokkos::LayoutRight, tftk::AnonSpace, Kokkos::MemoryUnmanaged>;
using Arr1 = Kokkos::Array<double, NDOFS>;
using Arr2 = Kokkos::Array<double, NSTAGES * NDOFS>;
Arr1 y;
Arr1 y0;
Arr1 dydt;
Arr1 ytemp;
Arr2 k;
};

template <typename MemorySpace> struct RkDynamicAllocation
{
using type = DynamicAllocationTag;
using View1 = Kokkos::View<double **, MemorySpace>;
using View2 = Kokkos::View<double ***, MemorySpace>;

RkDynamicAllocation(View1 y_, View1 y0_, View1 dydt_, View1 ytemp_, View2 k_)
: y(y_), y0(y0_), dydt(dydt_), ytemp(ytemp_), k(k_)
{
}

RkDynamicAllocation(const int n, const int ndofs, const int nstages)
: y(Kokkos::ViewAllocateWithoutInitializing("y"), n, ndofs),
y0(Kokkos::ViewAllocateWithoutInitializing("y0"), n, ndofs),
dydt(Kokkos::ViewAllocateWithoutInitializing("dydt"), n, ndofs),
ytemp(Kokkos::ViewAllocateWithoutInitializing("ytemp"), n, ndofs),
k(Kokkos::ViewAllocateWithoutInitializing("k"), n, nstages, ndofs)
{
}

View1 y;
View1 y0;
View1 dydt;
View1 ytemp;
View2 k;
};

template <typename MemorySpace> struct RkSharedAllocation
{
using type = ScratchAllocationTag;
using ScratchSpace = Kokkos::ScratchMemorySpace<typename MemorySpace::execution_space>;
using V1 = Kokkos::View<double *, ScratchSpace, Kokkos::MemoryUnmanaged>;
using V2 = Kokkos::View<double **, ScratchSpace, Kokkos::MemoryUnmanaged>;
};

template <typename Allocation> struct RkSolverState
{
using Type = typename Allocation::type;
using StackType =
std::conditional_t<std::is_same<Type, StackAllocationTag>::value, Allocation, EmptyRkStack>;
using Layout = std::conditional_t<std::is_same<Type, StackAllocationTag>::value,
Kokkos::LayoutRight,
Kokkos::LayoutStride>;
using View1 = Kokkos::View<double *, Layout, tftk::AnonSpace, Kokkos::MemoryUnmanaged>;
using View2 = Kokkos::View<double **, Layout, tftk::AnonSpace, Kokkos::MemoryUnmanaged>;

KOKKOS_FORCEINLINE_FUNCTION int ndofs() const { return static_cast<int>(y.extent(0)); };

// wrap stack
// Its unclear why cuda compiler decides to put the stack in local memory / unified cache when
// we make the stack a member variable... so we're forced to pass it in to give the compiler a
// chance of putting it in the threads registers
template <typename... Ignored>
KOKKOS_FORCEINLINE_FUNCTION void set_views(Allocation & stack, Ignored... ignored)
{
using V1 = typename Allocation::V1;
using V2 = typename Allocation::V2;
y = V1(stack.y.data());
y0 = V1(stack.y0.data());
dydt = V1(stack.dydt.data());
ytemp = V1(stack.ytemp.data());
k = V2(stack.k.data());
};

// wrap host / device dynamically allocated memory
KOKKOS_FORCEINLINE_FUNCTION void
set_views(EmptyRkStack & stack, const Allocation & dynamic, int tid)
{
y = Kokkos::subview(dynamic.y, tid, Kokkos::ALL);
y0 = Kokkos::subview(dynamic.y0, tid, Kokkos::ALL);
dydt = Kokkos::subview(dynamic.dydt, tid, Kokkos::ALL);
ytemp = Kokkos::subview(dynamic.ytemp, tid, Kokkos::ALL);
k = Kokkos::subview(dynamic.k, tid, Kokkos::ALL, Kokkos::ALL);
};

KOKKOS_FORCEINLINE_FUNCTION void set_views(const Allocation & dynamic, int tid)
{
EmptyRkStack stack{};
set_views(stack, dynamic, tid);
};

// wrap scratch pad memory
template <typename ScratchHandle>
KOKKOS_FORCEINLINE_FUNCTION void
set_views(EmptyRkStack & stack, ScratchHandle & handle, const int ndofs, const int nstages)
{
using V1 = typename Allocation::V1;
using V2 = typename Allocation::V2;
y = V1(handle, ndofs);
y0 = V1(handle, ndofs);
dydt = V1(handle, ndofs);
ytemp = V1(handle, ndofs);
k = V2(handle, nstages, ndofs);
};

template <typename ScratchHandle>
KOKKOS_FORCEINLINE_FUNCTION void
set_views(ScratchHandle & handle, const int ndofs, const int nstages)
{
EmptyRkStack stack{};
set_views(stack, handle, ndofs, nstages);
};

View1 y;
View1 y0;
View1 dydt;
View1 ytemp;
View2 k; // NSTAGES x NDOFS
};

} // namespace ode
} // namespace tftk

#endif
28 changes: 28 additions & 0 deletions src/batched/ode/tftk_DeviceODESolverStatus.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*--------------------------------------------------------------------*/
/* Copyright 2002 - 2008, 2010, 2011 National Technology & */
/* Engineering Solutions of Sandia, LLC (NTESS). Under the terms */
/* of Contract DE-NA0003525 with NTESS, there is a */
/* non-exclusive license for use of this work by or on behalf */
/* of the U.S. Government. Export of this program may require */
/* a license from the United States Government. */
/*--------------------------------------------------------------------*/
#ifndef SIERRA_tftk_DeviceODESolverStatus_h
#define SIERRA_tftk_DeviceODESolverStatus_h

#include <ostream>
namespace tftk
{
namespace ode
{
enum class ODESolverStatus
{
SUCCESS = 0,
FAILED_TO_CONVERGE,
MINIMUM_TIMESTEP_REACHED,
NONFINITE_STATE
};

std::ostream & operator<<(std::ostream & os, ODESolverStatus status);
} // namespace ode
} // namespace tftk
#endif
192 changes: 192 additions & 0 deletions src/batched/ode/tftk_DeviceODESolvers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*--------------------------------------------------------------------*/
/* Copyright 2002 - 2008, 2010, 2011 National Technology & */
/* Engineering Solutions of Sandia, LLC (NTESS). Under the terms */
/* of Contract DE-NA0003525 with NTESS, there is a */
/* non-exclusive license for use of this work by or on behalf */
/* of the U.S. Government. Export of this program may require */
/* a license from the United States Government. */
/*--------------------------------------------------------------------*/
#ifndef SIERRA_tftk_DeviceODESolvers_h
jennloe marked this conversation as resolved.
Show resolved Hide resolved
#define SIERRA_tftk_DeviceODESolvers_h

#include <Kokkos_Macros.hpp>
#include <Kokkos_ArithTraits.hpp>
#include "Kokkos_Layout.hpp"
#include "Kokkos_MemoryTraits.hpp"

//#include "tftk_util/tftk_KokkosTypes.h"
#include "tftk_KokkosTypes.h"
#include <tftk_ODEArgs.h>
#include <tftk_RungeKuttaTables.h>
#include <tftk_DeviceODESolverStatus.h>
#include <tftk_DeviceODESolverState.h>

//#include "stk_util/util/ReportHandler.hpp"

#include <type_traits>

namespace tftk
{
namespace ode
{

KOKKOS_FORCEINLINE_FUNCTION double
tol(const double y, const double y0, const double absTol, const double relTol)
{
return absTol +
relTol *
Kokkos::Experimental::fmax(Kokkos::Experimental::fabs(y), Kokkos::Experimental::fabs(y0));
}

template <typename View> KOKKOS_FUNCTION bool isfinite(View & y, const unsigned ndofs)
{
bool is_finite = true;
for (unsigned i = 0; i < ndofs; ++i)
{
if (!Kokkos::Experimental::isfinite(y[i]))
{
is_finite = false;
break;
}
}
return is_finite;
}
template <typename TableType> struct RungeKuttaSolver
{
static constexpr int nstages = TableType::n;

RungeKuttaSolver(const ODEArgs & args) : controls(args) {}

template <typename ODEType, typename StateType>
KOKKOS_FUNCTION ODESolverStatus solve(
const ODEType & ode, double tstart, double tend, StateType & s) const
{
using Kokkos::Experimental::fmax;
using Kokkos::Experimental::fmin;
using Kokkos::Experimental::pow;

const int ndofs = s.ndofs();

//TODO: should this be handled with an assert?
// assert(ode.num_equations() == ndofs, "Mismatched number of dofs in ode solver");
//if(ode.num_equations != ndofs)
//{
// throw std::runtime_error("Mismatched number of dofs in ode solver.");
//}

double t0 = tstart;

for (int i = 0; i < ndofs; ++i)
{
s.y0[i] = s.y[i];
}

if (!isfinite(s.y0, ndofs))
{
return ODESolverStatus::NONFINITE_STATE;
}

const double pFactor = -1.0 / table.order;

double dt = (tend - t0) / controls.num_substeps;

for (int n = 0; n < controls.maxSubSteps; ++n)
{
ode.derivatives(t0, s.y0, s.dydt);

// Limit dt to not exceed t_end
if (t0 + dt > tend)
{
dt = tend - t0;
}

double err = 0.0;
// Start iterative approach with time step adaptation
do
{
err = 0.0;
step(ode, t0, dt, s, err);

// Reduce dt for large error
if (err > 1 && controls.is_adaptive)
{
dt *= fmax(0.2, 0.8 * pow(err, pFactor));

if (dt < controls.minStepSize)
{
return ODESolverStatus::MINIMUM_TIMESTEP_REACHED;
}
}

} while (err > 1 && controls.is_adaptive);

t0 += dt;

for (int i = 0; i < ndofs; ++i)
{
s.y0[i] = s.y[i];
}

if (t0 >= tend)
{
auto status =
!isfinite(s.y, ndofs) ? ODESolverStatus::NONFINITE_STATE : ODESolverStatus::SUCCESS;
return status;
}

// Increase dt for small error
if (err < 0.5 && controls.is_adaptive)
{
dt *= fmin(10.0, fmax(2.0, 0.9 * pow(err, pFactor)));
}
}
return ODESolverStatus::FAILED_TO_CONVERGE;
}

template <typename ODEType, typename StateType>
KOKKOS_FUNCTION void
step(const ODEType & ode, const double t0, const double dt, StateType & s, double & err) const
{
const int ndofs = s.ndofs();

for (int j = 0; j < nstages; ++j)
{
const int offset = (j + 1) * j / 2;
for (int n = 0; n < ndofs; ++n)
{
double coeff = 0.0;
for (int k = 0; k < j; ++k)
{ // lower diagonal matrix
coeff += table.a[k + offset] * s.k(k, n);
}

s.ytemp[n] = s.y0[n] + dt * coeff;
}
auto ksub = Kokkos::subview(s.k, j, Kokkos::ALL);
ode.derivatives(t0 + table.c[j] * dt, s.ytemp, ksub);
}

for (int n = 0; n < ndofs; ++n)
{
double coeff = 0.0;
double errJ = 0.0;
for (int k = 0; k < nstages; ++k)
{
coeff += table.b[k] * s.k(k, n);
errJ += table.e[k] * s.k(k, n);
}
s.y[n] = s.y0[n] + dt * coeff;
errJ *= dt;
err = Kokkos::Experimental::fmax(err,
Kokkos::Experimental::fabs(errJ) /
tol(s.y[n], s.y0[n], controls.absTol, controls.relTol));
}
}

const TableType table;
const SolverControls controls;
};
} // namespace ode
} // namespace tftk

#endif
Loading