From ee5360454d39933419fbf76262cefdb83b19674f Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 13 Jul 2022 18:44:55 -0700 Subject: [PATCH 1/3] Reformat example/fenl files changed in #1382 --- example/fenl/TestFixture.hpp | 120 +-- example/fenl/fenl_functors.hpp | 1406 ++++++++++++++++---------------- 2 files changed, 751 insertions(+), 775 deletions(-) diff --git a/example/fenl/TestFixture.hpp b/example/fenl/TestFixture.hpp index 165265b881..7c09752433 100644 --- a/example/fenl/TestFixture.hpp +++ b/example/fenl/TestFixture.hpp @@ -56,102 +56,102 @@ namespace Kokkos { namespace Example { -template< class Device > -struct FixtureVerifyElemNodeCoord -{ - typedef Device execution_space ; +template +struct FixtureVerifyElemNodeCoord { + typedef Device execution_space; - typedef struct { size_t success , error ; } value_type ; + typedef struct { + size_t success, error; + } value_type; - typedef Kokkos::Example::BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear > FixtureType ; + typedef Kokkos::Example::BoxElemFixture< + Device, Kokkos::Example::BoxElemPart::ElemLinear> + FixtureType; - FixtureType m_fixture ; + FixtureType m_fixture; KOKKOS_INLINE_FUNCTION - void init( value_type & update ) const { update.success = update.error = 0 ; } + void init(value_type& update) const { update.success = update.error = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile value_type & update , - volatile const value_type & input ) const - { - update.success += input.success ; - update.error += input.error ; - } - + void join(volatile value_type& update, + volatile const value_type& input) const { + update.success += input.success; + update.error += input.error; + } KOKKOS_INLINE_FUNCTION - void operator()( size_t ielem , value_type & update ) const - { - unsigned node_coord[ FixtureType::ElemNode ][3] ; - - for ( unsigned i = 0 ; i < FixtureType::ElemNode ; ++i ) { - const unsigned node_id = m_fixture.elem_node(ielem,i); - node_coord[i][0] = m_fixture.node_grid(node_id,0); - node_coord[i][1] = m_fixture.node_grid(node_id,1); - node_coord[i][2] = m_fixture.node_grid(node_id,2); + void operator()(size_t ielem, value_type& update) const { + unsigned node_coord[FixtureType::ElemNode][3]; + + for (unsigned i = 0; i < FixtureType::ElemNode; ++i) { + const unsigned node_id = m_fixture.elem_node(ielem, i); + node_coord[i][0] = m_fixture.node_grid(node_id, 0); + node_coord[i][1] = m_fixture.node_grid(node_id, 1); + node_coord[i][2] = m_fixture.node_grid(node_id, 2); } - int error = 0 ; - for ( unsigned i = 1 ; i < FixtureType::ElemNode ; ++i ) { - if ( node_coord[0][0] + m_fixture.elem_node_local(i,0) != node_coord[i][0] || - node_coord[0][1] + m_fixture.elem_node_local(i,1) != node_coord[i][1] || - node_coord[0][2] + m_fixture.elem_node_local(i,2) != node_coord[i][2] ) { - error = 1 ; + int error = 0; + for (unsigned i = 1; i < FixtureType::ElemNode; ++i) { + if (node_coord[0][0] + m_fixture.elem_node_local(i, 0) != + node_coord[i][0] || + node_coord[0][1] + m_fixture.elem_node_local(i, 1) != + node_coord[i][1] || + node_coord[0][2] + m_fixture.elem_node_local(i, 2) != + node_coord[i][2]) { + error = 1; } } - if ( error ) { - ++update.error ; - } - else { - ++update.success ; + if (error) { + ++update.error; + } else { + ++update.success; } } - FixtureVerifyElemNodeCoord( const FixtureType & f ) : m_fixture(f) {} + FixtureVerifyElemNodeCoord(const FixtureType& f) : m_fixture(f) {} }; +template +void test_fixture() { + typedef Kokkos::Example::BoxElemFixture< + Device, Kokkos::Example::BoxElemPart::ElemLinear> + FixtureType; -template< class Device > -void test_fixture() -{ - typedef Kokkos::Example::BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear > FixtureType ; - - const Kokkos::Example::BoxElemPart::Decompose - decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ; // DecomposeElem | DecomposeNode ; - - const unsigned global_size = 256 ; - const unsigned global_nx = 400 ; - const unsigned global_ny = 400 ; - const unsigned global_nz = 400 ; + const Kokkos::Example::BoxElemPart::Decompose decompose = + Kokkos::Example::BoxElemPart::DecomposeElem; // DecomposeElem | + // DecomposeNode ; - for ( unsigned my_rank = 0 ; my_rank < global_size ; ++my_rank ) { + const unsigned global_size = 256; + const unsigned global_nx = 400; + const unsigned global_ny = 400; + const unsigned global_nz = 400; - const FixtureType fixture( decompose , global_size , my_rank , global_nx , global_ny , global_nz ); + for (unsigned my_rank = 0; my_rank < global_size; ++my_rank) { + const FixtureType fixture(decompose, global_size, my_rank, global_nx, + global_ny, global_nz); // Verify grid coordinates of element's nodes - - typename FixtureVerifyElemNodeCoord::value_type result = { 0 , 0 }; - Kokkos::parallel_reduce( fixture.elem_node().extent(0) , FixtureVerifyElemNodeCoord( fixture ) , result ); + typename FixtureVerifyElemNodeCoord::value_type result = {0, 0}; - if ( result.error ) { + Kokkos::parallel_reduce(fixture.elem_node().extent(0), + FixtureVerifyElemNodeCoord(fixture), + result); + + if (result.error) { std::cout << "P[" << my_rank << ":" << global_size << "] Fixture elem_node_coord" << " success(" << result.success << ")" - << " error(" << result.error << ")" - << std::endl ; + << " error(" << result.error << ")" << std::endl; } // Check send/recv alignment - - } } - } /* namespace Example */ } /* namespace Kokkos */ #endif /* #ifndef KOKKOS_EXAMPLE_TESTFIXTURE_HPP */ - diff --git a/example/fenl/fenl_functors.hpp b/example/fenl/fenl_functors.hpp index 01a4e989da..5706497db2 100644 --- a/example/fenl/fenl_functors.hpp +++ b/example/fenl/fenl_functors.hpp @@ -69,44 +69,42 @@ namespace Kokkos { namespace Example { namespace FENL { -template< class ElemNodeIdView , class CrsGraphType , unsigned ElemNode > +template class NodeNodeGraph { -public: + public: + typedef typename ElemNodeIdView::execution_space execution_space; + typedef pair key_type; - typedef typename ElemNodeIdView::execution_space execution_space ; - typedef pair key_type ; - - typedef Kokkos::UnorderedMap< key_type, void , execution_space > SetType ; - typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ; - typedef Kokkos::View< unsigned , execution_space > UnsignedValue ; + typedef Kokkos::UnorderedMap SetType; + typedef typename CrsGraphType::row_map_type::non_const_type RowMapType; + typedef Kokkos::View UnsignedValue; // Static dimensions of 0 generate compiler warnings or errors. - typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space > - ElemGraphType ; - -private: - - enum PhaseType { FILL_NODE_SET , - SCAN_NODE_COUNT , - FILL_GRAPH_ENTRIES , - SORT_GRAPH_ENTRIES , - FILL_ELEMENT_GRAPH }; - - const unsigned node_count ; - const ElemNodeIdView elem_node_id ; - UnsignedValue row_total ; - RowMapType row_count ; - RowMapType row_map ; - SetType node_node_set ; - PhaseType phase ; + typedef Kokkos::View + ElemGraphType; + + private: + enum PhaseType { + FILL_NODE_SET, + SCAN_NODE_COUNT, + FILL_GRAPH_ENTRIES, + SORT_GRAPH_ENTRIES, + FILL_ELEMENT_GRAPH + }; -public: + const unsigned node_count; + const ElemNodeIdView elem_node_id; + UnsignedValue row_total; + RowMapType row_count; + RowMapType row_map; + SetType node_node_set; + PhaseType phase; - CrsGraphType graph ; - ElemGraphType elem_graph ; + public: + CrsGraphType graph; + ElemGraphType elem_graph; - struct Times - { + struct Times { double ratio; double fill_node_set; double scan_node_count; @@ -115,139 +113,146 @@ class NodeNodeGraph { double fill_element_graph; }; - NodeNodeGraph( const ElemNodeIdView & arg_elem_node_id , - const unsigned arg_node_count, - Times & results - ) - : node_count(arg_node_count) - , elem_node_id( arg_elem_node_id ) - , row_total( "row_total" ) - , row_count(Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_count") , node_count ) // will deep_copy to 0 inside loop - , row_map( "graph_row_map" , node_count + 1 ) - , node_node_set() - , phase( FILL_NODE_SET ) - , graph() - , elem_graph() - { - //-------------------------------- - // Guess at span required for the map: - - Kokkos::Timer wall_clock ; - - wall_clock.reset(); - phase = FILL_NODE_SET ; - - // upper bound on the span - size_t set_span = (28ull * node_count) / 2; - - { - // Zero the row count to restart the fill - Kokkos::deep_copy( row_count , 0u ); - - node_node_set = SetType( set_span ); - - // May be larger that requested: - set_span = node_node_set.span(); - - Kokkos::parallel_for( "kokkos-kernels/example/fenl: NodeNodeGraph" , elem_node_id.extent(0) , *this ); - } + NodeNodeGraph(const ElemNodeIdView& arg_elem_node_id, + const unsigned arg_node_count, Times& results) + : node_count(arg_node_count), + elem_node_id(arg_elem_node_id), + row_total("row_total"), + row_count(Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_count"), + node_count) // will deep_copy to 0 inside loop + , + row_map("graph_row_map", node_count + 1), + node_node_set(), + phase(FILL_NODE_SET), + graph(), + elem_graph() { + //-------------------------------- + // Guess at span required for the map: + + Kokkos::Timer wall_clock; + + wall_clock.reset(); + phase = FILL_NODE_SET; + + // upper bound on the span + size_t set_span = (28ull * node_count) / 2; - execution_space().fence(); - results.ratio = (double)node_node_set.size() / (double)node_node_set.span(); - results.fill_node_set = wall_clock.seconds(); - //-------------------------------- + { + // Zero the row count to restart the fill + Kokkos::deep_copy(row_count, 0u); - wall_clock.reset(); - phase = SCAN_NODE_COUNT ; + node_node_set = SetType(set_span); - // Exclusive scan of row_count into row_map - // including the final total in the 'node_count + 1' position. - // Zero the 'row_count' values. - Kokkos::parallel_scan( node_count , *this ); + // May be larger that requested: + set_span = node_node_set.span(); - // Zero the row count for the fill: - Kokkos::deep_copy( row_count , 0u ); + Kokkos::parallel_for("kokkos-kernels/example/fenl: NodeNodeGraph", + elem_node_id.extent(0), *this); + } - unsigned graph_entry_count = 0 ; + execution_space().fence(); + results.ratio = (double)node_node_set.size() / (double)node_node_set.span(); + results.fill_node_set = wall_clock.seconds(); + //-------------------------------- - Kokkos::deep_copy( graph_entry_count , row_total ); + wall_clock.reset(); + phase = SCAN_NODE_COUNT; - // Assign graph's row_map and allocate graph's entries - graph.row_map = row_map ; - graph.entries = typename CrsGraphType::entries_type( "graph_entries" , graph_entry_count ); + // Exclusive scan of row_count into row_map + // including the final total in the 'node_count + 1' position. + // Zero the 'row_count' values. + Kokkos::parallel_scan(node_count, *this); - //-------------------------------- - // Fill graph's entries from the (node,node) set. + // Zero the row count for the fill: + Kokkos::deep_copy(row_count, 0u); - execution_space().fence(); - results.scan_node_count = wall_clock.seconds(); + unsigned graph_entry_count = 0; - wall_clock.reset(); - phase = FILL_GRAPH_ENTRIES ; - Kokkos::parallel_for( node_node_set.span() , *this ); + Kokkos::deep_copy(graph_entry_count, row_total); - execution_space().fence(); - results.fill_graph_entries = wall_clock.seconds(); + // Assign graph's row_map and allocate graph's entries + graph.row_map = row_map; + graph.entries = + typename CrsGraphType::entries_type("graph_entries", graph_entry_count); - //-------------------------------- - // Done with the temporary sets and arrays - wall_clock.reset(); - phase = SORT_GRAPH_ENTRIES ; + //-------------------------------- + // Fill graph's entries from the (node,node) set. - row_total = UnsignedValue(); - row_count = RowMapType(); - row_map = RowMapType(); - node_node_set.clear(); + execution_space().fence(); + results.scan_node_count = wall_clock.seconds(); - //-------------------------------- + wall_clock.reset(); + phase = FILL_GRAPH_ENTRIES; + Kokkos::parallel_for(node_node_set.span(), *this); - Kokkos::parallel_for( node_count , *this ); + execution_space().fence(); + results.fill_graph_entries = wall_clock.seconds(); - execution_space().fence(); - results.sort_graph_entries = wall_clock.seconds(); + //-------------------------------- + // Done with the temporary sets and arrays + wall_clock.reset(); + phase = SORT_GRAPH_ENTRIES; - //-------------------------------- - // Element-to-graph mapping: - wall_clock.reset(); - phase = FILL_ELEMENT_GRAPH ; - elem_graph = ElemGraphType("elem_graph", elem_node_id.extent(0) ); - Kokkos::parallel_for( elem_node_id.extent(0) , *this ); + row_total = UnsignedValue(); + row_count = RowMapType(); + row_map = RowMapType(); + node_node_set.clear(); - execution_space().fence(); - results.fill_element_graph = wall_clock.seconds(); - } + //-------------------------------- + + Kokkos::parallel_for(node_count, *this); + + execution_space().fence(); + results.sort_graph_entries = wall_clock.seconds(); + + //-------------------------------- + // Element-to-graph mapping: + wall_clock.reset(); + phase = FILL_ELEMENT_GRAPH; + elem_graph = ElemGraphType("elem_graph", elem_node_id.extent(0)); + Kokkos::parallel_for(elem_node_id.extent(0), *this); + + execution_space().fence(); + results.fill_element_graph = wall_clock.seconds(); + } //------------------------------------ // parallel_for: create map and count row length KOKKOS_INLINE_FUNCTION - void fill_set( const unsigned ielem ) const - { + void fill_set(const unsigned ielem) const { // Loop over element's (row_local_node,col_local_node) pairs: - for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) { - - const unsigned row_node = elem_node_id( ielem , row_local_node ); + for (unsigned row_local_node = 0; row_local_node < elem_node_id.extent(1); + ++row_local_node) { + const unsigned row_node = elem_node_id(ielem, row_local_node); - for ( unsigned col_local_node = row_local_node ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) { + for (unsigned col_local_node = row_local_node; + col_local_node < elem_node_id.extent(1); ++col_local_node) { + const unsigned col_node = elem_node_id(ielem, col_local_node); - const unsigned col_node = elem_node_id( ielem , col_local_node ); + // If either node is locally owned then insert the pair into the + // unordered map: - // If either node is locally owned then insert the pair into the unordered map: + if (row_node < row_count.extent(0) || col_node < row_count.extent(0)) { + const key_type key = (row_node < col_node) + ? make_pair(row_node, col_node) + : make_pair(col_node, row_node); - if ( row_node < row_count.extent(0) || col_node < row_count.extent(0) ) { - - const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ; - - const typename SetType::insert_result result = node_node_set.insert( key ); + const typename SetType::insert_result result = + node_node_set.insert(key); // A successfull insert: the first time this pair was added - if ( result.success() ) { - + if (result.success()) { // If row node is owned then increment count - if ( row_node < row_count.extent(0) ) { atomic_fetch_add( & row_count( row_node ) , 1 ); } + if (row_node < row_count.extent(0)) { + atomic_fetch_add(&row_count(row_node), 1); + } - // If column node is owned and not equal to row node then increment count - if ( col_node < row_count.extent(0) && col_node != row_node ) { atomic_fetch_add( & row_count( col_node ) , 1 ); } + // If column node is owned and not equal to row node then increment + // count + if (col_node < row_count.extent(0) && col_node != row_node) { + atomic_fetch_add(&row_count(col_node), 1); + } } } } @@ -255,114 +260,113 @@ class NodeNodeGraph { } KOKKOS_INLINE_FUNCTION - void fill_graph_entries( const unsigned iset ) const - { - if ( node_node_set.valid_at(iset) ) { + void fill_graph_entries(const unsigned iset) const { + if (node_node_set.valid_at(iset)) { // Add each entry to the graph entries. - const key_type key = node_node_set.key_at(iset) ; - const unsigned row_node = key.first ; - const unsigned col_node = key.second ; + const key_type key = node_node_set.key_at(iset); + const unsigned row_node = key.first; + const unsigned col_node = key.second; - if ( row_node < row_count.extent(0) ) { - const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , 1 ); - graph.entries( offset ) = col_node ; + if (row_node < row_count.extent(0)) { + const unsigned offset = + graph.row_map(row_node) + atomic_fetch_add(&row_count(row_node), 1); + graph.entries(offset) = col_node; } - if ( col_node < row_count.extent(0) && col_node != row_node ) { - const unsigned offset = graph.row_map( col_node ) + atomic_fetch_add( & row_count( col_node ) , 1 ); - graph.entries( offset ) = row_node ; + if (col_node < row_count.extent(0) && col_node != row_node) { + const unsigned offset = + graph.row_map(col_node) + atomic_fetch_add(&row_count(col_node), 1); + graph.entries(offset) = row_node; } } } KOKKOS_INLINE_FUNCTION - void sort_graph_entries( const unsigned irow ) const - { - const unsigned row_beg = graph.row_map( irow ); - const unsigned row_end = graph.row_map( irow + 1 ); - for ( unsigned i = row_beg + 1 ; i < row_end ; ++i ) { + void sort_graph_entries(const unsigned irow) const { + const unsigned row_beg = graph.row_map(irow); + const unsigned row_end = graph.row_map(irow + 1); + for (unsigned i = row_beg + 1; i < row_end; ++i) { const unsigned col = graph.entries(i); - unsigned j = i ; - for ( ; row_beg < j && col < graph.entries(j-1) ; --j ) { - graph.entries(j) = graph.entries(j-1); + unsigned j = i; + for (; row_beg < j && col < graph.entries(j - 1); --j) { + graph.entries(j) = graph.entries(j - 1); } - graph.entries(j) = col ; + graph.entries(j) = col; } } KOKKOS_INLINE_FUNCTION - void fill_elem_graph_map( const unsigned ielem ) const - { - for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) { - - const unsigned row_node = elem_node_id( ielem , row_local_node ); + void fill_elem_graph_map(const unsigned ielem) const { + for (unsigned row_local_node = 0; row_local_node < elem_node_id.extent(1); + ++row_local_node) { + const unsigned row_node = elem_node_id(ielem, row_local_node); - for ( unsigned col_local_node = 0 ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) { + for (unsigned col_local_node = 0; col_local_node < elem_node_id.extent(1); + ++col_local_node) { + const unsigned col_node = elem_node_id(ielem, col_local_node); - const unsigned col_node = elem_node_id( ielem , col_local_node ); + unsigned entry = ~0u; - unsigned entry = ~0u ; + if (row_node + 1 < graph.row_map.extent(0)) { + const unsigned entry_end = graph.row_map(row_node + 1); - if ( row_node + 1 < graph.row_map.extent(0) ) { + entry = graph.row_map(row_node); - const unsigned entry_end = graph.row_map( row_node + 1 ); + for (; entry < entry_end && graph.entries(entry) != col_node; ++entry) + ; - entry = graph.row_map( row_node ); - - for ( ; entry < entry_end && graph.entries(entry) != col_node ; ++entry ); - - if ( entry == entry_end ) entry = ~0u ; + if (entry == entry_end) entry = ~0u; } - elem_graph( ielem , row_local_node , col_local_node ) = entry ; + elem_graph(ielem, row_local_node, col_local_node) = entry; } } } KOKKOS_INLINE_FUNCTION - void operator()( const unsigned iwork ) const - { - if ( phase == FILL_NODE_SET ) { - fill_set( iwork ); - } - else if ( phase == FILL_GRAPH_ENTRIES ) { - fill_graph_entries( iwork ); - } - else if ( phase == SORT_GRAPH_ENTRIES ) { - sort_graph_entries( iwork ); - } - else if ( phase == FILL_ELEMENT_GRAPH ) { - fill_elem_graph_map( iwork ); + void operator()(const unsigned iwork) const { + if (phase == FILL_NODE_SET) { + fill_set(iwork); + } else if (phase == FILL_GRAPH_ENTRIES) { + fill_graph_entries(iwork); + } else if (phase == SORT_GRAPH_ENTRIES) { + sort_graph_entries(iwork); + } else if (phase == FILL_ELEMENT_GRAPH) { + fill_elem_graph_map(iwork); } } //------------------------------------ // parallel_scan: row offsets - typedef unsigned value_type ; + typedef unsigned value_type; KOKKOS_INLINE_FUNCTION - void operator()( const unsigned irow , unsigned & update , const bool final ) const - { + void operator()(const unsigned irow, unsigned& update, + const bool final) const { // exclusive scan - if ( final ) { row_map( irow ) = update ; } + if (final) { + row_map(irow) = update; + } - update += row_count( irow ); + update += row_count(irow); - if ( final ) { - if ( irow + 1 == row_count.extent(0) ) { - row_map( irow + 1 ) = update ; - row_total() = update ; + if (final) { + if (irow + 1 == row_count.extent(0)) { + row_map(irow + 1) = update; + row_total() = update; } } } KOKKOS_INLINE_FUNCTION - void init( unsigned & update ) const { update = 0 ; } + void init(unsigned& update) const { update = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile unsigned & update , const volatile unsigned & input ) const { update += input ; } + void join(volatile unsigned& update, const volatile unsigned& input) const { + update += input; + } //------------------------------------ }; @@ -377,222 +381,210 @@ namespace Kokkos { namespace Example { namespace FENL { -template< class ElemCompType > +template class NodeElemGatherFill { -public: - - typedef typename ElemCompType::execution_space execution_space ; - typedef typename ElemCompType::vector_type vector_type ; - typedef typename ElemCompType::sparse_matrix_type sparse_matrix_type ; - typedef typename ElemCompType::elem_node_type elem_node_type ; - typedef typename ElemCompType::elem_vectors_type elem_vectors_type ; - typedef typename ElemCompType::elem_matrices_type elem_matrices_type ; - typedef typename ElemCompType::elem_graph_type elem_graph_type ; + public: + typedef typename ElemCompType::execution_space execution_space; + typedef typename ElemCompType::vector_type vector_type; + typedef typename ElemCompType::sparse_matrix_type sparse_matrix_type; + typedef typename ElemCompType::elem_node_type elem_node_type; + typedef typename ElemCompType::elem_vectors_type elem_vectors_type; + typedef typename ElemCompType::elem_matrices_type elem_matrices_type; + typedef typename ElemCompType::elem_graph_type elem_graph_type; - static const unsigned ElemNodeCount = ElemCompType::ElemNodeCount ; + static const unsigned ElemNodeCount = ElemCompType::ElemNodeCount; //------------------------------------ -private: - - typedef Kokkos::StaticCrsGraph< unsigned[2] , execution_space > CrsGraphType ; - typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ; - typedef Kokkos::View< unsigned , execution_space > UnsignedValue ; - - enum PhaseType { FILL_NODE_COUNT , - SCAN_NODE_COUNT , - FILL_GRAPH_ENTRIES , - SORT_GRAPH_ENTRIES , - GATHER_FILL }; - - const elem_node_type elem_node_id ; - const elem_graph_type elem_graph ; - UnsignedValue row_total ; - RowMapType row_count ; - RowMapType row_map ; - CrsGraphType graph ; - vector_type residual ; - sparse_matrix_type jacobian ; - elem_vectors_type elem_residual ; - elem_matrices_type elem_jacobian ; - PhaseType phase ; - -public: + private: + typedef Kokkos::StaticCrsGraph CrsGraphType; + typedef typename CrsGraphType::row_map_type::non_const_type RowMapType; + typedef Kokkos::View UnsignedValue; + + enum PhaseType { + FILL_NODE_COUNT, + SCAN_NODE_COUNT, + FILL_GRAPH_ENTRIES, + SORT_GRAPH_ENTRIES, + GATHER_FILL + }; + const elem_node_type elem_node_id; + const elem_graph_type elem_graph; + UnsignedValue row_total; + RowMapType row_count; + RowMapType row_map; + CrsGraphType graph; + vector_type residual; + sparse_matrix_type jacobian; + elem_vectors_type elem_residual; + elem_matrices_type elem_jacobian; + PhaseType phase; + + public: NodeElemGatherFill() - : elem_node_id() - , elem_graph() - , row_total() - , row_count() - , row_map() - , graph() - , residual() - , jacobian() - , elem_residual() - , elem_jacobian() - , phase( FILL_NODE_COUNT ) - {} - - NodeElemGatherFill( const NodeElemGatherFill & rhs ) - : elem_node_id( rhs.elem_node_id ) - , elem_graph( rhs.elem_graph ) - , row_total( rhs.row_total ) - , row_count( rhs.row_count ) - , row_map( rhs.row_map ) - , graph( rhs.graph ) - , residual( rhs.residual ) - , jacobian( rhs.jacobian ) - , elem_residual( rhs.elem_residual ) - , elem_jacobian( rhs.elem_jacobian ) - , phase( rhs.phase ) - {} - - NodeElemGatherFill( const elem_node_type & arg_elem_node_id , - const elem_graph_type & arg_elem_graph , - const vector_type & arg_residual , - const sparse_matrix_type & arg_jacobian , - const elem_vectors_type & arg_elem_residual , - const elem_matrices_type & arg_elem_jacobian ) - : elem_node_id( arg_elem_node_id ) - , elem_graph( arg_elem_graph ) - , row_total( "row_total" ) - , row_count( "row_count" , arg_residual.extent(0) ) - , row_map( "graph_row_map" , arg_residual.extent(0) + 1 ) - , graph() - , residual( arg_residual ) - , jacobian( arg_jacobian ) - , elem_residual( arg_elem_residual ) - , elem_jacobian( arg_elem_jacobian ) - , phase( FILL_NODE_COUNT ) - { - //-------------------------------- - // Count node->element relations - - phase = FILL_NODE_COUNT ; - - Kokkos::parallel_for( elem_node_id.extent(0) , *this ); - - //-------------------------------- - - phase = SCAN_NODE_COUNT ; - - // Exclusive scan of row_count into row_map - // including the final total in the 'node_count + 1' position. - // Zero the 'row_count' values. - Kokkos::parallel_scan( residual.extent(0) , *this ); - - // Zero the row count for the fill: - Kokkos::deep_copy( row_count , typename RowMapType::value_type(0) ); - - unsigned graph_entry_count = 0 ; - - Kokkos::deep_copy( graph_entry_count , row_total ); - - // Assign graph's row_map and allocate graph's entries - graph.row_map = row_map ; - - typedef typename CrsGraphType::entries_type graph_entries_type ; - - graph.entries = graph_entries_type( "graph_entries" , graph_entry_count ); - - //-------------------------------- - // Fill graph's entries from the (node,node) set. - - phase = FILL_GRAPH_ENTRIES ; - - Kokkos::deep_copy( row_count , 0u ); - Kokkos::parallel_for( elem_node_id.extent(0) , *this ); - - execution_space().fence(); - - //-------------------------------- - // Done with the temporary sets and arrays - - row_total = UnsignedValue(); - row_count = RowMapType(); - row_map = RowMapType(); - - //-------------------------------- - - phase = SORT_GRAPH_ENTRIES ; - Kokkos::parallel_for( residual.extent(0) , *this ); - - execution_space().fence(); - - phase = GATHER_FILL ; - } - - void apply() const - { - Kokkos::parallel_for( residual.extent(0) , *this ); + : elem_node_id(), + elem_graph(), + row_total(), + row_count(), + row_map(), + graph(), + residual(), + jacobian(), + elem_residual(), + elem_jacobian(), + phase(FILL_NODE_COUNT) {} + + NodeElemGatherFill(const NodeElemGatherFill& rhs) + : elem_node_id(rhs.elem_node_id), + elem_graph(rhs.elem_graph), + row_total(rhs.row_total), + row_count(rhs.row_count), + row_map(rhs.row_map), + graph(rhs.graph), + residual(rhs.residual), + jacobian(rhs.jacobian), + elem_residual(rhs.elem_residual), + elem_jacobian(rhs.elem_jacobian), + phase(rhs.phase) {} + + NodeElemGatherFill(const elem_node_type& arg_elem_node_id, + const elem_graph_type& arg_elem_graph, + const vector_type& arg_residual, + const sparse_matrix_type& arg_jacobian, + const elem_vectors_type& arg_elem_residual, + const elem_matrices_type& arg_elem_jacobian) + : elem_node_id(arg_elem_node_id), + elem_graph(arg_elem_graph), + row_total("row_total"), + row_count("row_count", arg_residual.extent(0)), + row_map("graph_row_map", arg_residual.extent(0) + 1), + graph(), + residual(arg_residual), + jacobian(arg_jacobian), + elem_residual(arg_elem_residual), + elem_jacobian(arg_elem_jacobian), + phase(FILL_NODE_COUNT) { + //-------------------------------- + // Count node->element relations + + phase = FILL_NODE_COUNT; + + Kokkos::parallel_for(elem_node_id.extent(0), *this); + + //-------------------------------- + + phase = SCAN_NODE_COUNT; + + // Exclusive scan of row_count into row_map + // including the final total in the 'node_count + 1' position. + // Zero the 'row_count' values. + Kokkos::parallel_scan(residual.extent(0), *this); + + // Zero the row count for the fill: + Kokkos::deep_copy(row_count, typename RowMapType::value_type(0)); + + unsigned graph_entry_count = 0; + + Kokkos::deep_copy(graph_entry_count, row_total); + + // Assign graph's row_map and allocate graph's entries + graph.row_map = row_map; + + typedef typename CrsGraphType::entries_type graph_entries_type; + + graph.entries = graph_entries_type("graph_entries", graph_entry_count); + + //-------------------------------- + // Fill graph's entries from the (node,node) set. + + phase = FILL_GRAPH_ENTRIES; + + Kokkos::deep_copy(row_count, 0u); + Kokkos::parallel_for(elem_node_id.extent(0), *this); + + execution_space().fence(); + + //-------------------------------- + // Done with the temporary sets and arrays + + row_total = UnsignedValue(); + row_count = RowMapType(); + row_map = RowMapType(); + + //-------------------------------- + + phase = SORT_GRAPH_ENTRIES; + Kokkos::parallel_for(residual.extent(0), *this); + + execution_space().fence(); + + phase = GATHER_FILL; } + void apply() const { Kokkos::parallel_for(residual.extent(0), *this); } + //------------------------------------ //------------------------------------ // parallel_for: Count node->element pairs KOKKOS_INLINE_FUNCTION - void fill_node_count( const unsigned ielem ) const - { - for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) { + void fill_node_count(const unsigned ielem) const { + for (unsigned row_local_node = 0; row_local_node < elem_node_id.extent(1); + ++row_local_node) { + const unsigned row_node = elem_node_id(ielem, row_local_node); - const unsigned row_node = elem_node_id( ielem , row_local_node ); - - if ( row_node < row_count.extent(0) ) { - atomic_fetch_add( & row_count( row_node ) , 1 ); + if (row_node < row_count.extent(0)) { + atomic_fetch_add(&row_count(row_node), 1); } } } KOKKOS_INLINE_FUNCTION - void fill_graph_entries( const unsigned ielem ) const - { - for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) { - - const unsigned row_node = elem_node_id( ielem , row_local_node ); - - if ( row_node < row_count.extent(0) ) { + void fill_graph_entries(const unsigned ielem) const { + for (unsigned row_local_node = 0; row_local_node < elem_node_id.extent(1); + ++row_local_node) { + const unsigned row_node = elem_node_id(ielem, row_local_node); - const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , 1 ); + if (row_node < row_count.extent(0)) { + const unsigned offset = + graph.row_map(row_node) + atomic_fetch_add(&row_count(row_node), 1); - graph.entries( offset , 0 ) = ielem ; - graph.entries( offset , 1 ) = row_local_node ; + graph.entries(offset, 0) = ielem; + graph.entries(offset, 1) = row_local_node; } } } KOKKOS_INLINE_FUNCTION - void sort_graph_entries( const unsigned irow ) const - { - const unsigned row_beg = graph.row_map( irow ); - const unsigned row_end = graph.row_map( irow + 1 ); - for ( unsigned i = row_beg + 1 ; i < row_end ; ++i ) { - const unsigned elem = graph.entries(i,0); - const unsigned local = graph.entries(i,1); - unsigned j = i ; - for ( ; row_beg < j && elem < graph.entries(j-1,0) ; --j ) { - graph.entries(j,0) = graph.entries(j-1,0); - graph.entries(j,1) = graph.entries(j-1,1); + void sort_graph_entries(const unsigned irow) const { + const unsigned row_beg = graph.row_map(irow); + const unsigned row_end = graph.row_map(irow + 1); + for (unsigned i = row_beg + 1; i < row_end; ++i) { + const unsigned elem = graph.entries(i, 0); + const unsigned local = graph.entries(i, 1); + unsigned j = i; + for (; row_beg < j && elem < graph.entries(j - 1, 0); --j) { + graph.entries(j, 0) = graph.entries(j - 1, 0); + graph.entries(j, 1) = graph.entries(j - 1, 1); } - graph.entries(j,0) = elem ; - graph.entries(j,1) = local ; + graph.entries(j, 0) = elem; + graph.entries(j, 1) = local; } } //------------------------------------ KOKKOS_INLINE_FUNCTION - void gather_fill( const unsigned irow ) const - { + void gather_fill(const unsigned irow) const { const unsigned node_elem_begin = graph.row_map(irow); - const unsigned node_elem_end = graph.row_map(irow+1); + const unsigned node_elem_end = graph.row_map(irow + 1); // for each element that a node belongs to - for ( unsigned i = node_elem_begin ; i < node_elem_end ; i++ ) { - - const unsigned elem_id = graph.entries( i, 0); - const unsigned row_index = graph.entries( i, 1); + for (unsigned i = node_elem_begin; i < node_elem_end; i++) { + const unsigned elem_id = graph.entries(i, 0); + const unsigned row_index = graph.entries(i, 1); residual(irow) += elem_residual(elem_id, row_index); @@ -600,10 +592,10 @@ class NodeElemGatherFill { // gather the contents of the element stiffness // matrix that belong in irow - for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) { - const unsigned A_index = elem_graph( elem_id , row_index , j ); + for (unsigned j = 0; j < ElemNodeCount; ++j) { + const unsigned A_index = elem_graph(elem_id, row_index, j); - jacobian.values( A_index ) += elem_jacobian( elem_id, row_index, j ); + jacobian.values(A_index) += elem_jacobian(elem_id, row_index, j); } } } @@ -611,48 +603,48 @@ class NodeElemGatherFill { //------------------------------------ KOKKOS_INLINE_FUNCTION - void operator()( const unsigned iwork ) const - { - if ( phase == FILL_NODE_COUNT ) { - fill_node_count( iwork ); - } - else if ( phase == FILL_GRAPH_ENTRIES ) { - fill_graph_entries( iwork ); - } - else if ( phase == SORT_GRAPH_ENTRIES ) { - sort_graph_entries( iwork ); - } - else if ( phase == GATHER_FILL ) { - gather_fill( iwork ); + void operator()(const unsigned iwork) const { + if (phase == FILL_NODE_COUNT) { + fill_node_count(iwork); + } else if (phase == FILL_GRAPH_ENTRIES) { + fill_graph_entries(iwork); + } else if (phase == SORT_GRAPH_ENTRIES) { + sort_graph_entries(iwork); + } else if (phase == GATHER_FILL) { + gather_fill(iwork); } } //------------------------------------ // parallel_scan: row offsets - typedef unsigned value_type ; + typedef unsigned value_type; KOKKOS_INLINE_FUNCTION - void operator()( const unsigned irow , unsigned & update , const bool final ) const - { + void operator()(const unsigned irow, unsigned& update, + const bool final) const { // exclusive scan - if ( final ) { row_map( irow ) = update ; } + if (final) { + row_map(irow) = update; + } - update += row_count( irow ); + update += row_count(irow); - if ( final ) { - if ( irow + 1 == row_count.extent(0) ) { - row_map( irow + 1 ) = update ; - row_total() = update ; + if (final) { + if (irow + 1 == row_count.extent(0)) { + row_map(irow + 1) = update; + row_total() = update; } } } KOKKOS_INLINE_FUNCTION - void init( unsigned & update ) const { update = 0 ; } + void init(unsigned& update) const { update = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile unsigned & update , const volatile unsigned & input ) const { update += input ; } + void join(volatile unsigned& update, const volatile unsigned& input) const { + update += input; + } }; } /* namespace FENL */ @@ -665,188 +657,191 @@ namespace Kokkos { namespace Example { namespace FENL { -template< class FiniteElementMeshType , class SparseMatrixType > -class ElementComputation ; +template +class ElementComputation; - -template< class DeviceType , BoxElemPart::ElemOrder Order , class CoordinateMap , - typename ScalarType , typename OrdinalType , class MemoryTraits , typename SizeType > +template class ElementComputation< - Kokkos::Example::BoxElemFixture< DeviceType , Order , CoordinateMap > , - KokkosSparse::CrsMatrix< ScalarType , OrdinalType , DeviceType , MemoryTraits , SizeType > > -{ -public: - - typedef Kokkos::Example::BoxElemFixture< DeviceType, Order, CoordinateMap > mesh_type ; - typedef Kokkos::Example::HexElement_Data< mesh_type::ElemNode > element_data_type ; - - typedef KokkosSparse::CrsMatrix< ScalarType , OrdinalType , DeviceType , MemoryTraits , SizeType > sparse_matrix_type ; - typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type ; - - typedef DeviceType execution_space ; - typedef ScalarType scalar_type ; - - static const unsigned SpatialDim = element_data_type::spatial_dimension ; - static const unsigned TensorDim = SpatialDim * SpatialDim ; - static const unsigned ElemNodeCount = element_data_type::element_node_count ; - static const unsigned FunctionCount = element_data_type::function_count ; - static const unsigned IntegrationCount = element_data_type::integration_count ; + Kokkos::Example::BoxElemFixture, + KokkosSparse::CrsMatrix > { + public: + typedef Kokkos::Example::BoxElemFixture + mesh_type; + typedef Kokkos::Example::HexElement_Data + element_data_type; + + typedef KokkosSparse::CrsMatrix + sparse_matrix_type; + typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type; + + typedef DeviceType execution_space; + typedef ScalarType scalar_type; + + static const unsigned SpatialDim = element_data_type::spatial_dimension; + static const unsigned TensorDim = SpatialDim * SpatialDim; + static const unsigned ElemNodeCount = element_data_type::element_node_count; + static const unsigned FunctionCount = element_data_type::function_count; + static const unsigned IntegrationCount = element_data_type::integration_count; //------------------------------------ - typedef typename mesh_type::node_coord_type node_coord_type ; - typedef typename mesh_type::elem_node_type elem_node_type ; - typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ; - typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ; - typedef Kokkos::View< scalar_type* , execution_space > vector_type ; + typedef typename mesh_type::node_coord_type node_coord_type; + typedef typename mesh_type::elem_node_type elem_node_type; + typedef Kokkos::View + elem_matrices_type; + typedef Kokkos::View + elem_vectors_type; + typedef Kokkos::View vector_type; - typedef typename NodeNodeGraph< elem_node_type , sparse_graph_type , ElemNodeCount >::ElemGraphType elem_graph_type ; + typedef typename NodeNodeGraph::ElemGraphType elem_graph_type; //------------------------------------ - //------------------------------------ // Computational data: - const element_data_type elem_data ; - const elem_node_type elem_node_ids ; - const node_coord_type node_coords ; - const elem_graph_type elem_graph ; - const elem_matrices_type elem_jacobians ; - const elem_vectors_type elem_residuals ; - const vector_type solution ; - const vector_type residual ; - const sparse_matrix_type jacobian ; - const scalar_type coeff_K ; - - ElementComputation( const ElementComputation & rhs ) - : elem_data() - , elem_node_ids( rhs.elem_node_ids ) - , node_coords( rhs.node_coords ) - , elem_graph( rhs.elem_graph ) - , elem_jacobians( rhs.elem_jacobians ) - , elem_residuals( rhs.elem_residuals ) - , solution( rhs.solution ) - , residual( rhs.residual ) - , jacobian( rhs.jacobian ) - , coeff_K( rhs.coeff_K ) - {} + const element_data_type elem_data; + const elem_node_type elem_node_ids; + const node_coord_type node_coords; + const elem_graph_type elem_graph; + const elem_matrices_type elem_jacobians; + const elem_vectors_type elem_residuals; + const vector_type solution; + const vector_type residual; + const sparse_matrix_type jacobian; + const scalar_type coeff_K; + + ElementComputation(const ElementComputation& rhs) + : elem_data(), + elem_node_ids(rhs.elem_node_ids), + node_coords(rhs.node_coords), + elem_graph(rhs.elem_graph), + elem_jacobians(rhs.elem_jacobians), + elem_residuals(rhs.elem_residuals), + solution(rhs.solution), + residual(rhs.residual), + jacobian(rhs.jacobian), + coeff_K(rhs.coeff_K) {} // If the element->sparse_matrix graph is provided then perform atomic updates - // Otherwise fill per-element contributions for subequent gather-add into a residual and jacobian. - ElementComputation( const mesh_type & arg_mesh , - const scalar_type arg_coeff_K , - const vector_type & arg_solution , - const elem_graph_type & arg_elem_graph , - const sparse_matrix_type & arg_jacobian , - const vector_type & arg_residual ) - : elem_data() - , elem_node_ids( arg_mesh.elem_node() ) - , node_coords( arg_mesh.node_coord() ) - , elem_graph( arg_elem_graph ) - , elem_jacobians() - , elem_residuals() - , solution( arg_solution ) - , residual( arg_residual ) - , jacobian( arg_jacobian ) - , coeff_K( arg_coeff_K ) - {} - - ElementComputation( const mesh_type & arg_mesh , - const scalar_type arg_coeff_K , - const vector_type & arg_solution ) - : elem_data() - , elem_node_ids( arg_mesh.elem_node() ) - , node_coords( arg_mesh.node_coord() ) - , elem_graph() - , elem_jacobians( "elem_jacobians" , arg_mesh.elem_count() ) - , elem_residuals( "elem_residuals" , arg_mesh.elem_count() ) - , solution( arg_solution ) - , residual() - , jacobian() - , coeff_K( arg_coeff_K ) - {} + // Otherwise fill per-element contributions for subequent gather-add into a + // residual and jacobian. + ElementComputation(const mesh_type& arg_mesh, const scalar_type arg_coeff_K, + const vector_type& arg_solution, + const elem_graph_type& arg_elem_graph, + const sparse_matrix_type& arg_jacobian, + const vector_type& arg_residual) + : elem_data(), + elem_node_ids(arg_mesh.elem_node()), + node_coords(arg_mesh.node_coord()), + elem_graph(arg_elem_graph), + elem_jacobians(), + elem_residuals(), + solution(arg_solution), + residual(arg_residual), + jacobian(arg_jacobian), + coeff_K(arg_coeff_K) {} + + ElementComputation(const mesh_type& arg_mesh, const scalar_type arg_coeff_K, + const vector_type& arg_solution) + : elem_data(), + elem_node_ids(arg_mesh.elem_node()), + node_coords(arg_mesh.node_coord()), + elem_graph(), + elem_jacobians("elem_jacobians", arg_mesh.elem_count()), + elem_residuals("elem_residuals", arg_mesh.elem_count()), + solution(arg_solution), + residual(), + jacobian(), + coeff_K(arg_coeff_K) {} //------------------------------------ - void apply() const - { - parallel_for( elem_node_ids.extent(0) , *this ); - } + void apply() const { parallel_for(elem_node_ids.extent(0), *this); } //------------------------------------ static const unsigned FLOPS_transform_gradients = - /* Jacobian */ FunctionCount * TensorDim * 2 + - /* Inverse jacobian */ TensorDim * 6 + 6 + - /* Gradient transform */ FunctionCount * 15 ; + /* Jacobian */ FunctionCount * TensorDim * 2 + + /* Inverse jacobian */ TensorDim * 6 + 6 + + /* Gradient transform */ FunctionCount * 15; KOKKOS_INLINE_FUNCTION float transform_gradients( - const float grad[][ FunctionCount ] , // Gradient of bases master element - const double x[] , - const double y[] , - const double z[] , - float dpsidx[] , - float dpsidy[] , - float dpsidz[] ) const - { - enum { j11 = 0 , j12 = 1 , j13 = 2 , - j21 = 3 , j22 = 4 , j23 = 5 , - j31 = 6 , j32 = 7 , j33 = 8 }; + const float grad[][FunctionCount], // Gradient of bases master element + const double x[], const double y[], const double z[], float dpsidx[], + float dpsidy[], float dpsidz[]) const { + enum { + j11 = 0, + j12 = 1, + j13 = 2, + j21 = 3, + j22 = 4, + j23 = 5, + j31 = 6, + j32 = 7, + j33 = 8 + }; // Jacobian accumulation: - double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + double J[TensorDim] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; - for( unsigned i = 0; i < FunctionCount ; ++i ) { - const double x1 = x[i] ; - const double x2 = y[i] ; - const double x3 = z[i] ; + for (unsigned i = 0; i < FunctionCount; ++i) { + const double x1 = x[i]; + const double x2 = y[i]; + const double x3 = z[i]; - const float g1 = grad[0][i] ; - const float g2 = grad[1][i] ; - const float g3 = grad[2][i] ; + const float g1 = grad[0][i]; + const float g2 = grad[1][i]; + const float g3 = grad[2][i]; - J[j11] += g1 * x1 ; - J[j12] += g1 * x2 ; - J[j13] += g1 * x3 ; + J[j11] += g1 * x1; + J[j12] += g1 * x2; + J[j13] += g1 * x3; - J[j21] += g2 * x1 ; - J[j22] += g2 * x2 ; - J[j23] += g2 * x3 ; + J[j21] += g2 * x1; + J[j22] += g2 * x2; + J[j23] += g2 * x3; - J[j31] += g3 * x1 ; - J[j32] += g3 * x2 ; - J[j33] += g3 * x3 ; + J[j31] += g3 * x1; + J[j32] += g3 * x2; + J[j33] += g3 * x3; } // Inverse jacobian: - float invJ[ TensorDim ] = { - static_cast( J[j22] * J[j33] - J[j23] * J[j32] ) , - static_cast( J[j13] * J[j32] - J[j12] * J[j33] ) , - static_cast( J[j12] * J[j23] - J[j13] * J[j22] ) , + float invJ[TensorDim] = { + static_cast(J[j22] * J[j33] - J[j23] * J[j32]), + static_cast(J[j13] * J[j32] - J[j12] * J[j33]), + static_cast(J[j12] * J[j23] - J[j13] * J[j22]), - static_cast( J[j23] * J[j31] - J[j21] * J[j33] ) , - static_cast( J[j11] * J[j33] - J[j13] * J[j31] ) , - static_cast( J[j13] * J[j21] - J[j11] * J[j23] ) , + static_cast(J[j23] * J[j31] - J[j21] * J[j33]), + static_cast(J[j11] * J[j33] - J[j13] * J[j31]), + static_cast(J[j13] * J[j21] - J[j11] * J[j23]), - static_cast( J[j21] * J[j32] - J[j22] * J[j31] ) , - static_cast( J[j12] * J[j31] - J[j11] * J[j32] ) , - static_cast( J[j11] * J[j22] - J[j12] * J[j21] ) }; + static_cast(J[j21] * J[j32] - J[j22] * J[j31]), + static_cast(J[j12] * J[j31] - J[j11] * J[j32]), + static_cast(J[j11] * J[j22] - J[j12] * J[j21])}; - const float detJ = J[j11] * invJ[j11] + - J[j21] * invJ[j12] + - J[j31] * invJ[j13] ; + const float detJ = + J[j11] * invJ[j11] + J[j21] * invJ[j12] + J[j31] * invJ[j13]; - const float detJinv = 1.0 / detJ ; + const float detJinv = 1.0 / detJ; - for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; } + for (unsigned i = 0; i < TensorDim; ++i) { + invJ[i] *= detJinv; + } // Transform gradients: - for( unsigned i = 0; i < FunctionCount ; ++i ) { + for (unsigned i = 0; i < FunctionCount; ++i) { const float g0 = grad[0][i]; const float g1 = grad[1][i]; const float g2 = grad[2][i]; @@ -856,113 +851,101 @@ class ElementComputation< dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33]; } - return detJ ; + return detJ; } KOKKOS_INLINE_FUNCTION - void contributeResidualJacobian( - const float coeff_k , - const double dof_values[] , - const float dpsidx[] , - const float dpsidy[] , - const float dpsidz[] , - const float detJ , - const float integ_weight , - const float bases_vals[] , - double elem_res[] , - double elem_mat[][ FunctionCount ] ) const - { - double value_at_pt = 0 ; - double gradx_at_pt = 0 ; - double grady_at_pt = 0 ; - double gradz_at_pt = 0 ; - - for ( unsigned m = 0 ; m < FunctionCount ; m++ ) { - value_at_pt += dof_values[m] * bases_vals[m] ; - gradx_at_pt += dof_values[m] * dpsidx[m] ; - grady_at_pt += dof_values[m] * dpsidy[m] ; - gradz_at_pt += dof_values[m] * dpsidz[m] ; + void contributeResidualJacobian(const float coeff_k, + const double dof_values[], + const float dpsidx[], const float dpsidy[], + const float dpsidz[], const float detJ, + const float integ_weight, + const float bases_vals[], double elem_res[], + double elem_mat[][FunctionCount]) const { + double value_at_pt = 0; + double gradx_at_pt = 0; + double grady_at_pt = 0; + double gradz_at_pt = 0; + + for (unsigned m = 0; m < FunctionCount; m++) { + value_at_pt += dof_values[m] * bases_vals[m]; + gradx_at_pt += dof_values[m] * dpsidx[m]; + grady_at_pt += dof_values[m] * dpsidy[m]; + gradz_at_pt += dof_values[m] * dpsidz[m]; } - const scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ; - const double res_val = value_at_pt * value_at_pt * detJ * integ_weight ; - const double mat_val = 2.0 * value_at_pt * detJ * integ_weight ; + const scalar_type k_detJ_weight = coeff_k * detJ * integ_weight; + const double res_val = value_at_pt * value_at_pt * detJ * integ_weight; + const double mat_val = 2.0 * value_at_pt * detJ * integ_weight; - // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$ - // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$ + // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d + // \Omega $$ + // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla + // \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$ - for ( unsigned m = 0; m < FunctionCount; ++m) { - double * const mat = elem_mat[m] ; + for (unsigned m = 0; m < FunctionCount; ++m) { + double* const mat = elem_mat[m]; const float bases_val_m = bases_vals[m]; - const float dpsidx_m = dpsidx[m] ; - const float dpsidy_m = dpsidy[m] ; - const float dpsidz_m = dpsidz[m] ; - - elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt + - dpsidy_m * grady_at_pt + - dpsidz_m * gradz_at_pt ) + - res_val * bases_val_m ; - - for( unsigned n = 0; n < FunctionCount; n++) { - - mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] + - dpsidy_m * dpsidy[n] + - dpsidz_m * dpsidz[n] ) + + const float dpsidx_m = dpsidx[m]; + const float dpsidy_m = dpsidy[m]; + const float dpsidz_m = dpsidz[m]; + + elem_res[m] += + k_detJ_weight * (dpsidx_m * gradx_at_pt + dpsidy_m * grady_at_pt + + dpsidz_m * gradz_at_pt) + + res_val * bases_val_m; + + for (unsigned n = 0; n < FunctionCount; n++) { + mat[n] += k_detJ_weight * (dpsidx_m * dpsidx[n] + dpsidy_m * dpsidy[n] + + dpsidz_m * dpsidz[n]) + mat_val * bases_val_m * bases_vals[n]; } } } KOKKOS_INLINE_FUNCTION - void operator()( const unsigned ielem ) const - { + void operator()(const unsigned ielem) const { // Gather nodal coordinates and solution vector: - double x[ FunctionCount ] ; - double y[ FunctionCount ] ; - double z[ FunctionCount ] ; - double val[ FunctionCount ] ; - unsigned node_index[ ElemNodeCount ]; + double x[FunctionCount]; + double y[FunctionCount]; + double z[FunctionCount]; + double val[FunctionCount]; + unsigned node_index[ElemNodeCount]; - for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) { - const unsigned ni = elem_node_ids( ielem , i ); + for (unsigned i = 0; i < ElemNodeCount; ++i) { + const unsigned ni = elem_node_ids(ielem, i); - node_index[i] = ni ; + node_index[i] = ni; - x[i] = node_coords( ni , 0 ); - y[i] = node_coords( ni , 1 ); - z[i] = node_coords( ni , 2 ); + x[i] = node_coords(ni, 0); + y[i] = node_coords(ni, 1); + z[i] = node_coords(ni, 2); - val[i] = solution( ni ); + val[i] = solution(ni); } + double elem_vec[FunctionCount]; + double elem_mat[FunctionCount][FunctionCount]; - double elem_vec[ FunctionCount ] ; - double elem_mat[ FunctionCount ][ FunctionCount ] ; - - for( unsigned i = 0; i < FunctionCount ; i++ ) { - elem_vec[i] = 0 ; - for( unsigned j = 0; j < FunctionCount ; j++){ - elem_mat[i][j] = 0 ; + for (unsigned i = 0; i < FunctionCount; i++) { + elem_vec[i] = 0; + for (unsigned j = 0; j < FunctionCount; j++) { + elem_mat[i][j] = 0; } } + for (unsigned i = 0; i < IntegrationCount; ++i) { + float dpsidx[FunctionCount]; + float dpsidy[FunctionCount]; + float dpsidz[FunctionCount]; - for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) { - float dpsidx[ FunctionCount ] ; - float dpsidy[ FunctionCount ] ; - float dpsidz[ FunctionCount ] ; + const float detJ = transform_gradients(elem_data.gradients[i], x, y, z, + dpsidx, dpsidy, dpsidz); - const float detJ = - transform_gradients( elem_data.gradients[i] , x , y , z , - dpsidx , dpsidy , dpsidz ); - - contributeResidualJacobian( coeff_K , - val , dpsidx , dpsidy , dpsidz , - detJ , - elem_data.weights[i] , - elem_data.values[i] , - elem_vec , elem_mat ); + contributeResidualJacobian(coeff_K, val, dpsidx, dpsidy, dpsidz, detJ, + elem_data.weights[i], elem_data.values[i], + elem_vec, elem_mat); } #if 0 @@ -984,24 +967,23 @@ if ( 1 == ielem ) { #endif - if ( ! residual.extent(0) ) { - for( unsigned i = 0; i < FunctionCount ; i++){ - elem_residuals(ielem, i) = elem_vec[i] ; - for( unsigned j = 0; j < FunctionCount ; j++){ - elem_jacobians(ielem, i, j) = elem_mat[i][j] ; + if (!residual.extent(0)) { + for (unsigned i = 0; i < FunctionCount; i++) { + elem_residuals(ielem, i) = elem_vec[i]; + for (unsigned j = 0; j < FunctionCount; j++) { + elem_jacobians(ielem, i, j) = elem_mat[i][j]; } } - } - else { - for( unsigned i = 0 ; i < FunctionCount ; i++ ) { - const unsigned row = node_index[i] ; - if ( row < residual.extent(0) ) { - atomic_fetch_add( & residual( row ) , elem_vec[i] ); - - for( unsigned j = 0 ; j < FunctionCount ; j++ ) { - const unsigned entry = elem_graph( ielem , i , j ); - if ( entry != ~0u ) { - atomic_fetch_add( & jacobian.values( entry ) , elem_mat[i][j] ); + } else { + for (unsigned i = 0; i < FunctionCount; i++) { + const unsigned row = node_index[i]; + if (row < residual.extent(0)) { + atomic_fetch_add(&residual(row), elem_vec[i]); + + for (unsigned j = 0; j < FunctionCount; j++) { + const unsigned entry = elem_graph(ielem, i, j); + if (entry != ~0u) { + atomic_fetch_add(&jacobian.values(entry), elem_mat[i][j]); } } } @@ -1012,119 +994,114 @@ if ( 1 == ielem ) { //---------------------------------------------------------------------------- -template< class FixtureType , class SparseMatrixType > -class DirichletComputation ; +template +class DirichletComputation; -template< class DeviceType , BoxElemPart::ElemOrder Order , class CoordinateMap , - typename ScalarType , typename OrdinalType , class MemoryTraits , typename SizeType > +template class DirichletComputation< - Kokkos::Example::BoxElemFixture< DeviceType , Order , CoordinateMap > , - KokkosSparse::CrsMatrix< ScalarType , OrdinalType , DeviceType , MemoryTraits , SizeType > > -{ -public: - - typedef Kokkos::Example::BoxElemFixture< DeviceType, Order, CoordinateMap > mesh_type ; - typedef typename mesh_type::node_coord_type node_coord_type ; - typedef typename node_coord_type::value_type scalar_coord_type ; - - typedef KokkosSparse::CrsMatrix< ScalarType , OrdinalType , DeviceType , MemoryTraits , SizeType > sparse_matrix_type ; - typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type ; - - typedef DeviceType execution_space ; - typedef ScalarType scalar_type ; + Kokkos::Example::BoxElemFixture, + KokkosSparse::CrsMatrix > { + public: + typedef Kokkos::Example::BoxElemFixture + mesh_type; + typedef typename mesh_type::node_coord_type node_coord_type; + typedef typename node_coord_type::value_type scalar_coord_type; + + typedef KokkosSparse::CrsMatrix + sparse_matrix_type; + typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type; + + typedef DeviceType execution_space; + typedef ScalarType scalar_type; //------------------------------------ - typedef Kokkos::View< scalar_type* , execution_space > vector_type ; + typedef Kokkos::View vector_type; //------------------------------------ // Computational data: - const node_coord_type node_coords ; - const vector_type solution ; - const sparse_matrix_type jacobian ; - const vector_type residual ; - const scalar_type bc_lower_value ; - const scalar_type bc_upper_value ; - const scalar_coord_type bc_lower_limit ; - const scalar_coord_type bc_upper_limit ; - const unsigned bc_plane ; - const unsigned node_count ; - bool init ; - - - DirichletComputation( const mesh_type & arg_mesh , - const vector_type & arg_solution , - const sparse_matrix_type & arg_jacobian , - const vector_type & arg_residual , - const unsigned arg_bc_plane , - const scalar_type arg_bc_lower_value , - const scalar_type arg_bc_upper_value ) - : node_coords( arg_mesh.node_coord() ) - , solution( arg_solution ) - , jacobian( arg_jacobian ) - , residual( arg_residual ) - , bc_lower_value( arg_bc_lower_value ) - , bc_upper_value( arg_bc_upper_value ) - , bc_lower_limit( std::numeric_limits::epsilon() ) - , bc_upper_limit( scalar_coord_type(1) - std::numeric_limits::epsilon() ) - , bc_plane( arg_bc_plane ) - , node_count( arg_mesh.node_count_owned() ) - , init( false ) - { - parallel_for( node_count , *this ); - init = true ; - } - - void apply() const - { - parallel_for( node_count , *this ); + const node_coord_type node_coords; + const vector_type solution; + const sparse_matrix_type jacobian; + const vector_type residual; + const scalar_type bc_lower_value; + const scalar_type bc_upper_value; + const scalar_coord_type bc_lower_limit; + const scalar_coord_type bc_upper_limit; + const unsigned bc_plane; + const unsigned node_count; + bool init; + + DirichletComputation(const mesh_type& arg_mesh, + const vector_type& arg_solution, + const sparse_matrix_type& arg_jacobian, + const vector_type& arg_residual, + const unsigned arg_bc_plane, + const scalar_type arg_bc_lower_value, + const scalar_type arg_bc_upper_value) + : node_coords(arg_mesh.node_coord()), + solution(arg_solution), + jacobian(arg_jacobian), + residual(arg_residual), + bc_lower_value(arg_bc_lower_value), + bc_upper_value(arg_bc_upper_value), + bc_lower_limit(std::numeric_limits::epsilon()), + bc_upper_limit(scalar_coord_type(1) - + std::numeric_limits::epsilon()), + bc_plane(arg_bc_plane), + node_count(arg_mesh.node_count_owned()), + init(false) { + parallel_for(node_count, *this); + init = true; } + void apply() const { parallel_for(node_count, *this); } + //------------------------------------ KOKKOS_INLINE_FUNCTION - void operator()( const unsigned inode ) const - { + void operator()(const unsigned inode) const { // Apply dirichlet boundary condition on the Solution and Residual vectors. // To maintain the symmetry of the original global stiffness matrix, // zero out the columns that correspond to boundary conditions, and // update the residual vector accordingly const unsigned iBeg = jacobian.graph.row_map[inode]; - const unsigned iEnd = jacobian.graph.row_map[inode+1]; + const unsigned iEnd = jacobian.graph.row_map[inode + 1]; - const scalar_coord_type c = node_coords(inode,bc_plane); - const bool bc_lower = c <= bc_lower_limit ; - const bool bc_upper = bc_upper_limit <= c ; + const scalar_coord_type c = node_coords(inode, bc_plane); + const bool bc_lower = c <= bc_lower_limit; + const bool bc_upper = bc_upper_limit <= c; - if ( ! init ) { - solution(inode) = bc_lower ? bc_lower_value : ( - bc_upper ? bc_upper_value : 0 ); - } - else { - if ( bc_lower || bc_upper ) { - - residual(inode) = 0 ; + if (!init) { + solution(inode) = + bc_lower ? bc_lower_value : (bc_upper ? bc_upper_value : 0); + } else { + if (bc_lower || bc_upper) { + residual(inode) = 0; // zero each value on the row, and leave a one // on the diagonal - for( unsigned i = iBeg ; i < iEnd ; ++i ) { - jacobian.values(i) = int(inode) == int(jacobian.graph.entries(i)) ? 1 : 0 ; + for (unsigned i = iBeg; i < iEnd; ++i) { + jacobian.values(i) = + int(inode) == int(jacobian.graph.entries(i)) ? 1 : 0; } - } - else { - + } else { // Find any columns that are boundary conditions. // Clear them and adjust the residual vector - for( unsigned i = iBeg ; i < iEnd ; ++i ) { - const unsigned cnode = jacobian.graph.entries(i) ; - const scalar_coord_type cc = node_coords(cnode,bc_plane); + for (unsigned i = iBeg; i < iEnd; ++i) { + const unsigned cnode = jacobian.graph.entries(i); + const scalar_coord_type cc = node_coords(cnode, bc_plane); - if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) { - jacobian.values(i) = 0 ; + if ((cc <= bc_lower_limit) || (bc_upper_limit <= cc)) { + jacobian.values(i) = 0; } } } @@ -1139,11 +1116,10 @@ class DirichletComputation< //---------------------------------------------------------------------------- /* A Cuda-specific specialization for the element computation functor. */ -#if defined( __CUDACC__ ) +#if defined(__CUDACC__) // #include #endif //---------------------------------------------------------------------------- #endif /* #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP */ - From 45e919ca8dc6b057a2e51d6eb58495649893f7ce Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Fri, 8 Apr 2022 18:46:34 -0600 Subject: [PATCH 2/3] Remove join(volatile) overloads where join() taking non-volatile parameters exists --- src/blas/impl/KokkosBlas1_dot_impl.hpp | 5 ----- src/blas/impl/KokkosBlas1_iamax_impl.hpp | 7 ------- src/blas/impl/KokkosBlas1_nrm2_impl.hpp | 5 ----- src/blas/impl/KokkosBlas1_nrm2w_impl.hpp | 5 ----- 4 files changed, 22 deletions(-) diff --git a/src/blas/impl/KokkosBlas1_dot_impl.hpp b/src/blas/impl/KokkosBlas1_dot_impl.hpp index cb8db757f8..b153b3ed72 100644 --- a/src/blas/impl/KokkosBlas1_dot_impl.hpp +++ b/src/blas/impl/KokkosBlas1_dot_impl.hpp @@ -91,11 +91,6 @@ struct DotFunctor { const value_type& source) const { update += source; } - - KOKKOS_INLINE_FUNCTION void join(volatile value_type& update, - const volatile value_type& source) const { - update += source; - } }; } // namespace Impl diff --git a/src/blas/impl/KokkosBlas1_iamax_impl.hpp b/src/blas/impl/KokkosBlas1_iamax_impl.hpp index dc30edf7da..8b27b3e5a3 100644 --- a/src/blas/impl/KokkosBlas1_iamax_impl.hpp +++ b/src/blas/impl/KokkosBlas1_iamax_impl.hpp @@ -96,13 +96,6 @@ struct V_Iamax_Functor { update = Kokkos::reduction_identity::max() + 1; } - KOKKOS_INLINE_FUNCTION void join(volatile value_type& update, - const volatile value_type& source) const { - mag_type source_val = IPT::norm(m_x(source - 1)); - mag_type update_val = IPT::norm(m_x(update - 1)); - if (update_val < source_val) update = source; - } - KOKKOS_INLINE_FUNCTION void join(value_type& update, const value_type& source) const { mag_type source_val = IPT::norm(m_x(source - 1)); diff --git a/src/blas/impl/KokkosBlas1_nrm2_impl.hpp b/src/blas/impl/KokkosBlas1_nrm2_impl.hpp index f2b0e826bc..e56a884655 100644 --- a/src/blas/impl/KokkosBlas1_nrm2_impl.hpp +++ b/src/blas/impl/KokkosBlas1_nrm2_impl.hpp @@ -105,11 +105,6 @@ struct V_Nrm2_Functor { update += source; } - KOKKOS_INLINE_FUNCTION void join(volatile value_type& update, - const volatile value_type& source) const { - update += source; - } - KOKKOS_INLINE_FUNCTION void final(value_type& update) const { if (m_take_sqrt) update = diff --git a/src/blas/impl/KokkosBlas1_nrm2w_impl.hpp b/src/blas/impl/KokkosBlas1_nrm2w_impl.hpp index 3f202ca430..e2c858f0b3 100644 --- a/src/blas/impl/KokkosBlas1_nrm2w_impl.hpp +++ b/src/blas/impl/KokkosBlas1_nrm2w_impl.hpp @@ -108,11 +108,6 @@ struct V_Nrm2w_Functor { update += source; } - KOKKOS_INLINE_FUNCTION void join(volatile value_type& update, - const volatile value_type& source) const { - update += source; - } - KOKKOS_INLINE_FUNCTION void final(value_type& update) const { if (m_take_sqrt) update = From 2f60e260571b10f664adc4587aad66317db8fddc Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 17 May 2022 14:27:53 -0700 Subject: [PATCH 3/3] Drop remaining uses of volatile in reducer join method signatures --- example/fenl/TestFixture.hpp | 3 +-- example/fenl/fenl_functors.hpp | 8 ++------ perf_test/graph/KokkosGraph_run_triangle.hpp | 4 +--- src/batched/KokkosBatched_Util.hpp | 4 +--- src/blas/impl/KokkosBlas1_dot_impl.hpp | 2 +- src/blas/impl/KokkosBlas2_gemv_impl.hpp | 3 +-- src/common/KokkosKernels_SimpleUtils.hpp | 2 +- src/common/KokkosKernels_Utils.hpp | 15 ++++----------- src/graph/KokkosGraph_Distance1ColorHandle.hpp | 4 ++-- src/sparse/impl/KokkosSparse_spgemm_imp_outer.hpp | 2 +- .../impl/KokkosSparse_spgemm_impl_symbolic.hpp | 6 +++--- unit_test/common/Test_Common_ArithTraits.hpp | 4 ++-- 12 files changed, 20 insertions(+), 37 deletions(-) diff --git a/example/fenl/TestFixture.hpp b/example/fenl/TestFixture.hpp index 7c09752433..54b841c4b6 100644 --- a/example/fenl/TestFixture.hpp +++ b/example/fenl/TestFixture.hpp @@ -74,8 +74,7 @@ struct FixtureVerifyElemNodeCoord { void init(value_type& update) const { update.success = update.error = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& update, - volatile const value_type& input) const { + void join(value_type& update, const value_type& input) const { update.success += input.success; update.error += input.error; } diff --git a/example/fenl/fenl_functors.hpp b/example/fenl/fenl_functors.hpp index 5706497db2..0a489fa1c0 100644 --- a/example/fenl/fenl_functors.hpp +++ b/example/fenl/fenl_functors.hpp @@ -364,9 +364,7 @@ class NodeNodeGraph { void init(unsigned& update) const { update = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile unsigned& update, const volatile unsigned& input) const { - update += input; - } + void join(unsigned& update, const unsigned& input) const { update += input; } //------------------------------------ }; @@ -642,9 +640,7 @@ class NodeElemGatherFill { void init(unsigned& update) const { update = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile unsigned& update, const volatile unsigned& input) const { - update += input; - } + void join(unsigned& update, const unsigned& input) const { update += input; } }; } /* namespace FENL */ diff --git a/perf_test/graph/KokkosGraph_run_triangle.hpp b/perf_test/graph/KokkosGraph_run_triangle.hpp index 2fee139a64..0a189cd3e1 100644 --- a/perf_test/graph/KokkosGraph_run_triangle.hpp +++ b/perf_test/graph/KokkosGraph_run_triangle.hpp @@ -117,9 +117,7 @@ struct Flush { void init(value_type &update) { update = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type &update, const volatile value_type &input) { - update += input; - } + void join(value_type &update, const value_type &input) { update += input; } KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type &update) const { update += _buf[i]; } diff --git a/src/batched/KokkosBatched_Util.hpp b/src/batched/KokkosBatched_Util.hpp index 338c3fe8f8..46b97ee039 100644 --- a/src/batched/KokkosBatched_Util.hpp +++ b/src/batched/KokkosBatched_Util.hpp @@ -123,9 +123,7 @@ struct Flush { void init(value_type &update) { update = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type &update, const volatile value_type &input) { - update += input; - } + void join(value_type &update, const value_type &input) { update += input; } KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type &update) const { update += _buf[i]; } diff --git a/src/blas/impl/KokkosBlas1_dot_impl.hpp b/src/blas/impl/KokkosBlas1_dot_impl.hpp index b153b3ed72..5430e0177b 100644 --- a/src/blas/impl/KokkosBlas1_dot_impl.hpp +++ b/src/blas/impl/KokkosBlas1_dot_impl.hpp @@ -83,7 +83,7 @@ struct DotFunctor { Kokkos::Details::updateDot(sum, m_x(i), m_y(i)); // sum += m_x(i) * m_y(i) } - KOKKOS_INLINE_FUNCTION void init(volatile value_type& update) const { + KOKKOS_INLINE_FUNCTION void init(value_type& update) const { update = Kokkos::Details::ArithTraits::zero(); } diff --git a/src/blas/impl/KokkosBlas2_gemv_impl.hpp b/src/blas/impl/KokkosBlas2_gemv_impl.hpp index a16a9eaf9a..a6c8111684 100644 --- a/src/blas/impl/KokkosBlas2_gemv_impl.hpp +++ b/src/blas/impl/KokkosBlas2_gemv_impl.hpp @@ -190,8 +190,7 @@ struct SingleLevelTransposeGEMV { } } - KOKKOS_INLINE_FUNCTION void join(volatile value_type dst, - const volatile value_type src) const { + KOKKOS_INLINE_FUNCTION void join(value_type dst, const value_type src) const { for (IndexType j = 0; j < value_count; ++j) { dst[j] += src[j]; } diff --git a/src/common/KokkosKernels_SimpleUtils.hpp b/src/common/KokkosKernels_SimpleUtils.hpp index c1f68ebd3b..bb2a6d43b9 100644 --- a/src/common/KokkosKernels_SimpleUtils.hpp +++ b/src/common/KokkosKernels_SimpleUtils.hpp @@ -346,7 +346,7 @@ struct ReduceMaxFunctor { } } KOKKOS_INLINE_FUNCTION - void join(volatile value_type &dst, const volatile value_type &src) const { + void join(value_type &dst, const value_type &src) const { if (dst < src) { dst = src; } diff --git a/src/common/KokkosKernels_Utils.hpp b/src/common/KokkosKernels_Utils.hpp index bf881edc6f..eae4080879 100644 --- a/src/common/KokkosKernels_Utils.hpp +++ b/src/common/KokkosKernels_Utils.hpp @@ -515,7 +515,7 @@ struct PropogataMaxValstoZeros { } KOKKOS_INLINE_FUNCTION - void join(volatile idx &update, volatile const idx &input) const { + void join(idx &update, const idx &input) const { if (input > update) update = input; } }; @@ -1260,7 +1260,7 @@ struct ReduceRowSizeFunctor { } } KOKKOS_INLINE_FUNCTION - void join(volatile size_type &dst, const volatile size_type &src) const { + void join(size_type &dst, const size_type &src) const { if (dst < src) { dst = src; } @@ -1305,7 +1305,7 @@ struct ReduceMaxRowFunctor { } } KOKKOS_INLINE_FUNCTION - void join(volatile value_type &dst, const volatile value_type &src) const { + void join(value_type &dst, const value_type &src) const { if (dst < src) { dst = src; } @@ -1350,9 +1350,7 @@ struct IsEqualFunctor { } KOKKOS_INLINE_FUNCTION - void join(volatile int &dst, const volatile int &src) const { - dst = dst & src; - } + void join(int &dst, const int &src) const { dst = dst & src; } KOKKOS_INLINE_FUNCTION void init(int &dst) const { dst = 1; } }; @@ -1466,11 +1464,6 @@ struct array_sum_reduce { for (int i = 0; i < N; i++) data[i] += src.data[i]; return *this; } - KOKKOS_INLINE_FUNCTION // volatile add operator - void - operator+=(const volatile ValueType &src) volatile { - for (int i = 0; i < N; i++) data[i] += src.data[i]; - } }; template diff --git a/src/graph/KokkosGraph_Distance1ColorHandle.hpp b/src/graph/KokkosGraph_Distance1ColorHandle.hpp index 7f04bfa94f..0f5d60591f 100644 --- a/src/graph/KokkosGraph_Distance1ColorHandle.hpp +++ b/src/graph/KokkosGraph_Distance1ColorHandle.hpp @@ -560,9 +560,9 @@ class GraphColoringHandle { if (color_max < colors(i)) color_max = colors(i); } + // max-plus semiring equivalent of "plus" KOKKOS_INLINE_FUNCTION - void join(volatile color_t &dst, const volatile color_t &src) - const { // max -plus semiring equivalent of "plus" + void join(color_t &dst, const color_t &src) const { if (dst < src) { dst = src; } diff --git a/src/sparse/impl/KokkosSparse_spgemm_imp_outer.hpp b/src/sparse/impl/KokkosSparse_spgemm_imp_outer.hpp index e566e8bf06..c6a24e2163 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_imp_outer.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_imp_outer.hpp @@ -509,7 +509,7 @@ struct KokkosSPGEMM