Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interleave dev #472

Merged
merged 36 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
a455d3f
Serialize GEMM kernel runs
cgmillette May 7, 2024
5337ba8
First working interleaved 128x128 macro kernel
cgmillette Jun 27, 2024
ce0f116
Initial layout classes refactor
cgmillette Sep 9, 2024
cdd13bc
Refactor layout and traits organization
cgmillette Sep 12, 2024
e15f5b0
Remove unused file
cgmillette Sep 12, 2024
c27e4e1
Refactor layout traits
cgmillette Sep 26, 2024
f27ed38
Fixes build after layout folder refactor
cgmillette Oct 15, 2024
f3622e3
Update interleaving function
cgmillette Oct 24, 2024
2c550c3
Update is_layout_same and is_layout_orthogonal and matrix layouts logic
cgmillette Nov 9, 2024
64da221
Introduce register formats and refactor is_layout_same and is_layout_…
cgmillette Nov 11, 2024
b222868
Fixup interleaved layouts logic bugs. Add layout formats to fit all w…
cgmillette Nov 15, 2024
a5c23e4
Fix compiler unroll issue with function arg
cgmillette Nov 15, 2024
678a2d3
Deploy new mma workflow
cgmillette Nov 16, 2024
622a256
Fix include issues and io_shape test
cgmillette Nov 19, 2024
2c08b36
Add interleaved layout IOLayoutInt
cgmillette Nov 19, 2024
d89b536
Fixes for interleaved layout compatibility
cgmillette Nov 22, 2024
f6ff3e4
Add initial non-interleaved layout traits test
cgmillette Nov 25, 2024
9cc7ebc
Add DataT to Mma layout interface. Add checks for data size comparison
cgmillette Nov 25, 2024
368c6dc
Fixes f64 tests. Adds all block sizes tests.
cgmillette Nov 27, 2024
d88e353
Start implementing interleaved layout traits tests
cgmillette Nov 27, 2024
31e2f5a
Add interleaved and emulation tests
cgmillette Nov 28, 2024
ef05816
Fix build of layout unit tests
cgmillette Nov 28, 2024
b875471
Fix gfx11 implementation
cgmillette Dec 2, 2024
b035289
Restore perf_hgemm
cgmillette Dec 2, 2024
43c96fe
Skip tests on invalid layout condition for BlockK
cgmillette Dec 3, 2024
1aeb382
Add a softer warning for unsupported transform attempts
cgmillette Dec 3, 2024
abb085f
Adjust MaxVWSelector to fit more layout constraints
cgmillette Dec 4, 2024
a420f9b
Update / correct non-interleaved layout tests
cgmillette Dec 4, 2024
e494ec1
Prevent sgemm kernel from building on unsupported targets
cgmillette Dec 4, 2024
8c75e88
Fixes: remove default Format argument to avoid usage mistakes; fix te…
cgmillette Dec 5, 2024
73fb53c
Fixup interleaved tests on gfx11
cgmillette Dec 5, 2024
4d6fab8
Allow acc post mma xform to convert gfx11 mma acc quirk into configur…
cgmillette Dec 11, 2024
e3c61a3
Fixup MmaDim calculator
cgmillette Dec 11, 2024
61274d0
Removed WMMA_ACC_INT* formats
cgmillette Dec 17, 2024
45cf20e
removes std min reference for hipRTC
cgmillette Dec 17, 2024
19af315
Update perf_hgemm.cpp
cgmillette Dec 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions library/include/rocwmma/internal/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*
* MIT License
*
* Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -146,6 +146,11 @@ static_assert(0, "Unsupported architecture");
#define ROCWMMA_ARCH_GFX94X 1
#endif

#if ROCWMMA_ARCH_HOST
#define ROCWMMA_BLOCK_DIM_16_SUPPORTED 1
#define ROCWMMA_BLOCK_DIM_32_SUPPORTED 1
#endif

#if !defined(ROCWMMA_ARCH_GFX9)
#define ROCWMMA_ARCH_GFX9 0
#endif
Expand Down Expand Up @@ -201,10 +206,10 @@ static_assert((bool)(ROCWMMA_BLOCK_DIM_16_SUPPORTED) && !(bool)(ROCWMMA_BLOCK_DI
#endif

#if ROCWMMA_ARCH_GFX12
static_assert((bool)(ROCWMMA_WAVE32_MODE) && !(bool)(ROCWMMA_WAVE64_MODE),
"rocWMMA supports only wave32 for gfx12 arch");
static_assert((bool)(ROCWMMA_BLOCK_DIM_16_SUPPORTED) && !(bool)(ROCWMMA_BLOCK_DIM_32_SUPPORTED),
"rocWMMA supports only block size of 16 for gfx12 arch");
static_assert((bool)(ROCWMMA_WAVE32_MODE) && !(bool)(ROCWMMA_WAVE64_MODE),
"rocWMMA supports only wave32 for gfx12 arch");
static_assert((bool)(ROCWMMA_BLOCK_DIM_16_SUPPORTED) && !(bool)(ROCWMMA_BLOCK_DIM_32_SUPPORTED),
"rocWMMA supports only block size of 16 for gfx12 arch");
#endif

///
Expand Down
12 changes: 11 additions & 1 deletion library/include/rocwmma/internal/coop_io_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*
* MIT License
*
* Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -26,6 +26,7 @@
#ifndef ROCWMMA_COOP_IO_CONFIG_HPP
#define ROCWMMA_COOP_IO_CONFIG_HPP

#include "./layout/register_layout_transforms.hpp"
#include "coop_load.hpp"
#include "coop_store.hpp"
#include "io_layout.hpp"
Expand Down Expand Up @@ -85,6 +86,15 @@ namespace rocwmma
typename IOLayout::MatrixLayout,
IOLayout::VW>;

using PostLoadXForm = register_layout_transform<typename IOLayout::StorageLayout,
typename IOLayout::FragmentLayout>;

using PreMmaXForm = register_layout_transform<typename IOLayout::FragmentLayout,
typename IOLayout::MmaLayout>;

using PreStoreXForm = register_layout_transform<typename IOLayout::FragmentLayout,
typename IOLayout::StorageLayout>;

using Storer = CooperativeStore<IOShape::BlockDim,
IOShape::KDim,
DataT,
Expand Down
1 change: 0 additions & 1 deletion library/include/rocwmma/internal/coop_load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#define ROCWMMA_COOP_LOAD_HPP

#include "io_traits.hpp"
#include "layout.hpp"
#include "opaque_load.hpp"
#include "types.hpp"
#include "utils.hpp"
Expand Down
1 change: 0 additions & 1 deletion library/include/rocwmma/internal/coop_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#define ROCWMMA_COOP_STORE_HPP

#include "io_traits.hpp"
#include "layout.hpp"
#include "opaque_store.hpp"
#include "types.hpp"
#include "utils.hpp"
Expand Down
32 changes: 27 additions & 5 deletions library/include/rocwmma/internal/io_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*
* MIT License
*
* Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -26,6 +26,7 @@
#ifndef ROCWMMA_IO_CONFIG_HPP
#define ROCWMMA_IO_CONFIG_HPP

#include "./layout/register_layout_transforms.hpp"
#include "broadcast.hpp"
#include "coop_load.hpp"
#include "coop_store.hpp"
Expand All @@ -37,7 +38,6 @@

namespace rocwmma
{

/**
* \defgroup Rocwmma_ioconf ROCWMMA IOConfig
* @brief ROCWMMA fragment input and output configurations
Expand Down Expand Up @@ -88,6 +88,21 @@ namespace rocwmma
typename IOLayout::MatrixLayout,
IOLayout::VW>;

using PostLoadXForm = register_layout_transform<typename IOLayout::StorageLayout,
typename IOLayout::FragmentLayout>;

using PreMmaXForm = register_layout_transform<typename IOLayout::FragmentLayout,
typename IOLayout::MmaLayout>;

// Currently, only makes sense to have a post-mma transform on acc layouts
using PostMmaXForm = conditional_t<is_same_v<MatrixT, accumulator>,
register_layout_transform<typename IOLayout::MmaLayout,
typename IOLayout::FragmentLayout>,
register_layout_transform_nop>;

using PreStoreXForm = register_layout_transform<typename IOLayout::FragmentLayout,
typename IOLayout::StorageLayout>;

using Storer = OpaqueStore<IOShape::BlockDim,
IOShape::KDim,
DataT,
Expand All @@ -106,10 +121,17 @@ namespace rocwmma
template <uint32_t BlockM, uint32_t BlockN, uint32_t BlockK, typename DataT>
struct IOConfig<accumulator, BlockM, BlockN, BlockK, DataT, void>
{
using IOShape = IOShape<accumulator, BlockM, BlockN, BlockK>;
using IOTraits = IOTraits<IOShape::BlockDim, IOShape::KDim, DataT>;
using PackUtil = PackUtil<DataT>;
using IOShape = IOShape<accumulator, BlockM, BlockN, BlockK>;
using IOLayout = IOLayout<accumulator, IOShape::BlockDim, IOShape::KDim, DataT, void, 1u>;
using IOTraits = IOTraits<IOShape::BlockDim, IOShape::KDim, DataT>;
using PackUtil = PackUtil<DataT>;
using Broadcaster = Broadcast<DataT, IOTraits::UnpackedSize>;

using PreMmaXForm = register_layout_transform<typename IOLayout::FragmentLayout,
typename IOLayout::MmaLayout>;

using PostMmaXForm = register_layout_transform<typename IOLayout::MmaLayout,
typename IOLayout::FragmentLayout>;
};
/** @}*/

Expand Down
Loading