Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parametrizable BD chain syntax and new syntax for BD configuration in sequence #1656

Merged
merged 35 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
cd309e6
initial implementation; inlining works
andrej Jul 18, 2024
0d647c4
add lowering for BD IDs
andrej Jul 18, 2024
838e0c5
allow more than one argument to bd chains
andrej Jul 18, 2024
29eca7b
WireBundle is always gonna be DMA for BDs
andrej Jul 18, 2024
8468ac9
add lowering to NPU
andrej Jul 20, 2024
778a714
move some syntax around
andrej Jul 20, 2024
ea60932
add syntax for waiting on dma tasks; rework inlining pass
andrej Jul 22, 2024
e86dcd9
rename BDChainOp -> DMATaskOp
andrej Jul 24, 2024
ae4e185
add strides/wraps and next_bd lowering to bd chains; refactor (WIP)
andrej Jul 30, 2024
ae126d4
fix lowering
andrej Jul 31, 2024
758f9e1
rename ops
andrej Jul 31, 2024
382ca8e
rename ops more
andrej Jul 31, 2024
11e3fb4
add documentation
andrej Jul 31, 2024
1f7b2bf
fix memcpy_nd verification
andrej Jul 31, 2024
2bf2759
integrate with Python compiler frontend; several fixes to strides/wraps
andrej Jul 31, 2024
c3648ec
fix wrong buffer length
andrej Aug 1, 2024
2afc5e3
more renaming
andrej Aug 1, 2024
c88d6f4
better error messages, various small fixes
andrej Aug 1, 2024
2b59b52
add all tests
andrej Aug 1, 2024
2873ac7
format
andrej Aug 1, 2024
fce2575
fix stride check
andrej Aug 1, 2024
6cfce9b
remove check that is causing trouble
andrej Aug 1, 2024
3a581fa
address Jeff's comments
andrej Aug 2, 2024
50db8c5
fix buffer descriptor ID allocator; improve stride/wrap verification
andrej Aug 5, 2024
8df8184
fix python compiler wrapper
andrej Aug 5, 2024
d36a463
rewrite materialize-bd-chains as pattern
andrej Aug 5, 2024
31df52d
make unusual memcpyNd behavior explicit in code
andrej Aug 5, 2024
7586dc7
emit correct error messages to pass tests
andrej Aug 5, 2024
948b89b
fix tests
andrej Aug 6, 2024
461df10
Merge branch 'main' into parametrizable_bd_chain
andrej Aug 6, 2024
a88b82f
fix tests
andrej Aug 6, 2024
6031431
turn 'remove empty blocks' into a canonicalization pattern; WIP, curr…
andrej Aug 6, 2024
2197946
finish reworking patterns
andrej Aug 6, 2024
5896fd9
remove control flow simplification from materialize BD pass
andrej Aug 6, 2024
97bfd8b
Merge branch 'main' into parametrizable_bd_chain
andrej Aug 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions include/aie/Dialect/AIE/IR/AIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ def AIE_DMABDPACKETOp: AIE_Op<"dma_bd_packet", []> {
}

def AIE_DMABDOp: AIE_Op<"dma_bd", [
ParentOneOf<["MemOp", "MemTileDMAOp", "ShimDMAOp", "DMAOp"]>,
//ParentOneOf<["MemOp", "MemTileDMAOp", "ShimDMAOp", "DMAOp", "BDChainOp"]>,
andrej marked this conversation as resolved.
Show resolved Hide resolved
]> {
let summary = "Declare a dma buffer descriptor op";
let description = [{
Expand Down Expand Up @@ -1102,7 +1102,8 @@ def AIE_MemTileDMAOp: AIE_Op<"memtile_dma", [
}

def AIE_NextBDOp: AIE_Op<"next_bd", [
Terminator, ParentOneOf<["MemOp", "MemTileDMAOp", "mlir::func::FuncOp", "ShimDMAOp"]>
Terminator,
//ParentOneOf<["MemOp", "MemTileDMAOp", "mlir::func::FuncOp", "ShimDMAOp"]>
andrej marked this conversation as resolved.
Show resolved Hide resolved
]> {
let summary = "The next buffer descriptor";
let description = [{
Expand Down Expand Up @@ -1967,4 +1968,29 @@ def AIE_ObjectFifoRegisterProcessOp: AIE_Op<"objectfifo.register_process", []> {
}];
}

def AIE_BDChainOp: AIE_Op<"bd_chain", [Symbol]> {
let summary = "Definition of a Parametrizable Chain of Buffer Descriptors";
let description = [{
This operation allows you to define buffer descriptor chains with parametrizable inputs.
This is useful for common patterns such as double buffering (ping-pong) that may look identical but use different input/output buffers and locks.
Currently, only buffers and locks are parametrizable.

Once defined, an abstract BD chain can be used elsewhere using AIEX ops in the runtime sequence.
In the future, abstract BD chains will also be usable elsewhere, inside the static configuration.
At its usage sites, the abstract BD chain will be concretized with the given input arguments.
}];

let arguments = (
ins SymbolNameAttr:$sym_name,
TypeAttrOf<AnyTuple>:$entry_arg_types_attr
);
let regions = (region AnyRegion:$body);
let hasVerifier = 1;

let extraClassDeclaration = [{
}];
let hasCustomAssemblyFormat = 1;

}

#endif // AIE_OPS
22 changes: 22 additions & 0 deletions include/aie/Dialect/AIE/IR/AIETargetModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ class AIETargetModel {
/// tile.
virtual uint32_t getNumBDs(int col, int row) const = 0;

/// Return true iff buffer descriptor `bd_id` on tile (`col`, `row`) can be
/// submitted on channel `channel`.
virtual bool bdCanAccessChannel(int col, int row, uint32_t bd_id,
int channel) const = 0;

andrej marked this conversation as resolved.
Show resolved Hide resolved
virtual uint32_t getNumMemTileRows() const = 0;
/// Return the size (in bytes) of a MemTile.
virtual uint32_t getMemTileSize() const = 0;
Expand Down Expand Up @@ -257,6 +262,10 @@ class AIE1TargetModel : public AIETargetModel {
uint32_t getAccumulatorCascadeSize() const override { return 384; }
uint32_t getNumLocks(int col, int row) const override { return 16; }
uint32_t getNumBDs(int col, int row) const override { return 16; }
bool bdCanAccessChannel(int col, int row, uint32_t bd_id,
int channel) const override {
return true;
}
uint32_t getNumMemTileRows() const override { return 0; }
uint32_t getMemTileSize() const override { return 0; }

Expand Down Expand Up @@ -318,6 +327,19 @@ class AIE2TargetModel : public AIETargetModel {
return isMemTile(col, row) ? 48 : 16;
}

bool bdCanAccessChannel(int col, int row, uint32_t bd_id,
int channel) const override {
if (!isMemTile(col, row)) {
return true;
} else {
if ((channel & 1) == 0) { // even channel number
return channel < 24;
} else {
return channel >= 24;
}
}
}

uint32_t getMemTileSize() const override { return 0x00080000; }

uint32_t getNumDestSwitchboxConnections(int col, int row,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//===- AIEAssignBufferDescriptorIDs.h ---------------------------*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024 Advanced Micro Devices Inc.
//
//===----------------------------------------------------------------------===//

#ifndef AIE_ASSIGN_BUFFER_DESCRIPTOR_IDS_H
#define AIE_ASSIGN_BUFFER_DESCRIPTOR_IDS_H

#include <optional>

#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "aie/Dialect/AIE/Transforms/AIEAssignBufferDescriptorIDs.h"
#include "aie/Dialect/AIE/Transforms/AIEPasses.h"

#include "mlir/Pass/Pass.h"

using namespace mlir;
using namespace xilinx;
using namespace xilinx::AIE;

#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "aie/Dialect/AIE/Transforms/AIEPasses.h"

#include "mlir/Pass/Pass.h"

#define DEBUG_TYPE "aie-assign-bd-ids"

using namespace mlir;
using namespace xilinx;
using namespace xilinx::AIE;

struct BdIdGenerator {
const int col;
const int row;
const AIETargetModel &targetModel;
std::set<uint32_t> alreadyAssigned;

BdIdGenerator(int col, int row, const AIETargetModel &targetModel);

std::optional<uint32_t> nextBdId(int channelIndex);

void assignBdId(uint32_t bdId);

bool bdIdAlreadyAssigned(uint32_t bdId);

void freeBdId(uint32_t bdId);
};

#endif
176 changes: 154 additions & 22 deletions include/aie/Dialect/AIEX/IR/AIEX.td
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ include "mlir/IR/OpBase.td"
include "mlir/IR/AttrTypeBase.td"
include "mlir/IR/EnumAttr.td"
include "mlir/IR/SymbolInterfaces.td"
include "mlir/Interfaces/CallInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/IR/CommonAttrConstraints.td"

Expand All @@ -30,8 +29,14 @@ def AIEX_Dialect : Dialect {
This is a dialect for experimental work related to AIEngine processors.
The expectation is that new ideas can be developed here before migration
to the more mature AIE dialect.
}];

let extraClassDeclaration = [{
static uint64_t getBufferDescriptorAddressRegisterAddress(const AIE::AIETargetModel &tm, unsigned bd_id, unsigned col);
static std::pair<llvm::SmallVector<int64_t, 4>, llvm::SmallVector<int64_t, 4>> getHardwareStridesWraps(const AIE::AIETargetModel &targetModel, mlir::MemRefType referencedBufType, llvm::SmallVector<int64_t, 4> inputSizes, llvm::SmallVector<int64_t, 4> inputStrides);
static mlir::LogicalResult verifyStridesWraps(mlir::Operation *forOp, mlir::MemRefType referencedBufType, int tileCol, int tileRow, llvm::SmallVector<int64_t, 4> inputSizes, llvm::SmallVector<int64_t, 4> inputStrides, llvm::SmallVector<int64_t, 4> hardwareSizes, llvm::SmallVector<int64_t, 4> hardwareStrides);
}];

fifield marked this conversation as resolved.
Show resolved Hide resolved
}


Expand Down Expand Up @@ -577,27 +582,6 @@ def AIE_NpuDmaMemcpyNdOp: AIEX_Op<"npu.dma_memcpy_nd", [
static unsigned getOffsetSizeAndStrideStartOperandIndex();
static std::array<unsigned, 3> getArrayAttrMaxRanks();

/* Returns the provided multi-dimensional data transfer strides in units of
address granularity. In the IR, we express strides in units of element
data type, but the hardware requires it in units of address granularity.
Address granularity currently is 4 bytes for all hardware.

The returned stride[0] is the second-lowest dimension stride, i.e.
stride 1. The lowest stride is currently implicitly one, but this is not
a hardware requirement and could be changed in the future. */
llvm::SmallVector<int64_t, 4> getStridesInAddressGranularity();

/* Returns the multi-dimensional data transfer sizes in units of address
granularity. These sizes are expressed in units of element data type in
the IR, but the hardware requires them to be in units of address
granularity. Address granularity currently is 4 bytes for all hardware.

The returned size[0] is the lowest dimension size. In the IR, the sizes
are given in reverse order. For example, specifying sizes in IR as
[1, 2, 3, 4] would result in this function returning [4, 3, 2, 1].
*/
llvm::SmallVector<int64_t, 4> getSizesInAddressGranularity();

/* Returns the data transfer offset in bytes, i.e. the first N bytes of the
target buffer will be skipped. In the IR, offsets are expressed in units
of memref element data type size. */
Expand Down Expand Up @@ -813,4 +797,152 @@ def AIE_NpuWriteBdOp: AIEX_Op<"npu.writebd", []> {
}];
}

def AIE_DMAConfigureTaskOp : AIEX_Op<"dma_configure_task", [HasParent<"RuntimeSequenceOp">]>, Results<(outs Index:$result)> {
let summary = "Concrete Instantiation of a Buffer Descriptor Chain as a Task on a Channel and Direction on a Tile";
let description = [{
Encapsulates the DMA configuration of one task, that is the (chain of) buffer descriptors to be executed on a given channel and direction on a tile.

Such configurations are generated by materializing abstract aie.bd_chains using aiex.start_task, or can be created manually using this op.

Once configured, a task can be submitted for execution using `aiex.dma_start_configured_task`, after which its execution completion can be awaited using `aiex.dma_await_task`.
}];

let arguments = (
ins Index:$tile,
DMAChannelDir:$direction,
I32Attr:$channel,
DefaultValuedOptionalAttr<BoolAttr, "false">:$issue_token,
DefaultValuedOptionalAttr<I32Attr, "0">:$repeat_count
);

let regions = (
region AnyRegion:$body
);

let assemblyFormat = [{
`(` $tile `,` $direction `,` $channel `)` regions attr-dict
}];

let extraClassDeclaration = [{
AIE::TileOp getTileOp();
std::optional<uint32_t> getFirstBdId();
}];

let extraClassDefinition = [{
AIE::TileOp DMAConfigureTaskOp::getTileOp() { return cast<AIE::TileOp>(getTile().getDefiningOp()); }
}];
}

def AIE_DMAFreeTaskOp : AIEX_Op<"dma_free_task", [HasParent<"RuntimeSequenceOp">]> {
let summary = "Inform Compilre Buffer Descriptor IDs can be reused";
andrej marked this conversation as resolved.
Show resolved Hide resolved
let description = [{
This operation informs the compiler that the buffer descriptor IDs it has allocated to the BDs inside the referenced task can be reused thereafter.
}];
let arguments = (
ins Index:$task
);

let assemblyFormat = [{
`(` $task `)` attr-dict
}];

let extraClassDeclaration = [{
DMAConfigureTaskOp getTaskOp();
}];

let extraClassDefinition = [{
DMAConfigureTaskOp DMAFreeTaskOp::getTaskOp() { return dyn_cast<DMAConfigureTaskOp>(getTask().getDefiningOp()); }
}];
}

def AIE_DMAStartTaskOp : AIEX_Op<"dma_start_task", [HasParent<"RuntimeSequenceOp">]> {
let summary = "Submit a Preconfigured Task to the Task Queue";
let description = [{
Submits the referenced task for execution on the tile, channel and direction it has been configured to run on.
Once submitted, if the task is configured to issue a token, you can await completion of the task using `aiex.await_task`.
}];

let arguments = (
ins Index:$task
);

let assemblyFormat = [{
`(` $task `)` attr-dict
}];

let extraClassDeclaration = [{
DMAConfigureTaskOp getTaskOp();
}];

let extraClassDefinition = [{
DMAConfigureTaskOp DMAStartTaskOp::getTaskOp() { return dyn_cast<DMAConfigureTaskOp>(getTask().getDefiningOp()); }
}];
}

def AIE_DMAAwaitTaskOp : AIEX_Op<"dma_await_task", [HasParent<"RuntimeSequenceOp">]> {
let summary = "Await Completion of a Previously Submitted DMA Task";
let description = [{
This operation will block execution of the runtime sequence until the referenced previously started DMA task has completed.

DMA tasks can be started using `aiex.start_task` using abstract BD chains declared using `aie.bd_chain`, or using `aiex.start_configured_task` using a manually configured task.

To be able to wait on a task, it must issue a task completion token (TCT). Tasks only emit these tokens if the attribute `issue_token` is set to `true`.
}];

let arguments = (
ins Index:$task
);

let assemblyFormat = [{
`(` $task `)` attr-dict
}];

let extraClassDeclaration = [{
DMAConfigureTaskOp getTaskOp();
}];

let extraClassDefinition = [{
DMAConfigureTaskOp DMAAwaitTaskOp::getTaskOp() { return dyn_cast<DMAConfigureTaskOp>(getTask().getDefiningOp()); }
}];
}


def AIE_DMAStartBdChainOp: AIEX_Op<"dma_start_bd_chain", [HasParent<"RuntimeSequenceOp">]>,
Results<(outs Index:$result)>
{

let summary = "Materialize an Abstract BD Chain as a DMA Task on the Given Tile, Channel and Direction and Immediately Start It";
let description = [{
This operation will configure a new DMA task on the given tile, channel and direction by concretizing an abstract BD chain, previously defined using `aie.bd_chain`, with the given input arguments.

Completion of the DMA task, i.e. the data transfer, can be awaited using `aiex.await_task` if the attribute `issue_token` is set to `true`.
}];

let arguments = (
ins FlatSymbolRefAttr:$symbol,
Variadic<AnyType>:$concrete_args,
andrej marked this conversation as resolved.
Show resolved Hide resolved
Index:$tile,
DMAChannelDir:$direction,
I32Attr:$channel,
DefaultValuedOptionalAttr<BoolAttr, "false">:$issue_token,
DefaultValuedOptionalAttr<I32Attr, "0">:$repeat_count
);

let assemblyFormat = [{
$symbol `(` $concrete_args `)` `:` `(` type($concrete_args) `)` ` ` `on` ` ` `(` $tile `,` $direction `,` $channel `)` attr-dict
}];

let hasVerifier = 1;

let extraClassDeclaration = [{
AIE::TileOp getTileOp();
AIE::BDChainOp getBDChainOp();
}];

let extraClassDefinition = [{
AIE::TileOp DMAStartBdChainOp::getTileOp() { return cast<AIE::TileOp>(getTile().getDefiningOp()); }
}];

}

#endif // AIEX_OPS
2 changes: 2 additions & 0 deletions include/aie/Dialect/AIEX/IR/AIEXDialect.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#ifndef MLIR_AIEX_DIALECT_H
#define MLIR_AIEX_DIALECT_H

#include <optional>

#include "aie/Dialect/AIE/IR/AIEDialect.h"
andrej marked this conversation as resolved.
Show resolved Hide resolved

// Include dialect declarations such as parseAttributes, parseType
Expand Down
6 changes: 6 additions & 0 deletions include/aie/Dialect/AIEX/Transforms/AIEXPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ std::unique_ptr<mlir::OperationPass<AIE::DeviceOp>>
createAIEBroadcastPacketPass();
std::unique_ptr<mlir::OperationPass<AIE::DeviceOp>> createAIEDmaToNpuPass();
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>> createAIEXToStandardPass();
std::unique_ptr<mlir::OperationPass<AIE::DeviceOp>>
createAIEMaterializeBDChainsPass();
std::unique_ptr<mlir::OperationPass<AIE::DeviceOp>>
createAIEAssignRuntimeSequenceBDIDsPass();
std::unique_ptr<mlir::OperationPass<AIE::DeviceOp>>
createAIEDMATasksToNPUPass();

/// Generate the code for registering passes.
#define GEN_PASS_REGISTRATION
Expand Down
Loading
Loading