Skip to content

Commit

Permalink
[SYCL][CUDA] Adds PI CUDA support for reqd_work_group_size attribute (#…
Browse files Browse the repository at this point in the history
…3735)

This commit adds support for reqd_work_group_size in the PI CUDA backend
by extracting the attribute as program metadata. The program metadata
accompanies the binary when passed to the backend and it is up to the
backend if they extract any useful metadata. This adds two additional
parameters to piProgramCreateWithBinary for passing the program
metadata.

Program metadata is transported as a properties created by
sycl-post-link, so this commit also changes the behaviour of the NVPTX
path for linkage actions leading to the offload wrapper. These changes
uses file tables for the NVPTX path as well to allow generation and
preservation of properties. This assumes that the file table only ever
contains a single row if taking the NVPTX path and will fail otherwise.

Signed-off-by: Steffen Larsen <steffen.larsen@codeplay.com>
  • Loading branch information
steffenlarsen authored Jul 16, 2021
1 parent f7aa2bf commit a8fe4a5
Show file tree
Hide file tree
Showing 32 changed files with 2,031 additions and 619 deletions.
21 changes: 20 additions & 1 deletion clang/include/clang/Driver/Action.h
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,14 @@ class FileTableTformJobAction : public JobAction {

public:
struct Tform {
enum Kind { EXTRACT, EXTRACT_DROP_TITLE, REPLACE, RENAME };
enum Kind {
EXTRACT,
EXTRACT_DROP_TITLE,
REPLACE,
REPLACE_CELL,
RENAME,
COPY_SINGLE_FILE
};

Tform() = default;
Tform(Kind K, std::initializer_list<StringRef> Args) : TheKind(K) {
Expand All @@ -794,10 +801,19 @@ class FileTableTformJobAction : public JobAction {
// <To> from another file table passed as input to this action.
void addReplaceColumnTform(StringRef From, StringRef To);

// Replaces a cell in this table with column title <ColumnName> and row <Row>
// with the file name passed as input to this action.
void addReplaceCellTform(StringRef ColumnName, int Row);

// Renames a column with title <From> in this table with a column with title
// <To> passed as input to this action.
void addRenameColumnTform(StringRef From, StringRef To);

// Specifies that, instead of generating a new table, the transformation
// should copy the file at column <ColumnName> and row <Row> into the
// output file.
void addCopySingleFileTform(StringRef ColumnName, int Row);

static bool classof(const Action *A) {
return A->getKind() == FileTableTformJobClass;
}
Expand All @@ -806,6 +822,9 @@ class FileTableTformJobAction : public JobAction {

private:
SmallVector<Tform, 2> Tforms; // transformation actions requested

// column to copy single file from if requested
std::string CopySingleFileColumnName;
};

class AppendFooterJobAction : public JobAction {
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Driver/Action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,11 +507,23 @@ void FileTableTformJobAction::addReplaceColumnTform(StringRef From,
Tforms.emplace_back(Tform(Tform::REPLACE, {From, To}));
}

void FileTableTformJobAction::addReplaceCellTform(StringRef ColumnName,
int Row) {
Tforms.emplace_back(
Tform(Tform::REPLACE_CELL, {ColumnName, std::to_string(Row)}));
}

void FileTableTformJobAction::addRenameColumnTform(StringRef From,
StringRef To) {
Tforms.emplace_back(Tform(Tform::RENAME, {From, To}));
}

void FileTableTformJobAction::addCopySingleFileTform(StringRef ColumnName,
int Row) {
Tforms.emplace_back(
Tform(Tform::COPY_SINGLE_FILE, {ColumnName, std::to_string(Row)}));
}

void AppendFooterJobAction::anchor() {}

AppendFooterJobAction::AppendFooterJobAction(Action *Input, types::ID Type)
Expand Down
77 changes: 47 additions & 30 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4378,33 +4378,33 @@ class OffloadingActionBuilder final {
// .--------------------------------------.
// | PostLink |
// .--------------------------------------.
// [.n] [+*] [+*]
// [+n] [+*] [+]
// | | |
// | .-----------------. |
// | | FileTableTform | |
// | | (extract "Code")| |
// | .-----------------. |
// | [-] |
// .----------------. .-----------------. |
// | FileTableTform | | FileTableTform | |
// | (copy "Code") | | (extract "Code")| |
// .----------------. .-----------------. |
// [.] [-] |
// | | |
// | [-*] |
// .-------------. .-------------------. |
// |finalizeNVPTX| | SPIRVTranslator | |
// .-------------. .-------------------. |
// | [-as] [-!a] |
// [.] [-*] |
// .---------------. .-------------------. |
// | finalizeNVPTX | | SPIRVTranslator | |
// .---------------. .-------------------. |
// [.] [-as] [-!a] |
// | | | |
// | [-s] | |
// | .----------------. | |
// | | BackendCompile | | |
// | .----------------. | |
// | [-s] | |
// | | | |
// | [-a] [-!a] [+]
// | .--------------------.
// | | FileTableTform |
// | | (replace "Code") |
// | .--------------------.
// | |
// [.n] [+*]
// [.] [-a] [-!a] [+]
// .------------------------------------.
// | FileTableTform |
// | (replace "Code") |
// .------------------------------------.
// |
// [+]
// .--------------------------------------.
// | OffloadWrapper |
// .--------------------------------------.
Expand Down Expand Up @@ -4451,24 +4451,40 @@ class OffloadingActionBuilder final {
ActionList WrapperInputs;
// post link is not optional - even if not splitting, always need to
// process specialization constants
types::ID PostLinkOutType =
isNVPTX || isAMDGCN ? types::TY_LLVM_BC : types::TY_Tempfiletable;
auto *PostLinkAction = C.MakeAction<SYCLPostLinkJobAction>(
FullDeviceLinkAction, PostLinkOutType);
FullDeviceLinkAction, types::TY_Tempfiletable);
PostLinkAction->setRTSetsSpecConstants(!isAOT);

if (isNVPTX) {
Action *FinAction =
finalizeNVPTXDependences(PostLinkAction, (*TC)->getTriple());
WrapperInputs.push_back(FinAction);
} else if (isAMDGCN) {
Action *FinAction =
finalizeAMDGCNDependences(PostLinkAction, (*TC)->getTriple());
WrapperInputs.push_back(FinAction);
constexpr char COL_CODE[] = "Code";

if (isNVPTX || isAMDGCN) {
// Make extraction copy the only remaining code file instead of
// creating a new table with a single entry.
// TODO: Process all PTX code files in file table to enable code
// splitting for PTX target.
auto *ExtractIRFilesAction = C.MakeAction<FileTableTformJobAction>(
PostLinkAction, types::TY_LLVM_BC);
ExtractIRFilesAction->addCopySingleFileTform(COL_CODE, 0);

Action *FinAction;
if (isNVPTX) {
FinAction = finalizeNVPTXDependences(ExtractIRFilesAction,
(*TC)->getTriple());
} else /* isAMDGCN */ {
FinAction = finalizeAMDGCNDependences(ExtractIRFilesAction,
(*TC)->getTriple());
}
ActionList TformInputs{PostLinkAction, FinAction};

// Replace the only code entry in the table, as confirmed by the
// previous transformation.
auto *ReplaceFilesAction = C.MakeAction<FileTableTformJobAction>(
TformInputs, types::TY_Tempfiletable);
ReplaceFilesAction->addReplaceCellTform(COL_CODE, 0);
WrapperInputs.push_back(ReplaceFilesAction);
} else {
// For SPIRV-based targets - translate to SPIRV then optionally
// compile ahead-of-time to native architecture
constexpr char COL_CODE[] = "Code";
auto *ExtractIRFilesAction = C.MakeAction<FileTableTformJobAction>(
PostLinkAction, types::TY_Tempfilelist);
// single column w/o title fits TY_Tempfilelist format
Expand Down Expand Up @@ -4513,6 +4529,7 @@ class OffloadingActionBuilder final {
ReplaceFilesAction->addReplaceColumnTform(COL_CODE, COL_CODE);
WrapperInputs.push_back(ReplaceFilesAction);
}

// After the Link, wrap the files before the final host link
auto *DeviceWrappingAction = C.MakeAction<OffloadWrapperJobAction>(
WrapperInputs, types::TY_Object);
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8860,6 +8860,9 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
TCArgs.hasFlag(options::OPT_fsycl_dead_args_optimization,
options::OPT_fno_sycl_dead_args_optimization, false))
addArgs(CmdArgs, TCArgs, {"-emit-param-info"});
// Enable PI program metadata
if (getToolChain().getTriple().isNVPTX())
addArgs(CmdArgs, TCArgs, {"-emit-program-metadata"});
if (JA.getType() == types::TY_LLVM_BC) {
// single file output requested - this means only perform necessary IR
// transformations (like specialization constant intrinsic lowering) and
Expand Down Expand Up @@ -8946,6 +8949,15 @@ void FileTableTform::ConstructJob(Compilation &C, const JobAction &JA,
addArgs(CmdArgs, TCArgs, {Arg});
break;
}
case FileTableTformJobAction::Tform::REPLACE_CELL: {
assert(Tf.TheArgs.size() == 2 && "column name and row id expected");
SmallString<128> Arg("-replace_cell=");
Arg += Tf.TheArgs[0];
Arg += ",";
Arg += Tf.TheArgs[1];
addArgs(CmdArgs, TCArgs, {Arg});
break;
}
case FileTableTformJobAction::Tform::RENAME: {
assert(Tf.TheArgs.size() == 2 && "from/to names expected");
SmallString<128> Arg("-rename=");
Expand All @@ -8955,8 +8967,18 @@ void FileTableTform::ConstructJob(Compilation &C, const JobAction &JA,
addArgs(CmdArgs, TCArgs, {Arg});
break;
}
case FileTableTformJobAction::Tform::COPY_SINGLE_FILE: {
assert(Tf.TheArgs.size() == 2 && "column name and row id expected");
SmallString<128> Arg("-copy_single_file=");
Arg += Tf.TheArgs[0];
Arg += ",";
Arg += Tf.TheArgs[1];
addArgs(CmdArgs, TCArgs, {Arg});
break;
}
}
}

// 2) add output option
assert(Output.isFilename() && "table tform output must be a file");
addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()});
Expand Down
16 changes: 9 additions & 7 deletions clang/test/Driver/sycl-offload-amdgcn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
// CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl)
// CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl)
// CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl)
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl)
// CHK-PHASES-NO-CC: 13: backend, {12}, assembler, (device-sycl)
// CHK-PHASES-NO-CC: 14: assembler, {13}, object, (device-sycl)
// CHK-PHASES-NO-CC: 15: linker, {14}, image, (device-sycl)
// CHK-PHASES-NO-CC: 16: linker, {15}, hip-fatbin, (device-sycl)
// CHK-PHASES-NO-CC: 17: clang-offload-wrapper, {16}, object, (device-sycl)
// CHK-PHASES-NO-CC: 18: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {17}, image
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, tempfiletable, (device-sycl)
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl)
// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl)
// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl)
// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl)
// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl)
// CHK-PHASES-NO-CC: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl)
// CHK-PHASES-NO-CC: 19: clang-offload-wrapper, {18}, object, (device-sycl)
// CHK-PHASES-NO-CC: 20: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {19}, image
20 changes: 12 additions & 8 deletions clang/test/Driver/sycl-offload-nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
// CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl)
// CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl)
// CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 13: backend, {12}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 14: clang-offload-wrapper, {13}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 15: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_50)" {14}, image
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 15: file-table-tform, {12, 14}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 16: clang-offload-wrapper, {15}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 17: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_50)" {16}, image

/// Check phases specifying a compute capability.
// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \
Expand All @@ -50,7 +52,9 @@
// CHK-PHASES: 9: assembler, {8}, object, (host-sycl)
// CHK-PHASES: 10: linker, {9}, image, (host-sycl)
// CHK-PHASES: 11: linker, {5}, ir, (device-sycl, sm_35)
// CHK-PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, sm_35)
// CHK-PHASES: 13: backend, {12}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 14: clang-offload-wrapper, {13}, object, (device-sycl, sm_35)
// CHK-PHASES: 15: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_35)" {14}, image
// CHK-PHASES: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 13: file-table-tform, {12}, ir, (device-sycl, sm_35)
// CHK-PHASES: 14: backend, {13}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 15: file-table-tform, {12, 14}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 16: clang-offload-wrapper, {15}, object, (device-sycl, sm_35)
// CHK-PHASES: 17: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_35)" {16}, image
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/PropertySetIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ class PropertySetRegistry {
"SYCL/specialization constants default values";
static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask";
static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt";
static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata";
static constexpr char SYCL_MISC_PROP[] = "SYCL/misc properties";
static constexpr char SYCL_ASSERT_USED[] = "SYCL/assert used";

Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Support/SimpleTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ class SimpleTable {
Error replaceColumn(StringRef Name, const SimpleTable &Src,
StringRef SrcName = "");

// Replaces the value in a cell at a given column and row with the new value.
Error updateCellValue(StringRef ColName, int Row, StringRef NewValue);

// Renames a column.
Error renameColumn(StringRef OldName, StringRef NewName);

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/PropertySetIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[];
constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[];
constexpr char PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[];
constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[];
constexpr char PropertySetRegistry::SYCL_PROGRAM_METADATA[];
constexpr char PropertySetRegistry::SYCL_MISC_PROP[];
constexpr char PropertySetRegistry::SYCL_ASSERT_USED[];

Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Support/SimpleTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ Error SimpleTable::replaceColumn(StringRef Name, const SimpleTable &Src,
return Error::success();
}

Error SimpleTable::updateCellValue(StringRef ColName, int Row,
StringRef NewValue) {
if (getNumColumns() == 0)
return makeError("empty table");
if (Row > getNumRows() || Row < 0)
return makeError("row index out of bounds");
Rows[Row][getColumnId(ColName)] = NewValue.str();
return Error::success();
}

Error SimpleTable::renameColumn(StringRef OldName, StringRef NewName) {
int I = getColumnId(OldName);

Expand Down
2 changes: 2 additions & 0 deletions llvm/test/tools/file-table-tform/Inputs/s.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[A|B|C|D]
aaa|bbb|100|XXX
8 changes: 8 additions & 0 deletions llvm/test/tools/file-table-tform/file-table-tform-single.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Insert %S/Inputs/gold.txt into column A at row index 0
RUN: file-table-tform --replace_cell=A,0 %S/Inputs/s.txt %S/Inputs/gold.txt -o t.txt

-- Copy file in column A from the only row
RUN: file-table-tform --copy_single_file=A,0 t.txt -o u.txt

-- Verify result
RUN: diff u.txt %S/Inputs/gold.txt
24 changes: 24 additions & 0 deletions llvm/test/tools/sycl-post-link/emit_program_metadata.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; This test checks that the post-link tool generates SYCL program metadata.
;
; RUN: sycl-post-link -emit-program-metadata -S %s -o %t.files.table
; RUN: FileCheck %s -input-file=%t.files.table --check-prefixes CHECK-TABLE
; RUN: FileCheck %s -input-file=%t.files_0.prop --match-full-lines --check-prefixes CHECK-PROP

target triple = "spir64-unknown-unknown-sycldevice"

!0 = !{i32 1, i32 2, i32 4}

define weak_odr spir_kernel void @SpirKernel1(float %arg1) !reqd_work_group_size !0 {
call void @foo(float %arg1)
ret void
}

declare void @foo(float)

; CHECK-PROP: [SYCL/program metadata]
; // Base64 encoding in the prop file (including 8 bytes length):
; CHECK-PROP-NEXT: SpirKernel1@reqd_work_group_size=2|gBAAAAAAAAQAAAAACAAAAQAAAAA

; CHECK-TABLE: [Code|Properties]
; CHECK-TABLE-NEXT: {{.*}}files_0.prop
; CHECK-TABLE-EMPTY:
Loading

0 comments on commit a8fe4a5

Please sign in to comment.