-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Copy flt_scr and Any Remaining Pseudo Op Flags to their Real Counterparts #100187
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matin Raayai (matinraayai) ChangesThis PR builds on #89830 and #93004. It aims to copy any remaining MCID flags missing from the real opcodes and registers like flt_scr that have a target-specific opcode. The goal is to keep the machine verifier happy if a disassembled MCInst (with real opcode and real register operands) is put into an MF and put through the code gen pipeline. Also MCID attribute checks should return the correct results. This is a WIP; I wanted to run the idea by the AMDGPU maintainers first before making a full PR for review. I will have to update this for all instructions + all target-specific registers. Let me know what you think @ritter-x2a @jayfoad @arsenm @kzhuravl. Full diff: https://github.com/llvm/llvm-project/pull/100187.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index f1d9aec163635..a2056dcd5e4b0 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -764,7 +764,8 @@ let GeneratePressureSet = 0, HasSGPR = 1 in {
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
- (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
+ (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_LO_vi, FLAT_SCR_LO_ci, FLAT_SCR_HI,
+ FLAT_SCR_HI_vi, FLAT_SCR_HI_ci, XNACK_MASK_LO, XNACK_MASK_HI,
SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID,
@@ -773,7 +774,8 @@ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i
}
def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
- (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
+ (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_LO_vi_LO16,
+ FLAT_SCR_LO_ci_LO16, FLAT_SCR_HI_LO16, FLAT_SCR_HI_vi_LO16, FLAT_SCR_HI_ci_LO16,
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
@@ -846,7 +848,7 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
}
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
+ (add SGPR_64, VCC, FLAT_SCR, FLAT_SCR_vi, FLAT_SCR_ci, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
let AllocationPriority = 1;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 0d6883254c350..0c38037e29973 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -55,18 +55,21 @@ class SOP1_Real<bits<8> op, SOP1_Pseudo ps, string real_name = ps.Mnemonic> :
let Size = 4;
// copy relevant pseudo op flags
- let SubtargetPredicate = ps.SubtargetPredicate;
- let AsmMatchConverter = ps.AsmMatchConverter;
- let SchedRW = ps.SchedRW;
- let mayLoad = ps.mayLoad;
- let mayStore = ps.mayStore;
- let isTerminator = ps.isTerminator;
- let isReturn = ps.isReturn;
- let isCall = ps.isCall;
- let isBranch = ps.isBranch;
- let isBarrier = ps.isBarrier;
- let Uses = ps.Uses;
- let Defs = ps.Defs;
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let SchedRW = ps.SchedRW;
+ let mayLoad = ps.mayLoad;
+ let mayStore = ps.mayStore;
+ let isTerminator = ps.isTerminator;
+ let isReturn = ps.isReturn;
+ let isCall = ps.isCall;
+ let isBranch = ps.isBranch;
+ let isIndirectBranch = ps.isIndirectBranch;
+ let isMoveImm = ps.isMoveImm;
+ let mayRaiseFPException = ps.mayRaiseFPException;
+ let isBarrier = ps.isBarrier;
+ let Uses = ps.Uses;
+ let Defs = ps.Defs;
// encoding
bits<7> sdst;
@@ -577,6 +580,7 @@ class SOP2_Real<SOP_Pseudo ps, string name = ps.Mnemonic> :
let mayStore = ps.mayStore;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
+ let isCommutable = ps.isCommutable;
let Uses = ps.Uses;
let Defs = ps.Defs;
@@ -994,6 +998,7 @@ class SOPK_Real<SOPK_Pseudo ps, string name = ps.Mnemonic> :
let isTerminator = ps.isTerminator;
let isReturn = ps.isReturn;
let isBarrier = ps.isBarrier;
+ let isCompare = ps.isCompare;
let Uses = ps.Uses;
let Defs = ps.Defs;
@@ -1456,6 +1461,7 @@ class SOPP_Real<SOPP_Pseudo ps, string name = ps.Mnemonic> :
let isReturn = ps.isReturn;
let isCall = ps.isCall;
let isBranch = ps.isBranch;
+ let isTrap = ps.isTrap;
let isBarrier = ps.isBarrier;
let Uses = ps.Uses;
let Defs = ps.Defs;
|
|
No strong objections from me but it seems likely to rot if there is no in-tree use for this, and no way to verify it. |
Would it help if I contribute tests for these? I should be able to write MIR with real instructions that gets parsed, and each opcode's MCIDs gets checked for correct properties. It can then be passed to the machine verifier for a final verification. |
That would work I think |
I'm dropping this PR since extending the pseudo attributes/classes to reals for both instructions and registers needs more than just changing the AMDGPU tablegen files. It also requires modification of machine verifier functions in the AMDGPU backend to work in codegen and maybe some redesigning of tablegen classes and records. Instead of trying to modify the AMDGPU backend, I wrote a tablegen backend which maps all registers/opcodes to their pseudo equivalent on my end, hence this PR is not needed anymore. |
This PR builds on #89830 and #93004. It aims to copy any remaining MCID flags missing from the real opcodes and registers like flt_scr that have a target-specific opcode. The goal is to keep the machine verifier happy if a disassembled MCInst (with real opcode and real register operands) is put into an MF and put through the code gen pipeline. Also MCID attribute checks should return the correct results.
This is a WIP; I wanted to run the idea by the AMDGPU maintainers first before making a full PR for review. I will have to update this for all instructions + all target-specific registers.
Let me know what you think @ritter-x2a @jayfoad @arsenm @kzhuravl.