From 763d18cb87b209161133b2c51d4196da02f69b8e Mon Sep 17 00:00:00 2001 From: Sagar Maheshwari Date: Mon, 17 Feb 2025 11:49:53 +0530 Subject: [PATCH 1/3] [AIE2/2P] Pre-commit test to check DONE latency at region end. --- llvm/test/CodeGen/AIE/aie2/schedule/done.mir | 32 ++++++++++++++- llvm/test/CodeGen/AIE/aie2p/schedule/done.mir | 39 +++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AIE/aie2p/schedule/done.mir diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/done.mir b/llvm/test/CodeGen/AIE/aie2/schedule/done.mir index 52009e4d71ee..b253b53e2910 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/done.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/done.mir @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +# (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -march=aie2 -run-pass=postmisched %topdown-multi %s -o - | FileCheck %s # Make sure DONE stays in place @@ -36,3 +36,33 @@ body: | RET implicit $lr DelayedSchedBarrier implicit $r0 ... + +# Check 6 cycle latency for DONE at region end. +--- +name: done_latency +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: done_latency + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SCHED_BARRIER + ; CHECK-NEXT: DONE + ; CHECK-NEXT: SCHED_BARRIER + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: JL 0, implicit-def $lr + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: DelayedSchedBarrier csr_aie2 + bb.0.entry: + SCHED_BARRIER + DONE + SCHED_BARRIER + bb.1: + JL 0, implicit-def $lr + DelayedSchedBarrier csr_aie2 +... diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/done.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/done.mir new file mode 100644 index 000000000000..ef22a4c28848 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/schedule/done.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates + +# RUN: llc -march=aie2p %topdown-multi -run-pass=postmisched %s -o - | FileCheck %s + +# Check 6 cycle latency for DONE at region end. +--- +name: done +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: done + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SCHED_BARRIER + ; CHECK-NEXT: DONE + ; CHECK-NEXT: SCHED_BARRIER + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: JL_lng 0, implicit-def $lr + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: DelayedSchedBarrier csr_aie2p + bb.0.entry: + SCHED_BARRIER + DONE + SCHED_BARRIER + bb.1: + JL_lng 0, implicit-def $lr + DelayedSchedBarrier csr_aie2p +... From 6ab46441646ee37df182f6f653976a48fa30b6b3 Mon Sep 17 00:00:00 2001 From: Sagar Maheshwari Date: Mon, 17 Feb 2025 11:50:59 +0530 Subject: [PATCH 2/3] [AIE2/2P] Fix DONE latency at region end. --- llvm/lib/Target/AIE/AIE2InstrInfo.cpp | 8 ++++++++ llvm/lib/Target/AIE/AIE2InstrInfo.h | 1 + llvm/lib/Target/AIE/AIEBaseInstrInfo.h | 16 ++++++++++++++++ llvm/lib/Target/AIE/AIEBaseSubtarget.cpp | 6 +++++- llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 8 ++++++++ llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h | 1 + llvm/test/CodeGen/AIE/aie2/schedule/done.mir | 5 +++++ llvm/test/CodeGen/AIE/aie2p/schedule/done.mir | 5 +++++ 8 files changed, 49 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index 9572ce83a6b6..a8daa5cedac4 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -1172,6 +1172,14 @@ bool AIE2InstrInfo::isLock(unsigned Opc) const { return false; } +// Return an optional latency if Opc is DONE. +std::optional +AIE2InstrInfo::getDoneLatency(const unsigned Opc) const { + // AIE2P ISA isn't very clear on the DONE instruction and only mentions a + // structural conflict at E4..E6. So, conservatively, provide a latency of 6. + return (Opc == AIE2::DONE) ? std::optional(6) : std::nullopt; +} + bool AIE2InstrInfo::isDelayedSchedBarrier(const MachineInstr &MI) const { return MI.getOpcode() == AIE2::DelayedSchedBarrier; } diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.h b/llvm/lib/Target/AIE/AIE2InstrInfo.h index feebe2140b68..e611a8cb3af8 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.h +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.h @@ -64,6 +64,7 @@ class AIE2InstrInfo : public AIE2GenInstrInfo { unsigned getGenericBroadcastVectorOpcode() const override; unsigned getCycleSeparatorOpcode() const override; bool isLock(unsigned Opc) const override; + std::optional getDoneLatency(unsigned) const override; bool isDelayedSchedBarrier(const MachineInstr &MI) const override; bool isSchedBarrier(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h index 4345f8733d4b..6a5521f88d42 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h +++ b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h @@ -176,6 +176,22 @@ struct AIEBaseInstrInfo : public TargetInstrInfo { } /// Check whether Opc represents a lock instruction virtual bool isLock(unsigned Opc) const { return false; } + + /// Return an optional latency if Opc is DONE. + virtual std::optional getDoneLatency(const unsigned Opc) const { + return std::nullopt; + } + + /// Get "implicit" latency for special instructions. + /// This is basically an extra latency, implicit to a special instruction like + /// "DONE", that we would like to give to the exit edge. + virtual unsigned getImplicitLatency(const MachineInstr &MI) const { + if (auto OptLatency = getDoneLatency(MI.getOpcode())) + return *OptLatency; + + return 0; + } + /// Check whether this is a delayed scheduling barrier induced from /// a preceding instruction with delay slots. virtual bool isDelayedSchedBarrier(const MachineInstr &) const { diff --git a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp index e214d35039d7..fd1d5c86640c 100644 --- a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp +++ b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // @@ -282,6 +282,10 @@ class RegionEndEdges : public ScheduleDAGMutation { EdgeLatency = DelaySlots + 1; } + // "Implicit" latency for special instructions. + const unsigned ImplicitLatency = TII->getImplicitLatency(MI); + EdgeLatency = std::max(EdgeLatency, ImplicitLatency); + // Between writing ZOL Registers (lc, le, ls) and the end of the loop, // there must be a minimum distance. This is ultimately padded out by the // alignment pass using bundle elongation, but this needs to have enough diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index 51791c57e265..a7f0374bc638 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -1219,6 +1219,14 @@ bool AIE2PInstrInfo::isLock(unsigned Opc) const { return false; } +// Return an optional latency if Opc is DONE. +std::optional +AIE2PInstrInfo::getDoneLatency(const unsigned Opc) const { + // AIE2P ISA isn't very clear on the DONE instruction and only mentions a + // structural conflict at E4..E6. So, conservatively, provide a latency of 6. + return (Opc == AIE2P::DONE) ? std::optional(6) : std::nullopt; +} + bool AIE2PInstrInfo::isDelayedSchedBarrier(const MachineInstr &MI) const { return MI.getOpcode() == AIE2P::DelayedSchedBarrier; } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h index 2e86375c57b8..9f582a3d51e1 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h @@ -64,6 +64,7 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo { unsigned getGenericVShiftOpcode() const override; unsigned getGenericExtractSubvectorOpcode() const override; bool isLock(unsigned Opc) const override; + std::optional getDoneLatency(unsigned) const override; bool isDelayedSchedBarrier(const MachineInstr &MI) const override; bool isSchedBarrier(const MachineInstr &MI) const override; diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/done.mir b/llvm/test/CodeGen/AIE/aie2/schedule/done.mir index b253b53e2910..56731a0d37a3 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/done.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/done.mir @@ -48,6 +48,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: SCHED_BARRIER ; CHECK-NEXT: DONE + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP ; CHECK-NEXT: SCHED_BARRIER ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/done.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/done.mir index ef22a4c28848..6cb948455244 100644 --- a/llvm/test/CodeGen/AIE/aie2p/schedule/done.mir +++ b/llvm/test/CodeGen/AIE/aie2p/schedule/done.mir @@ -19,6 +19,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: SCHED_BARRIER ; CHECK-NEXT: DONE + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP ; CHECK-NEXT: SCHED_BARRIER ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: From 1393b566b1fa757fdba9b0cc355c1f555d4f2398 Mon Sep 17 00:00:00 2001 From: Sagar Maheshwari Date: Mon, 17 Feb 2025 01:09:15 +0530 Subject: [PATCH 3/3] [AIE2/2P] Add a delay between memory ops and DONE. Be conservative here, as we don't expect DONE to appear without a sched barrier. --- llvm/lib/Target/AIE/AIEBaseSubtarget.cpp | 11 +++++++---- llvm/test/CodeGen/AIE/aie2/schedule/done.mir | 3 +++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp index fd1d5c86640c..84ee36401a04 100644 --- a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp +++ b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp @@ -184,9 +184,9 @@ bool updateSuccLatency(SDep &SuccEdge, SUnit &PredSU, int Latency) { return updatePredLatency(PredEdge, *SuccEdge.getSUnit(), Latency); } -// Set the latency of ordering edges between memory operations and locks. +// Set the latency of ordering edges between memory operations and locks/DONE. // The initial graph will have ordering edges induced by hasSideEffects of the -// locks +// locks/DONE. class LockDelays : public ScheduleDAGMutation { void apply(ScheduleDAGInstrs *DAG) override { // FIXME: Delays for locks to reach the core aren't completely described in @@ -199,9 +199,12 @@ class LockDelays : public ScheduleDAGMutation { // to increase the edge latency. // Note that scalar streams are kept away from locks using // a reserved FuncUnit instead. See AIE2Schedule.td + // Be conservative for DONE instruction as we don't expect it to appear + // without a sched barrier. for (auto &SU : DAG->SUnits) { - MachineInstr *Lock = SU.getInstr(); - if (!Lock || !TII->isLock(Lock->getOpcode())) { + MachineInstr *MI = SU.getInstr(); + if (!MI || !(TII->isLock(MI->getOpcode()) || + TII->getDoneLatency(MI->getOpcode()))) { continue; } for (auto &PredEdge : SU.Preds) { diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/done.mir b/llvm/test/CodeGen/AIE/aie2/schedule/done.mir index 56731a0d37a3..959daf0a8a24 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/done.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/done.mir @@ -22,6 +22,9 @@ body: | ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP ; CHECK-NEXT: ST_dms_sts_idx_imm $r0, killed $p1, 0 + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP ; CHECK-NEXT: DONE ; CHECK-NEXT: RET implicit $lr ; CHECK-NEXT: NOP