[rockett1] draft Tile

chipsalliance · Jul 24, 2024 · 94cdc3f · 94cdc3f
1 parent 628cecc
commit 94cdc3f
Show file tree

Hide file tree

Showing 3 changed files with 521 additions and 60 deletions.
diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala
@@ -1398,3 +1398,32 @@ class FrontendBundle(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entr
   val ptw = new TLBPTWIO(nPMPs, vpnBits, paddrBits, vaddrBits, pgLevels, xLen, maxPAddrBits, pgIdxBits)
   val errors = new ICacheErrors(hasCorrectable, hasUncorrectable, paddrBits)
 }
+
+// Interface between T1 <> Rocket integration
+class RocketCoreToT1 extends Bundle {
+  val issue: Valid[T1Issue] = Valid(new T1Issue)
+  val retire: T1Retire = Flipped(new T1Retire)
+}
+
+class T1Issue extends Bundle {
+  val instruction: UInt = UInt(32.W)
+  val rs1Data: UInt = UInt(32.W)
+  val rs2Data: UInt = UInt(32.W)
+  val vcsr: UInt = UInt(32.W)
+}
+
+class T1RdRetire extends Bundle {
+  val rd:      UInt = UInt(5.W)
+  val data:    UInt = UInt(32.W)
+  val fp:      Bool = Bool()
+}
+
+class T1CSRRetire extends Bundle {
+  val vxsat:   UInt = UInt(32.W)
+}
+
+class T1Retire extends Bundle {
+  val rd = Valid(new T1RdRetire)
+  val csr = Valid(new T1CSRRetire)
+  val mem = Valid(new Bundle {})
+}
diff --git a/rocketv/src/RocketCore.scala b/rocketv/src/RocketCore.scala
@@ -9,7 +9,7 @@ import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable}
 import chisel3.experimental.{SerializableModule, SerializableModuleParameter}
 import chisel3.util.circt.ClockGate
 import chisel3.util.experimental.decode.DecodeBundle
-import chisel3.util.{BitPat, Cat, Fill, MuxLookup, PriorityEncoder, PriorityMux, RegEnable, log2Ceil}
+import chisel3.util.{BitPat, Cat, DecoupledIO, Fill, MuxLookup, PriorityEncoder, PriorityMux, Queue, RegEnable, log2Ceil, log2Up}
 import org.chipsalliance.rocketv.rvdecoderdbcompat.Causes
 import org.chipsalliance.rvdecoderdb.Instruction
 
@@ -43,9 +43,12 @@ case class RocketParameter(
                             fastLoadByte: Boolean,
                             fastLoadWord: Boolean,
                             dcacheNSets: Int,
-                            flushOnFenceI: Boolean
+                            flushOnFenceI: Boolean,
+                            usingT1: Boolean
                           )
   extends SerializableModuleParameter {
+  // interface to T1
+  def usingVector = hasInstructionSet("rv_v")
 
   // fixed for now
   def usingRVE = false
@@ -135,8 +138,6 @@ case class RocketParameter(
   // static to false for now
   def usingNMI = hasInstructionSet("rv_smrnmi")
 
-  def usingVector = hasInstructionSet("rv_v")
-
   // calculated parameter
   def fetchWidth: Int = 1
 
@@ -309,6 +310,7 @@ class RocketInterface(parameter: RocketParameter) extends Bundle {
     )
   )
   val fpu = parameter.fLen.map(fLen => Flipped(new FPUCoreIO(parameter.hartIdLen, parameter.xLen, fLen)))
+  val t1 = Option.when(parameter.usingT1)(new RocketCoreToT1)
   val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch))
   val cease = Output(Bool())
   val wfi = Output(Bool())
@@ -1331,62 +1333,69 @@ class Rocket(val parameter: RocketParameter)
       fpu.keep_clock_enabled := false.B
     }
 
-// @todo get back t1.
-//    t1Request.foreach { t1 =>
-//      // Send instruction to T1 when write back.
-//      t1.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector)
-//      t1.bits.instruction := wbRegInstruction
-//      t1.bits.rs1Data := wbRegWdata
-//      t1.bits.rs2Data := wbRegRS2
-//      val response: DecoupledIO[VectorResponse] = t1Response.get
-//      // TODO: make it configurable
-//      val maxCount: Int = 32
-//      val countWidth = log2Up(maxCount)
-//      def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = {
-//        val counter: UInt = RegInit(0.U(size.W))
-//        val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt)
-//        val updateCounter = grant ^ release
-//        when(updateCounter) {
-//          counter := nextCount
-//        }
-//        flush.foreach(f => when(f)(counter := 0.U))
-//        val empty = (updateCounter && nextCount === 0.U) || counter === 0.U
-//        val fullCounter: Int = (1 << size) - 1 - margin
-//        val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U
-//        (empty, full)
-//      }
-//      // Maintain lsu counter
-//      val lsuGrant:   Bool = t1.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)
-//      val lsuRelease: Bool = response.fire && response.bits.mem
-//      val (lsuEmpty, _) = counterManagement(countWidth)(lsuGrant, lsuRelease)
-//      // Maintain vector counter
-//      // There may be 4 instructions in the pipe
-//      val (vectorEmpty, vectorFull) = counterManagement(countWidth, 4)(t1.valid, t1IssueQueueRelease.get)
-//      vectorLSUEmpty.foreach(_ := lsuEmpty)
-//      vectorQueueFull.foreach(_ := vectorFull)
-//    }
-//    // todo: vector change csr
-//    t1Response.foreach { vectorResponse =>
-//      val vectorTryToWriteRd = vectorResponse.bits.rd.valid && !vectorResponse.bits.float
-//      val vectorTryToWriteFP = vectorResponse.bits.rd.valid && vectorResponse.bits.float
-//      vectorResponse.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) &&
-//        (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
-//      when(vectorResponse.fire && vectorTryToWriteRd) {
-//        longlatencyWdata := vectorResponse.bits.data
-//        longlatencyWaddress := vectorResponse.bits.rd.bits
-//        longLatencyWenable := true.B
-//      }
-//      fpu.foreach { fpu =>
-//        when(!(dmemResponseValid && dmemResponseFpu)) {
-//          fpu.dmem_resp_val := vectorResponse.fire && vectorTryToWriteFP
-//          fpu.dmem_resp_data := vectorResponse.bits.data
-//          // todo: 32 bit only
-//          fpu.dmem_resp_type := 2.U
-//          // todo: connect tag
-//          fpu.dmem_resp_tag := 0.U
-//        }
-//      }
-//    }
+    io.t1.foreach { t1 =>
+      // Send instruction to T1 when write back.
+      // TODO: make it configurable
+      val maxCount: Int = 32
+      val t1IssueQueue = Module(new Queue(chiselTypeOf(t1.issue.bits), maxCount))
+      t1IssueQueue.io.enq.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector)
+      t1IssueQueue.io.enq.bits.instruction := wbRegInstruction
+      t1IssueQueue.io.enq.bits.rs1Data := wbRegWdata
+      t1IssueQueue.io.enq.bits.rs2Data := wbRegRS2
+      t1.issue.valid := t1IssueQueue.io.deq.valid
+      t1.issue.bits := t1IssueQueue.io.deq.bits
+      // TODO: really maintain 3 retire queues? we need to reduce it via a scoreboard.
+      val t1MemoryRetireQueue = Module(new Queue(chiselTypeOf(t1.retire.mem.bits), maxCount))
+      val t1CSRRetireQueue = Module(new Queue(chiselTypeOf(t1.retire.csr.bits), maxCount))
+      val t1XRDRetireQueue = Module(new Queue(chiselTypeOf(t1.retire.rd.bits), maxCount))
+
+      val countWidth = log2Up(maxCount)
+      def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = {
+        val counter: UInt = RegInit(0.U(size.W))
+        val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt)
+        val updateCounter = grant ^ release
+        when(updateCounter) {
+          counter := nextCount
+        }
+        flush.foreach(f => when(f)(counter := 0.U))
+        val empty = (updateCounter && nextCount === 0.U) || counter === 0.U
+        val fullCounter: Int = (1 << size) - 1 - margin
+        val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U
+        (empty, full)
+      }
+      // T1 Memory Scoreboard
+      val lsuGrant:   Bool = t1.issue.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)
+      val lsuRelease: Bool = t1.retire.mem.fire
+      val (lsuEmpty, _) = counterManagement(countWidth)(lsuGrant, lsuRelease)
+      // Maintain vector counter
+      // There may be 4 instructions in the pipe
+      val (vectorEmpty, vectorFull) = counterManagement(countWidth, 4)(t1.issue.valid, ???)
+      vectorLSUEmpty.foreach(_ := lsuEmpty)
+      vectorQueueFull.foreach(_ := vectorFull)
+
+      val vectorTryToWriteRd = t1.retire.rd.valid && !t1.retire.rd.bits.fp
+      val vectorTryToWriteFP = t1.retire.rd.valid && t1.retire.rd.bits.fp
+      // TODO: maintain queue here?
+      t1XRDRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
+      t1CSRRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
+      t1MemoryRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
+
+      when(t1.retire.rd.fire && vectorTryToWriteRd) {
+        longlatencyWdata := t1.retire.rd.bits.data
+        longlatencyWaddress := t1.retire.rd.bits.rd
+        longLatencyWenable := true.B
+      }
+      io.fpu.foreach { fpu =>
+        when(!(dmemResponseValid && dmemResponseFpu)) {
+          fpu.dmem_resp_val := t1.retire.mem.fire && vectorTryToWriteFP
+          fpu.dmem_resp_data := t1.retire.rd.bits.data
+          // todo: 32 bit only
+          fpu.dmem_resp_type := 2.U
+          // todo: connect tag
+          fpu.dmem_resp_tag := 0.U
+        }
+      }
+    }
 
     io.dmem.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mem)
     val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B))