Skip to content

Commit

Permalink
[rockett1] draft Tile
Browse files Browse the repository at this point in the history
  • Loading branch information
sequencer committed Jul 24, 2024
1 parent 628cecc commit 94cdc3f
Show file tree
Hide file tree
Showing 3 changed files with 521 additions and 60 deletions.
29 changes: 29 additions & 0 deletions rocketv/src/Bundle.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1398,3 +1398,32 @@ class FrontendBundle(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entr
val ptw = new TLBPTWIO(nPMPs, vpnBits, paddrBits, vaddrBits, pgLevels, xLen, maxPAddrBits, pgIdxBits)
val errors = new ICacheErrors(hasCorrectable, hasUncorrectable, paddrBits)
}

// Interface between T1 <> Rocket integration
class RocketCoreToT1 extends Bundle {
val issue: Valid[T1Issue] = Valid(new T1Issue)
val retire: T1Retire = Flipped(new T1Retire)
}

class T1Issue extends Bundle {
val instruction: UInt = UInt(32.W)
val rs1Data: UInt = UInt(32.W)
val rs2Data: UInt = UInt(32.W)
val vcsr: UInt = UInt(32.W)
}

class T1RdRetire extends Bundle {
val rd: UInt = UInt(5.W)
val data: UInt = UInt(32.W)
val fp: Bool = Bool()
}

class T1CSRRetire extends Bundle {
val vxsat: UInt = UInt(32.W)
}

class T1Retire extends Bundle {
val rd = Valid(new T1RdRetire)
val csr = Valid(new T1CSRRetire)
val mem = Valid(new Bundle {})
}
129 changes: 69 additions & 60 deletions rocketv/src/RocketCore.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable}
import chisel3.experimental.{SerializableModule, SerializableModuleParameter}
import chisel3.util.circt.ClockGate
import chisel3.util.experimental.decode.DecodeBundle
import chisel3.util.{BitPat, Cat, Fill, MuxLookup, PriorityEncoder, PriorityMux, RegEnable, log2Ceil}
import chisel3.util.{BitPat, Cat, DecoupledIO, Fill, MuxLookup, PriorityEncoder, PriorityMux, Queue, RegEnable, log2Ceil, log2Up}
import org.chipsalliance.rocketv.rvdecoderdbcompat.Causes
import org.chipsalliance.rvdecoderdb.Instruction

Expand Down Expand Up @@ -43,9 +43,12 @@ case class RocketParameter(
fastLoadByte: Boolean,
fastLoadWord: Boolean,
dcacheNSets: Int,
flushOnFenceI: Boolean
flushOnFenceI: Boolean,
usingT1: Boolean
)
extends SerializableModuleParameter {
// interface to T1
def usingVector = hasInstructionSet("rv_v")

// fixed for now
def usingRVE = false
Expand Down Expand Up @@ -135,8 +138,6 @@ case class RocketParameter(
// static to false for now
def usingNMI = hasInstructionSet("rv_smrnmi")

def usingVector = hasInstructionSet("rv_v")

// calculated parameter
def fetchWidth: Int = 1

Expand Down Expand Up @@ -309,6 +310,7 @@ class RocketInterface(parameter: RocketParameter) extends Bundle {
)
)
val fpu = parameter.fLen.map(fLen => Flipped(new FPUCoreIO(parameter.hartIdLen, parameter.xLen, fLen)))
val t1 = Option.when(parameter.usingT1)(new RocketCoreToT1)
val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch))
val cease = Output(Bool())
val wfi = Output(Bool())
Expand Down Expand Up @@ -1331,62 +1333,69 @@ class Rocket(val parameter: RocketParameter)
fpu.keep_clock_enabled := false.B
}

// @todo get back t1.
// t1Request.foreach { t1 =>
// // Send instruction to T1 when write back.
// t1.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector)
// t1.bits.instruction := wbRegInstruction
// t1.bits.rs1Data := wbRegWdata
// t1.bits.rs2Data := wbRegRS2
// val response: DecoupledIO[VectorResponse] = t1Response.get
// // TODO: make it configurable
// val maxCount: Int = 32
// val countWidth = log2Up(maxCount)
// def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = {
// val counter: UInt = RegInit(0.U(size.W))
// val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt)
// val updateCounter = grant ^ release
// when(updateCounter) {
// counter := nextCount
// }
// flush.foreach(f => when(f)(counter := 0.U))
// val empty = (updateCounter && nextCount === 0.U) || counter === 0.U
// val fullCounter: Int = (1 << size) - 1 - margin
// val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U
// (empty, full)
// }
// // Maintain lsu counter
// val lsuGrant: Bool = t1.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)
// val lsuRelease: Bool = response.fire && response.bits.mem
// val (lsuEmpty, _) = counterManagement(countWidth)(lsuGrant, lsuRelease)
// // Maintain vector counter
// // There may be 4 instructions in the pipe
// val (vectorEmpty, vectorFull) = counterManagement(countWidth, 4)(t1.valid, t1IssueQueueRelease.get)
// vectorLSUEmpty.foreach(_ := lsuEmpty)
// vectorQueueFull.foreach(_ := vectorFull)
// }
// // todo: vector change csr
// t1Response.foreach { vectorResponse =>
// val vectorTryToWriteRd = vectorResponse.bits.rd.valid && !vectorResponse.bits.float
// val vectorTryToWriteFP = vectorResponse.bits.rd.valid && vectorResponse.bits.float
// vectorResponse.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) &&
// (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
// when(vectorResponse.fire && vectorTryToWriteRd) {
// longlatencyWdata := vectorResponse.bits.data
// longlatencyWaddress := vectorResponse.bits.rd.bits
// longLatencyWenable := true.B
// }
// fpu.foreach { fpu =>
// when(!(dmemResponseValid && dmemResponseFpu)) {
// fpu.dmem_resp_val := vectorResponse.fire && vectorTryToWriteFP
// fpu.dmem_resp_data := vectorResponse.bits.data
// // todo: 32 bit only
// fpu.dmem_resp_type := 2.U
// // todo: connect tag
// fpu.dmem_resp_tag := 0.U
// }
// }
// }
io.t1.foreach { t1 =>
// Send instruction to T1 when write back.
// TODO: make it configurable
val maxCount: Int = 32
val t1IssueQueue = Module(new Queue(chiselTypeOf(t1.issue.bits), maxCount))
t1IssueQueue.io.enq.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector)
t1IssueQueue.io.enq.bits.instruction := wbRegInstruction
t1IssueQueue.io.enq.bits.rs1Data := wbRegWdata
t1IssueQueue.io.enq.bits.rs2Data := wbRegRS2
t1.issue.valid := t1IssueQueue.io.deq.valid
t1.issue.bits := t1IssueQueue.io.deq.bits
// TODO: really maintain 3 retire queues? we need to reduce it via a scoreboard.
val t1MemoryRetireQueue = Module(new Queue(chiselTypeOf(t1.retire.mem.bits), maxCount))
val t1CSRRetireQueue = Module(new Queue(chiselTypeOf(t1.retire.csr.bits), maxCount))
val t1XRDRetireQueue = Module(new Queue(chiselTypeOf(t1.retire.rd.bits), maxCount))

val countWidth = log2Up(maxCount)
def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = {
val counter: UInt = RegInit(0.U(size.W))
val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt)
val updateCounter = grant ^ release
when(updateCounter) {
counter := nextCount
}
flush.foreach(f => when(f)(counter := 0.U))
val empty = (updateCounter && nextCount === 0.U) || counter === 0.U
val fullCounter: Int = (1 << size) - 1 - margin
val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U
(empty, full)
}
// T1 Memory Scoreboard
val lsuGrant: Bool = t1.issue.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)
val lsuRelease: Bool = t1.retire.mem.fire
val (lsuEmpty, _) = counterManagement(countWidth)(lsuGrant, lsuRelease)
// Maintain vector counter
// There may be 4 instructions in the pipe
val (vectorEmpty, vectorFull) = counterManagement(countWidth, 4)(t1.issue.valid, ???)
vectorLSUEmpty.foreach(_ := lsuEmpty)
vectorQueueFull.foreach(_ := vectorFull)

val vectorTryToWriteRd = t1.retire.rd.valid && !t1.retire.rd.bits.fp
val vectorTryToWriteFP = t1.retire.rd.valid && t1.retire.rd.bits.fp
// TODO: maintain queue here?
t1XRDRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
t1CSRRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
t1MemoryRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)

when(t1.retire.rd.fire && vectorTryToWriteRd) {
longlatencyWdata := t1.retire.rd.bits.data
longlatencyWaddress := t1.retire.rd.bits.rd
longLatencyWenable := true.B
}
io.fpu.foreach { fpu =>
when(!(dmemResponseValid && dmemResponseFpu)) {
fpu.dmem_resp_val := t1.retire.mem.fire && vectorTryToWriteFP
fpu.dmem_resp_data := t1.retire.rd.bits.data
// todo: 32 bit only
fpu.dmem_resp_type := 2.U
// todo: connect tag
fpu.dmem_resp_tag := 0.U
}
}
}

io.dmem.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mem)
val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B))
Expand Down
Loading

0 comments on commit 94cdc3f

Please sign in to comment.