diff --git a/src/main/scala/xiangshan/backend/decode/DecodeUnit.scala b/src/main/scala/xiangshan/backend/decode/DecodeUnit.scala index 2c0aee2f3e..e7574a669e 100644 --- a/src/main/scala/xiangshan/backend/decode/DecodeUnit.scala +++ b/src/main/scala/xiangshan/backend/decode/DecodeUnit.scala @@ -35,7 +35,7 @@ abstract trait DecodeConstants { def Y = BitPat("b1") def decodeDefault: List[BitPat] = // illegal instruction - // srcType(0) srcType(1) srcType(2) fuType fuOpType rfWen + // srcType(0) srcType(1) srcType(2) fuType fuOpType rfWen // | | | | | | fpWen // | | | | | | | isXSTrap // | | | | | | | | noSpecExec @@ -389,6 +389,18 @@ object FDivSqrtDecode extends DecodeConstants { ) } +/** + * CBO decode + */ +object CBODecode extends DecodeConstants { + val table: Array[(BitPat, List[BitPat])] = Array( + CBO_ZERO -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.stu, LSUOpType.cbo_zero , N, N, N, N, N, N, N, SelImm.IMM_S), + CBO_CLEAN -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.stu, LSUOpType.cbo_clean, N, N, N, N, N, N, N, SelImm.IMM_S), + CBO_FLUSH -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.stu, LSUOpType.cbo_flush, N, N, N, N, N, N, N, SelImm.IMM_S), + CBO_INVAL -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.stu, LSUOpType.cbo_inval, N, N, N, N, N, N, N, SelImm.IMM_S) + ) +} + /** * XiangShan Trap Decode constants */ @@ -522,7 +534,7 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan ctrl_flow := io.enq.ctrl_flow - val decode_table = XDecode.table ++ FDecode.table ++ FDivSqrtDecode.table ++ X64Decode.table ++ XSTrapDecode.table ++ BDecode.table + val decode_table = XDecode.table ++ FDecode.table ++ FDivSqrtDecode.table ++ X64Decode.table ++ XSTrapDecode.table ++ BDecode.table ++ CBODecode.table // output cf_ctrl.cf := ctrl_flow diff --git a/src/main/scala/xiangshan/backend/decode/Instructions.scala b/src/main/scala/xiangshan/backend/decode/Instructions.scala index 6155a12a96..0371bba595 100644 --- a/src/main/scala/xiangshan/backend/decode/Instructions.scala +++ b/src/main/scala/xiangshan/backend/decode/Instructions.scala @@ -73,6 +73,10 @@ object Instructions { def SH = BitPat("b?????????????????001?????0100011") def SW = BitPat("b?????????????????010?????0100011") def SD = BitPat("b?????????????????011?????0100011") + def CBO_ZERO = BitPat("b000000000100?????010000000001111") + def CBO_CLEAN = BitPat("b000000000001?????010000000001111") + def CBO_FLUSH = BitPat("b000000000010?????010000000001111") + def CBO_INVAL = BitPat("b000000000000?????010000000001111") def FENCE = BitPat("b?????????????????000?????0001111") def FENCE_I = BitPat("b?????????????????001?????0001111") def MUL = BitPat("b0000001??????????000?????0110011") diff --git a/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala b/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala index b6b76b3dbd..19985c76c0 100644 --- a/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala +++ b/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala @@ -74,7 +74,7 @@ case class DCacheParameters // | Above index | Set | Bank | Offset | // -------------------------------------- // | | | | -// | | | DCacheWordOffset +// | | | 0 // | | DCacheBankOffset // | DCacheSetOffset // DCacheAboveIndexOffset @@ -114,19 +114,23 @@ trait HasDCacheParameters extends HasL1CacheParameters { val DCacheWays = cacheParams.nWays val DCacheBanks = 8 val DCacheSRAMRowBits = 64 // hardcoded + val DCacheWordBits = 64 // hardcoded + val DCacheWordBytes = DCacheWordBits / 8 - val DCacheLineBits = DCacheSRAMRowBits * DCacheBanks * DCacheWays * DCacheSets - val DCacheLineBytes = DCacheLineBits / 8 - val DCacheLineWords = DCacheLineBits / 64 // TODO + val DCacheSizeBits = DCacheSRAMRowBits * DCacheBanks * DCacheWays * DCacheSets + val DCacheSizeBytes = DCacheSizeBits / 8 + val DCacheSizeWords = DCacheSizeBits / 64 // TODO val DCacheSameVPAddrLength = 12 val DCacheSRAMRowBytes = DCacheSRAMRowBits / 8 - val DCacheWordOffset = 0 - val DCacheBankOffset = DCacheWordOffset + log2Up(DCacheSRAMRowBytes) + val DCacheWordOffset = log2Up(DCacheWordBytes) + + val DCacheBankOffset = log2Up(DCacheSRAMRowBytes) val DCacheSetOffset = DCacheBankOffset + log2Up(DCacheBanks) val DCacheAboveIndexOffset = DCacheSetOffset + log2Up(DCacheSets) val DCacheTagOffset = DCacheAboveIndexOffset min DCacheSameVPAddrLength + val DCacheLineOffset = DCacheSetOffset val DCacheIndexOffset = DCacheBankOffset def addr_to_dcache_bank(addr: UInt) = { @@ -206,6 +210,7 @@ class DCacheLineReq(implicit p: Parameters) extends DCacheBundle class DCacheWordReqWithVaddr(implicit p: Parameters) extends DCacheWordReq { val vaddr = UInt(VAddrBits.W) + val wline = Bool() } class DCacheWordResp(implicit p: Parameters) extends DCacheBundle diff --git a/src/main/scala/xiangshan/mem/MemCommon.scala b/src/main/scala/xiangshan/mem/MemCommon.scala index 234113f603..2942dfecad 100644 --- a/src/main/scala/xiangshan/mem/MemCommon.scala +++ b/src/main/scala/xiangshan/mem/MemCommon.scala @@ -55,6 +55,7 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundle { val mask = UInt(8.W) val data = UInt((XLEN+1).W) val uop = new MicroOp + val wlineflag = Bool() // store write the whole cache line val miss = Bool() val tlbMiss = Bool() diff --git a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala index 1289cd779f..b75f80eb11 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala @@ -223,10 +223,12 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete paddrModule.io.waddr(i) := stWbIndex paddrModule.io.wdata(i) := io.storeIn(i).bits.paddr + paddrModule.io.wlineflag(i) := io.storeIn(i).bits.wlineflag paddrModule.io.wen(i) := true.B vaddrModule.io.waddr(i) := stWbIndex vaddrModule.io.wdata(i) := io.storeIn(i).bits.vaddr + vaddrModule.io.wlineflag(i) := io.storeIn(i).bits.wlineflag vaddrModule.io.wen(i) := true.B debug_paddr(paddrModule.io.waddr(i)) := paddrModule.io.wdata(i) @@ -258,7 +260,10 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete datavalid(stWbIndex) := true.B dataModule.io.data.waddr(i) := stWbIndex - dataModule.io.data.wdata(i) := genWdata(io.storeDataIn(i).bits.data, io.storeDataIn(i).bits.uop.ctrl.fuOpType(1,0)) + dataModule.io.data.wdata(i) := Mux(io.storeDataIn(i).bits.uop.ctrl.fuOpType === LSUOpType.cbo_zero, + 0.U, + genWdata(io.storeDataIn(i).bits.data, io.storeDataIn(i).bits.uop.ctrl.fuOpType(1,0)) + ) dataModule.io.data.wen(i) := true.B debug_data(dataModule.io.data.waddr(i)) := dataModule.io.data.wdata(i) @@ -393,6 +398,17 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete io.uncache.req.bits.data := dataModule.io.rdata(0).data io.uncache.req.bits.mask := dataModule.io.rdata(0).mask + // CBO op type check can be delayed for 1 cycle, + // as uncache op will not start in s_idle + val cbo_mmio_addr = paddrModule.io.rdata(0) >> 2 << 2 // clear lowest 2 bits for op + val cbo_mmio_op = 0.U //TODO + val cbo_mmio_data = cbo_mmio_addr | cbo_mmio_op + when(RegNext(LSUOpType.isCbo(uop(deqPtr).ctrl.fuOpType))){ + io.uncache.req.bits.addr := DontCare // TODO + io.uncache.req.bits.data := paddrModule.io.rdata(0) + io.uncache.req.bits.mask := DontCare // TODO + } + io.uncache.req.bits.id := DontCare io.uncache.req.bits.instrtype := DontCare @@ -463,11 +479,14 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio(ptr) // Note that store data/addr should both be valid after store's commit assert(!io.sbuffer(i).valid || allvalid(ptr)) + // Write line request should have all 1 mask + assert(!(io.sbuffer(i).valid && io.sbuffer(i).bits.wline && !io.sbuffer(i).bits.mask.andR)) io.sbuffer(i).bits.cmd := MemoryOpConstants.M_XWR io.sbuffer(i).bits.addr := paddrModule.io.rdata(i) io.sbuffer(i).bits.vaddr := vaddrModule.io.rdata(i) io.sbuffer(i).bits.data := dataModule.io.rdata(i).data io.sbuffer(i).bits.mask := dataModule.io.rdata(i).mask + io.sbuffer(i).bits.wline := paddrModule.io.rlineflag(i) io.sbuffer(i).bits.id := DontCare io.sbuffer(i).bits.instrtype := DontCare diff --git a/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala b/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala index afd5989d63..41153031e6 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala @@ -33,33 +33,42 @@ class SQAddrModule(dataWidth: Int, numEntries: Int, numRead: Int, numWrite: Int, val io = IO(new Bundle { val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) val rdata = Output(Vec(numRead, UInt(dataWidth.W))) + val rlineflag = Output(Vec(numRead, Bool())) val wen = Input(Vec(numWrite, Bool())) val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) val wdata = Input(Vec(numWrite, UInt(dataWidth.W))) + val wlineflag = Input(Vec(numWrite, Bool())) val forwardMdata = Input(Vec(numForward, UInt(dataWidth.W))) val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool()))) val debug_data = Output(Vec(numEntries, UInt(dataWidth.W))) }) val data = Reg(Vec(numEntries, UInt(dataWidth.W))) + val lineflag = Reg(Vec(numEntries, Bool())) // cache line match flag + // if lineflag == true, this address points to a whole cacheline io.debug_data := data // read ports for (i <- 0 until numRead) { io.rdata(i) := data(RegNext(io.raddr(i))) + io.rlineflag(i) := lineflag(RegNext(io.raddr(i))) } // below is the write ports (with priorities) for (i <- 0 until numWrite) { when (io.wen(i)) { data(io.waddr(i)) := io.wdata(i) + lineflag(io.waddr(i)) := io.wlineflag(i) } } // content addressed match for (i <- 0 until numForward) { for (j <- 0 until numEntries) { - io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3) + // io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3) + val linehit = io.forwardMdata(i)(dataWidth-1, DCacheLineOffset) === data(j)(dataWidth-1, DCacheLineOffset) + val wordhit = io.forwardMdata(i)(DCacheLineOffset-1, DCacheWordOffset) === data(j)(DCacheLineOffset-1, DCacheWordOffset) + io.forwardMmask(i)(j) := linehit && (wordhit || lineflag(j)) } } diff --git a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala index 328627993d..1d17def2ca 100644 --- a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala @@ -66,6 +66,7 @@ class StoreUnit_S0(implicit p: Parameters) extends XSModule { io.out.bits.rsIdx := io.rsIdx io.out.bits.mask := genWmask(io.out.bits.vaddr, io.in.bits.uop.ctrl.fuOpType(1,0)) io.out.bits.isFirstIssue := io.isFirstIssue + io.out.bits.wlineflag := io.in.bits.uop.ctrl.fuOpType === LSUOpType.cbo_zero io.out.valid := io.in.valid io.in.ready := io.out.ready @@ -98,9 +99,14 @@ class StoreUnit_S1(implicit p: Parameters) extends XSModule { val rsFeedback = ValidIO(new RSFeedback) }) + // mmio cbo decoder + val is_mmio_cbo = io.in.bits.uop.ctrl.fuOpType === LSUOpType.cbo_clean || + io.in.bits.uop.ctrl.fuOpType === LSUOpType.cbo_flush || + io.in.bits.uop.ctrl.fuOpType === LSUOpType.cbo_inval + val s1_paddr = io.dtlbResp.bits.paddr val s1_tlb_miss = io.dtlbResp.bits.miss - val s1_mmio = io.dtlbResp.bits.mmio + val s1_mmio = io.dtlbResp.bits.mmio || is_mmio_cbo val s1_exception = selectStore(io.out.bits.uop.cf.exceptionVec, false).asUInt.orR io.in.ready := true.B diff --git a/src/main/scala/xiangshan/mem/sbuffer/Sbuffer.scala b/src/main/scala/xiangshan/mem/sbuffer/Sbuffer.scala index 9cacd51652..a55e373754 100644 --- a/src/main/scala/xiangshan/mem/sbuffer/Sbuffer.scala +++ b/src/main/scala/xiangshan/mem/sbuffer/Sbuffer.scala @@ -67,6 +67,7 @@ class DataWriteReq(implicit p: Parameters) extends SbufferBundle { val mask = UInt((DataBits/8).W) val data = UInt(DataBits.W) val wordOffset = UInt(WordOffsetWidth.W) + val wline = Bool() } class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst { @@ -81,9 +82,14 @@ class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst for(i <- 0 until StorePipelineWidth) { when(req(i).valid){ - for(j <- 0 until DataBytes){ - when(req(i).bits.mask(j)){ - data(req(i).bits.idx)(req(i).bits.wordOffset)(j) := req(i).bits.data(j*8+7, j*8) + for(word <- 0 until CacheLineWords){ + for(byte <- 0 until DataBytes){ + when( + req(i).bits.mask(byte) && (req(i).bits.wordOffset(WordsWidth-1, 0) === word.U) || + req(i).bits.wline + ){ + data(req(i).bits.idx)(word)(byte) := req(i).bits.data(byte*8+7, byte*8) + } } } } @@ -272,6 +278,7 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst writeReq(i).bits.wordOffset := wordOffset writeReq(i).bits.mask := in.bits.mask writeReq(i).bits.data := in.bits.data + writeReq(i).bits.wline := in.bits.wline val insertIdx = if(i == 0) firstInsertIdx else secondInsertIdx val flushMask = if(i == 0) true.B else !sameTag accessIdx(i).valid := RegNext(in.fire()) diff --git a/src/main/scala/xiangshan/package.scala b/src/main/scala/xiangshan/package.scala index 4e55e30c8d..b1995d2a57 100644 --- a/src/main/scala/xiangshan/package.scala +++ b/src/main/scala/xiangshan/package.scala @@ -351,20 +351,27 @@ package object xiangshan { object LSUOpType { // normal load/store // bit(1, 0) are size - def lb = "b000000".U - def lh = "b000001".U - def lw = "b000010".U - def ld = "b000011".U - def lbu = "b000100".U - def lhu = "b000101".U - def lwu = "b000110".U - def sb = "b001000".U - def sh = "b001001".U - def sw = "b001010".U - def sd = "b001011".U + def lb = "b000000".U + def lh = "b000001".U + def lw = "b000010".U + def ld = "b000011".U + def lbu = "b000100".U + def lhu = "b000101".U + def lwu = "b000110".U + def sb = "b001000".U + def sh = "b001001".U + def sw = "b001010".U + def sd = "b001011".U + + def cbo_zero = "b001111".U // l1 cache op + + def cbo_clean = "b011111".U // llc op + def cbo_flush = "b101111".U // llc op + def cbo_inval = "b111111".U // llc op def isLoad(op: UInt): Bool = !op(3) def isStore(op: UInt): Bool = op(3) + def isCbo(op: UInt): Bool = op(3, 0) === "b1111".U // atomics // bit(1, 0) are size