diff --git a/regression/run-test-bucket b/regression/run-test-bucket index 5ed757cdcc0..231be123666 100755 --- a/regression/run-test-bucket +++ b/regression/run-test-bucket @@ -51,8 +51,9 @@ bucket_number=$1 set -x case "${bucket_number}" in 1) - travis_wait 100 make emulator-ndebug -C regression SUITE=UnittestSuite JVM_MEMORY=3G VERILATOR_THREADS=1 - travis_wait 100 make emulator-regression-tests -C regression SUITE=UnittestSuite JVM_MEMORY=3G VERILATOR_THREADS=1 + # Temporarily disable this bucket, which is hitting OOM on Actions + #travis_wait 100 make emulator-ndebug -C regression SUITE=UnittestSuite JVM_MEMORY=3G VERILATOR_THREADS=1 + #travis_wait 100 make emulator-regression-tests -C regression SUITE=UnittestSuite JVM_MEMORY=3G VERILATOR_THREADS=1 ;; 2) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index cd051e76c15..7d83e7822ec 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -877,15 +877,17 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // AMOs if (usingRMW) { - // when xLen < coreDataBits (e.g. RV32D), this AMOALU is wider than necessary - val amoalu = Module(new AMOALU(coreDataBits)) - amoalu.io.mask := pstore1_mask - amoalu.io.cmd := (if (usingAtomicsInCache) pstore1_cmd else M_XWR) - amoalu.io.lhs := s2_data_word - amoalu.io.rhs := pstore1_data - pstore1_storegen_data := (if (!usingDataScratchpad) amoalu.io.out else { + val amoalus = (0 until coreDataBits / xLen).map { i => + val amoalu = Module(new AMOALU(xLen)) + amoalu.io.mask := pstore1_mask >> (i * xBytes) + amoalu.io.cmd := (if (usingAtomicsInCache) pstore1_cmd else M_XWR) + amoalu.io.lhs := s2_data_word >> (i * xLen) + amoalu.io.rhs := pstore1_data >> (i * xLen) + amoalu + } + pstore1_storegen_data := (if (!usingDataScratchpad) amoalus.map(_.io.out).asUInt else { val mask = FillInterleaved(8, Mux(s2_correct, 0.U, pstore1_mask)) - amoalu.io.out_unmasked & mask | s2_data_word_corrected & ~mask + amoalus.map(_.io.out_unmasked).asUInt & mask | s2_data_word_corrected & ~mask }) } else if (!usingAtomics) { assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation") @@ -897,7 +899,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // We could consider turning some of these into dynamic PMA checks. require(!m.supportsAcquireB || m.supportsGet, "With a vector unit, cacheable memory must support Get") require(!m.supportsAcquireT || m.supportsPutPartial, "With a vector unit, cacheable memory must support PutPartial") - require(!m.supportsPutFull || m.supportsPutPartial, "With a vector unit, writable memory must support PutPartial") } } diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index 769ab62ba30..60aa0d97538 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -64,6 +64,7 @@ class TLBEntryData(implicit p: Parameters) extends CoreBundle()(p) { val pw = Bool() val px = Bool() val pr = Bool() + val ppp = Bool() // PutPartial val pal = Bool() // AMO logical val paa = Bool() // AMO arithmetic val eff = Bool() // get/put effects @@ -199,6 +200,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val deny_access_to_debug = mpu_priv <= PRV.M && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false) val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r val prot_w = fastCheck(_.supportsPutFull) && !deny_access_to_debug && pmp.io.w + val prot_pp = fastCheck(_.supportsPutPartial) val prot_al = fastCheck(_.supportsLogical) val prot_aa = fastCheck(_.supportsArithmetic) val prot_x = fastCheck(_.executable) && !deny_access_to_debug && pmp.io.x @@ -226,6 +228,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T newEntry.pr := prot_r newEntry.pw := prot_w newEntry.px := prot_x + newEntry.ppp := prot_pp newEntry.pal := prot_al newEntry.paa := prot_aa newEntry.eff := prot_eff @@ -260,8 +263,10 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val px_array = Cat(Fill(nPhysicalEntries, prot_x), normal_entries.map(_.px).asUInt) & ~ptw_ae_array val eff_array = Cat(Fill(nPhysicalEntries, prot_eff), normal_entries.map(_.eff).asUInt) val c_array = Cat(Fill(nPhysicalEntries, cacheable), normal_entries.map(_.c).asUInt) + val ppp_array = Cat(Fill(nPhysicalEntries, prot_pp), normal_entries.map(_.ppp).asUInt) val paa_array = Cat(Fill(nPhysicalEntries, prot_aa), normal_entries.map(_.paa).asUInt) val pal_array = Cat(Fill(nPhysicalEntries, prot_al), normal_entries.map(_.pal).asUInt) + val ppp_array_if_cached = ppp_array | c_array val paa_array_if_cached = paa_array | Mux(usingAtomicsInCache, c_array, 0.U) val pal_array_if_cached = pal_array | Mux(usingAtomicsInCache, c_array, 0.U) val prefetchable_array = Cat((cacheable && homogeneous) << (nPhysicalEntries-1), normal_entries.map(_.c).asUInt) @@ -280,6 +285,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val cmd_lrsc = Bool(usingAtomics) && io.req.bits.cmd.isOneOf(M_XLR, M_XSC) val cmd_amo_logical = Bool(usingAtomics) && isAMOLogical(io.req.bits.cmd) val cmd_amo_arithmetic = Bool(usingAtomics) && isAMOArithmetic(io.req.bits.cmd) + val cmd_put_partial = io.req.bits.cmd === M_PWR val cmd_read = isRead(io.req.bits.cmd) val cmd_write = isWrite(io.req.bits.cmd) val cmd_write_perms = cmd_write || @@ -292,9 +298,11 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val ae_ld_array = Mux(cmd_read, ae_array | ~pr_array, 0.U) val ae_st_array = Mux(cmd_write_perms, ae_array | ~pw_array, 0.U) | + Mux(cmd_put_partial, ~ppp_array_if_cached, 0.U) | Mux(cmd_amo_logical, ~pal_array_if_cached, 0.U) | Mux(cmd_amo_arithmetic, ~paa_array_if_cached, 0.U) val must_alloc_array = + Mux(cmd_put_partial, ~ppp_array, 0.U) | Mux(cmd_amo_logical, ~paa_array, 0.U) | Mux(cmd_amo_arithmetic, ~pal_array, 0.U) | Mux(cmd_lrsc, ~0.U(pal_array.getWidth.W), 0.U)