From 983e08cc9486d945cecaf8ded2d3083a70f33a02 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 11 May 2020 09:43:06 -0700 Subject: [PATCH 1/5] ManagerUnification: increase evaluation performance Something is broken with scala.collection.mutable.HashMap. Insertions get slower and slower as the map gets more full. groupBy is more idiomatic anyway. --- src/main/scala/tilelink/Parameters.scala | 74 ++++++++++++------------ 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/src/main/scala/tilelink/Parameters.scala b/src/main/scala/tilelink/Parameters.scala index ad17d4d5286..276e0352303 100644 --- a/src/main/scala/tilelink/Parameters.scala +++ b/src/main/scala/tilelink/Parameters.scala @@ -1113,47 +1113,45 @@ case class TLRationalEdgeParameters(client: TLRationalClientPortParameters, mana def formatEdge = client.infoString + "\n" + manager.infoString } +// To be unified, devices must agree on all of these terms +case class ManagerUnificationKey( + resources: Seq[Resource], + regionType: RegionType.T, + executable: Boolean, + supportsAcquireT: TransferSizes, + supportsAcquireB: TransferSizes, + supportsArithmetic: TransferSizes, + supportsLogical: TransferSizes, + supportsGet: TransferSizes, + supportsPutFull: TransferSizes, + supportsPutPartial: TransferSizes, + supportsHint: TransferSizes) + +object ManagerUnificationKey +{ + def apply(x: TLManagerParameters): ManagerUnificationKey = ManagerUnificationKey( + resources = x.resources, + regionType = x.regionType, + executable = x.executable, + supportsAcquireT = x.supportsAcquireT, + supportsAcquireB = x.supportsAcquireB, + supportsArithmetic = x.supportsArithmetic, + supportsLogical = x.supportsLogical, + supportsGet = x.supportsGet, + supportsPutFull = x.supportsPutFull, + supportsPutPartial = x.supportsPutPartial, + supportsHint = x.supportsHint) +} + object ManagerUnification { def apply(managers: Seq[TLManagerParameters]): List[TLManagerParameters] = { - // To be unified, devices must agree on all of these terms - case class TLManagerKey( - resources: Seq[Resource], - regionType: RegionType.T, - executable: Boolean, - supportsAcquireT: TransferSizes, - supportsAcquireB: TransferSizes, - supportsArithmetic: TransferSizes, - supportsLogical: TransferSizes, - supportsGet: TransferSizes, - supportsPutFull: TransferSizes, - supportsPutPartial: TransferSizes, - supportsHint: TransferSizes) - def key(x: TLManagerParameters) = TLManagerKey( - resources = x.resources, - regionType = x.regionType, - executable = x.executable, - supportsAcquireT = x.supportsAcquireT, - supportsAcquireB = x.supportsAcquireB, - supportsArithmetic = x.supportsArithmetic, - supportsLogical = x.supportsLogical, - supportsGet = x.supportsGet, - supportsPutFull = x.supportsPutFull, - supportsPutPartial = x.supportsPutPartial, - supportsHint = x.supportsHint) - val map = scala.collection.mutable.HashMap[TLManagerKey, TLManagerParameters]() - managers.foreach { m => - val k = key(m) - map.get(k) match { - case None => map.update(k, m) - case Some(n) => { - map.update(k, m.v1copy( - address = m.address ++ n.address, - fifoId = None)) // Merging means it's not FIFO anymore! - } - } - } - map.values.map(m => m.v1copy(address = AddressSet.unify(m.address))).toList + managers.groupBy(ManagerUnificationKey.apply).values.map { seq => + val agree = seq.forall(_.fifoId == seq.head.fifoId) + seq(0).v1copy( + address = AddressSet.unify(seq.flatMap(_.address)), + fifoId = if (agree) seq(0).fifoId else None) + }.toList } } From 88cf2b1d850db0205bdb9020767519d7f61f3446 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 11 May 2020 13:15:59 -0700 Subject: [PATCH 2/5] AddressSet.subtract: pay off technical debt No longer exponential time! --- src/main/scala/diplomacy/Parameters.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/scala/diplomacy/Parameters.scala b/src/main/scala/diplomacy/Parameters.scala index 51a8f399b51..fec51c20060 100644 --- a/src/main/scala/diplomacy/Parameters.scala +++ b/src/main/scala/diplomacy/Parameters.scala @@ -167,13 +167,13 @@ case class AddressSet(base: BigInt, mask: BigInt) extends Ordered[AddressSet] } def subtract(x: AddressSet): Seq[AddressSet] = { - if (!overlaps(x)) { - Seq(this) - } else { - val new_inflex = ~x.mask & mask - // !!! this fractures too much; find a better algorithm - val fracture = AddressSet.enumerateMask(new_inflex).flatMap(m => intersect(AddressSet(m, ~new_inflex))) - fracture.filter(!_.overlaps(x)) + intersect(x) match { + case None => Seq(this) + case Some(remove) => AddressSet.enumerateBits(mask & ~remove.mask).map { bit => + val nmask = (mask & (bit-1)) | remove.mask + val nbase = (remove.base ^ bit) & ~nmask + AddressSet(nbase, nmask) + } } } From 63becef3ec369b569254378c5fc5b6be910b7874 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 11 May 2020 17:08:18 -0700 Subject: [PATCH 3/5] AddressSet.unify: pay off technical debt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The perfect implementation of this algorithm would be to perform a full circuit minimization, ala Quine–McCluskey. However, that would require expanding out to every addres, very exponential time! The previous heuristic algorithm was O(n^2) for n AddressSet terms. However, it did not always merge sets by the same bit in order. This led to poor outcomes on common inputs. The new heuristic merges by lowest bits first. This is O(n * bits), which has better worst-case performance. On contiguous ranges it also finds better results. --- src/main/scala/diplomacy/Parameters.scala | 30 ++++++++++------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/main/scala/diplomacy/Parameters.scala b/src/main/scala/diplomacy/Parameters.scala index fec51c20060..a8fdc1e2d37 100644 --- a/src/main/scala/diplomacy/Parameters.scala +++ b/src/main/scala/diplomacy/Parameters.scala @@ -219,24 +219,20 @@ object AddressSet } } + def unify(seq: Seq[AddressSet], bit: BigInt): Seq[AddressSet] = { + // Pair terms up by ignoring 'bit' + seq.groupBy(x => x.copy(base = x.base & ~bit)).map { case (key, seq) => + if (seq.size == 1) { + seq.head // singleton -> unaffected + } else { + key.copy(mask = key.mask | bit) // pair - widen mask by bit + } + }.toList + } + def unify(seq: Seq[AddressSet]): Seq[AddressSet] = { - val n = seq.size - val array = Array(seq:_*) - var filter = Array.fill(n) { false } - for (i <- 0 until n-1) { if (!filter(i)) { - for (j <- i+1 until n) { if (!filter(j)) { - val a = array(i) - val b = array(j) - if (a.mask == b.mask && isPow2(a.base ^ b.base)) { - val c_base = a.base & ~(a.base ^ b.base) - val c_mask = a.mask | (a.base ^ b.base) - filter.update(j, true) - array.update(i, AddressSet(c_base, c_mask)) - } - }} - }} - val out = (array zip filter) flatMap { case (a, f) => if (f) None else Some(a) } - if (out.size != n) unify(out) else out.toList + val bits = seq.map(_.base).foldLeft(BigInt(0))(_ | _) + AddressSet.enumerateBits(bits).foldLeft(seq) { case (acc, bit) => unify(acc, bit) }.sorted } def enumerateMask(mask: BigInt): Seq[BigInt] = { From b67bfac3697c620200211ae4d651b871a1234d06 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 11 May 2020 17:50:49 -0700 Subject: [PATCH 4/5] AddressAdjuster: add integrated FIFO-fixing The remote/local routing decision is too granular to leave to a static FIFOFixer. If you have 1MiB of local memory in a 256MiB region, you had to describe 256 different FIFO regions, one for each potential offset of the local memory. There are actually only two options: local or remote. Use the dynamic routing information of the AddressAdjuster. --- src/main/scala/tilelink/AddressAdjuster.scala | 77 +++++++++++-------- src/main/scala/tilelink/BusWrapper.scala | 5 +- 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/src/main/scala/tilelink/AddressAdjuster.scala b/src/main/scala/tilelink/AddressAdjuster.scala index fe704441edf..f51f4fef740 100644 --- a/src/main/scala/tilelink/AddressAdjuster.scala +++ b/src/main/scala/tilelink/AddressAdjuster.scala @@ -12,13 +12,13 @@ class AddressAdjuster(val params: ReplicatedRegion, val forceLocal: Seq[AddressS val mask = params.replicationMask // Which bits are in the mask? val bits = AddressSet.enumerateBits(mask) - // Which ids must we route within that mask? - val ids = AddressSet.enumerateMask(mask) - // Find the intersection of the mask with some region - private def masked(region: Seq[AddressSet], offset: BigInt = 0): Seq[AddressSet] = { - region.flatMap { _.intersect(AddressSet(offset, ~mask)) } + // Find the portion of the addresses which correspond to prefix0 + private def prefix0(region: Seq[AddressSet]): Seq[AddressSet] = { + region.flatMap { _.intersect(params.local) } + } + private def prefixNot0(region: Seq[AddressSet]): Seq[AddressSet] = { + region.flatMap { _.subtract(params.local) } } - // forceLocal better only go one place (the low index) forceLocal.foreach { as => require((as.max & mask) == 0) } @@ -26,7 +26,7 @@ class AddressAdjuster(val params: ReplicatedRegion, val forceLocal: Seq[AddressS // Report whether a region of addresses fully contains a particular manager def isDeviceContainedBy(region: Seq[AddressSet], m: TLSlaveParameters): Boolean = { - val addr = masked(m.address) + val addr = prefix0(m.address) val any_in = region.exists { f => addr.exists { a => f.overlaps(a) } } val any_out = region.exists { f => addr.exists { a => !f.contains(a) } } // Ensure device is either completely inside or outside this region @@ -57,8 +57,8 @@ class AddressAdjuster(val params: ReplicatedRegion, val forceLocal: Seq[AddressS } def sameSupport(local: Seq[TLSlaveParameters], remote: Seq[TLSlaveParameters]): (Boolean, Seq[AddressSet]) = { - val ra = masked(remote.flatMap(_.address)) - val la = masked(local .flatMap(_.address)) + val ra = prefix0(remote.flatMap(_.address)) + val la = prefix0(local .flatMap(_.address)) val holes = la.foldLeft(ra) { case (holes, la) => holes.flatMap(_.subtract(la)) } val covered = remote.forall { r => r.address.forall { ra => @@ -119,6 +119,10 @@ class AddressAdjuster(val params: ReplicatedRegion, val forceLocal: Seq[AddressS val local = mp(0) val remote = mp(1) + // Confirm that the two manager paths have homogeneous FIFO ids + requireFifoHomogeneity(local.managers) + requireFifoHomogeneity(remote.managers) + // Subdivide the managers into four cases: (adjustable vs fixed) x (local vs remote) val adjustableLocalManagers = local.managers.filter(m => isDeviceContainedBy(Seq(params.region), m)) val fixedLocalManagers = local.managers.filter(m => !isDeviceContainedBy(Seq(params.region), m)) @@ -155,10 +159,6 @@ class AddressAdjuster(val params: ReplicatedRegion, val forceLocal: Seq[AddressS // Confirm that the error device can supply all the same capabilities as the remote path errorDev.foreach { e => requireErrorSupport(e, adjustableRemoteManagers) } - // Confirm that each subset of adjustable managers have homogeneous FIFO ids - requireFifoHomogeneity(adjustableLocalManagers) - requireFifoHomogeneity(adjustableRemoteManagers) - // Actually rewrite the PMAs for the adjustable local devices val newLocals = adjustableLocalManagers.map { l => // Ensure that every local device has a matching remote device @@ -172,7 +172,7 @@ class AddressAdjuster(val params: ReplicatedRegion, val forceLocal: Seq[AddressS // All other PMAs are replaced with the capabilities of the remote path, since that's all we can know statically. // Capabilities supported by the remote but not the local will result in dynamic re-reouting to the error device. l.v1copy( - address = AddressSet.unify(masked(l.address) ++ (if (Some(l) == errorDev) holes else Nil)), + address = AddressSet.unify(prefix0(l.address) ++ (if (Some(l) == errorDev) holes else Nil)), regionType = r.regionType, executable = r.executable, supportsAcquireT = r.supportsAcquireT, @@ -186,27 +186,21 @@ class AddressAdjuster(val params: ReplicatedRegion, val forceLocal: Seq[AddressS mayDenyGet = r.mayDenyGet, mayDenyPut = r.mayDenyPut, alwaysGrantsT = r.alwaysGrantsT, - fifoId = Some(if (isDeviceContainedBy(forceLocal, l)) ids.size else 0)) + fifoId = Some(0)) } // Actually rewrite the PMAs for the adjustable remote region too, to account for the differing FIFO domains under the mask - val newRemotes = ids.tail.zipWithIndex.flatMap { case (id, i) => adjustableRemoteManagers.map { r => + val newRemotes = adjustableRemoteManagers.map { r => r.v1copy( - address = AddressSet.unify(masked(r.address, offset = id)), - fifoId = Some(i+1)) - } } - - // Relable the FIFO domains for certain manager subsets - val fifoIdFactory = TLXbar.relabeler() - def relabelFifo(managers: Seq[TLSlaveParameters]): Seq[TLSlaveParameters] = { - val fifoIdMapper = fifoIdFactory() - managers.map(m => m.v1copy(fifoId = m.fifoId.map(fifoIdMapper(_)))) + address = prefixNot0(r.address), + fifoId = Some(0)) } val newManagerList = - relabelFifo(newLocals ++ newRemotes) ++ - relabelFifo(fixedLocalManagers) ++ - relabelFifo(fixedRemoteManagers) + newLocals ++ + newRemotes ++ + fixedLocalManagers ++ + fixedRemoteManagers Seq(local.v1copy( managers = newManagerList, @@ -249,13 +243,34 @@ class AddressAdjuster(val params: ReplicatedRegion, val forceLocal: Seq[AddressS def routeLocal(addr: UInt): Bool = Mux(isAdjustable(addr), isDynamicallyLocal(addr), isStaticallyLocal(addr)) // Route A by address, but reroute unsupported operations + val a_stall = Wire(Bool()) val a_local = routeLocal(parent.a.bits.address) - parent.a.ready := Mux(a_local, local.a.ready, remote.a.ready) - local .a.valid := parent.a.valid && a_local - remote.a.valid := parent.a.valid && !a_local + parent.a.ready := Mux(a_local, local.a.ready, remote.a.ready) && !a_stall + local .a.valid := parent.a.valid && a_local && !a_stall + remote.a.valid := parent.a.valid && !a_local && !a_stall local .a.bits := parent.a.bits remote.a.bits := parent.a.bits + // Count beats + val a_first = parentEdge.first(parent.a) + val d_first = parentEdge.first(parent.d) && parent.d.bits.opcode =/= TLMessages.ReleaseAck + + // Keep one bit for each source recording if there is an outstanding request that must be made FIFO + // Sources unused in the stall signal calculation should be pruned by DCE + val flight = RegInit(VecInit(Seq.fill(parentEdge.client.endSourceId) { false.B })) + when (a_first && parent.a.fire()) { flight(parent.a.bits.source) := true.B } + when (d_first && parent.d.fire()) { flight(parent.d.bits.source) := false.B } + + val stalls = parentEdge.client.clients.filter(c => c.requestFifo && c.sourceId.size > 1).map { c => + val a_sel = c.sourceId.contains(parent.a.bits.source) + val local = RegEnable(a_local, parent.a.fire() && a_sel) + val track = flight.slice(c.sourceId.start, c.sourceId.end) + + a_sel && a_first && track.reduce(_ || _) && (local =/= a_local) + } + + a_stall := stalls.foldLeft(false.B)(_||_) + val (allSame, holes) = sameSupport(adjustableLocalManagers, adjustableRemoteManagers) val dynamicLocal = AddressSet.unify(adjustableLocalManagers.flatMap(_.address)) diff --git a/src/main/scala/tilelink/BusWrapper.scala b/src/main/scala/tilelink/BusWrapper.scala index 4d2350c2753..56648d1e7aa 100644 --- a/src/main/scala/tilelink/BusWrapper.scala +++ b/src/main/scala/tilelink/BusWrapper.scala @@ -356,8 +356,7 @@ case class AddressAdjusterWrapperParams( blockBytes: Int, beatBytes: Int, replication: Option[ReplicatedRegion], - forceLocal: Seq[AddressSet] = Nil, - policy: TLFIFOFixer.Policy = TLFIFOFixer.allVolatile + forceLocal: Seq[AddressSet] = Nil ) extends HasTLBusParams with TLBusWrapperInstantiationLike @@ -374,7 +373,7 @@ case class AddressAdjusterWrapperParams( class AddressAdjusterWrapper(params: AddressAdjusterWrapperParams, name: String)(implicit p: Parameters) extends TLBusWrapper(params, name) { private val address_adjuster = params.replication.map { r => LazyModule(new AddressAdjuster(r, params.forceLocal)) } private val viewNode = TLIdentityNode() - val inwardNode: TLInwardNode = address_adjuster.map(_.node :*=* TLFIFOFixer(params.policy) :*=* viewNode).getOrElse(viewNode) + val inwardNode: TLInwardNode = address_adjuster.map(_.node :*=* viewNode).getOrElse(viewNode) def outwardNode: TLOutwardNode = address_adjuster.map(_.node).getOrElse(viewNode) def busView: TLEdge = viewNode.edges.in.head val prefixNode = address_adjuster.map(_.prefix) From e9beab84a487df4d0ee8f71019f034669a4ee5b5 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 11 May 2020 19:07:36 -0700 Subject: [PATCH 5/5] OMISA: enable prototyping/testing for Sv57 --- src/main/scala/diplomaticobjectmodel/model/OMISA.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/scala/diplomaticobjectmodel/model/OMISA.scala b/src/main/scala/diplomaticobjectmodel/model/OMISA.scala index 0ad97979d23..9ca68e9f6d9 100644 --- a/src/main/scala/diplomaticobjectmodel/model/OMISA.scala +++ b/src/main/scala/diplomaticobjectmodel/model/OMISA.scala @@ -21,6 +21,8 @@ case object Bare extends OMAddressTranslationMode case object Sv32 extends OMAddressTranslationMode case object Sv39 extends OMAddressTranslationMode case object Sv48 extends OMAddressTranslationMode +// unratified/subject-to-change in the RISC-V priviledged ISA specification: +case object Sv57 extends OMAddressTranslationMode trait OMBaseInstructionSet extends OMEnum case object RV32E extends OMBaseInstructionSet @@ -86,6 +88,7 @@ object OMISA { case 32 if (pgLevels == 2) => Sv32 case 64 if (pgLevels == 3) => Sv39 case 64 if (pgLevels == 4) => Sv48 + case 64 if (pgLevels == 5) => Sv57 case _ => throw new IllegalArgumentException(s"ERROR: Invalid Xlen/PgLevels combination: $xLen/$pgLevels") }