style(pbmt): remove the useless and standardize code

* style(pbmt): remove outstanding constant which is just for self-test

* fix(uncache): added mask comparison for `addrMatch`

* style(mem): code normalization

* fix(pbmt): handle cases where the load unit is byte, word, etc

* style(uncache): fix an import

* fix(uncahce): address match should use non-offset address when forwading

  In this case, to ensure correct forwarding, stores with the same address but overlapping masks cannot be entered at the same time.

* style(RAR): remove redundant design of `nc` reg
This commit is contained in:
Yanqin Li 2024-11-27 14:50:21 +08:00 committed by zhanglinjuan
parent 043d3da44e
commit e10e20c653
6 changed files with 29 additions and 31 deletions

View File

@ -205,11 +205,6 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
memBlock.io.redirect := backend.io.mem.redirect
memBlock.io.ooo_to_mem.csrCtrl := backend.io.mem.csrCtrl
// XXX lyq: remove this before PR
val tmp_debug_uncache_otsd = Constantin.createRecord("uncache_outstanding_enable", 0)
memBlock.io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable := tmp_debug_uncache_otsd
memBlock.io.ooo_to_mem.tlbCsr := backend.io.mem.tlbCsr
memBlock.io.ooo_to_mem.lsqio.lcommit := backend.io.mem.robLsqIO.lcommit
memBlock.io.ooo_to_mem.lsqio.scommit := backend.io.mem.robLsqIO.scommit

View File

@ -201,6 +201,10 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
def addrMatch(x: UncacheEntry, y: UncacheWordReq): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
def addrMatch(x: UncacheWordReq, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
def addrMatch(x: UncacheEntry, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
def addrMatch(x: UInt, y: UInt): Bool = x(PAddrBits - 1, 3) === y(PAddrBits - 1, 3)
// drain buffer
val empty = Wire(Bool())
@ -261,6 +265,7 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
*/
val e0_fire = req.fire
val e0_req_valid = req.valid
val e0_req = req.bits
/**
TODO lyq: block or wait or forward?
@ -270,7 +275,7 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
- st->ld forward
- st->st block
*/
val e0_existSame = sizeMap(j => e0_req.addr === entries(j).addr && states(j).isValid()).asUInt.orR
val e0_existSame = sizeMap(j => e0_req_valid && states(j).isValid() && addrMatch(e0_req, entries(j))).asUInt.orR
val e0_invalidVec = sizeMap(i => !states(i).isValid())
val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
val e0_alloc = e0_canAlloc && !e0_existSame && e0_fire
@ -282,9 +287,9 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
// judge whether wait same block: e0 & q0
val waitSameVec = sizeMap(j =>
e0_req.addr === entries(j).addr && states(j).isValid() && states(j).isInflight()
e0_req_valid && states(j).isValid() && states(j).isInflight() && addrMatch(e0_req, entries(j))
)
val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent
val waitQ0 = q0_canSent && addrMatch(e0_req, q0_entry)
when (waitSameVec.reduce(_ || _) || waitQ0) {
states(e0_allocIdx).setWaitSame(true.B)
}
@ -345,7 +350,7 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
// q0 should judge whether wait same block
(0 until UncacheBufferSize).map(j =>
when(q0_entry.addr === entries(j).addr && states(j).isValid() && !states(j).isWaitReturn()){
when(states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){
states(j).setWaitSame(true.B)
}
)
@ -367,7 +372,7 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
// remove state of wait same block
(0 until UncacheBufferSize).map(j =>
when(entries(id).addr === entries(j).addr && states(j).isValid() && states(j).isWaitSame()){
when(states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){
states(j).setWaitSame(false.B)
}
)
@ -421,7 +426,7 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
val f1_fwdValid = RegNext(f0_fwdValid)
// f0 vaddr match
val f0_vtagMatches = sizeMap(w => entries(w).vaddr === forward.vaddr)
val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr))
val f0_validTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid)
// f0 select
val f0_fwdMask = shiftMaskToHigh(
@ -437,8 +442,8 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
val f1_fwdMask = RegEnable(f0_fwdMask, f0_fwdValid)
val f1_fwdData = RegEnable(f0_fwdData, f0_fwdValid)
// forward.paddr from dtlb, which is far from uncache
val f1_ptagMatches = sizeMap(w => RegEnable(entries(w).addr, f0_fwdValid) === RegEnable(forward.paddr, f0_fwdValid))
f1_tagMismatchVec(i) := f0_fwdValid && sizeMap(w =>
val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), RegEnable(forward.paddr, f0_fwdValid)))
f1_tagMismatchVec(i) := sizeMap(w =>
RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid
).asUInt.orR
when(f1_tagMismatchVec(i)) {
@ -456,11 +461,10 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
for (j <- 0 until VDataBytes) {
forward.forwardMaskFast(j) := f0_fwdMask(j)
forward.forwardData(j) := f1_fwdData(j)
forward.forwardMask(j) := false.B
forward.forwardData(j) := DontCare
when(f1_fwdMask(j) && f1_fwdValid) {
forward.forwardMask(j) := true.B
forward.forwardData(j) := f1_fwdData(j)
}
}

View File

@ -66,22 +66,22 @@ object genWdata {
}
object shiftDataToLow {
def apply(addr: UInt,data : UInt): UInt = {
Mux(addr(3), (data >> 64).asUInt,data)
def apply(addr: UInt, data : UInt): UInt = {
Mux(addr(3), (data >> 64).asUInt, data)
}
}
object shiftMaskToLow {
def apply(addr: UInt,mask: UInt): UInt = {
Mux(addr(3),(mask >> 8).asUInt,mask)
def apply(addr: UInt, mask: UInt): UInt = {
Mux(addr(3), (mask >> 8).asUInt, mask)
}
}
object shiftDataToHigh {
def apply(addr: UInt,data : UInt): UInt = {
def apply(addr: UInt, data : UInt): UInt = {
Mux(addr(3), (data << 64).asUInt, data)
}
}
object shiftMaskToHigh {
def apply(addr: UInt,mask: UInt): UInt = {
def apply(addr: UInt, mask: UInt): UInt = {
Mux(addr(3), (mask << 8).asUInt, mask)
}
}

View File

@ -51,16 +51,15 @@ class LoadQueueRAR(implicit p: Parameters) extends XSModule
println("LoadQueueRAR: size: " + LoadQueueRARSize)
// LoadQueueRAR field
// +-------+-------+-------+----------+----+
// | Valid | Uop | PAddr | Released | NC |
// +-------+-------+-------+----------+----+
// +-------+-------+-------+----------+
// | Valid | Uop | PAddr | Released |
// +-------+-------+-------+----------+
//
// Field descriptions:
// Allocated : entry is valid.
// MicroOp : Micro-op
// PAddr : physical address.
// Released : DCache released.
// NC : is NC with data.
val allocated = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B))) // The control signals need to explicitly indicate the initial value
val uop = Reg(Vec(LoadQueueRARSize, new DynInst))
val paddrModule = Module(new LqPAddrModule(
@ -74,7 +73,6 @@ class LoadQueueRAR(implicit p: Parameters) extends XSModule
))
paddrModule.io := DontCare
val released = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B)))
val nc = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B)))
val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W)))
// freeliset: store valid entries index.
@ -144,13 +142,15 @@ class LoadQueueRAR(implicit p: Parameters) extends XSModule
// Fill info
uop(enqIndex) := enq.bits.uop
released(enqIndex) :=
// NC is uncachable and will not be explicitly released.
// So NC requests are not allowed to have RAR
released(enqIndex) := enq.bits.is_nc || (
enq.bits.data_valid &&
(release2Cycle.valid &&
enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release2Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset) ||
release1Cycle.valid &&
enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release1Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset))
nc(enqIndex) := enq.bits.is_nc
)
}
}
@ -214,7 +214,7 @@ class LoadQueueRAR(implicit p: Parameters) extends XSModule
matchMaskReg(i) := (allocated(i) &
paddrModule.io.releaseViolationMmask(w)(i) &
robIdxMask(i) &&
(nc(i) || released(i)))
released(i))
}
val matchMask = GatedValidRegNext(matchMaskReg)
// Load-to-Load violation check result

View File

@ -28,7 +28,7 @@ import utility._
import xiangshan.backend.Bundles
import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
import xiangshan.backend.fu.FuConfig.LduCfg
import _root_.xiangshan.backend.HasMemBlockParameters
import xiangshan.backend.HasMemBlockParameters
class UncacheEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule
with HasCircularQueuePtrHelper

View File

@ -741,7 +741,6 @@ class LoadUnit(implicit p: Parameters) extends XSModule
"b11".U -> (s0_dcache_vaddr(2, 0) === 0.U) //d
))
XSError(s0_sel_src.isvec && s0_dcache_vaddr(3, 0) =/= 0.U && s0_sel_src.alignedType(2), "unit-stride 128 bit element is not aligned!")
XSError(s0_sel_src.isnc && s0_dcache_vaddr(3, 0) =/= 0.U && s0_sel_src.alignedType(2), "nc element is not aligned!")
// accept load flow if dcache ready (tlb is always ready)
// TODO: prefetch need writeback to loadQueueFlag