feat(outstanding): support nc outstanding and remove mmio st outstanding
This commit is contained in:
parent
cfdd605feb
commit
e04c5f647e
|
|
@ -205,6 +205,11 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
|
|||
|
||||
memBlock.io.redirect := backend.io.mem.redirect
|
||||
memBlock.io.ooo_to_mem.csrCtrl := backend.io.mem.csrCtrl
|
||||
|
||||
// XXX lyq: remove this before PR
|
||||
val tmp_debug_uncache_otsd = Constantin.createRecord("uncache_outstanding_enable", 0)
|
||||
memBlock.io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable := tmp_debug_uncache_otsd
|
||||
|
||||
memBlock.io.ooo_to_mem.tlbCsr := backend.io.mem.tlbCsr
|
||||
memBlock.io.ooo_to_mem.lsqio.lcommit := backend.io.mem.robLsqIO.lcommit
|
||||
memBlock.io.ooo_to_mem.lsqio.scommit := backend.io.mem.robLsqIO.scommit
|
||||
|
|
|
|||
|
|
@ -820,6 +820,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
|
|||
// forward
|
||||
loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
|
||||
loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
|
||||
loadUnits(i).io.ubuffer <> uncache.io.forward(i)
|
||||
loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
|
||||
loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
|
||||
// ld-ld violation check
|
||||
|
|
@ -963,6 +964,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
|
|||
hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i)
|
||||
// forward
|
||||
hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i)
|
||||
hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i)
|
||||
// hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i)
|
||||
hybridUnits(i).io.ldu_io.vec_forward := DontCare
|
||||
hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i)
|
||||
|
|
@ -1332,8 +1334,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
|
|||
is (s_idle) {
|
||||
when (uncacheReq.fire) {
|
||||
when (lsq.io.uncache.req.valid) {
|
||||
val isStore = lsq.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR
|
||||
when (!isStore || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
|
||||
when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
|
||||
uncacheState := s_scalar_uncache
|
||||
}
|
||||
}.otherwise {
|
||||
|
|
|
|||
|
|
@ -515,6 +515,7 @@ class UncacheWordReq(implicit p: Parameters) extends DCacheBundle
|
|||
{
|
||||
val cmd = UInt(M_SZ.W)
|
||||
val addr = UInt(PAddrBits.W)
|
||||
val vaddr = UInt(VAddrBits.W) // for uncache buffer forwarding
|
||||
val data = UInt(XLEN.W)
|
||||
val mask = UInt((XLEN/8).W)
|
||||
val id = UInt(uncacheIdxBits.W)
|
||||
|
|
@ -534,8 +535,9 @@ class UncacheWordResp(implicit p: Parameters) extends DCacheBundle
|
|||
{
|
||||
val data = UInt(XLEN.W)
|
||||
val data_delayed = UInt(XLEN.W)
|
||||
val id = UInt(uncacheIdxBits.W)
|
||||
val nc = Bool()
|
||||
val id = UInt(uncacheIdxBits.W) // resp identified signals
|
||||
val nc = Bool() // resp identified signals
|
||||
val is2lq = Bool() // resp identified signals
|
||||
val miss = Bool()
|
||||
val replay = Bool()
|
||||
val tag_error = Bool()
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import org.chipsalliance.cde.config.Parameters
|
|||
import utils._
|
||||
import utility._
|
||||
import xiangshan._
|
||||
import xiangshan.mem._
|
||||
import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
|
||||
import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
|
||||
|
||||
|
|
@ -33,8 +34,9 @@ class UncacheFlushBundle extends Bundle {
|
|||
class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
|
||||
val cmd = UInt(M_SZ.W)
|
||||
val addr = UInt(PAddrBits.W)
|
||||
val vaddr = UInt(VAddrBits.W)
|
||||
val data = UInt(XLEN.W)
|
||||
val mask = UInt((XLEN/8).W)
|
||||
val mask = UInt(DataBytes.W)
|
||||
val id = UInt(uncacheIdxBits.W)
|
||||
val nc = Bool()
|
||||
val atomic = Bool()
|
||||
|
|
@ -43,9 +45,14 @@ class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
|
|||
val resp_data = UInt(XLEN.W)
|
||||
val resp_nderr = Bool()
|
||||
|
||||
// FIXME lyq: Confirm the forward logic. if no forward, it can be removed
|
||||
val fwd_data = UInt(XLEN.W)
|
||||
val fwd_mask = UInt(DataBytes.W)
|
||||
|
||||
def set(x: UncacheWordReq): Unit = {
|
||||
cmd := x.cmd
|
||||
addr := x.addr
|
||||
vaddr := x.vaddr
|
||||
data := x.data
|
||||
mask := x.mask
|
||||
id := x.id
|
||||
|
|
@ -53,6 +60,8 @@ class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
|
|||
atomic := x.atomic
|
||||
resp_nderr := false.B
|
||||
resp_data := 0.U
|
||||
fwd_data := 0.U
|
||||
fwd_mask := 0.U
|
||||
}
|
||||
|
||||
def update(x: TLBundleD): Unit = {
|
||||
|
|
@ -60,10 +69,18 @@ class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
|
|||
resp_nderr := x.denied
|
||||
}
|
||||
|
||||
def update(forwardData: UInt, forwardMask: UInt): Unit = {
|
||||
fwd_data := forwardData
|
||||
fwd_mask := forwardMask
|
||||
}
|
||||
|
||||
def toUncacheWordResp(): UncacheWordResp = {
|
||||
val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
|
||||
Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), resp_data(8*(j+1)-1, 8*j))
|
||||
)).asUInt
|
||||
val r = Wire(new UncacheWordResp)
|
||||
r := DontCare
|
||||
r.data := resp_data
|
||||
r.data := resp_fwd_data
|
||||
r.id := id
|
||||
r.nderr := resp_nderr
|
||||
r.nc := nc
|
||||
|
|
@ -121,6 +138,7 @@ class UncacheIO(implicit p: Parameters) extends DCacheBundle {
|
|||
val enableOutstanding = Input(Bool())
|
||||
val flush = Flipped(new UncacheFlushBundle)
|
||||
val lsq = Flipped(new UncacheWordIO)
|
||||
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
|
||||
}
|
||||
|
||||
// convert DCacheIO to TileLink
|
||||
|
|
@ -182,10 +200,19 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
|
|||
val uState = RegInit(s_idle)
|
||||
|
||||
def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
|
||||
def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
|
||||
def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
|
||||
|
||||
// drain buffer
|
||||
val empty = Wire(Bool())
|
||||
val f0_needDrain = Wire(Bool())
|
||||
val do_uarch_drain = RegNext(f0_needDrain)
|
||||
|
||||
val q0_entry = Wire(new UncacheEntry)
|
||||
val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
|
||||
val q0_canSent = Wire(Bool())
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* uState for non-outstanding
|
||||
******************************************************************/
|
||||
|
|
@ -234,28 +261,37 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
|
|||
2. how to merge store and response precisely
|
||||
*/
|
||||
|
||||
val e0_invalids = sizeMap(i => !states(i).isValid())
|
||||
val e0_invalid_oh = VecInit(PriorityEncoderOH(e0_invalids)).asUInt
|
||||
val e0_fire = req.fire
|
||||
val e0_req = req.bits
|
||||
|
||||
req_ready := e0_invalid_oh.orR
|
||||
/**
|
||||
TODO lyq: prohibit or wait or forward?
|
||||
NOW: strict block by same address; otherwise: exhaustive consideration is needed.
|
||||
- ld->ld wait
|
||||
- ld->st forward
|
||||
- st->ld forward
|
||||
- st->st block
|
||||
*/
|
||||
val e0_existSameVec = sizeMap(j =>
|
||||
e0_req.addr === entries(j).addr && states(j).isValid()
|
||||
)
|
||||
val e0_invalidVec = sizeMap(i => !states(i).isValid() && !e0_existSameVec(i))
|
||||
val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
|
||||
val e0_alloc = e0_canAlloc && e0_fire
|
||||
req_ready := e0_invalidVec.asUInt.orR && !do_uarch_drain
|
||||
|
||||
for (i <- 0 until UncacheBufferSize) {
|
||||
val alloc = e0_fire && e0_invalid_oh(i)
|
||||
when(alloc){
|
||||
entries(i).set(e0_req)
|
||||
states(i).setValid(true.B)
|
||||
|
||||
// judge whether wait same block: e0 & q0
|
||||
val waitSameVec = sizeMap(j =>
|
||||
e0_req.addr === entries(j).addr && states(j).isValid() && states(j).isInflight()
|
||||
)
|
||||
val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent
|
||||
when (waitSameVec.reduce(_ || _) || waitQ0) {
|
||||
states(i).setWaitSame(true.B)
|
||||
}
|
||||
when (e0_alloc) {
|
||||
entries(e0_allocIdx).set(e0_req)
|
||||
states(e0_allocIdx).setValid(true.B)
|
||||
|
||||
// judge whether wait same block: e0 & q0
|
||||
val waitSameVec = sizeMap(j =>
|
||||
e0_req.addr === entries(j).addr && states(j).isValid() && states(j).isInflight()
|
||||
)
|
||||
val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent
|
||||
when (waitSameVec.reduce(_ || _) || waitQ0) {
|
||||
states(e0_allocIdx).setWaitSame(true.B)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -272,7 +308,7 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
|
|||
******************************************************************/
|
||||
|
||||
val q0_canSentVec = sizeMap(i =>
|
||||
// (io.enableOutstanding || uState === s_refill_req) && // FIXME lyq: comment for debug
|
||||
(io.enableOutstanding || uState === s_refill_req) &&
|
||||
states(i).can2Uncache()
|
||||
)
|
||||
val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
|
||||
|
|
@ -360,9 +396,75 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
|
|||
* 1. when io.flush.valid is true
|
||||
* 2. when io.lsq.req.bits.atomic is true
|
||||
******************************************************************/
|
||||
empty := !VecInit(states.map(_.isValid())).asUInt.orR
|
||||
io.flush.empty := empty
|
||||
|
||||
val invalid_entries = PopCount(states.map(!_.isValid()))
|
||||
io.flush.empty := invalid_entries === UncacheBufferSize.U
|
||||
|
||||
/******************************************************************
|
||||
* Load Data Forward
|
||||
*
|
||||
* 0. ld in ldu pipeline
|
||||
* f0: tag match, fast resp
|
||||
* f1: data resp
|
||||
*
|
||||
* 1. ld in buffer (in "Enter Buffer")
|
||||
* ld(en) -> st(in): ld entry.update, state.updateUncacheResp
|
||||
* st(en) -> ld(in): ld entry.update, state.updateUncacheResp
|
||||
* NOW: strict block by same address; there is no such forward.
|
||||
*
|
||||
******************************************************************/
|
||||
|
||||
val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
|
||||
val f0_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
|
||||
f0_needDrain := f0_tagMismatchVec.asUInt.orR && !empty
|
||||
|
||||
for ((forward, i) <- io.forward.zipWithIndex) {
|
||||
val f0_vtagMatches = sizeMap(w => entries(w).vaddr === forward.vaddr)
|
||||
val f0_ptagMatches = sizeMap(w => entries(w).addr === forward.paddr)
|
||||
f0_tagMismatchVec(i) := forward.valid && sizeMap(w =>
|
||||
f0_vtagMatches(w) =/= f0_ptagMatches(w) && f0_validMask(w)
|
||||
).asUInt.orR
|
||||
when (f0_tagMismatchVec(i)) {
|
||||
XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
|
||||
RegNext(f0_ptagMatches.asUInt),
|
||||
RegNext(f0_vtagMatches.asUInt),
|
||||
RegNext(forward.vaddr),
|
||||
RegNext(forward.paddr)
|
||||
)
|
||||
}
|
||||
|
||||
val f0_validTagMatches = sizeMap(w => f0_ptagMatches(w) && f0_validMask(w) && forward.valid)
|
||||
|
||||
val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
|
||||
val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
|
||||
val f0_fwdMask = shiftMaskToHigh(
|
||||
forward.paddr,
|
||||
Mux1H(f0_validTagMatches, f0_fwdMaskCandidates)
|
||||
).asTypeOf(Vec(VDataBytes, Bool()))
|
||||
val f0_fwdData = shiftDataToHigh(
|
||||
forward.paddr,
|
||||
Mux1H(f0_validTagMatches, f0_fwdDataCandidates)
|
||||
).asTypeOf(Vec(VDataBytes, UInt(8.W)))
|
||||
|
||||
val f1_fwdValid = RegNext(forward.valid)
|
||||
val f1_fwdMask = RegEnable(f0_fwdMask, forward.valid)
|
||||
val f1_fwdData = RegEnable(f0_fwdData, forward.valid)
|
||||
|
||||
forward.addrInvalid := false.B // addr in ubuffer is always ready
|
||||
forward.dataInvalid := false.B // data in ubuffer is always ready
|
||||
forward.matchInvalid := f0_tagMismatchVec(i) // paddr / vaddr cam result does not match
|
||||
for (j <- 0 until VDataBytes) {
|
||||
forward.forwardMaskFast(j) := f0_fwdMask(j)
|
||||
|
||||
forward.forwardMask(j) := false.B
|
||||
forward.forwardData(j) := DontCare
|
||||
when(f1_fwdMask(j) && f1_fwdValid) {
|
||||
forward.forwardMask(j) := true.B
|
||||
forward.forwardData(j) := f1_fwdData(j)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
|
|
@ -386,18 +488,18 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
|
|||
}
|
||||
|
||||
/* Performance Counters */
|
||||
def isStore: Bool = io.lsq.req.bits.cmd === MemoryOpConstants.M_XWR
|
||||
XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore && !io.lsq.req.bits.nc)
|
||||
XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore && !io.lsq.req.bits.nc)
|
||||
XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore && io.lsq.req.bits.nc)
|
||||
XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore && io.lsq.req.bits.nc)
|
||||
XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
|
||||
XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
|
||||
XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
|
||||
XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
|
||||
XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
|
||||
XSPerfAccumulate("vaddr_match_failed", PopCount(f0_tagMismatchVec))
|
||||
|
||||
val perfEvents = Seq(
|
||||
("uncache_mmio_store", io.lsq.req.fire && isStore && !io.lsq.req.bits.nc),
|
||||
("uncache_mmio_load", io.lsq.req.fire && !isStore && !io.lsq.req.bits.nc),
|
||||
("uncache_nc_store", io.lsq.req.fire && isStore && io.lsq.req.bits.nc),
|
||||
("uncache_nc_load", io.lsq.req.fire && !isStore && io.lsq.req.bits.nc),
|
||||
("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
|
||||
("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
|
||||
("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
|
||||
("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
|
||||
("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -75,6 +75,16 @@ object shiftMaskToLow {
|
|||
Mux(addr(3),(mask >> 8).asUInt,mask)
|
||||
}
|
||||
}
|
||||
object shiftDataToHigh {
|
||||
def apply(addr: UInt,data : UInt): UInt = {
|
||||
Mux(addr(3), (data << 64).asUInt, data)
|
||||
}
|
||||
}
|
||||
object shiftMaskToHigh {
|
||||
def apply(addr: UInt,mask: UInt): UInt = {
|
||||
Mux(addr(3), (mask << 8).asUInt, mask)
|
||||
}
|
||||
}
|
||||
|
||||
class LsPipelineBundle(implicit p: Parameters) extends XSBundle
|
||||
with HasDCacheParameters
|
||||
|
|
|
|||
|
|
@ -247,8 +247,10 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
|
|||
switch(pendingstate){
|
||||
is(s_idle){
|
||||
when(io.uncache.req.fire){
|
||||
pendingstate := Mux(loadQueue.io.uncache.req.valid, s_load,
|
||||
Mux(io.uncacheOutstanding, s_idle, s_store))
|
||||
pendingstate :=
|
||||
Mux(io.uncacheOutstanding && io.uncache.req.bits.nc, s_idle,
|
||||
Mux(loadQueue.io.uncache.req.valid, s_load,
|
||||
s_store))
|
||||
}
|
||||
}
|
||||
is(s_load){
|
||||
|
|
@ -279,14 +281,10 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
|
|||
io.uncache.req.valid := false.B
|
||||
io.uncache.req.bits := DontCare
|
||||
}
|
||||
when (io.uncacheOutstanding) {
|
||||
when (io.uncache.resp.bits.is2lq) {
|
||||
io.uncache.resp <> loadQueue.io.uncache.resp
|
||||
} .otherwise {
|
||||
when(pendingstate === s_load){
|
||||
io.uncache.resp <> loadQueue.io.uncache.resp
|
||||
}.otherwise{
|
||||
io.uncache.resp <> storeQueue.io.uncache.resp
|
||||
}
|
||||
io.uncache.resp <> storeQueue.io.uncache.resp
|
||||
}
|
||||
|
||||
loadQueue.io.debugTopDown <> io.debugTopDown
|
||||
|
|
|
|||
|
|
@ -304,6 +304,13 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
|||
val mmioReq = Wire(chiselTypeOf(io.uncache.req))
|
||||
val ncReq = Wire(chiselTypeOf(io.uncache.req))
|
||||
val ncResp = Wire(chiselTypeOf(io.uncache.resp))
|
||||
val ncDoReq = Wire(Bool())
|
||||
val ncDoResp = Wire(Bool())
|
||||
val ncReadNextTrigger = Mux(io.uncacheOutstanding, ncDoReq, ncDoResp)
|
||||
// ncDoReq is double RegNexted, as ubuffer data write takes 3 cycles.
|
||||
// TODO lyq: to eliminate coupling by passing signals through ubuffer
|
||||
val ncDeqTrigger = Mux(io.uncacheOutstanding, RegNext(RegNext(ncDoReq)), ncDoResp)
|
||||
val ncPtr = Mux(io.uncacheOutstanding, RegNext(RegNext(io.uncache.req.bits.id)), io.uncache.resp.bits.id)
|
||||
|
||||
// store miss align info
|
||||
io.maControl.storeInfo.data := dataModule.io.rdata(0).data
|
||||
|
|
@ -320,7 +327,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
|||
val rdataPtrExtNext = Wire(Vec(EnsbufferWidth, new SqPtr))
|
||||
rdataPtrExtNext := rdataPtrExt.map(i => i +
|
||||
PopCount(dataBuffer.io.enq.map(_.fire)) +
|
||||
PopCount(ncResp.fire || io.mmioStout.fire || io.vecmmioStout.fire)
|
||||
PopCount(ncReadNextTrigger || io.mmioStout.fire || io.vecmmioStout.fire)
|
||||
)
|
||||
|
||||
// deqPtrExtNext traces which inst is about to leave store queue
|
||||
|
|
@ -334,12 +341,12 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
|||
val deqPtrExtNext = Wire(Vec(EnsbufferWidth, new SqPtr))
|
||||
deqPtrExtNext := deqPtrExt.map(i => i +
|
||||
RegNext(PopCount(VecInit(io.sbuffer.map(_.fire)))) +
|
||||
PopCount(ncResp.fire || io.mmioStout.fire || io.vecmmioStout.fire)
|
||||
PopCount(ncDeqTrigger || io.mmioStout.fire || io.vecmmioStout.fire)
|
||||
)
|
||||
|
||||
io.sqDeq := RegNext(
|
||||
RegNext(PopCount(VecInit(io.sbuffer.map(_.fire && !misalignBlock)))) +
|
||||
PopCount(ncResp.fire || io.mmioStout.fire || io.vecmmioStout.fire || finishMisalignSt)
|
||||
PopCount(ncDeqTrigger || io.mmioStout.fire || io.vecmmioStout.fire || finishMisalignSt)
|
||||
)
|
||||
|
||||
assert(!RegNext(RegNext(io.sbuffer(0).fire) && (io.mmioStout.fire || io.vecmmioStout.fire)))
|
||||
|
|
@ -804,11 +811,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
|||
}
|
||||
is(s_req) {
|
||||
when (mmioDoReq) {
|
||||
when (io.uncacheOutstanding) {
|
||||
mmioState := s_wb
|
||||
} .otherwise {
|
||||
mmioState := s_resp
|
||||
}
|
||||
mmioState := s_resp
|
||||
}
|
||||
}
|
||||
is(s_resp) {
|
||||
|
|
@ -841,6 +844,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
|||
mmioReq.bits := DontCare
|
||||
mmioReq.bits.cmd := MemoryOpConstants.M_XWR
|
||||
mmioReq.bits.addr := paddrModule.io.rdata(0) // data(deqPtr) -> rdata(0)
|
||||
mmioReq.bits.vaddr:= vaddrModule.io.rdata(0)
|
||||
mmioReq.bits.data := shiftDataToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).data)
|
||||
mmioReq.bits.mask := shiftMaskToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).mask)
|
||||
mmioReq.bits.atomic := atomic(GatedRegNext(rdataPtrExtNext(0)).value)
|
||||
|
|
@ -855,7 +859,6 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
|||
// TODO: CAN NOT deal with vector nc now!
|
||||
val nc_idle :: nc_req :: nc_resp :: Nil = Enum(3)
|
||||
val ncState = RegInit(nc_idle)
|
||||
val ncDoReq = io.uncache.req.fire && io.uncache.req.bits.nc
|
||||
val rptr0 = rdataPtrExt(0).value
|
||||
switch(ncState){
|
||||
is(nc_idle) {
|
||||
|
|
@ -865,7 +868,11 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
|||
}
|
||||
is(nc_req) {
|
||||
when(ncDoReq) {
|
||||
ncState := nc_resp
|
||||
when(io.uncacheOutstanding) {
|
||||
ncState := nc_idle
|
||||
}.otherwise{
|
||||
ncState := nc_resp
|
||||
}
|
||||
}
|
||||
}
|
||||
is(nc_resp) {
|
||||
|
|
@ -874,23 +881,27 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
ncDoReq := io.uncache.req.fire && io.uncache.req.bits.nc
|
||||
ncDoResp := ncResp.fire
|
||||
|
||||
ncReq.valid := ncState === nc_req
|
||||
ncReq.bits := DontCare
|
||||
ncReq.bits.cmd := MemoryOpConstants.M_XWR
|
||||
ncReq.bits.addr := paddrModule.io.rdata(0)
|
||||
ncReq.bits.vaddr:= vaddrModule.io.rdata(0)
|
||||
ncReq.bits.data := shiftDataToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).data)
|
||||
ncReq.bits.mask := shiftMaskToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).mask)
|
||||
ncReq.bits.atomic := atomic(GatedRegNext(rdataPtrExtNext(0)).value)
|
||||
ncReq.bits.nc := true.B
|
||||
ncReq.bits.id := rdataPtrExt(0).value
|
||||
ncReq.bits.id := rptr0
|
||||
|
||||
ncResp.ready := io.uncache.resp.ready
|
||||
ncResp.valid := io.uncache.resp.fire && io.uncache.resp.bits.nc
|
||||
ncResp.bits <> io.uncache.resp.bits
|
||||
when (ncResp.fire) {
|
||||
val ptr = io.uncache.resp.bits.id
|
||||
allocated(ptr) := false.B
|
||||
XSDebug("nc fire: ptr %d\n", ptr)
|
||||
when (ncDeqTrigger) {
|
||||
allocated(ncPtr) := false.B
|
||||
XSDebug("nc fire: ptr %d\n", ncPtr)
|
||||
}
|
||||
|
||||
mmioReq.ready := io.uncache.req.ready
|
||||
|
|
|
|||
|
|
@ -129,6 +129,7 @@ class IOBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule
|
|||
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
|
||||
io.uncache.req.bits.data := DontCare
|
||||
io.uncache.req.bits.addr := req.paddr
|
||||
io.uncache.req.bits.vaddr:= req.vaddr
|
||||
io.uncache.req.bits.mask := Mux(req.paddr(3), req.mask(15, 8), req.mask(7, 0))
|
||||
io.uncache.req.bits.id := io.id
|
||||
io.uncache.req.bits.instrtype := DontCare
|
||||
|
|
@ -241,7 +242,7 @@ class IOBuffer(implicit p: Parameters) extends XSModule
|
|||
allocWidth = LoadPipelineWidth,
|
||||
freeWidth = 4,
|
||||
enablePreAlloc = true,
|
||||
moduleName = "UncacheBuffer freelist"
|
||||
moduleName = "IOBuffer freelist"
|
||||
))
|
||||
freeList.io := DontCare
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
/***************************************************************************************
|
||||
/***************************************************************************************
|
||||
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
|
||||
* Copyright (c) 2020-2021 Peng Cheng Laboratory
|
||||
*
|
||||
|
|
@ -13,420 +13,421 @@
|
|||
*
|
||||
* See the Mulan PSL v2 for more details.
|
||||
***************************************************************************************/
|
||||
package xiangshan.mem
|
||||
package xiangshan.mem
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import org.chipsalliance.cde.config._
|
||||
import xiangshan._
|
||||
import xiangshan.backend.rob.{RobPtr, RobLsqIO}
|
||||
import xiangshan.ExceptionNO._
|
||||
import xiangshan.cache._
|
||||
import utils._
|
||||
import utility._
|
||||
import xiangshan.backend.Bundles
|
||||
import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
|
||||
import xiangshan.backend.fu.FuConfig.LduCfg
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import org.chipsalliance.cde.config._
|
||||
import xiangshan._
|
||||
import xiangshan.backend.rob.{RobPtr, RobLsqIO}
|
||||
import xiangshan.ExceptionNO._
|
||||
import xiangshan.cache._
|
||||
import utils._
|
||||
import utility._
|
||||
import xiangshan.backend.Bundles
|
||||
import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
|
||||
import xiangshan.backend.fu.FuConfig.LduCfg
|
||||
|
||||
class NCBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule
|
||||
with HasCircularQueuePtrHelper
|
||||
with HasLoadHelper
|
||||
{
|
||||
val io = IO(new Bundle() {
|
||||
val id = Input(UInt())
|
||||
class NCBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule
|
||||
with HasCircularQueuePtrHelper
|
||||
with HasLoadHelper
|
||||
{
|
||||
val io = IO(new Bundle() {
|
||||
val id = Input(UInt())
|
||||
|
||||
val redirect = Flipped(Valid(new Redirect))
|
||||
val redirect = Flipped(Valid(new Redirect))
|
||||
|
||||
// client requests
|
||||
val req = Flipped(Valid(new LqWriteBundle))
|
||||
// client requests
|
||||
val req = Flipped(Valid(new LqWriteBundle))
|
||||
|
||||
// rerequest nc_with_data to loadunit
|
||||
val ncOut = DecoupledIO(new LsPipelineBundle)
|
||||
// rerequest nc_with_data to loadunit
|
||||
val ncOut = DecoupledIO(new LsPipelineBundle)
|
||||
|
||||
// uncache io
|
||||
val uncache = new UncacheWordIO
|
||||
// uncache io
|
||||
val uncache = new UncacheWordIO
|
||||
|
||||
// flush this entry
|
||||
val flush = Output(Bool())
|
||||
// flush this entry
|
||||
val flush = Output(Bool())
|
||||
|
||||
// exception generated by outer bus
|
||||
val exception = Valid(new LqWriteBundle)
|
||||
})
|
||||
// exception generated by outer bus
|
||||
val exception = Valid(new LqWriteBundle)
|
||||
})
|
||||
|
||||
val req_valid = RegInit(false.B)
|
||||
val req = Reg(new LqWriteBundle)
|
||||
val req_valid = RegInit(false.B)
|
||||
val req = Reg(new LqWriteBundle)
|
||||
|
||||
val s_idle :: s_req :: s_resp :: s_wait :: Nil = Enum(4)
|
||||
val uncacheState = RegInit(s_idle)
|
||||
val uncacheData = Reg(io.uncache.resp.bits.data.cloneType)
|
||||
val nderr = RegInit(false.B)
|
||||
val s_idle :: s_req :: s_resp :: s_wait :: Nil = Enum(4)
|
||||
val uncacheState = RegInit(s_idle)
|
||||
val uncacheData = Reg(io.uncache.resp.bits.data.cloneType)
|
||||
val nderr = RegInit(false.B)
|
||||
|
||||
// enqueue
|
||||
when (req_valid && req.uop.robIdx.needFlush(io.redirect)) {
|
||||
req_valid := false.B
|
||||
} .elsewhen (io.req.valid) {
|
||||
XSError(req_valid, p"UncacheNCBuffer: You can not write an valid entry: $entryIndex")
|
||||
req_valid := true.B
|
||||
req := io.req.bits
|
||||
nderr := false.B
|
||||
} .elsewhen (io.ncOut.fire) {
|
||||
req_valid := false.B
|
||||
}
|
||||
// enqueue
|
||||
when (req_valid && req.uop.robIdx.needFlush(io.redirect)) {
|
||||
req_valid := false.B
|
||||
} .elsewhen (io.req.valid) {
|
||||
XSError(req_valid, p"UncacheNCBuffer: You can not write an valid entry: $entryIndex")
|
||||
req_valid := true.B
|
||||
req := io.req.bits
|
||||
nderr := false.B
|
||||
} .elsewhen (io.ncOut.fire) {
|
||||
req_valid := false.B
|
||||
}
|
||||
|
||||
io.flush := req_valid && req.uop.robIdx.needFlush(io.redirect)
|
||||
/**
|
||||
* NC operations
|
||||
*
|
||||
* States:
|
||||
* (1) s_idle: wait for nc req from loadunit
|
||||
* (2) s_req: wait to be sent to uncache channel until getting new nc req and uncache ready
|
||||
* (3) s_resp: wait for response from uncache channel
|
||||
* (4) s_wait: wait loadunit for A to receive nc_with_data req
|
||||
*/
|
||||
io.flush := req_valid && req.uop.robIdx.needFlush(io.redirect)
|
||||
/**
|
||||
* NC operations
|
||||
*
|
||||
* States:
|
||||
* (1) s_idle: wait for nc req from loadunit
|
||||
* (2) s_req: wait to be sent to uncache channel until getting new nc req and uncache ready
|
||||
* (3) s_resp: wait for response from uncache channel
|
||||
* (4) s_wait: wait loadunit for A to receive nc_with_data req
|
||||
*/
|
||||
|
||||
switch (uncacheState) {
|
||||
is (s_idle) {
|
||||
when (req_valid) {
|
||||
uncacheState := s_req
|
||||
}
|
||||
}
|
||||
is (s_req) {
|
||||
when (io.uncache.req.fire) {
|
||||
uncacheState := s_resp
|
||||
}
|
||||
}
|
||||
is (s_resp) {
|
||||
when (io.uncache.resp.fire) {
|
||||
uncacheState := s_wait
|
||||
}
|
||||
}
|
||||
is (s_wait) {
|
||||
when (io.ncOut.fire) {
|
||||
uncacheState := s_idle // ready for next mmio
|
||||
}
|
||||
}
|
||||
}
|
||||
switch (uncacheState) {
|
||||
is (s_idle) {
|
||||
when (req_valid) {
|
||||
uncacheState := s_req
|
||||
}
|
||||
}
|
||||
is (s_req) {
|
||||
when (io.uncache.req.fire) {
|
||||
uncacheState := s_resp
|
||||
}
|
||||
}
|
||||
is (s_resp) {
|
||||
when (io.uncache.resp.fire) {
|
||||
uncacheState := s_wait
|
||||
}
|
||||
}
|
||||
is (s_wait) {
|
||||
when (io.ncOut.fire) {
|
||||
uncacheState := s_idle // ready for next mmio
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
io.uncache.req.valid := uncacheState === s_req
|
||||
io.uncache.req.bits := DontCare
|
||||
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
|
||||
io.uncache.req.bits.data := DontCare
|
||||
io.uncache.req.bits.addr := req.paddr
|
||||
io.uncache.req.bits.mask := Mux(req.paddr(3), req.mask(15, 8), req.mask(7, 0))
|
||||
io.uncache.req.bits.id := io.id
|
||||
io.uncache.req.bits.instrtype := DontCare
|
||||
io.uncache.req.bits.replayCarry := DontCare
|
||||
io.uncache.req.bits.atomic := false.B
|
||||
io.uncache.req.bits.nc := true.B
|
||||
io.uncache.req.valid := uncacheState === s_req
|
||||
io.uncache.req.bits := DontCare
|
||||
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
|
||||
io.uncache.req.bits.data := DontCare
|
||||
io.uncache.req.bits.addr := req.paddr
|
||||
io.uncache.req.bits.vaddr:= req.vaddr
|
||||
io.uncache.req.bits.mask := Mux(req.paddr(3), req.mask(15, 8), req.mask(7, 0))
|
||||
io.uncache.req.bits.id := io.id
|
||||
io.uncache.req.bits.instrtype := DontCare
|
||||
io.uncache.req.bits.replayCarry := DontCare
|
||||
io.uncache.req.bits.atomic := false.B
|
||||
io.uncache.req.bits.nc := true.B
|
||||
|
||||
io.uncache.resp.ready := true.B
|
||||
io.uncache.resp.ready := true.B
|
||||
|
||||
when (io.uncache.req.fire) {
|
||||
XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
|
||||
req.uop.pc,
|
||||
io.uncache.req.bits.addr,
|
||||
io.uncache.req.bits.data,
|
||||
io.uncache.req.bits.cmd,
|
||||
io.uncache.req.bits.mask
|
||||
)
|
||||
}
|
||||
when (io.uncache.req.fire) {
|
||||
XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
|
||||
req.uop.pc,
|
||||
io.uncache.req.bits.addr,
|
||||
io.uncache.req.bits.data,
|
||||
io.uncache.req.bits.cmd,
|
||||
io.uncache.req.bits.mask
|
||||
)
|
||||
}
|
||||
|
||||
// (3) response from uncache channel
|
||||
when (io.uncache.resp.fire) {
|
||||
uncacheData := io.uncache.resp.bits.data
|
||||
nderr := io.uncache.resp.bits.nderr
|
||||
}
|
||||
// (3) response from uncache channel
|
||||
when (io.uncache.resp.fire) {
|
||||
uncacheData := io.uncache.resp.bits.data
|
||||
nderr := io.uncache.resp.bits.nderr
|
||||
}
|
||||
|
||||
// uncache writeback
|
||||
val selUop = req.uop
|
||||
val func = selUop.fuOpType
|
||||
val raddr = req.paddr
|
||||
val rdataSel = LookupTree(raddr(2, 0), List(
|
||||
"b000".U -> uncacheData(63, 0),
|
||||
"b001".U -> uncacheData(63, 8),
|
||||
"b010".U -> uncacheData(63, 16),
|
||||
"b011".U -> uncacheData(63, 24),
|
||||
"b100".U -> uncacheData(63, 32),
|
||||
"b101".U -> uncacheData(63, 40),
|
||||
"b110".U -> uncacheData(63, 48),
|
||||
"b111".U -> uncacheData(63, 56)
|
||||
))
|
||||
val rdataPartialLoad = rdataHelper(selUop, rdataSel)
|
||||
// uncache writeback
|
||||
val selUop = req.uop
|
||||
val func = selUop.fuOpType
|
||||
val raddr = req.paddr
|
||||
val rdataSel = LookupTree(raddr(2, 0), List(
|
||||
"b000".U -> uncacheData(63, 0),
|
||||
"b001".U -> uncacheData(63, 8),
|
||||
"b010".U -> uncacheData(63, 16),
|
||||
"b011".U -> uncacheData(63, 24),
|
||||
"b100".U -> uncacheData(63, 32),
|
||||
"b101".U -> uncacheData(63, 40),
|
||||
"b110".U -> uncacheData(63, 48),
|
||||
"b111".U -> uncacheData(63, 56)
|
||||
))
|
||||
val rdataPartialLoad = rdataHelper(selUop, rdataSel)
|
||||
|
||||
io.ncOut.valid := (uncacheState === s_wait)
|
||||
io.ncOut.bits := DontCare
|
||||
io.ncOut.bits.uop := selUop
|
||||
io.ncOut.bits.uop.lqIdx := req.uop.lqIdx
|
||||
io.ncOut.bits.uop.exceptionVec(loadAccessFault) := nderr
|
||||
io.ncOut.bits.data := rdataPartialLoad
|
||||
io.ncOut.bits.paddr := req.paddr
|
||||
io.ncOut.bits.vaddr := req.vaddr
|
||||
io.ncOut.bits.nc := true.B
|
||||
io.ncOut.bits.mask := Mux(req.paddr(3), req.mask(15, 8), req.mask(7, 0))
|
||||
io.ncOut.bits.schedIndex := req.schedIndex
|
||||
io.ncOut.bits.isvec := req.isvec
|
||||
io.ncOut.bits.is128bit := req.is128bit
|
||||
io.ncOut.bits.vecActive := req.vecActive
|
||||
io.ncOut.valid := (uncacheState === s_wait)
|
||||
io.ncOut.bits := DontCare
|
||||
io.ncOut.bits.uop := selUop
|
||||
io.ncOut.bits.uop.lqIdx := req.uop.lqIdx
|
||||
io.ncOut.bits.uop.exceptionVec(loadAccessFault) := nderr
|
||||
io.ncOut.bits.data := rdataPartialLoad
|
||||
io.ncOut.bits.paddr := req.paddr
|
||||
io.ncOut.bits.vaddr := req.vaddr
|
||||
io.ncOut.bits.nc := true.B
|
||||
io.ncOut.bits.mask := Mux(req.paddr(3), req.mask(15, 8), req.mask(7, 0))
|
||||
io.ncOut.bits.schedIndex := req.schedIndex
|
||||
io.ncOut.bits.isvec := req.isvec
|
||||
io.ncOut.bits.is128bit := req.is128bit
|
||||
io.ncOut.bits.vecActive := req.vecActive
|
||||
|
||||
|
||||
io.exception.valid := io.ncOut.fire
|
||||
io.exception.bits := req
|
||||
io.exception.bits.uop.exceptionVec(loadAccessFault) := nderr
|
||||
io.exception.valid := io.ncOut.fire
|
||||
io.exception.bits := req
|
||||
io.exception.bits.uop.exceptionVec(loadAccessFault) := nderr
|
||||
|
||||
|
||||
when (io.ncOut.fire) {
|
||||
req_valid := false.B
|
||||
when (io.ncOut.fire) {
|
||||
req_valid := false.B
|
||||
|
||||
XSInfo("int load miss write to cbd robidx %d lqidx %d pc 0x%x mmio %x\n",
|
||||
io.ncOut.bits.uop.robIdx.asUInt,
|
||||
io.ncOut.bits.uop.lqIdx.asUInt,
|
||||
io.ncOut.bits.uop.pc,
|
||||
true.B
|
||||
)
|
||||
}
|
||||
XSInfo("int load miss write to cbd robidx %d lqidx %d pc 0x%x mmio %x\n",
|
||||
io.ncOut.bits.uop.robIdx.asUInt,
|
||||
io.ncOut.bits.uop.lqIdx.asUInt,
|
||||
io.ncOut.bits.uop.pc,
|
||||
true.B
|
||||
)
|
||||
}
|
||||
|
||||
// end
|
||||
}
|
||||
// end
|
||||
}
|
||||
|
||||
class NCBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
|
||||
val io = IO(new Bundle() {
|
||||
// control
|
||||
val redirect = Flipped(Valid(new Redirect))
|
||||
class NCBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
|
||||
val io = IO(new Bundle() {
|
||||
// control
|
||||
val redirect = Flipped(Valid(new Redirect))
|
||||
|
||||
//from loadunit
|
||||
val req = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
|
||||
//from loadunit
|
||||
val req = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
|
||||
|
||||
//to loadunit: return response of nc with data
|
||||
val ncOut = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
|
||||
//to loadunit: return response of nc with data
|
||||
val ncOut = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
|
||||
|
||||
// uncache io
|
||||
val uncache = new UncacheWordIO
|
||||
// uncache io
|
||||
val uncache = new UncacheWordIO
|
||||
|
||||
// rollback from frontend when NCBuffer is full
|
||||
val rollback = Output(Valid(new Redirect))
|
||||
// rollback from frontend when NCBuffer is full
|
||||
val rollback = Output(Valid(new Redirect))
|
||||
|
||||
// exception generated by outer bus
|
||||
val exception = Valid(new LqWriteBundle)
|
||||
})
|
||||
// exception generated by outer bus
|
||||
val exception = Valid(new LqWriteBundle)
|
||||
})
|
||||
|
||||
val entries = Seq.tabulate(LoadNCBufferSize)(i => Module(new NCBufferEntry(i)))
|
||||
val entries = Seq.tabulate(LoadNCBufferSize)(i => Module(new NCBufferEntry(i)))
|
||||
|
||||
// freelist: store valid entries index.
|
||||
// +---+---+--------------+-----+-----+
|
||||
// | 0 | 1 | ...... | n-2 | n-1 |
|
||||
// +---+---+--------------+-----+-----+
|
||||
val freeList = Module(new FreeList(
|
||||
size = LoadNCBufferSize,
|
||||
allocWidth = LoadPipelineWidth,
|
||||
freeWidth = 4,
|
||||
enablePreAlloc = true,
|
||||
moduleName = "NCBuffer freelist"
|
||||
))
|
||||
freeList.io := DontCare
|
||||
// freelist: store valid entries index.
|
||||
// +---+---+--------------+-----+-----+
|
||||
// | 0 | 1 | ...... | n-2 | n-1 |
|
||||
// +---+---+--------------+-----+-----+
|
||||
val freeList = Module(new FreeList(
|
||||
size = LoadNCBufferSize,
|
||||
allocWidth = LoadPipelineWidth,
|
||||
freeWidth = 4,
|
||||
enablePreAlloc = true,
|
||||
moduleName = "NCBuffer freelist"
|
||||
))
|
||||
freeList.io := DontCare
|
||||
|
||||
// set enqueue default
|
||||
entries.foreach {
|
||||
case (e) =>
|
||||
e.io.req.valid := false.B
|
||||
e.io.req.bits := DontCare
|
||||
}
|
||||
// set enqueue default
|
||||
entries.foreach {
|
||||
case (e) =>
|
||||
e.io.req.valid := false.B
|
||||
e.io.req.bits := DontCare
|
||||
}
|
||||
|
||||
// set uncache default
|
||||
io.uncache.req.valid := false.B
|
||||
io.uncache.req.bits := DontCare
|
||||
io.uncache.resp.ready := false.B
|
||||
// set uncache default
|
||||
io.uncache.req.valid := false.B
|
||||
io.uncache.req.bits := DontCare
|
||||
io.uncache.resp.ready := false.B
|
||||
|
||||
entries.foreach {
|
||||
case (e) =>
|
||||
e.io.uncache.req.ready := false.B
|
||||
e.io.uncache.resp.valid := false.B
|
||||
e.io.uncache.resp.bits := DontCare
|
||||
}
|
||||
entries.foreach {
|
||||
case (e) =>
|
||||
e.io.uncache.req.ready := false.B
|
||||
e.io.uncache.resp.valid := false.B
|
||||
e.io.uncache.resp.bits := DontCare
|
||||
}
|
||||
|
||||
// set writeback default
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
io.ncOut(w).valid := false.B
|
||||
io.ncOut(w).bits := DontCare
|
||||
}
|
||||
// set writeback default
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
io.ncOut(w).valid := false.B
|
||||
io.ncOut(w).bits := DontCare
|
||||
}
|
||||
|
||||
// enqueue
|
||||
// s1:
|
||||
val s1_req = VecInit(io.req.map(_.bits))
|
||||
val s1_valid = VecInit(io.req.map(_.valid))
|
||||
// enqueue
|
||||
// s1:
|
||||
val s1_req = VecInit(io.req.map(_.bits))
|
||||
val s1_valid = VecInit(io.req.map(_.valid))
|
||||
|
||||
// s2: enqueue
|
||||
val s2_req = (0 until LoadPipelineWidth).map(i => {
|
||||
RegEnable(s1_req(i), s1_valid(i))})
|
||||
val s2_valid = (0 until LoadPipelineWidth).map(i => {
|
||||
RegNext(s1_valid(i)) &&
|
||||
!s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) &&
|
||||
!s2_req(i).uop.robIdx.needFlush(io.redirect)
|
||||
})
|
||||
val s2_has_exception = s2_req.map(x => ExceptionNO.selectByFu(x.uop.exceptionVec, LduCfg).asUInt.orR)
|
||||
val s2_need_replay = s2_req.map(_.rep_info.need_rep)
|
||||
// s2: enqueue
|
||||
val s2_req = (0 until LoadPipelineWidth).map(i => {
|
||||
RegEnable(s1_req(i), s1_valid(i))})
|
||||
val s2_valid = (0 until LoadPipelineWidth).map(i => {
|
||||
RegNext(s1_valid(i)) &&
|
||||
!s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) &&
|
||||
!s2_req(i).uop.robIdx.needFlush(io.redirect)
|
||||
})
|
||||
val s2_has_exception = s2_req.map(x => ExceptionNO.selectByFu(x.uop.exceptionVec, LduCfg).asUInt.orR)
|
||||
val s2_need_replay = s2_req.map(_.rep_info.need_rep)
|
||||
|
||||
val s2_enqueue = Wire(Vec(LoadPipelineWidth, Bool()))
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
s2_enqueue(w) := s2_valid(w) && !s2_has_exception(w) && !s2_need_replay(w) && s2_req(w).nc
|
||||
}
|
||||
val s2_enqueue = Wire(Vec(LoadPipelineWidth, Bool()))
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
s2_enqueue(w) := s2_valid(w) && !s2_has_exception(w) && !s2_need_replay(w) && s2_req(w).nc
|
||||
}
|
||||
|
||||
//
|
||||
val enqValidVec = Wire(Vec(LoadPipelineWidth, Bool()))
|
||||
val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt()))
|
||||
//
|
||||
val enqValidVec = Wire(Vec(LoadPipelineWidth, Bool()))
|
||||
val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt()))
|
||||
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
freeList.io.allocateReq(w) := true.B
|
||||
}
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
freeList.io.allocateReq(w) := true.B
|
||||
}
|
||||
|
||||
// freeList real-allocate
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
freeList.io.doAllocate(w) := enqValidVec(w)
|
||||
}
|
||||
// freeList real-allocate
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
freeList.io.doAllocate(w) := enqValidVec(w)
|
||||
}
|
||||
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
enqValidVec(w) := s2_enqueue(w) && freeList.io.canAllocate(w)
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
enqValidVec(w) := s2_enqueue(w) && freeList.io.canAllocate(w)
|
||||
|
||||
val offset = PopCount(s2_enqueue.take(w))
|
||||
enqIndexVec(w) := freeList.io.allocateSlot(offset)
|
||||
}
|
||||
val offset = PopCount(s2_enqueue.take(w))
|
||||
enqIndexVec(w) := freeList.io.allocateSlot(offset)
|
||||
}
|
||||
|
||||
// TODO lyq: It's best to choose in robIdx order
|
||||
val uncacheReqArb = Module(new RRArbiterInit(io.uncache.req.bits.cloneType, LoadNCBufferSize))
|
||||
val ncOutArb = Module(new RRArbiterInit(io.ncOut(0).bits.cloneType, LoadNCBufferSize))
|
||||
// TODO lyq: It's best to choose in robIdx order / the order in which they enter
|
||||
val uncacheReqArb = Module(new RRArbiterInit(io.uncache.req.bits.cloneType, LoadNCBufferSize))
|
||||
val ncOutArb = Module(new RRArbiterInit(io.ncOut(0).bits.cloneType, LoadNCBufferSize))
|
||||
|
||||
entries.zipWithIndex.foreach {
|
||||
case (e, i) =>
|
||||
e.io.redirect <> io.redirect
|
||||
e.io.id := i.U
|
||||
entries.zipWithIndex.foreach {
|
||||
case (e, i) =>
|
||||
e.io.redirect <> io.redirect
|
||||
e.io.id := i.U
|
||||
|
||||
// enqueue
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
when (enqValidVec(w) && (i.U === enqIndexVec(w))) {
|
||||
e.io.req.valid := true.B
|
||||
e.io.req.bits := s2_req(w)
|
||||
}
|
||||
}
|
||||
// enqueue
|
||||
for (w <- 0 until LoadPipelineWidth) {
|
||||
when (enqValidVec(w) && (i.U === enqIndexVec(w))) {
|
||||
e.io.req.valid := true.B
|
||||
e.io.req.bits := s2_req(w)
|
||||
}
|
||||
}
|
||||
|
||||
// uncache logic
|
||||
uncacheReqArb.io.in(i).valid := e.io.uncache.req.valid
|
||||
uncacheReqArb.io.in(i).bits := e.io.uncache.req.bits
|
||||
e.io.uncache.req.ready := uncacheReqArb.io.in(i).ready
|
||||
ncOutArb.io.in(i).valid := e.io.ncOut.valid
|
||||
ncOutArb.io.in(i).bits := e.io.ncOut.bits
|
||||
e.io.ncOut.ready := ncOutArb.io.in(i).ready
|
||||
// uncache logic
|
||||
uncacheReqArb.io.in(i).valid := e.io.uncache.req.valid
|
||||
uncacheReqArb.io.in(i).bits := e.io.uncache.req.bits
|
||||
e.io.uncache.req.ready := uncacheReqArb.io.in(i).ready
|
||||
ncOutArb.io.in(i).valid := e.io.ncOut.valid
|
||||
ncOutArb.io.in(i).bits := e.io.ncOut.bits
|
||||
e.io.ncOut.ready := ncOutArb.io.in(i).ready
|
||||
|
||||
when (i.U === io.uncache.resp.bits.id) {
|
||||
e.io.uncache.resp <> io.uncache.resp
|
||||
}
|
||||
}
|
||||
when (i.U === io.uncache.resp.bits.id) {
|
||||
e.io.uncache.resp <> io.uncache.resp
|
||||
}
|
||||
}
|
||||
|
||||
// uncache Request
|
||||
AddPipelineReg(uncacheReqArb.io.out, io.uncache.req, false.B)
|
||||
// uncache Request
|
||||
AddPipelineReg(uncacheReqArb.io.out, io.uncache.req, false.B)
|
||||
|
||||
// uncache Writeback
|
||||
AddPipelineReg(ncOutArb.io.out, io.ncOut(0), false.B)
|
||||
// uncache Writeback
|
||||
AddPipelineReg(ncOutArb.io.out, io.ncOut(0), false.B)
|
||||
|
||||
// uncache exception
|
||||
io.exception.valid := Cat(entries.map(_.io.exception.valid)).orR
|
||||
io.exception.bits := ParallelPriorityMux(entries.map(e =>
|
||||
(e.io.exception.valid, e.io.exception.bits)
|
||||
))
|
||||
// uncache exception
|
||||
io.exception.valid := Cat(entries.map(_.io.exception.valid)).orR
|
||||
io.exception.bits := ParallelPriorityMux(entries.map(e =>
|
||||
(e.io.exception.valid, e.io.exception.bits)
|
||||
))
|
||||
|
||||
// UncacheBuffer deallocate
|
||||
val freeMaskVec = Wire(Vec(LoadNCBufferSize, Bool()))
|
||||
// UncacheBuffer deallocate
|
||||
val freeMaskVec = Wire(Vec(LoadNCBufferSize, Bool()))
|
||||
|
||||
// init
|
||||
freeMaskVec.map(e => e := false.B)
|
||||
// init
|
||||
freeMaskVec.map(e => e := false.B)
|
||||
|
||||
// dealloc logic
|
||||
entries.zipWithIndex.foreach {
|
||||
case (e, i) =>
|
||||
when (e.io.ncOut.fire || e.io.flush) {
|
||||
freeMaskVec(i) := true.B
|
||||
}
|
||||
}
|
||||
// dealloc logic
|
||||
entries.zipWithIndex.foreach {
|
||||
case (e, i) =>
|
||||
when (e.io.ncOut.fire || e.io.flush) {
|
||||
freeMaskVec(i) := true.B
|
||||
}
|
||||
}
|
||||
|
||||
freeList.io.free := freeMaskVec.asUInt
|
||||
freeList.io.free := freeMaskVec.asUInt
|
||||
|
||||
/**
|
||||
* Uncache rollback detection
|
||||
*
|
||||
* When uncache loads enqueue, it searches uncache loads, They can not enqueue and need re-execution.
|
||||
*
|
||||
* Cycle 0: uncache enqueue.
|
||||
* Cycle 1: Select oldest uncache loads.
|
||||
* Cycle 2: Redirect Fire.
|
||||
* Choose the oldest load from LoadPipelineWidth oldest loads.
|
||||
* Prepare redirect request according to the detected rejection.
|
||||
* Fire redirect request (if valid)
|
||||
*/
|
||||
// Load_S3 .... Load_S3
|
||||
// stage 0: lq lq
|
||||
// | | (can not enqueue)
|
||||
// stage 1: lq lq
|
||||
// | |
|
||||
// ---------------
|
||||
// |
|
||||
// stage 2: lq
|
||||
// |
|
||||
// rollback req
|
||||
def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
|
||||
val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
|
||||
val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
|
||||
(if (j < i) !xs(j).valid || compareVec(i)(j)
|
||||
else if (j == i) xs(i).valid
|
||||
else !xs(j).valid || !compareVec(j)(i))
|
||||
)).andR))
|
||||
resultOnehot
|
||||
}
|
||||
val reqNeedCheck = VecInit((0 until LoadPipelineWidth).map(w =>
|
||||
s2_enqueue(w) && !enqValidVec(w)
|
||||
))
|
||||
val reqSelUops = VecInit(s2_req.map(_.uop))
|
||||
val allRedirect = (0 until LoadPipelineWidth).map(i => {
|
||||
val redirect = Wire(Valid(new Redirect))
|
||||
redirect.valid := reqNeedCheck(i)
|
||||
redirect.bits := DontCare
|
||||
redirect.bits.isRVC := reqSelUops(i).preDecodeInfo.isRVC
|
||||
redirect.bits.robIdx := reqSelUops(i).robIdx
|
||||
redirect.bits.ftqIdx := reqSelUops(i).ftqPtr
|
||||
redirect.bits.ftqOffset := reqSelUops(i).ftqOffset
|
||||
redirect.bits.level := RedirectLevel.flush
|
||||
redirect.bits.cfiUpdate.target := reqSelUops(i).pc // TODO: check if need pc
|
||||
redirect.bits.debug_runahead_checkpoint_id := reqSelUops(i).debugInfo.runahead_checkpoint_id
|
||||
redirect
|
||||
})
|
||||
val oldestOneHot = selectOldestRedirect(allRedirect)
|
||||
val oldestRedirect = Mux1H(oldestOneHot, allRedirect)
|
||||
val lastCycleRedirect = Wire(Valid(new Redirect))
|
||||
lastCycleRedirect.valid := RegNext(io.redirect.valid)
|
||||
lastCycleRedirect.bits := RegEnable(io.redirect.bits, io.redirect.valid)
|
||||
val lastLastCycleRedirect = Wire(Valid(new Redirect))
|
||||
lastLastCycleRedirect.valid := RegNext(lastCycleRedirect.valid)
|
||||
lastLastCycleRedirect.bits := RegEnable(lastCycleRedirect.bits, lastCycleRedirect.valid)
|
||||
io.rollback.valid := GatedValidRegNext(oldestRedirect.valid &&
|
||||
!oldestRedirect.bits.robIdx.needFlush(io.redirect) &&
|
||||
!oldestRedirect.bits.robIdx.needFlush(lastCycleRedirect) &&
|
||||
!oldestRedirect.bits.robIdx.needFlush(lastLastCycleRedirect))
|
||||
io.rollback.bits := RegEnable(oldestRedirect.bits, oldestRedirect.valid)
|
||||
/**
|
||||
* Uncache rollback detection
|
||||
*
|
||||
* When uncache loads enqueue, it searches uncache loads, They can not enqueue and need re-execution.
|
||||
*
|
||||
* Cycle 0: uncache enqueue.
|
||||
* Cycle 1: Select oldest uncache loads.
|
||||
* Cycle 2: Redirect Fire.
|
||||
* Choose the oldest load from LoadPipelineWidth oldest loads.
|
||||
* Prepare redirect request according to the detected rejection.
|
||||
* Fire redirect request (if valid)
|
||||
*/
|
||||
// Load_S3 .... Load_S3
|
||||
// stage 0: lq lq
|
||||
// | | (can not enqueue)
|
||||
// stage 1: lq lq
|
||||
// | |
|
||||
// ---------------
|
||||
// |
|
||||
// stage 2: lq
|
||||
// |
|
||||
// rollback req
|
||||
def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
|
||||
val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
|
||||
val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
|
||||
(if (j < i) !xs(j).valid || compareVec(i)(j)
|
||||
else if (j == i) xs(i).valid
|
||||
else !xs(j).valid || !compareVec(j)(i))
|
||||
)).andR))
|
||||
resultOnehot
|
||||
}
|
||||
val reqNeedCheck = VecInit((0 until LoadPipelineWidth).map(w =>
|
||||
s2_enqueue(w) && !enqValidVec(w)
|
||||
))
|
||||
val reqSelUops = VecInit(s2_req.map(_.uop))
|
||||
val allRedirect = (0 until LoadPipelineWidth).map(i => {
|
||||
val redirect = Wire(Valid(new Redirect))
|
||||
redirect.valid := reqNeedCheck(i)
|
||||
redirect.bits := DontCare
|
||||
redirect.bits.isRVC := reqSelUops(i).preDecodeInfo.isRVC
|
||||
redirect.bits.robIdx := reqSelUops(i).robIdx
|
||||
redirect.bits.ftqIdx := reqSelUops(i).ftqPtr
|
||||
redirect.bits.ftqOffset := reqSelUops(i).ftqOffset
|
||||
redirect.bits.level := RedirectLevel.flush
|
||||
redirect.bits.cfiUpdate.target := reqSelUops(i).pc // TODO: check if need pc
|
||||
redirect.bits.debug_runahead_checkpoint_id := reqSelUops(i).debugInfo.runahead_checkpoint_id
|
||||
redirect
|
||||
})
|
||||
val oldestOneHot = selectOldestRedirect(allRedirect)
|
||||
val oldestRedirect = Mux1H(oldestOneHot, allRedirect)
|
||||
val lastCycleRedirect = Wire(Valid(new Redirect))
|
||||
lastCycleRedirect.valid := RegNext(io.redirect.valid)
|
||||
lastCycleRedirect.bits := RegEnable(io.redirect.bits, io.redirect.valid)
|
||||
val lastLastCycleRedirect = Wire(Valid(new Redirect))
|
||||
lastLastCycleRedirect.valid := RegNext(lastCycleRedirect.valid)
|
||||
lastLastCycleRedirect.bits := RegEnable(lastCycleRedirect.bits, lastCycleRedirect.valid)
|
||||
io.rollback.valid := GatedValidRegNext(oldestRedirect.valid &&
|
||||
!oldestRedirect.bits.robIdx.needFlush(io.redirect) &&
|
||||
!oldestRedirect.bits.robIdx.needFlush(lastCycleRedirect) &&
|
||||
!oldestRedirect.bits.robIdx.needFlush(lastLastCycleRedirect))
|
||||
io.rollback.bits := RegEnable(oldestRedirect.bits, oldestRedirect.valid)
|
||||
|
||||
// perf counter
|
||||
val validCount = freeList.io.validCount
|
||||
val allowEnqueue = !freeList.io.empty
|
||||
QueuePerf(LoadNCBufferSize, validCount, !allowEnqueue)
|
||||
// perf counter
|
||||
val validCount = freeList.io.validCount
|
||||
val allowEnqueue = !freeList.io.empty
|
||||
QueuePerf(LoadNCBufferSize, validCount, !allowEnqueue)
|
||||
|
||||
XSPerfAccumulate("ncReqCycle", VecInit(uncacheReqArb.io.in.map(_.fire)).asUInt.orR)
|
||||
XSPerfAccumulate("ncUncacheReqCnt", io.uncache.req.fire)
|
||||
XSPerfAccumulate("nc_writeback_success", io.ncOut(0).fire)
|
||||
XSPerfAccumulate("nc_writeback_blocked", io.ncOut(0).valid && !io.ncOut(0).ready)
|
||||
XSPerfAccumulate("uncache_full_rollback", io.rollback.valid)
|
||||
XSPerfAccumulate("ncReqCycle", VecInit(uncacheReqArb.io.in.map(_.fire)).asUInt.orR)
|
||||
XSPerfAccumulate("ncUncacheReqCnt", io.uncache.req.fire)
|
||||
XSPerfAccumulate("nc_writeback_success", io.ncOut(0).fire)
|
||||
XSPerfAccumulate("nc_writeback_blocked", io.ncOut(0).valid && !io.ncOut(0).ready)
|
||||
XSPerfAccumulate("uncache_full_rollback", io.rollback.valid)
|
||||
|
||||
val perfEvents: Seq[(String, UInt)] = Seq(
|
||||
("ncReqCycle", VecInit(uncacheReqArb.io.in.map(_.fire)).asUInt.orR),
|
||||
("ncUncacheReqCnt", io.uncache.req.fire),
|
||||
("nc_writeback_success", io.ncOut(0).fire),
|
||||
("nc_writeback_blocked", io.ncOut(0).valid && !io.ncOut(0).ready),
|
||||
("uncache_full_rollback", io.rollback.valid)
|
||||
)
|
||||
// end
|
||||
}
|
||||
val perfEvents: Seq[(String, UInt)] = Seq(
|
||||
("ncReqCycle", VecInit(uncacheReqArb.io.in.map(_.fire)).asUInt.orR),
|
||||
("ncUncacheReqCnt", io.uncache.req.fire),
|
||||
("nc_writeback_success", io.ncOut(0).fire),
|
||||
("nc_writeback_blocked", io.ncOut(0).valid && !io.ncOut(0).ready),
|
||||
("uncache_full_rollback", io.rollback.valid)
|
||||
)
|
||||
// end
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule
|
|||
|
||||
// data path
|
||||
val sbuffer = new LoadForwardQueryIO
|
||||
val ubuffer = new LoadForwardQueryIO
|
||||
val vec_forward = new LoadForwardQueryIO
|
||||
val lsq = new LoadToLsqIO
|
||||
val tl_d_channel = Input(new DcacheToLduForwardIO)
|
||||
|
|
@ -608,6 +609,14 @@ class HybridUnit(implicit p: Parameters) extends XSModule
|
|||
io.ldu_io.sbuffer.mask := s1_in.mask
|
||||
io.ldu_io.sbuffer.pc := s1_in.uop.pc // FIXME: remove it
|
||||
|
||||
io.ldu_io.ubuffer.valid := s1_valid && !(s1_exception || s1_tlb_miss || s1_kill || s1_fast_rep_kill || s1_prf || !s1_ld_flow)
|
||||
io.ldu_io.ubuffer.vaddr := s1_vaddr
|
||||
io.ldu_io.ubuffer.paddr := s1_paddr_dup_lsu
|
||||
io.ldu_io.ubuffer.uop := s1_in.uop
|
||||
io.ldu_io.ubuffer.sqIdx := s1_in.uop.sqIdx
|
||||
io.ldu_io.ubuffer.mask := s1_in.mask
|
||||
io.ldu_io.ubuffer.pc := s1_in.uop.pc // FIXME: remove it
|
||||
|
||||
io.ldu_io.vec_forward.valid := s1_valid && !(s1_exception || s1_tlb_miss || s1_kill || s1_fast_rep_kill || s1_prf || !s1_ld_flow)
|
||||
io.ldu_io.vec_forward.vaddr := s1_vaddr
|
||||
io.ldu_io.vec_forward.paddr := s1_paddr_dup_lsu
|
||||
|
|
@ -970,16 +979,12 @@ class HybridUnit(implicit p: Parameters) extends XSModule
|
|||
s2_full_fwd := ((~s2_fwd_mask.asUInt).asUInt & s2_in.mask) === 0.U && !io.ldu_io.lsq.forward.dataInvalid && !io.ldu_io.vec_forward.dataInvalid
|
||||
// generate XLEN/8 Muxs
|
||||
for (i <- 0 until VLEN / 8) {
|
||||
s2_fwd_mask(i) := io.ldu_io.lsq.forward.forwardMask(i) || io.ldu_io.sbuffer.forwardMask(i) || io.ldu_io.vec_forward.forwardMask(i)
|
||||
s2_fwd_data(i) := Mux(
|
||||
io.ldu_io.lsq.forward.forwardMask(i),
|
||||
io.ldu_io.lsq.forward.forwardData(i),
|
||||
Mux(
|
||||
io.ldu_io.vec_forward.forwardMask(i),
|
||||
io.ldu_io.vec_forward.forwardData(i),
|
||||
io.ldu_io.sbuffer.forwardData(i)
|
||||
)
|
||||
)
|
||||
s2_fwd_mask(i) := io.ldu_io.lsq.forward.forwardMask(i) || io.ldu_io.sbuffer.forwardMask(i) || io.ldu_io.vec_forward.forwardMask(i) || io.ldu_io.ubuffer.forwardMask(i)
|
||||
s2_fwd_data(i) :=
|
||||
Mux(io.ldu_io.lsq.forward.forwardMask(i), io.ldu_io.lsq.forward.forwardData(i),
|
||||
Mux(io.ldu_io.vec_forward.forwardMask(i), io.ldu_io.vec_forward.forwardData(i),
|
||||
Mux(io.ldu_io.ubuffer.forwardMask(i), io.ldu_io.ubuffer.forwardData(i),
|
||||
io.ldu_io.sbuffer.forwardData(i))))
|
||||
}
|
||||
|
||||
XSDebug(s2_fire && s2_ld_flow, "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n",
|
||||
|
|
@ -1159,7 +1164,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule
|
|||
io.ldu_io.fast_rep_out.bits.delayedLoadError := s3_dly_ld_err
|
||||
io.ldu_io.lsq.ldin.bits.dcacheRequireReplay := s3_dcache_rep
|
||||
|
||||
val s3_vp_match_fail = RegNext(io.ldu_io.lsq.forward.matchInvalid || io.ldu_io.sbuffer.matchInvalid) && s3_troublem
|
||||
val s3_vp_match_fail = RegNext(io.ldu_io.lsq.forward.matchInvalid || io.ldu_io.sbuffer.matchInvalid || io.ldu_io.ubuffer.matchInvalid) && s3_troublem
|
||||
val s3_ldld_rep_inst =
|
||||
io.ldu_io.lsq.ldld_nuke_query.resp.valid &&
|
||||
io.ldu_io.lsq.ldld_nuke_query.resp.bits.rep_frm_fetch &&
|
||||
|
|
|
|||
|
|
@ -132,6 +132,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule
|
|||
val pmp = Flipped(new PMPRespBundle()) // arrive same to tlb now
|
||||
val dcache = new DCacheLoadIO
|
||||
val sbuffer = new LoadForwardQueryIO
|
||||
val ubuffer = new LoadForwardQueryIO
|
||||
val lsq = new LoadToLsqIO
|
||||
val tl_d_channel = Input(new DcacheToLduForwardIO)
|
||||
val forward_mshr = Flipped(new LduToMissqueueForwardIO)
|
||||
|
|
@ -926,6 +927,14 @@ class LoadUnit(implicit p: Parameters) extends XSModule
|
|||
io.sbuffer.mask := s1_in.mask
|
||||
io.sbuffer.pc := s1_in.uop.pc // FIXME: remove it
|
||||
|
||||
io.ubuffer.valid := s1_valid && s1_nc_with_data && !(s1_exception || s1_tlb_miss || s1_kill || s1_dly_err || s1_prf)
|
||||
io.ubuffer.vaddr := s1_vaddr
|
||||
io.ubuffer.paddr := s1_paddr_dup_lsu
|
||||
io.ubuffer.uop := s1_in.uop
|
||||
io.ubuffer.sqIdx := s1_in.uop.sqIdx
|
||||
io.ubuffer.mask := s1_in.mask
|
||||
io.ubuffer.pc := s1_in.uop.pc // FIXME: remove it
|
||||
|
||||
io.lsq.forward.valid := s1_valid && !(s1_exception || s1_tlb_miss || s1_kill || s1_dly_err || s1_prf)
|
||||
io.lsq.forward.vaddr := s1_vaddr
|
||||
io.lsq.forward.paddr := s1_paddr_dup_lsu
|
||||
|
|
@ -1244,7 +1253,8 @@ class LoadUnit(implicit p: Parameters) extends XSModule
|
|||
|
||||
val s2_data_fwded = s2_dcache_miss && s2_full_fwd
|
||||
|
||||
val s2_vp_match_fail = (io.lsq.forward.matchInvalid || io.sbuffer.matchInvalid) && s2_troublem
|
||||
val s2_fwd_vp_match_invalid = io.lsq.forward.matchInvalid || io.sbuffer.matchInvalid || io.ubuffer.matchInvalid
|
||||
val s2_vp_match_fail = s2_fwd_vp_match_invalid && s2_troublem
|
||||
val s2_safe_wakeup = !s2_out.rep_info.need_rep && !s2_mmio && (!s2_in.nc || s2_nc_with_data) && !s2_mis_align && !s2_exception // don't need to replay and is not a mmio\misalign no data
|
||||
val s2_safe_writeback = s2_exception || s2_safe_wakeup || s2_vp_match_fail
|
||||
|
||||
|
|
@ -1271,8 +1281,11 @@ class LoadUnit(implicit p: Parameters) extends XSModule
|
|||
s2_full_fwd := ((~s2_fwd_mask.asUInt).asUInt & s2_in.mask) === 0.U && !io.lsq.forward.dataInvalid
|
||||
// generate XLEN/8 Muxs
|
||||
for (i <- 0 until VLEN / 8) {
|
||||
s2_fwd_mask(i) := io.lsq.forward.forwardMask(i) || io.sbuffer.forwardMask(i)
|
||||
s2_fwd_data(i) := Mux(io.lsq.forward.forwardMask(i), io.lsq.forward.forwardData(i), io.sbuffer.forwardData(i))
|
||||
s2_fwd_mask(i) := io.lsq.forward.forwardMask(i) || io.sbuffer.forwardMask(i) || io.ubuffer.forwardMask(i)
|
||||
s2_fwd_data(i) :=
|
||||
Mux(io.lsq.forward.forwardMask(i), io.lsq.forward.forwardData(i),
|
||||
Mux(s2_nc_with_data, io.ubuffer.forwardData(i),
|
||||
io.sbuffer.forwardData(i)))
|
||||
}
|
||||
|
||||
XSDebug(s2_fire, "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n",
|
||||
|
|
@ -1458,7 +1471,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule
|
|||
io.lsq.ldin.bits.dcacheRequireReplay := s3_dcache_rep
|
||||
io.fast_rep_out.bits.delayedLoadError := s3_dly_ld_err
|
||||
|
||||
val s3_vp_match_fail = GatedValidRegNext(io.lsq.forward.matchInvalid || io.sbuffer.matchInvalid) && s3_troublem
|
||||
val s3_vp_match_fail = GatedValidRegNext(s2_fwd_vp_match_invalid) && s3_troublem
|
||||
val s3_rep_frm_fetch = s3_vp_match_fail
|
||||
val s3_ldld_rep_inst =
|
||||
io.lsq.ldld_nuke_query.resp.valid &&
|
||||
|
|
|
|||
|
|
@ -303,7 +303,7 @@ class Sbuffer(implicit p: Parameters)
|
|||
// sbuffer_in_s1:
|
||||
// * read data and meta from fifo queue
|
||||
// * update sbuffer meta (vtag, ptag, flag)
|
||||
// * prevert that line from being sent to dcache (add a block condition)
|
||||
// * prevent that line from being sent to dcache (add a block condition)
|
||||
// * prepare cacheline level write enable signal, RegNext() data and mask
|
||||
|
||||
// sbuffer_in_s2:
|
||||
|
|
|
|||
Loading…
Reference in New Issue