feat(outstanding): support nc outstanding and remove mmio st outstanding

This commit is contained in:
Yanqin Li 2024-11-19 17:35:13 +08:00 committed by zhanglinjuan
parent cfdd605feb
commit e04c5f647e
12 changed files with 582 additions and 433 deletions

View File

@ -205,6 +205,11 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
memBlock.io.redirect := backend.io.mem.redirect
memBlock.io.ooo_to_mem.csrCtrl := backend.io.mem.csrCtrl
// XXX lyq: remove this before PR
val tmp_debug_uncache_otsd = Constantin.createRecord("uncache_outstanding_enable", 0)
memBlock.io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable := tmp_debug_uncache_otsd
memBlock.io.ooo_to_mem.tlbCsr := backend.io.mem.tlbCsr
memBlock.io.ooo_to_mem.lsqio.lcommit := backend.io.mem.robLsqIO.lcommit
memBlock.io.ooo_to_mem.lsqio.scommit := backend.io.mem.robLsqIO.scommit

View File

@ -820,6 +820,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
// forward
loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
loadUnits(i).io.ubuffer <> uncache.io.forward(i)
loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
// ld-ld violation check
@ -963,6 +964,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i)
// forward
hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i)
hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i)
// hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i)
hybridUnits(i).io.ldu_io.vec_forward := DontCare
hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i)
@ -1332,8 +1334,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
is (s_idle) {
when (uncacheReq.fire) {
when (lsq.io.uncache.req.valid) {
val isStore = lsq.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR
when (!isStore || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
uncacheState := s_scalar_uncache
}
}.otherwise {

View File

@ -515,6 +515,7 @@ class UncacheWordReq(implicit p: Parameters) extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val vaddr = UInt(VAddrBits.W) // for uncache buffer forwarding
val data = UInt(XLEN.W)
val mask = UInt((XLEN/8).W)
val id = UInt(uncacheIdxBits.W)
@ -534,8 +535,9 @@ class UncacheWordResp(implicit p: Parameters) extends DCacheBundle
{
val data = UInt(XLEN.W)
val data_delayed = UInt(XLEN.W)
val id = UInt(uncacheIdxBits.W)
val nc = Bool()
val id = UInt(uncacheIdxBits.W) // resp identified signals
val nc = Bool() // resp identified signals
val is2lq = Bool() // resp identified signals
val miss = Bool()
val replay = Bool()
val tag_error = Bool()

View File

@ -22,6 +22,7 @@ import org.chipsalliance.cde.config.Parameters
import utils._
import utility._
import xiangshan._
import xiangshan.mem._
import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
@ -33,8 +34,9 @@ class UncacheFlushBundle extends Bundle {
class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val vaddr = UInt(VAddrBits.W)
val data = UInt(XLEN.W)
val mask = UInt((XLEN/8).W)
val mask = UInt(DataBytes.W)
val id = UInt(uncacheIdxBits.W)
val nc = Bool()
val atomic = Bool()
@ -43,9 +45,14 @@ class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
val resp_data = UInt(XLEN.W)
val resp_nderr = Bool()
// FIXME lyq: Confirm the forward logic. if no forward, it can be removed
val fwd_data = UInt(XLEN.W)
val fwd_mask = UInt(DataBytes.W)
def set(x: UncacheWordReq): Unit = {
cmd := x.cmd
addr := x.addr
vaddr := x.vaddr
data := x.data
mask := x.mask
id := x.id
@ -53,6 +60,8 @@ class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
atomic := x.atomic
resp_nderr := false.B
resp_data := 0.U
fwd_data := 0.U
fwd_mask := 0.U
}
def update(x: TLBundleD): Unit = {
@ -60,10 +69,18 @@ class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
resp_nderr := x.denied
}
def update(forwardData: UInt, forwardMask: UInt): Unit = {
fwd_data := forwardData
fwd_mask := forwardMask
}
def toUncacheWordResp(): UncacheWordResp = {
val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), resp_data(8*(j+1)-1, 8*j))
)).asUInt
val r = Wire(new UncacheWordResp)
r := DontCare
r.data := resp_data
r.data := resp_fwd_data
r.id := id
r.nderr := resp_nderr
r.nc := nc
@ -121,6 +138,7 @@ class UncacheIO(implicit p: Parameters) extends DCacheBundle {
val enableOutstanding = Input(Bool())
val flush = Flipped(new UncacheFlushBundle)
val lsq = Flipped(new UncacheWordIO)
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
}
// convert DCacheIO to TileLink
@ -182,10 +200,19 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
val uState = RegInit(s_idle)
def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
// drain buffer
val empty = Wire(Bool())
val f0_needDrain = Wire(Bool())
val do_uarch_drain = RegNext(f0_needDrain)
val q0_entry = Wire(new UncacheEntry)
val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
val q0_canSent = Wire(Bool())
/******************************************************************
* uState for non-outstanding
******************************************************************/
@ -234,18 +261,27 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
2. how to merge store and response precisely
*/
val e0_invalids = sizeMap(i => !states(i).isValid())
val e0_invalid_oh = VecInit(PriorityEncoderOH(e0_invalids)).asUInt
val e0_fire = req.fire
val e0_req = req.bits
/**
TODO lyq: prohibit or wait or forward?
NOW: strict block by same address; otherwise: exhaustive consideration is needed.
- ld->ld wait
- ld->st forward
- st->ld forward
- st->st block
*/
val e0_existSameVec = sizeMap(j =>
e0_req.addr === entries(j).addr && states(j).isValid()
)
val e0_invalidVec = sizeMap(i => !states(i).isValid() && !e0_existSameVec(i))
val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
val e0_alloc = e0_canAlloc && e0_fire
req_ready := e0_invalidVec.asUInt.orR && !do_uarch_drain
req_ready := e0_invalid_oh.orR
for (i <- 0 until UncacheBufferSize) {
val alloc = e0_fire && e0_invalid_oh(i)
when(alloc){
entries(i).set(e0_req)
states(i).setValid(true.B)
when (e0_alloc) {
entries(e0_allocIdx).set(e0_req)
states(e0_allocIdx).setValid(true.B)
// judge whether wait same block: e0 & q0
val waitSameVec = sizeMap(j =>
@ -253,9 +289,9 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
)
val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent
when (waitSameVec.reduce(_ || _) || waitQ0) {
states(i).setWaitSame(true.B)
}
states(e0_allocIdx).setWaitSame(true.B)
}
}
@ -272,7 +308,7 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
******************************************************************/
val q0_canSentVec = sizeMap(i =>
// (io.enableOutstanding || uState === s_refill_req) && // FIXME lyq: comment for debug
(io.enableOutstanding || uState === s_refill_req) &&
states(i).can2Uncache()
)
val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
@ -360,9 +396,75 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
* 1. when io.flush.valid is true
* 2. when io.lsq.req.bits.atomic is true
******************************************************************/
empty := !VecInit(states.map(_.isValid())).asUInt.orR
io.flush.empty := empty
val invalid_entries = PopCount(states.map(!_.isValid()))
io.flush.empty := invalid_entries === UncacheBufferSize.U
/******************************************************************
* Load Data Forward
*
* 0. ld in ldu pipeline
* f0: tag match, fast resp
* f1: data resp
*
* 1. ld in buffer (in "Enter Buffer")
* ld(en) -> st(in): ld entry.update, state.updateUncacheResp
* st(en) -> ld(in): ld entry.update, state.updateUncacheResp
* NOW: strict block by same address; there is no such forward.
*
******************************************************************/
val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
val f0_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
f0_needDrain := f0_tagMismatchVec.asUInt.orR && !empty
for ((forward, i) <- io.forward.zipWithIndex) {
val f0_vtagMatches = sizeMap(w => entries(w).vaddr === forward.vaddr)
val f0_ptagMatches = sizeMap(w => entries(w).addr === forward.paddr)
f0_tagMismatchVec(i) := forward.valid && sizeMap(w =>
f0_vtagMatches(w) =/= f0_ptagMatches(w) && f0_validMask(w)
).asUInt.orR
when (f0_tagMismatchVec(i)) {
XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
RegNext(f0_ptagMatches.asUInt),
RegNext(f0_vtagMatches.asUInt),
RegNext(forward.vaddr),
RegNext(forward.paddr)
)
}
val f0_validTagMatches = sizeMap(w => f0_ptagMatches(w) && f0_validMask(w) && forward.valid)
val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
val f0_fwdMask = shiftMaskToHigh(
forward.paddr,
Mux1H(f0_validTagMatches, f0_fwdMaskCandidates)
).asTypeOf(Vec(VDataBytes, Bool()))
val f0_fwdData = shiftDataToHigh(
forward.paddr,
Mux1H(f0_validTagMatches, f0_fwdDataCandidates)
).asTypeOf(Vec(VDataBytes, UInt(8.W)))
val f1_fwdValid = RegNext(forward.valid)
val f1_fwdMask = RegEnable(f0_fwdMask, forward.valid)
val f1_fwdData = RegEnable(f0_fwdData, forward.valid)
forward.addrInvalid := false.B // addr in ubuffer is always ready
forward.dataInvalid := false.B // data in ubuffer is always ready
forward.matchInvalid := f0_tagMismatchVec(i) // paddr / vaddr cam result does not match
for (j <- 0 until VDataBytes) {
forward.forwardMaskFast(j) := f0_fwdMask(j)
forward.forwardMask(j) := false.B
forward.forwardData(j) := DontCare
when(f1_fwdMask(j) && f1_fwdValid) {
forward.forwardMask(j) := true.B
forward.forwardData(j) := f1_fwdData(j)
}
}
}
/******************************************************************
@ -386,18 +488,18 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
}
/* Performance Counters */
def isStore: Bool = io.lsq.req.bits.cmd === MemoryOpConstants.M_XWR
XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore && !io.lsq.req.bits.nc)
XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore && !io.lsq.req.bits.nc)
XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore && io.lsq.req.bits.nc)
XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore && io.lsq.req.bits.nc)
XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
XSPerfAccumulate("vaddr_match_failed", PopCount(f0_tagMismatchVec))
val perfEvents = Seq(
("uncache_mmio_store", io.lsq.req.fire && isStore && !io.lsq.req.bits.nc),
("uncache_mmio_load", io.lsq.req.fire && !isStore && !io.lsq.req.bits.nc),
("uncache_nc_store", io.lsq.req.fire && isStore && io.lsq.req.bits.nc),
("uncache_nc_load", io.lsq.req.fire && !isStore && io.lsq.req.bits.nc),
("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
)

View File

@ -75,6 +75,16 @@ object shiftMaskToLow {
Mux(addr(3),(mask >> 8).asUInt,mask)
}
}
object shiftDataToHigh {
def apply(addr: UInt,data : UInt): UInt = {
Mux(addr(3), (data << 64).asUInt, data)
}
}
object shiftMaskToHigh {
def apply(addr: UInt,mask: UInt): UInt = {
Mux(addr(3), (mask << 8).asUInt, mask)
}
}
class LsPipelineBundle(implicit p: Parameters) extends XSBundle
with HasDCacheParameters

View File

@ -247,8 +247,10 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
switch(pendingstate){
is(s_idle){
when(io.uncache.req.fire){
pendingstate := Mux(loadQueue.io.uncache.req.valid, s_load,
Mux(io.uncacheOutstanding, s_idle, s_store))
pendingstate :=
Mux(io.uncacheOutstanding && io.uncache.req.bits.nc, s_idle,
Mux(loadQueue.io.uncache.req.valid, s_load,
s_store))
}
}
is(s_load){
@ -279,15 +281,11 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
io.uncache.req.valid := false.B
io.uncache.req.bits := DontCare
}
when (io.uncacheOutstanding) {
when (io.uncache.resp.bits.is2lq) {
io.uncache.resp <> loadQueue.io.uncache.resp
} .otherwise {
when(pendingstate === s_load){
io.uncache.resp <> loadQueue.io.uncache.resp
}.otherwise{
io.uncache.resp <> storeQueue.io.uncache.resp
}
}
loadQueue.io.debugTopDown <> io.debugTopDown

View File

@ -304,6 +304,13 @@ class StoreQueue(implicit p: Parameters) extends XSModule
val mmioReq = Wire(chiselTypeOf(io.uncache.req))
val ncReq = Wire(chiselTypeOf(io.uncache.req))
val ncResp = Wire(chiselTypeOf(io.uncache.resp))
val ncDoReq = Wire(Bool())
val ncDoResp = Wire(Bool())
val ncReadNextTrigger = Mux(io.uncacheOutstanding, ncDoReq, ncDoResp)
// ncDoReq is double RegNexted, as ubuffer data write takes 3 cycles.
// TODO lyq: to eliminate coupling by passing signals through ubuffer
val ncDeqTrigger = Mux(io.uncacheOutstanding, RegNext(RegNext(ncDoReq)), ncDoResp)
val ncPtr = Mux(io.uncacheOutstanding, RegNext(RegNext(io.uncache.req.bits.id)), io.uncache.resp.bits.id)
// store miss align info
io.maControl.storeInfo.data := dataModule.io.rdata(0).data
@ -320,7 +327,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule
val rdataPtrExtNext = Wire(Vec(EnsbufferWidth, new SqPtr))
rdataPtrExtNext := rdataPtrExt.map(i => i +
PopCount(dataBuffer.io.enq.map(_.fire)) +
PopCount(ncResp.fire || io.mmioStout.fire || io.vecmmioStout.fire)
PopCount(ncReadNextTrigger || io.mmioStout.fire || io.vecmmioStout.fire)
)
// deqPtrExtNext traces which inst is about to leave store queue
@ -334,12 +341,12 @@ class StoreQueue(implicit p: Parameters) extends XSModule
val deqPtrExtNext = Wire(Vec(EnsbufferWidth, new SqPtr))
deqPtrExtNext := deqPtrExt.map(i => i +
RegNext(PopCount(VecInit(io.sbuffer.map(_.fire)))) +
PopCount(ncResp.fire || io.mmioStout.fire || io.vecmmioStout.fire)
PopCount(ncDeqTrigger || io.mmioStout.fire || io.vecmmioStout.fire)
)
io.sqDeq := RegNext(
RegNext(PopCount(VecInit(io.sbuffer.map(_.fire && !misalignBlock)))) +
PopCount(ncResp.fire || io.mmioStout.fire || io.vecmmioStout.fire || finishMisalignSt)
PopCount(ncDeqTrigger || io.mmioStout.fire || io.vecmmioStout.fire || finishMisalignSt)
)
assert(!RegNext(RegNext(io.sbuffer(0).fire) && (io.mmioStout.fire || io.vecmmioStout.fire)))
@ -804,13 +811,9 @@ class StoreQueue(implicit p: Parameters) extends XSModule
}
is(s_req) {
when (mmioDoReq) {
when (io.uncacheOutstanding) {
mmioState := s_wb
} .otherwise {
mmioState := s_resp
}
}
}
is(s_resp) {
when(io.uncache.resp.fire && !io.uncache.resp.bits.nc) {
mmioState := s_wb
@ -841,6 +844,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule
mmioReq.bits := DontCare
mmioReq.bits.cmd := MemoryOpConstants.M_XWR
mmioReq.bits.addr := paddrModule.io.rdata(0) // data(deqPtr) -> rdata(0)
mmioReq.bits.vaddr:= vaddrModule.io.rdata(0)
mmioReq.bits.data := shiftDataToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).data)
mmioReq.bits.mask := shiftMaskToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).mask)
mmioReq.bits.atomic := atomic(GatedRegNext(rdataPtrExtNext(0)).value)
@ -855,7 +859,6 @@ class StoreQueue(implicit p: Parameters) extends XSModule
// TODO: CAN NOT deal with vector nc now!
val nc_idle :: nc_req :: nc_resp :: Nil = Enum(3)
val ncState = RegInit(nc_idle)
val ncDoReq = io.uncache.req.fire && io.uncache.req.bits.nc
val rptr0 = rdataPtrExt(0).value
switch(ncState){
is(nc_idle) {
@ -865,32 +868,40 @@ class StoreQueue(implicit p: Parameters) extends XSModule
}
is(nc_req) {
when(ncDoReq) {
when(io.uncacheOutstanding) {
ncState := nc_idle
}.otherwise{
ncState := nc_resp
}
}
}
is(nc_resp) {
when(ncResp.fire) {
ncState := nc_idle
}
}
}
ncDoReq := io.uncache.req.fire && io.uncache.req.bits.nc
ncDoResp := ncResp.fire
ncReq.valid := ncState === nc_req
ncReq.bits := DontCare
ncReq.bits.cmd := MemoryOpConstants.M_XWR
ncReq.bits.addr := paddrModule.io.rdata(0)
ncReq.bits.vaddr:= vaddrModule.io.rdata(0)
ncReq.bits.data := shiftDataToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).data)
ncReq.bits.mask := shiftMaskToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).mask)
ncReq.bits.atomic := atomic(GatedRegNext(rdataPtrExtNext(0)).value)
ncReq.bits.nc := true.B
ncReq.bits.id := rdataPtrExt(0).value
ncReq.bits.id := rptr0
ncResp.ready := io.uncache.resp.ready
ncResp.valid := io.uncache.resp.fire && io.uncache.resp.bits.nc
ncResp.bits <> io.uncache.resp.bits
when (ncResp.fire) {
val ptr = io.uncache.resp.bits.id
allocated(ptr) := false.B
XSDebug("nc fire: ptr %d\n", ptr)
when (ncDeqTrigger) {
allocated(ncPtr) := false.B
XSDebug("nc fire: ptr %d\n", ncPtr)
}
mmioReq.ready := io.uncache.req.ready

View File

@ -129,6 +129,7 @@ class IOBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
io.uncache.req.bits.data := DontCare
io.uncache.req.bits.addr := req.paddr
io.uncache.req.bits.vaddr:= req.vaddr
io.uncache.req.bits.mask := Mux(req.paddr(3), req.mask(15, 8), req.mask(7, 0))
io.uncache.req.bits.id := io.id
io.uncache.req.bits.instrtype := DontCare
@ -241,7 +242,7 @@ class IOBuffer(implicit p: Parameters) extends XSModule
allocWidth = LoadPipelineWidth,
freeWidth = 4,
enablePreAlloc = true,
moduleName = "UncacheBuffer freelist"
moduleName = "IOBuffer freelist"
))
freeList.io := DontCare

View File

@ -1,4 +1,4 @@
/***************************************************************************************
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
@ -13,25 +13,25 @@
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package xiangshan.mem
package xiangshan.mem
import chisel3._
import chisel3.util._
import org.chipsalliance.cde.config._
import xiangshan._
import xiangshan.backend.rob.{RobPtr, RobLsqIO}
import xiangshan.ExceptionNO._
import xiangshan.cache._
import utils._
import utility._
import xiangshan.backend.Bundles
import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
import xiangshan.backend.fu.FuConfig.LduCfg
import chisel3._
import chisel3.util._
import org.chipsalliance.cde.config._
import xiangshan._
import xiangshan.backend.rob.{RobPtr, RobLsqIO}
import xiangshan.ExceptionNO._
import xiangshan.cache._
import utils._
import utility._
import xiangshan.backend.Bundles
import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
import xiangshan.backend.fu.FuConfig.LduCfg
class NCBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule
class NCBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule
with HasCircularQueuePtrHelper
with HasLoadHelper
{
{
val io = IO(new Bundle() {
val id = Input(UInt())
@ -112,6 +112,7 @@
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
io.uncache.req.bits.data := DontCare
io.uncache.req.bits.addr := req.paddr
io.uncache.req.bits.vaddr:= req.vaddr
io.uncache.req.bits.mask := Mux(req.paddr(3), req.mask(15, 8), req.mask(7, 0))
io.uncache.req.bits.id := io.id
io.uncache.req.bits.instrtype := DontCare
@ -186,9 +187,9 @@
}
// end
}
}
class NCBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
class NCBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
// control
val redirect = Flipped(Valid(new Redirect))
@ -290,7 +291,7 @@
enqIndexVec(w) := freeList.io.allocateSlot(offset)
}
// TODO lyq: It's best to choose in robIdx order
// TODO lyq: It's best to choose in robIdx order / the order in which they enter
val uncacheReqArb = Module(new RRArbiterInit(io.uncache.req.bits.cloneType, LoadNCBufferSize))
val ncOutArb = Module(new RRArbiterInit(io.ncOut(0).bits.cloneType, LoadNCBufferSize))
@ -429,4 +430,4 @@
("uncache_full_rollback", io.rollback.valid)
)
// end
}
}

View File

@ -62,6 +62,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule
// data path
val sbuffer = new LoadForwardQueryIO
val ubuffer = new LoadForwardQueryIO
val vec_forward = new LoadForwardQueryIO
val lsq = new LoadToLsqIO
val tl_d_channel = Input(new DcacheToLduForwardIO)
@ -608,6 +609,14 @@ class HybridUnit(implicit p: Parameters) extends XSModule
io.ldu_io.sbuffer.mask := s1_in.mask
io.ldu_io.sbuffer.pc := s1_in.uop.pc // FIXME: remove it
io.ldu_io.ubuffer.valid := s1_valid && !(s1_exception || s1_tlb_miss || s1_kill || s1_fast_rep_kill || s1_prf || !s1_ld_flow)
io.ldu_io.ubuffer.vaddr := s1_vaddr
io.ldu_io.ubuffer.paddr := s1_paddr_dup_lsu
io.ldu_io.ubuffer.uop := s1_in.uop
io.ldu_io.ubuffer.sqIdx := s1_in.uop.sqIdx
io.ldu_io.ubuffer.mask := s1_in.mask
io.ldu_io.ubuffer.pc := s1_in.uop.pc // FIXME: remove it
io.ldu_io.vec_forward.valid := s1_valid && !(s1_exception || s1_tlb_miss || s1_kill || s1_fast_rep_kill || s1_prf || !s1_ld_flow)
io.ldu_io.vec_forward.vaddr := s1_vaddr
io.ldu_io.vec_forward.paddr := s1_paddr_dup_lsu
@ -970,16 +979,12 @@ class HybridUnit(implicit p: Parameters) extends XSModule
s2_full_fwd := ((~s2_fwd_mask.asUInt).asUInt & s2_in.mask) === 0.U && !io.ldu_io.lsq.forward.dataInvalid && !io.ldu_io.vec_forward.dataInvalid
// generate XLEN/8 Muxs
for (i <- 0 until VLEN / 8) {
s2_fwd_mask(i) := io.ldu_io.lsq.forward.forwardMask(i) || io.ldu_io.sbuffer.forwardMask(i) || io.ldu_io.vec_forward.forwardMask(i)
s2_fwd_data(i) := Mux(
io.ldu_io.lsq.forward.forwardMask(i),
io.ldu_io.lsq.forward.forwardData(i),
Mux(
io.ldu_io.vec_forward.forwardMask(i),
io.ldu_io.vec_forward.forwardData(i),
io.ldu_io.sbuffer.forwardData(i)
)
)
s2_fwd_mask(i) := io.ldu_io.lsq.forward.forwardMask(i) || io.ldu_io.sbuffer.forwardMask(i) || io.ldu_io.vec_forward.forwardMask(i) || io.ldu_io.ubuffer.forwardMask(i)
s2_fwd_data(i) :=
Mux(io.ldu_io.lsq.forward.forwardMask(i), io.ldu_io.lsq.forward.forwardData(i),
Mux(io.ldu_io.vec_forward.forwardMask(i), io.ldu_io.vec_forward.forwardData(i),
Mux(io.ldu_io.ubuffer.forwardMask(i), io.ldu_io.ubuffer.forwardData(i),
io.ldu_io.sbuffer.forwardData(i))))
}
XSDebug(s2_fire && s2_ld_flow, "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n",
@ -1159,7 +1164,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule
io.ldu_io.fast_rep_out.bits.delayedLoadError := s3_dly_ld_err
io.ldu_io.lsq.ldin.bits.dcacheRequireReplay := s3_dcache_rep
val s3_vp_match_fail = RegNext(io.ldu_io.lsq.forward.matchInvalid || io.ldu_io.sbuffer.matchInvalid) && s3_troublem
val s3_vp_match_fail = RegNext(io.ldu_io.lsq.forward.matchInvalid || io.ldu_io.sbuffer.matchInvalid || io.ldu_io.ubuffer.matchInvalid) && s3_troublem
val s3_ldld_rep_inst =
io.ldu_io.lsq.ldld_nuke_query.resp.valid &&
io.ldu_io.lsq.ldld_nuke_query.resp.bits.rep_frm_fetch &&

View File

@ -132,6 +132,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule
val pmp = Flipped(new PMPRespBundle()) // arrive same to tlb now
val dcache = new DCacheLoadIO
val sbuffer = new LoadForwardQueryIO
val ubuffer = new LoadForwardQueryIO
val lsq = new LoadToLsqIO
val tl_d_channel = Input(new DcacheToLduForwardIO)
val forward_mshr = Flipped(new LduToMissqueueForwardIO)
@ -926,6 +927,14 @@ class LoadUnit(implicit p: Parameters) extends XSModule
io.sbuffer.mask := s1_in.mask
io.sbuffer.pc := s1_in.uop.pc // FIXME: remove it
io.ubuffer.valid := s1_valid && s1_nc_with_data && !(s1_exception || s1_tlb_miss || s1_kill || s1_dly_err || s1_prf)
io.ubuffer.vaddr := s1_vaddr
io.ubuffer.paddr := s1_paddr_dup_lsu
io.ubuffer.uop := s1_in.uop
io.ubuffer.sqIdx := s1_in.uop.sqIdx
io.ubuffer.mask := s1_in.mask
io.ubuffer.pc := s1_in.uop.pc // FIXME: remove it
io.lsq.forward.valid := s1_valid && !(s1_exception || s1_tlb_miss || s1_kill || s1_dly_err || s1_prf)
io.lsq.forward.vaddr := s1_vaddr
io.lsq.forward.paddr := s1_paddr_dup_lsu
@ -1244,7 +1253,8 @@ class LoadUnit(implicit p: Parameters) extends XSModule
val s2_data_fwded = s2_dcache_miss && s2_full_fwd
val s2_vp_match_fail = (io.lsq.forward.matchInvalid || io.sbuffer.matchInvalid) && s2_troublem
val s2_fwd_vp_match_invalid = io.lsq.forward.matchInvalid || io.sbuffer.matchInvalid || io.ubuffer.matchInvalid
val s2_vp_match_fail = s2_fwd_vp_match_invalid && s2_troublem
val s2_safe_wakeup = !s2_out.rep_info.need_rep && !s2_mmio && (!s2_in.nc || s2_nc_with_data) && !s2_mis_align && !s2_exception // don't need to replay and is not a mmio\misalign no data
val s2_safe_writeback = s2_exception || s2_safe_wakeup || s2_vp_match_fail
@ -1271,8 +1281,11 @@ class LoadUnit(implicit p: Parameters) extends XSModule
s2_full_fwd := ((~s2_fwd_mask.asUInt).asUInt & s2_in.mask) === 0.U && !io.lsq.forward.dataInvalid
// generate XLEN/8 Muxs
for (i <- 0 until VLEN / 8) {
s2_fwd_mask(i) := io.lsq.forward.forwardMask(i) || io.sbuffer.forwardMask(i)
s2_fwd_data(i) := Mux(io.lsq.forward.forwardMask(i), io.lsq.forward.forwardData(i), io.sbuffer.forwardData(i))
s2_fwd_mask(i) := io.lsq.forward.forwardMask(i) || io.sbuffer.forwardMask(i) || io.ubuffer.forwardMask(i)
s2_fwd_data(i) :=
Mux(io.lsq.forward.forwardMask(i), io.lsq.forward.forwardData(i),
Mux(s2_nc_with_data, io.ubuffer.forwardData(i),
io.sbuffer.forwardData(i)))
}
XSDebug(s2_fire, "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n",
@ -1458,7 +1471,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule
io.lsq.ldin.bits.dcacheRequireReplay := s3_dcache_rep
io.fast_rep_out.bits.delayedLoadError := s3_dly_ld_err
val s3_vp_match_fail = GatedValidRegNext(io.lsq.forward.matchInvalid || io.sbuffer.matchInvalid) && s3_troublem
val s3_vp_match_fail = GatedValidRegNext(s2_fwd_vp_match_invalid) && s3_troublem
val s3_rep_frm_fetch = s3_vp_match_fail
val s3_ldld_rep_inst =
io.lsq.ldld_nuke_query.resp.valid &&

View File

@ -303,7 +303,7 @@ class Sbuffer(implicit p: Parameters)
// sbuffer_in_s1:
// * read data and meta from fifo queue
// * update sbuffer meta (vtag, ptag, flag)
// * prevert that line from being sent to dcache (add a block condition)
// * prevent that line from being sent to dcache (add a block condition)
// * prepare cacheline level write enable signal, RegNext() data and mask
// sbuffer_in_s2: