feat(outstanding): support nc outstanding and remove mmio st outstanding
This commit is contained in:
		
							parent
							
								
									cfdd605feb
								
							
						
					
					
						commit
						e04c5f647e
					
				|  | @ -205,6 +205,11 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) | |||
| 
 | ||||
|   memBlock.io.redirect := backend.io.mem.redirect | ||||
|   memBlock.io.ooo_to_mem.csrCtrl := backend.io.mem.csrCtrl | ||||
|    | ||||
|   // XXX lyq: remove this before PR | ||||
|   val tmp_debug_uncache_otsd = Constantin.createRecord("uncache_outstanding_enable", 0) | ||||
|   memBlock.io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable := tmp_debug_uncache_otsd | ||||
| 
 | ||||
|   memBlock.io.ooo_to_mem.tlbCsr := backend.io.mem.tlbCsr | ||||
|   memBlock.io.ooo_to_mem.lsqio.lcommit          := backend.io.mem.robLsqIO.lcommit | ||||
|   memBlock.io.ooo_to_mem.lsqio.scommit          := backend.io.mem.robLsqIO.scommit | ||||
|  |  | |||
|  | @ -820,6 +820,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) | |||
|     // forward | ||||
|     loadUnits(i).io.lsq.forward <> lsq.io.forward(i) | ||||
|     loadUnits(i).io.sbuffer <> sbuffer.io.forward(i) | ||||
|     loadUnits(i).io.ubuffer <> uncache.io.forward(i) | ||||
|     loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i) | ||||
|     loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i) | ||||
|     // ld-ld violation check | ||||
|  | @ -963,6 +964,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) | |||
|     hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i) | ||||
|     // forward | ||||
|     hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i) | ||||
|     hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i) | ||||
|     // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i) | ||||
|     hybridUnits(i).io.ldu_io.vec_forward := DontCare | ||||
|     hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i) | ||||
|  | @ -1332,8 +1334,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) | |||
|     is (s_idle) { | ||||
|       when (uncacheReq.fire) { | ||||
|         when (lsq.io.uncache.req.valid) { | ||||
|           val isStore = lsq.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR | ||||
|           when (!isStore || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { | ||||
|           when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { | ||||
|             uncacheState := s_scalar_uncache | ||||
|           } | ||||
|         }.otherwise { | ||||
|  |  | |||
|  | @ -515,6 +515,7 @@ class UncacheWordReq(implicit p: Parameters) extends DCacheBundle | |||
| { | ||||
|   val cmd  = UInt(M_SZ.W) | ||||
|   val addr = UInt(PAddrBits.W) | ||||
|   val vaddr = UInt(VAddrBits.W) // for uncache buffer forwarding | ||||
|   val data = UInt(XLEN.W) | ||||
|   val mask = UInt((XLEN/8).W) | ||||
|   val id   = UInt(uncacheIdxBits.W) | ||||
|  | @ -534,8 +535,9 @@ class UncacheWordResp(implicit p: Parameters) extends DCacheBundle | |||
| { | ||||
|   val data      = UInt(XLEN.W) | ||||
|   val data_delayed = UInt(XLEN.W) | ||||
|   val id        = UInt(uncacheIdxBits.W) | ||||
|   val nc        = Bool() | ||||
|   val id        = UInt(uncacheIdxBits.W) // resp identified signals | ||||
|   val nc        = Bool() // resp identified signals | ||||
|   val is2lq     = Bool() // resp identified signals | ||||
|   val miss      = Bool() | ||||
|   val replay    = Bool() | ||||
|   val tag_error = Bool() | ||||
|  |  | |||
|  | @ -22,6 +22,7 @@ import org.chipsalliance.cde.config.Parameters | |||
| import utils._ | ||||
| import utility._ | ||||
| import xiangshan._ | ||||
| import xiangshan.mem._ | ||||
| import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes} | ||||
| import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters} | ||||
| 
 | ||||
|  | @ -33,8 +34,9 @@ class UncacheFlushBundle extends Bundle { | |||
| class UncacheEntry(implicit p: Parameters) extends DCacheBundle { | ||||
|   val cmd = UInt(M_SZ.W) | ||||
|   val addr = UInt(PAddrBits.W) | ||||
|   val vaddr = UInt(VAddrBits.W) | ||||
|   val data = UInt(XLEN.W) | ||||
|   val mask = UInt((XLEN/8).W) | ||||
|   val mask = UInt(DataBytes.W) | ||||
|   val id = UInt(uncacheIdxBits.W) | ||||
|   val nc = Bool() | ||||
|   val atomic = Bool() | ||||
|  | @ -43,9 +45,14 @@ class UncacheEntry(implicit p: Parameters) extends DCacheBundle { | |||
|   val resp_data = UInt(XLEN.W) | ||||
|   val resp_nderr = Bool() | ||||
| 
 | ||||
|   // FIXME lyq: Confirm the forward logic. if no forward, it can be removed | ||||
|   val fwd_data = UInt(XLEN.W) | ||||
|   val fwd_mask = UInt(DataBytes.W) | ||||
| 
 | ||||
|   def set(x: UncacheWordReq): Unit = { | ||||
|     cmd := x.cmd | ||||
|     addr := x.addr | ||||
|     vaddr := x.vaddr | ||||
|     data := x.data | ||||
|     mask := x.mask | ||||
|     id := x.id | ||||
|  | @ -53,6 +60,8 @@ class UncacheEntry(implicit p: Parameters) extends DCacheBundle { | |||
|     atomic := x.atomic | ||||
|     resp_nderr := false.B | ||||
|     resp_data := 0.U | ||||
|     fwd_data := 0.U | ||||
|     fwd_mask := 0.U | ||||
|   } | ||||
| 
 | ||||
|   def update(x: TLBundleD): Unit = { | ||||
|  | @ -60,10 +69,18 @@ class UncacheEntry(implicit p: Parameters) extends DCacheBundle { | |||
|     resp_nderr := x.denied | ||||
|   } | ||||
| 
 | ||||
|   def update(forwardData: UInt, forwardMask: UInt): Unit = { | ||||
|     fwd_data := forwardData | ||||
|     fwd_mask := forwardMask | ||||
|   } | ||||
| 
 | ||||
|   def toUncacheWordResp(): UncacheWordResp = { | ||||
|     val resp_fwd_data = VecInit((0 until DataBytes).map(j => | ||||
|       Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), resp_data(8*(j+1)-1, 8*j)) | ||||
|     )).asUInt | ||||
|     val r = Wire(new UncacheWordResp) | ||||
|     r := DontCare | ||||
|     r.data := resp_data | ||||
|     r.data := resp_fwd_data | ||||
|     r.id := id | ||||
|     r.nderr := resp_nderr | ||||
|     r.nc := nc | ||||
|  | @ -121,6 +138,7 @@ class UncacheIO(implicit p: Parameters) extends DCacheBundle { | |||
|   val enableOutstanding = Input(Bool()) | ||||
|   val flush = Flipped(new UncacheFlushBundle) | ||||
|   val lsq = Flipped(new UncacheWordIO) | ||||
|   val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) | ||||
| } | ||||
| 
 | ||||
| // convert DCacheIO to TileLink | ||||
|  | @ -182,10 +200,19 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) | |||
|   val uState = RegInit(s_idle) | ||||
|    | ||||
|   def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f)) | ||||
|   def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR | ||||
|   def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR | ||||
| 
 | ||||
|   // drain buffer | ||||
|   val empty = Wire(Bool()) | ||||
|   val f0_needDrain = Wire(Bool()) | ||||
|   val do_uarch_drain = RegNext(f0_needDrain) | ||||
| 
 | ||||
|   val q0_entry = Wire(new UncacheEntry) | ||||
|   val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W)) | ||||
|   val q0_canSent = Wire(Bool()) | ||||
| 
 | ||||
| 
 | ||||
|   /****************************************************************** | ||||
|    * uState for non-outstanding | ||||
|    ******************************************************************/ | ||||
|  | @ -234,18 +261,27 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) | |||
|     2. how to merge store and response precisely | ||||
|   */ | ||||
| 
 | ||||
|   val e0_invalids = sizeMap(i => !states(i).isValid()) | ||||
|   val e0_invalid_oh = VecInit(PriorityEncoderOH(e0_invalids)).asUInt | ||||
|   val e0_fire = req.fire | ||||
|   val e0_req = req.bits | ||||
|   /** | ||||
|     TODO lyq: prohibit or wait or forward? | ||||
|     NOW: strict block by same address; otherwise: exhaustive consideration is needed. | ||||
|       - ld->ld wait | ||||
|       - ld->st forward | ||||
|       - st->ld forward | ||||
|       - st->st block | ||||
|   */ | ||||
|   val e0_existSameVec = sizeMap(j =>  | ||||
|     e0_req.addr === entries(j).addr && states(j).isValid() | ||||
|   ) | ||||
|   val e0_invalidVec = sizeMap(i => !states(i).isValid() && !e0_existSameVec(i)) | ||||
|   val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec) | ||||
|   val e0_alloc = e0_canAlloc && e0_fire | ||||
|   req_ready := e0_invalidVec.asUInt.orR && !do_uarch_drain | ||||
|    | ||||
|   req_ready := e0_invalid_oh.orR | ||||
|    | ||||
|   for (i <- 0 until UncacheBufferSize) { | ||||
|     val alloc = e0_fire && e0_invalid_oh(i) | ||||
|     when(alloc){ | ||||
|       entries(i).set(e0_req) | ||||
|       states(i).setValid(true.B) | ||||
|   when (e0_alloc) { | ||||
|     entries(e0_allocIdx).set(e0_req) | ||||
|     states(e0_allocIdx).setValid(true.B) | ||||
|      | ||||
|     // judge whether wait same block: e0 & q0 | ||||
|     val waitSameVec = sizeMap(j =>  | ||||
|  | @ -253,9 +289,9 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) | |||
|     ) | ||||
|     val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent | ||||
|     when (waitSameVec.reduce(_ || _) || waitQ0) { | ||||
|         states(i).setWaitSame(true.B) | ||||
|       } | ||||
|       states(e0_allocIdx).setWaitSame(true.B) | ||||
|     } | ||||
|      | ||||
|   } | ||||
| 
 | ||||
| 
 | ||||
|  | @ -272,7 +308,7 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) | |||
|    ******************************************************************/ | ||||
| 
 | ||||
|   val q0_canSentVec = sizeMap(i =>  | ||||
|     // (io.enableOutstanding || uState === s_refill_req) && // FIXME lyq: comment for debug | ||||
|     (io.enableOutstanding || uState === s_refill_req) && | ||||
|     states(i).can2Uncache() | ||||
|   ) | ||||
|   val q0_res = PriorityEncoderWithFlag(q0_canSentVec) | ||||
|  | @ -360,9 +396,75 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) | |||
|    * 1. when io.flush.valid is true | ||||
|    * 2. when io.lsq.req.bits.atomic is true | ||||
|    ******************************************************************/ | ||||
|   empty := !VecInit(states.map(_.isValid())).asUInt.orR | ||||
|   io.flush.empty := empty | ||||
| 
 | ||||
|   val invalid_entries = PopCount(states.map(!_.isValid())) | ||||
|   io.flush.empty := invalid_entries === UncacheBufferSize.U | ||||
| 
 | ||||
|   /****************************************************************** | ||||
|    * Load Data Forward | ||||
|    *  | ||||
|    * 0. ld in ldu pipeline | ||||
|    *    f0: tag match, fast resp | ||||
|    *    f1: data resp | ||||
|    * | ||||
|    * 1. ld in buffer (in "Enter Buffer") | ||||
|    *    ld(en) -> st(in): ld entry.update, state.updateUncacheResp | ||||
|    *    st(en) -> ld(in): ld entry.update, state.updateUncacheResp | ||||
|    *    NOW: strict block by same address; there is no such forward. | ||||
|    * | ||||
|    ******************************************************************/ | ||||
|    | ||||
|   val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid()) | ||||
|   val f0_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool())) | ||||
|   f0_needDrain := f0_tagMismatchVec.asUInt.orR && !empty | ||||
| 
 | ||||
|   for ((forward, i) <- io.forward.zipWithIndex) { | ||||
|     val f0_vtagMatches = sizeMap(w => entries(w).vaddr === forward.vaddr) | ||||
|     val f0_ptagMatches = sizeMap(w => entries(w).addr === forward.paddr) | ||||
|     f0_tagMismatchVec(i) := forward.valid && sizeMap(w => | ||||
|       f0_vtagMatches(w) =/= f0_ptagMatches(w) && f0_validMask(w) | ||||
|     ).asUInt.orR | ||||
|     when (f0_tagMismatchVec(i)) { | ||||
|       XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", | ||||
|         RegNext(f0_ptagMatches.asUInt), | ||||
|         RegNext(f0_vtagMatches.asUInt), | ||||
|         RegNext(forward.vaddr), | ||||
|         RegNext(forward.paddr) | ||||
|       ) | ||||
|     } | ||||
| 
 | ||||
|     val f0_validTagMatches = sizeMap(w => f0_ptagMatches(w) && f0_validMask(w) && forward.valid) | ||||
| 
 | ||||
|     val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask)) | ||||
|     val f0_fwdDataCandidates = VecInit(entries.map(e => e.data)) | ||||
|     val f0_fwdMask = shiftMaskToHigh( | ||||
|       forward.paddr, | ||||
|       Mux1H(f0_validTagMatches, f0_fwdMaskCandidates) | ||||
|     ).asTypeOf(Vec(VDataBytes, Bool())) | ||||
|     val f0_fwdData = shiftDataToHigh( | ||||
|       forward.paddr, | ||||
|       Mux1H(f0_validTagMatches, f0_fwdDataCandidates) | ||||
|     ).asTypeOf(Vec(VDataBytes, UInt(8.W))) | ||||
| 
 | ||||
|     val f1_fwdValid = RegNext(forward.valid) | ||||
|     val f1_fwdMask = RegEnable(f0_fwdMask, forward.valid) | ||||
|     val f1_fwdData = RegEnable(f0_fwdData, forward.valid) | ||||
| 
 | ||||
|     forward.addrInvalid := false.B // addr in ubuffer is always ready | ||||
|     forward.dataInvalid := false.B // data in ubuffer is always ready | ||||
|     forward.matchInvalid := f0_tagMismatchVec(i) // paddr / vaddr cam result does not match | ||||
|     for (j <- 0 until VDataBytes) { | ||||
|       forward.forwardMaskFast(j) := f0_fwdMask(j) | ||||
| 
 | ||||
|       forward.forwardMask(j) := false.B | ||||
|       forward.forwardData(j) := DontCare | ||||
|       when(f1_fwdMask(j) && f1_fwdValid) { | ||||
|         forward.forwardMask(j) := true.B | ||||
|         forward.forwardData(j) := f1_fwdData(j) | ||||
|       } | ||||
|     } | ||||
| 
 | ||||
|   } | ||||
| 
 | ||||
| 
 | ||||
|   /****************************************************************** | ||||
|  | @ -386,18 +488,18 @@ class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) | |||
|   } | ||||
| 
 | ||||
|   /* Performance Counters */ | ||||
|   def isStore: Bool = io.lsq.req.bits.cmd === MemoryOpConstants.M_XWR | ||||
|   XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore && !io.lsq.req.bits.nc) | ||||
|   XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore && !io.lsq.req.bits.nc) | ||||
|   XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore && io.lsq.req.bits.nc) | ||||
|   XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore && io.lsq.req.bits.nc) | ||||
|   XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) | ||||
|   XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) | ||||
|   XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) | ||||
|   XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) | ||||
|   XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire) | ||||
|   XSPerfAccumulate("vaddr_match_failed", PopCount(f0_tagMismatchVec)) | ||||
|    | ||||
|   val perfEvents = Seq( | ||||
|     ("uncache_mmio_store", io.lsq.req.fire && isStore && !io.lsq.req.bits.nc), | ||||
|     ("uncache_mmio_load", io.lsq.req.fire && !isStore && !io.lsq.req.bits.nc), | ||||
|     ("uncache_nc_store", io.lsq.req.fire && isStore && io.lsq.req.bits.nc), | ||||
|     ("uncache_nc_load", io.lsq.req.fire && !isStore && io.lsq.req.bits.nc), | ||||
|     ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), | ||||
|     ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), | ||||
|     ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), | ||||
|     ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), | ||||
|     ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire) | ||||
|   ) | ||||
| 
 | ||||
|  |  | |||
|  | @ -75,6 +75,16 @@ object shiftMaskToLow { | |||
|     Mux(addr(3),(mask >> 8).asUInt,mask) | ||||
|   } | ||||
| } | ||||
| object shiftDataToHigh { | ||||
|   def apply(addr: UInt,data : UInt): UInt = { | ||||
|     Mux(addr(3), (data << 64).asUInt, data) | ||||
|   } | ||||
| } | ||||
| object shiftMaskToHigh { | ||||
|   def apply(addr: UInt,mask: UInt): UInt = { | ||||
|     Mux(addr(3), (mask << 8).asUInt, mask) | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| class LsPipelineBundle(implicit p: Parameters) extends XSBundle | ||||
|   with HasDCacheParameters | ||||
|  |  | |||
|  | @ -247,8 +247,10 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete | |||
|   switch(pendingstate){ | ||||
|     is(s_idle){ | ||||
|       when(io.uncache.req.fire){ | ||||
|         pendingstate := Mux(loadQueue.io.uncache.req.valid, s_load, | ||||
|                           Mux(io.uncacheOutstanding, s_idle, s_store)) | ||||
|         pendingstate :=  | ||||
|           Mux(io.uncacheOutstanding && io.uncache.req.bits.nc, s_idle, | ||||
|           Mux(loadQueue.io.uncache.req.valid, s_load, | ||||
|           s_store)) | ||||
|       } | ||||
|     } | ||||
|     is(s_load){ | ||||
|  | @ -279,15 +281,11 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete | |||
|     io.uncache.req.valid := false.B | ||||
|     io.uncache.req.bits := DontCare | ||||
|   } | ||||
|   when (io.uncacheOutstanding) { | ||||
|   when (io.uncache.resp.bits.is2lq) { | ||||
|     io.uncache.resp <> loadQueue.io.uncache.resp | ||||
|   } .otherwise { | ||||
|     when(pendingstate === s_load){ | ||||
|       io.uncache.resp <> loadQueue.io.uncache.resp | ||||
|     }.otherwise{ | ||||
|     io.uncache.resp <> storeQueue.io.uncache.resp | ||||
|   } | ||||
|   } | ||||
| 
 | ||||
|   loadQueue.io.debugTopDown <> io.debugTopDown | ||||
| 
 | ||||
|  |  | |||
|  | @ -304,6 +304,13 @@ class StoreQueue(implicit p: Parameters) extends XSModule | |||
|   val mmioReq = Wire(chiselTypeOf(io.uncache.req)) | ||||
|   val ncReq = Wire(chiselTypeOf(io.uncache.req)) | ||||
|   val ncResp = Wire(chiselTypeOf(io.uncache.resp)) | ||||
|   val ncDoReq = Wire(Bool()) | ||||
|   val ncDoResp = Wire(Bool()) | ||||
|   val ncReadNextTrigger = Mux(io.uncacheOutstanding, ncDoReq, ncDoResp) | ||||
|   // ncDoReq is double RegNexted, as ubuffer data write takes 3 cycles. | ||||
|   // TODO lyq: to eliminate coupling by passing signals through ubuffer | ||||
|   val ncDeqTrigger = Mux(io.uncacheOutstanding, RegNext(RegNext(ncDoReq)), ncDoResp) | ||||
|   val ncPtr = Mux(io.uncacheOutstanding, RegNext(RegNext(io.uncache.req.bits.id)), io.uncache.resp.bits.id) | ||||
|    | ||||
|   // store miss align info | ||||
|   io.maControl.storeInfo.data := dataModule.io.rdata(0).data | ||||
|  | @ -320,7 +327,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule | |||
|   val rdataPtrExtNext = Wire(Vec(EnsbufferWidth, new SqPtr)) | ||||
|   rdataPtrExtNext := rdataPtrExt.map(i => i + | ||||
|     PopCount(dataBuffer.io.enq.map(_.fire)) + | ||||
|     PopCount(ncResp.fire || io.mmioStout.fire || io.vecmmioStout.fire) | ||||
|     PopCount(ncReadNextTrigger || io.mmioStout.fire || io.vecmmioStout.fire) | ||||
|   ) | ||||
| 
 | ||||
|   // deqPtrExtNext traces which inst is about to leave store queue | ||||
|  | @ -334,12 +341,12 @@ class StoreQueue(implicit p: Parameters) extends XSModule | |||
|   val deqPtrExtNext = Wire(Vec(EnsbufferWidth, new SqPtr)) | ||||
|   deqPtrExtNext := deqPtrExt.map(i =>  i + | ||||
|     RegNext(PopCount(VecInit(io.sbuffer.map(_.fire)))) + | ||||
|     PopCount(ncResp.fire || io.mmioStout.fire || io.vecmmioStout.fire) | ||||
|     PopCount(ncDeqTrigger || io.mmioStout.fire || io.vecmmioStout.fire) | ||||
|   ) | ||||
| 
 | ||||
|   io.sqDeq := RegNext( | ||||
|     RegNext(PopCount(VecInit(io.sbuffer.map(_.fire && !misalignBlock)))) + | ||||
|     PopCount(ncResp.fire || io.mmioStout.fire || io.vecmmioStout.fire || finishMisalignSt) | ||||
|     PopCount(ncDeqTrigger || io.mmioStout.fire || io.vecmmioStout.fire || finishMisalignSt) | ||||
|   ) | ||||
| 
 | ||||
|   assert(!RegNext(RegNext(io.sbuffer(0).fire) && (io.mmioStout.fire || io.vecmmioStout.fire))) | ||||
|  | @ -804,13 +811,9 @@ class StoreQueue(implicit p: Parameters) extends XSModule | |||
|     } | ||||
|     is(s_req) { | ||||
|       when (mmioDoReq) { | ||||
|         when (io.uncacheOutstanding) { | ||||
|           mmioState := s_wb | ||||
|         } .otherwise { | ||||
|         mmioState := s_resp | ||||
|       } | ||||
|     } | ||||
|     } | ||||
|     is(s_resp) { | ||||
|       when(io.uncache.resp.fire && !io.uncache.resp.bits.nc) { | ||||
|         mmioState := s_wb | ||||
|  | @ -841,6 +844,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule | |||
|   mmioReq.bits := DontCare | ||||
|   mmioReq.bits.cmd  := MemoryOpConstants.M_XWR | ||||
|   mmioReq.bits.addr := paddrModule.io.rdata(0) // data(deqPtr) -> rdata(0) | ||||
|   mmioReq.bits.vaddr:= vaddrModule.io.rdata(0) | ||||
|   mmioReq.bits.data := shiftDataToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).data) | ||||
|   mmioReq.bits.mask := shiftMaskToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).mask) | ||||
|   mmioReq.bits.atomic := atomic(GatedRegNext(rdataPtrExtNext(0)).value) | ||||
|  | @ -855,7 +859,6 @@ class StoreQueue(implicit p: Parameters) extends XSModule | |||
|   // TODO: CAN NOT deal with vector nc now! | ||||
|   val nc_idle :: nc_req :: nc_resp :: Nil = Enum(3) | ||||
|   val ncState = RegInit(nc_idle) | ||||
|   val ncDoReq = io.uncache.req.fire && io.uncache.req.bits.nc | ||||
|   val rptr0 = rdataPtrExt(0).value | ||||
|   switch(ncState){ | ||||
|     is(nc_idle) { | ||||
|  | @ -865,32 +868,40 @@ class StoreQueue(implicit p: Parameters) extends XSModule | |||
|     } | ||||
|     is(nc_req) { | ||||
|       when(ncDoReq) { | ||||
|         when(io.uncacheOutstanding) { | ||||
|           ncState := nc_idle | ||||
|         }.otherwise{ | ||||
|           ncState := nc_resp | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     is(nc_resp) { | ||||
|       when(ncResp.fire) { | ||||
|         ncState := nc_idle | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   ncDoReq := io.uncache.req.fire && io.uncache.req.bits.nc | ||||
|   ncDoResp := ncResp.fire | ||||
| 
 | ||||
|   ncReq.valid := ncState === nc_req | ||||
|   ncReq.bits := DontCare | ||||
|   ncReq.bits.cmd  := MemoryOpConstants.M_XWR | ||||
|   ncReq.bits.addr := paddrModule.io.rdata(0) | ||||
|   ncReq.bits.vaddr:= vaddrModule.io.rdata(0) | ||||
|   ncReq.bits.data := shiftDataToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).data) | ||||
|   ncReq.bits.mask := shiftMaskToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).mask) | ||||
|   ncReq.bits.atomic := atomic(GatedRegNext(rdataPtrExtNext(0)).value) | ||||
|   ncReq.bits.nc := true.B | ||||
|   ncReq.bits.id := rdataPtrExt(0).value | ||||
|   ncReq.bits.id := rptr0 | ||||
|    | ||||
|   ncResp.ready := io.uncache.resp.ready | ||||
|   ncResp.valid := io.uncache.resp.fire && io.uncache.resp.bits.nc | ||||
|   ncResp.bits <> io.uncache.resp.bits | ||||
|   when (ncResp.fire) { | ||||
|     val ptr = io.uncache.resp.bits.id | ||||
|     allocated(ptr) := false.B | ||||
|     XSDebug("nc fire: ptr %d\n", ptr) | ||||
|   when (ncDeqTrigger) { | ||||
|     allocated(ncPtr) := false.B | ||||
|     XSDebug("nc fire: ptr %d\n", ncPtr) | ||||
|   } | ||||
|    | ||||
|   mmioReq.ready := io.uncache.req.ready | ||||
|  |  | |||
|  | @ -129,6 +129,7 @@ class IOBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule | |||
|   io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD | ||||
|   io.uncache.req.bits.data := DontCare | ||||
|   io.uncache.req.bits.addr := req.paddr | ||||
|   io.uncache.req.bits.vaddr:= req.vaddr | ||||
|   io.uncache.req.bits.mask := Mux(req.paddr(3), req.mask(15, 8), req.mask(7, 0)) | ||||
|   io.uncache.req.bits.id   := io.id | ||||
|   io.uncache.req.bits.instrtype := DontCare | ||||
|  | @ -241,7 +242,7 @@ class IOBuffer(implicit p: Parameters) extends XSModule | |||
|     allocWidth = LoadPipelineWidth, | ||||
|     freeWidth = 4, | ||||
|     enablePreAlloc = true, | ||||
|     moduleName = "UncacheBuffer freelist" | ||||
|     moduleName = "IOBuffer freelist" | ||||
|   )) | ||||
|   freeList.io := DontCare | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
|  /*************************************************************************************** | ||||
| /*************************************************************************************** | ||||
|  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences | ||||
|  * Copyright (c) 2020-2021 Peng Cheng Laboratory | ||||
|  * | ||||
|  | @ -13,25 +13,25 @@ | |||
|  * | ||||
|  * See the Mulan PSL v2 for more details. | ||||
|  ***************************************************************************************/ | ||||
|  package xiangshan.mem | ||||
| package xiangshan.mem | ||||
| 
 | ||||
|  import chisel3._ | ||||
|  import chisel3.util._ | ||||
|  import org.chipsalliance.cde.config._ | ||||
|  import xiangshan._ | ||||
|  import xiangshan.backend.rob.{RobPtr, RobLsqIO} | ||||
|  import xiangshan.ExceptionNO._ | ||||
|  import xiangshan.cache._ | ||||
|  import utils._ | ||||
|  import utility._ | ||||
|  import xiangshan.backend.Bundles | ||||
|  import xiangshan.backend.Bundles.{DynInst, MemExuOutput} | ||||
|  import xiangshan.backend.fu.FuConfig.LduCfg | ||||
| import chisel3._ | ||||
| import chisel3.util._ | ||||
| import org.chipsalliance.cde.config._ | ||||
| import xiangshan._ | ||||
| import xiangshan.backend.rob.{RobPtr, RobLsqIO} | ||||
| import xiangshan.ExceptionNO._ | ||||
| import xiangshan.cache._ | ||||
| import utils._ | ||||
| import utility._ | ||||
| import xiangshan.backend.Bundles | ||||
| import xiangshan.backend.Bundles.{DynInst, MemExuOutput} | ||||
| import xiangshan.backend.fu.FuConfig.LduCfg | ||||
| 
 | ||||
|  class NCBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule | ||||
| class NCBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModule | ||||
|   with HasCircularQueuePtrHelper | ||||
|   with HasLoadHelper | ||||
|  { | ||||
| { | ||||
|   val io = IO(new Bundle() { | ||||
|     val id = Input(UInt()) | ||||
| 
 | ||||
|  | @ -112,6 +112,7 @@ | |||
|   io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD | ||||
|   io.uncache.req.bits.data := DontCare | ||||
|   io.uncache.req.bits.addr := req.paddr | ||||
|   io.uncache.req.bits.vaddr:= req.vaddr | ||||
|   io.uncache.req.bits.mask := Mux(req.paddr(3), req.mask(15, 8), req.mask(7, 0)) | ||||
|   io.uncache.req.bits.id   := io.id | ||||
|   io.uncache.req.bits.instrtype := DontCare | ||||
|  | @ -186,9 +187,9 @@ | |||
|   } | ||||
| 
 | ||||
|   // end | ||||
|  } | ||||
| } | ||||
| 
 | ||||
|  class NCBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper { | ||||
| class NCBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper { | ||||
|   val io = IO(new Bundle() { | ||||
|     // control | ||||
|     val redirect = Flipped(Valid(new Redirect)) | ||||
|  | @ -290,7 +291,7 @@ | |||
|     enqIndexVec(w) := freeList.io.allocateSlot(offset) | ||||
|   } | ||||
| 
 | ||||
|    // TODO lyq: It's best to choose in robIdx order | ||||
|   // TODO lyq: It's best to choose in robIdx order / the order in which they enter  | ||||
|   val uncacheReqArb = Module(new RRArbiterInit(io.uncache.req.bits.cloneType, LoadNCBufferSize)) | ||||
|   val ncOutArb = Module(new RRArbiterInit(io.ncOut(0).bits.cloneType, LoadNCBufferSize)) | ||||
| 
 | ||||
|  | @ -429,4 +430,4 @@ | |||
|     ("uncache_full_rollback",  io.rollback.valid) | ||||
|   ) | ||||
|   // end | ||||
|  } | ||||
| } | ||||
|  |  | |||
|  | @ -62,6 +62,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule | |||
| 
 | ||||
|       // data path | ||||
|       val sbuffer       = new LoadForwardQueryIO | ||||
|       val ubuffer       = new LoadForwardQueryIO | ||||
|       val vec_forward   = new LoadForwardQueryIO | ||||
|       val lsq           = new LoadToLsqIO | ||||
|       val tl_d_channel  = Input(new DcacheToLduForwardIO) | ||||
|  | @ -608,6 +609,14 @@ class HybridUnit(implicit p: Parameters) extends XSModule | |||
|   io.ldu_io.sbuffer.mask  := s1_in.mask | ||||
|   io.ldu_io.sbuffer.pc    := s1_in.uop.pc // FIXME: remove it | ||||
| 
 | ||||
|   io.ldu_io.ubuffer.valid := s1_valid && !(s1_exception || s1_tlb_miss || s1_kill || s1_fast_rep_kill || s1_prf || !s1_ld_flow) | ||||
|   io.ldu_io.ubuffer.vaddr := s1_vaddr | ||||
|   io.ldu_io.ubuffer.paddr := s1_paddr_dup_lsu | ||||
|   io.ldu_io.ubuffer.uop   := s1_in.uop | ||||
|   io.ldu_io.ubuffer.sqIdx := s1_in.uop.sqIdx | ||||
|   io.ldu_io.ubuffer.mask  := s1_in.mask | ||||
|   io.ldu_io.ubuffer.pc    := s1_in.uop.pc // FIXME: remove it | ||||
| 
 | ||||
|   io.ldu_io.vec_forward.valid := s1_valid && !(s1_exception || s1_tlb_miss || s1_kill || s1_fast_rep_kill || s1_prf || !s1_ld_flow) | ||||
|   io.ldu_io.vec_forward.vaddr := s1_vaddr | ||||
|   io.ldu_io.vec_forward.paddr := s1_paddr_dup_lsu | ||||
|  | @ -970,16 +979,12 @@ class HybridUnit(implicit p: Parameters) extends XSModule | |||
|   s2_full_fwd := ((~s2_fwd_mask.asUInt).asUInt & s2_in.mask) === 0.U && !io.ldu_io.lsq.forward.dataInvalid && !io.ldu_io.vec_forward.dataInvalid | ||||
|   // generate XLEN/8 Muxs | ||||
|   for (i <- 0 until VLEN / 8) { | ||||
|     s2_fwd_mask(i) := io.ldu_io.lsq.forward.forwardMask(i) || io.ldu_io.sbuffer.forwardMask(i) || io.ldu_io.vec_forward.forwardMask(i) | ||||
|     s2_fwd_data(i) := Mux( | ||||
|       io.ldu_io.lsq.forward.forwardMask(i), | ||||
|       io.ldu_io.lsq.forward.forwardData(i), | ||||
|       Mux( | ||||
|         io.ldu_io.vec_forward.forwardMask(i), | ||||
|         io.ldu_io.vec_forward.forwardData(i), | ||||
|         io.ldu_io.sbuffer.forwardData(i) | ||||
|       ) | ||||
|     ) | ||||
|     s2_fwd_mask(i) := io.ldu_io.lsq.forward.forwardMask(i) || io.ldu_io.sbuffer.forwardMask(i) || io.ldu_io.vec_forward.forwardMask(i) || io.ldu_io.ubuffer.forwardMask(i) | ||||
|     s2_fwd_data(i) :=  | ||||
|       Mux(io.ldu_io.lsq.forward.forwardMask(i), io.ldu_io.lsq.forward.forwardData(i), | ||||
|       Mux(io.ldu_io.vec_forward.forwardMask(i), io.ldu_io.vec_forward.forwardData(i), | ||||
|       Mux(io.ldu_io.ubuffer.forwardMask(i), io.ldu_io.ubuffer.forwardData(i), | ||||
|       io.ldu_io.sbuffer.forwardData(i)))) | ||||
|   } | ||||
| 
 | ||||
|   XSDebug(s2_fire && s2_ld_flow, "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n", | ||||
|  | @ -1159,7 +1164,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule | |||
|   io.ldu_io.fast_rep_out.bits.delayedLoadError := s3_dly_ld_err | ||||
|   io.ldu_io.lsq.ldin.bits.dcacheRequireReplay  := s3_dcache_rep | ||||
| 
 | ||||
|   val s3_vp_match_fail = RegNext(io.ldu_io.lsq.forward.matchInvalid || io.ldu_io.sbuffer.matchInvalid) && s3_troublem | ||||
|   val s3_vp_match_fail = RegNext(io.ldu_io.lsq.forward.matchInvalid || io.ldu_io.sbuffer.matchInvalid || io.ldu_io.ubuffer.matchInvalid) && s3_troublem | ||||
|   val s3_ldld_rep_inst = | ||||
|       io.ldu_io.lsq.ldld_nuke_query.resp.valid && | ||||
|       io.ldu_io.lsq.ldld_nuke_query.resp.bits.rep_frm_fetch && | ||||
|  |  | |||
|  | @ -132,6 +132,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule | |||
|     val pmp           = Flipped(new PMPRespBundle()) // arrive same to tlb now | ||||
|     val dcache        = new DCacheLoadIO | ||||
|     val sbuffer       = new LoadForwardQueryIO | ||||
|     val ubuffer       = new LoadForwardQueryIO | ||||
|     val lsq           = new LoadToLsqIO | ||||
|     val tl_d_channel  = Input(new DcacheToLduForwardIO) | ||||
|     val forward_mshr  = Flipped(new LduToMissqueueForwardIO) | ||||
|  | @ -926,6 +927,14 @@ class LoadUnit(implicit p: Parameters) extends XSModule | |||
|   io.sbuffer.mask  := s1_in.mask | ||||
|   io.sbuffer.pc    := s1_in.uop.pc // FIXME: remove it | ||||
| 
 | ||||
|   io.ubuffer.valid := s1_valid && s1_nc_with_data && !(s1_exception || s1_tlb_miss || s1_kill || s1_dly_err || s1_prf) | ||||
|   io.ubuffer.vaddr := s1_vaddr | ||||
|   io.ubuffer.paddr := s1_paddr_dup_lsu | ||||
|   io.ubuffer.uop   := s1_in.uop | ||||
|   io.ubuffer.sqIdx := s1_in.uop.sqIdx | ||||
|   io.ubuffer.mask  := s1_in.mask | ||||
|   io.ubuffer.pc    := s1_in.uop.pc // FIXME: remove it | ||||
| 
 | ||||
|   io.lsq.forward.valid     := s1_valid && !(s1_exception || s1_tlb_miss || s1_kill || s1_dly_err || s1_prf) | ||||
|   io.lsq.forward.vaddr     := s1_vaddr | ||||
|   io.lsq.forward.paddr     := s1_paddr_dup_lsu | ||||
|  | @ -1244,7 +1253,8 @@ class LoadUnit(implicit p: Parameters) extends XSModule | |||
| 
 | ||||
|   val s2_data_fwded = s2_dcache_miss && s2_full_fwd | ||||
| 
 | ||||
|   val s2_vp_match_fail = (io.lsq.forward.matchInvalid || io.sbuffer.matchInvalid) && s2_troublem | ||||
|   val s2_fwd_vp_match_invalid = io.lsq.forward.matchInvalid || io.sbuffer.matchInvalid || io.ubuffer.matchInvalid | ||||
|   val s2_vp_match_fail = s2_fwd_vp_match_invalid && s2_troublem | ||||
|   val s2_safe_wakeup = !s2_out.rep_info.need_rep && !s2_mmio && (!s2_in.nc || s2_nc_with_data) && !s2_mis_align && !s2_exception // don't need to replay and is not a mmio\misalign no data | ||||
|   val s2_safe_writeback = s2_exception || s2_safe_wakeup || s2_vp_match_fail | ||||
| 
 | ||||
|  | @ -1271,8 +1281,11 @@ class LoadUnit(implicit p: Parameters) extends XSModule | |||
|   s2_full_fwd := ((~s2_fwd_mask.asUInt).asUInt & s2_in.mask) === 0.U && !io.lsq.forward.dataInvalid | ||||
|   // generate XLEN/8 Muxs | ||||
|   for (i <- 0 until VLEN / 8) { | ||||
|     s2_fwd_mask(i) := io.lsq.forward.forwardMask(i) || io.sbuffer.forwardMask(i) | ||||
|     s2_fwd_data(i) := Mux(io.lsq.forward.forwardMask(i), io.lsq.forward.forwardData(i), io.sbuffer.forwardData(i)) | ||||
|     s2_fwd_mask(i) := io.lsq.forward.forwardMask(i) || io.sbuffer.forwardMask(i) || io.ubuffer.forwardMask(i) | ||||
|     s2_fwd_data(i) :=  | ||||
|       Mux(io.lsq.forward.forwardMask(i), io.lsq.forward.forwardData(i),  | ||||
|       Mux(s2_nc_with_data, io.ubuffer.forwardData(i), | ||||
|       io.sbuffer.forwardData(i))) | ||||
|   } | ||||
| 
 | ||||
|   XSDebug(s2_fire, "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n", | ||||
|  | @ -1458,7 +1471,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule | |||
|   io.lsq.ldin.bits.dcacheRequireReplay  := s3_dcache_rep | ||||
|   io.fast_rep_out.bits.delayedLoadError := s3_dly_ld_err | ||||
| 
 | ||||
|   val s3_vp_match_fail = GatedValidRegNext(io.lsq.forward.matchInvalid || io.sbuffer.matchInvalid) && s3_troublem | ||||
|   val s3_vp_match_fail = GatedValidRegNext(s2_fwd_vp_match_invalid) && s3_troublem | ||||
|   val s3_rep_frm_fetch = s3_vp_match_fail | ||||
|   val s3_ldld_rep_inst = | ||||
|       io.lsq.ldld_nuke_query.resp.valid && | ||||
|  |  | |||
|  | @ -303,7 +303,7 @@ class Sbuffer(implicit p: Parameters) | |||
|   // sbuffer_in_s1: | ||||
|   // * read data and meta from fifo queue | ||||
|   // * update sbuffer meta (vtag, ptag, flag) | ||||
|   // * prevert that line from being sent to dcache (add a block condition) | ||||
|   // * prevent that line from being sent to dcache (add a block condition) | ||||
|   // * prepare cacheline level write enable signal, RegNext() data and mask | ||||
| 
 | ||||
|   // sbuffer_in_s2: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue