sbuffer: add an extra cycle for sbuffer write
In previous design, sbuffer valid entry select and sbuffer data write are in the same cycle, which caused huge fanout. An extra write stage is added to solve this problem. Now sbuffer enq logic is divided into 3 stages: sbuffer_in_s0: * read data and meta from store queue * store them in 2 entry fifo queue sbuffer_in_s1: * read data and meta from fifo queue * update sbuffer meta (vtag, ptag, flag) * prevert that line from being sent to dcache (add a block condition) * prepare cacheline level write enable signal, RegNext() data and mask sbuffer_in_s2: * use cacheline level buffer to update sbuffer data and mask * remove dcache write block (if there is)
This commit is contained in:
parent
b909b713d4
commit
3d3419b91b
|
|
@ -145,17 +145,36 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
||||||
val commitCount = RegNext(io.rob.scommit)
|
val commitCount = RegNext(io.rob.scommit)
|
||||||
|
|
||||||
// Read dataModule
|
// Read dataModule
|
||||||
// rdataPtrExtNext to rdataPtrExtNext+StorePipelineWidth entries will be read from dataModule
|
assert(EnsbufferWidth <= 2)
|
||||||
val rdataPtrExtNext = PriorityMuxDefault(Seq.tabulate(EnsbufferWidth)(i =>
|
// rdataPtrExtNext and rdataPtrExtNext+1 entry will be read from dataModule
|
||||||
dataBuffer.io.enq(i).fire -> VecInit(rdataPtrExt.map(_ + (i + 1).U))
|
val rdataPtrExtNext = WireInit(Mux(dataBuffer.io.enq(1).fire(),
|
||||||
).reverse :+ (io.mmioStout.fire -> VecInit(deqPtrExt.map(_ + 1.U))), rdataPtrExt)
|
VecInit(rdataPtrExt.map(_ + 2.U)),
|
||||||
|
Mux(dataBuffer.io.enq(0).fire() || io.mmioStout.fire(),
|
||||||
|
VecInit(rdataPtrExt.map(_ + 1.U)),
|
||||||
|
rdataPtrExt
|
||||||
|
)
|
||||||
|
))
|
||||||
|
|
||||||
// deqPtrExtNext traces which inst is about to leave store queue
|
// deqPtrExtNext traces which inst is about to leave store queue
|
||||||
val deqPtrExtNext = PriorityMuxDefault(Seq.tabulate(EnsbufferWidth)(i =>
|
//
|
||||||
io.sbuffer(i).fire -> VecInit(deqPtrExt.map(_ + (i + 1).U))
|
// io.sbuffer(i).fire() is RegNexted, as sbuffer data write takes 2 cycles.
|
||||||
).reverse :+ (io.mmioStout.fire -> VecInit(deqPtrExt.map(_ + 1.U))), deqPtrExt)
|
// Before data write finish, sbuffer is unable to provide store to load
|
||||||
io.sqDeq := RegNext(PriorityMuxDefault(Seq.tabulate(EnsbufferWidth)(i =>
|
// forward data. As an workaround, deqPtrExt and allocated flag update
|
||||||
io.sbuffer(i).fire -> (i + 1).U
|
// is delayed so that load can get the right data from store queue.
|
||||||
).reverse :+ (io.mmioStout.fire -> 1.U), 0.U))
|
//
|
||||||
|
// Modify deqPtrExtNext and io.sqDeq with care!
|
||||||
|
val deqPtrExtNext = Mux(RegNext(io.sbuffer(1).fire()),
|
||||||
|
VecInit(deqPtrExt.map(_ + 2.U)),
|
||||||
|
Mux(RegNext(io.sbuffer(0).fire()) || io.mmioStout.fire(),
|
||||||
|
VecInit(deqPtrExt.map(_ + 1.U)),
|
||||||
|
deqPtrExt
|
||||||
|
)
|
||||||
|
)
|
||||||
|
io.sqDeq := RegNext(Mux(RegNext(io.sbuffer(1).fire()), 2.U,
|
||||||
|
Mux(RegNext(io.sbuffer(0).fire()) || io.mmioStout.fire(), 1.U, 0.U)
|
||||||
|
))
|
||||||
|
assert(!RegNext(RegNext(io.sbuffer(0).fire()) && io.mmioStout.fire()))
|
||||||
|
|
||||||
for (i <- 0 until EnsbufferWidth) {
|
for (i <- 0 until EnsbufferWidth) {
|
||||||
dataModule.io.raddr(i) := rdataPtrExtNext(i).value
|
dataModule.io.raddr(i) := rdataPtrExtNext(i).value
|
||||||
paddrModule.io.raddr(i) := rdataPtrExtNext(i).value
|
paddrModule.io.raddr(i) := rdataPtrExtNext(i).value
|
||||||
|
|
@ -537,9 +556,13 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
||||||
io.sbuffer(i).bits.id := DontCare
|
io.sbuffer(i).bits.id := DontCare
|
||||||
io.sbuffer(i).bits.instrtype := DontCare
|
io.sbuffer(i).bits.instrtype := DontCare
|
||||||
|
|
||||||
|
// io.sbuffer(i).fire() is RegNexted, as sbuffer data write takes 2 cycles.
|
||||||
|
// Before data write finish, sbuffer is unable to provide store to load
|
||||||
|
// forward data. As an workaround, deqPtrExt and allocated flag update
|
||||||
|
// is delayed so that load can get the right data from store queue.
|
||||||
val ptr = dataBuffer.io.deq(i).bits.sqPtr.value
|
val ptr = dataBuffer.io.deq(i).bits.sqPtr.value
|
||||||
when (io.sbuffer(i).fire()) {
|
when (RegNext(io.sbuffer(i).fire())) {
|
||||||
allocated(ptr) := false.B
|
allocated(RegEnable(ptr, io.sbuffer(i).fire())) := false.B
|
||||||
XSDebug("sbuffer "+i+" fire: ptr %d\n", ptr)
|
XSDebug("sbuffer "+i+" fire: ptr %d\n", ptr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -606,7 +629,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule
|
||||||
deqPtrExt := deqPtrExtNext
|
deqPtrExt := deqPtrExtNext
|
||||||
rdataPtrExt := rdataPtrExtNext
|
rdataPtrExt := rdataPtrExtNext
|
||||||
|
|
||||||
val dequeueCount = PriorityMuxDefault(Seq.tabulate(EnsbufferWidth)(i => io.sbuffer(i).fire -> (i + 1).U).reverse :+ (io.mmioStout.fire -> 1.U), 0.U)
|
// val dequeueCount = Mux(io.sbuffer(1).fire(), 2.U, Mux(io.sbuffer(0).fire() || io.mmioStout.fire(), 1.U, 0.U))
|
||||||
|
|
||||||
// If redirect at T0, sqCancelCnt is at T2
|
// If redirect at T0, sqCancelCnt is at T2
|
||||||
io.sqCancelCnt := RegNext(lastCycleCancelCount + lastEnqCancel)
|
io.sqCancelCnt := RegNext(lastCycleCancelCount + lastEnqCancel)
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,6 @@ class SbufferEntryState (implicit p: Parameters) extends SbufferBundle {
|
||||||
val state_inflight = Bool() // sbuffer is trying to write this entry to dcache
|
val state_inflight = Bool() // sbuffer is trying to write this entry to dcache
|
||||||
val w_timeout = Bool() // with timeout resp, waiting for resend store pipeline req timeout
|
val w_timeout = Bool() // with timeout resp, waiting for resend store pipeline req timeout
|
||||||
val w_sameblock_inflight = Bool() // same cache block dcache req is inflight
|
val w_sameblock_inflight = Bool() // same cache block dcache req is inflight
|
||||||
val s_recheck_inflight = Bool() // recheck if same cache block dcache req is inflight
|
|
||||||
|
|
||||||
def isInvalid(): Bool = !state_valid
|
def isInvalid(): Bool = !state_valid
|
||||||
def isValid(): Bool = state_valid
|
def isValid(): Bool = state_valid
|
||||||
|
|
@ -65,36 +64,75 @@ class SbufferEntryState (implicit p: Parameters) extends SbufferBundle {
|
||||||
class SbufferBundle(implicit p: Parameters) extends XSBundle with HasSbufferConst
|
class SbufferBundle(implicit p: Parameters) extends XSBundle with HasSbufferConst
|
||||||
|
|
||||||
class DataWriteReq(implicit p: Parameters) extends SbufferBundle {
|
class DataWriteReq(implicit p: Parameters) extends SbufferBundle {
|
||||||
// val idx = UInt(SbufferIndexWidth.W)
|
// univerisal writemask
|
||||||
val wvec = UInt(StoreBufferSize.W)
|
val wvec = UInt(StoreBufferSize.W)
|
||||||
|
// 2 cycle update
|
||||||
val mask = UInt((DataBits/8).W)
|
val mask = UInt((DataBits/8).W)
|
||||||
val data = UInt(DataBits.W)
|
val data = UInt(DataBits.W)
|
||||||
val wordOffset = UInt(WordOffsetWidth.W)
|
val wordOffset = UInt(WordOffsetWidth.W)
|
||||||
val wline = Bool()
|
val wline = Bool() // write whold cacheline
|
||||||
|
// 1 cycle update
|
||||||
|
val cleanMask = Bool() // set whole line's mask to 0
|
||||||
}
|
}
|
||||||
|
|
||||||
class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst {
|
class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst {
|
||||||
val io = IO(new Bundle(){
|
val io = IO(new Bundle(){
|
||||||
val writeReq = Vec(EnsbufferWidth, Flipped(ValidIO(new DataWriteReq)))
|
val writeReq = Vec(EnsbufferWidth, Flipped(ValidIO(new DataWriteReq)))
|
||||||
val dataOut = Output(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
|
val dataOut = Output(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
|
||||||
|
val maskOut = Output(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool()))))
|
||||||
})
|
})
|
||||||
|
|
||||||
val data = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
|
val data = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
|
||||||
|
val mask = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool()))))
|
||||||
|
|
||||||
val req = io.writeReq
|
// 2 cycle data / mask update
|
||||||
|
|
||||||
for(i <- 0 until EnsbufferWidth) {
|
for(i <- 0 until EnsbufferWidth) {
|
||||||
when(req(i).valid){
|
val req = io.writeReq(i)
|
||||||
for(line <- 0 until StoreBufferSize){
|
for(line <- 0 until StoreBufferSize){
|
||||||
|
val sbuffer_in_s1_line_wen = req.valid && req.bits.wvec(line)
|
||||||
|
val sbuffer_in_s2_line_wen = RegNext(sbuffer_in_s1_line_wen)
|
||||||
|
val line_write_buffer_data = RegEnable(req.bits.data, sbuffer_in_s1_line_wen)
|
||||||
|
val line_write_buffer_wline = RegEnable(req.bits.wline, sbuffer_in_s1_line_wen)
|
||||||
|
val line_write_buffer_mask = RegEnable(req.bits.mask, sbuffer_in_s1_line_wen)
|
||||||
|
val line_write_buffer_offset = RegEnable(req.bits.wordOffset(WordsWidth-1, 0), sbuffer_in_s1_line_wen)
|
||||||
|
sbuffer_in_s1_line_wen.suggestName("sbuffer_in_s1_line_wen_"+line)
|
||||||
|
sbuffer_in_s2_line_wen.suggestName("sbuffer_in_s2_line_wen_"+line)
|
||||||
|
line_write_buffer_data.suggestName("line_write_buffer_data_"+line)
|
||||||
|
line_write_buffer_wline.suggestName("line_write_buffer_wline_"+line)
|
||||||
|
line_write_buffer_mask.suggestName("line_write_buffer_mask_"+line)
|
||||||
|
line_write_buffer_offset.suggestName("line_write_buffer_offset_"+line)
|
||||||
for(word <- 0 until CacheLineWords){
|
for(word <- 0 until CacheLineWords){
|
||||||
for(byte <- 0 until DataBytes){
|
for(byte <- 0 until DataBytes){
|
||||||
when(
|
val write_byte = sbuffer_in_s2_line_wen && (
|
||||||
req(i).bits.wvec(line) && (
|
line_write_buffer_mask(byte) && (line_write_buffer_offset === word.U) ||
|
||||||
req(i).bits.mask(byte) && (req(i).bits.wordOffset(WordsWidth-1, 0) === word.U) ||
|
line_write_buffer_wline
|
||||||
req(i).bits.wline
|
|
||||||
)
|
)
|
||||||
|
when(write_byte){
|
||||||
|
data(line)(word)(byte) := line_write_buffer_data(byte*8+7, byte*8)
|
||||||
|
mask(line)(word)(byte) := true.B
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1 cycle line mask clean
|
||||||
|
for(i <- 0 until EnsbufferWidth) {
|
||||||
|
val req = io.writeReq(i)
|
||||||
|
when(req.valid){
|
||||||
|
for(line <- 0 until StoreBufferSize){
|
||||||
|
when(
|
||||||
|
req.bits.wvec(line) &&
|
||||||
|
req.bits.cleanMask
|
||||||
){
|
){
|
||||||
data(line)(word)(byte) := req(i).bits.data(byte*8+7, byte*8)
|
for(word <- 0 until CacheLineWords){
|
||||||
|
for(byte <- 0 until DataBytes){
|
||||||
|
mask(line)(word)(byte) := false.B
|
||||||
|
val debug_last_cycle_write_byte = RegNext(req.valid && req.bits.wvec(line) && (
|
||||||
|
req.bits.mask(byte) && (req.bits.wordOffset(WordsWidth-1, 0) === word.U) ||
|
||||||
|
req.bits.wline
|
||||||
|
))
|
||||||
|
assert(!debug_last_cycle_write_byte)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -103,6 +141,7 @@ class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst
|
||||||
}
|
}
|
||||||
|
|
||||||
io.dataOut := data
|
io.dataOut := data
|
||||||
|
io.maskOut := mask
|
||||||
}
|
}
|
||||||
|
|
||||||
class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst with HasPerfEvents {
|
class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst with HasPerfEvents {
|
||||||
|
|
@ -122,9 +161,10 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
|
||||||
|
|
||||||
val ptag = Reg(Vec(StoreBufferSize, UInt(PTagWidth.W)))
|
val ptag = Reg(Vec(StoreBufferSize, UInt(PTagWidth.W)))
|
||||||
val vtag = Reg(Vec(StoreBufferSize, UInt(VTagWidth.W)))
|
val vtag = Reg(Vec(StoreBufferSize, UInt(VTagWidth.W)))
|
||||||
val mask = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool()))))
|
val debug_mask = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool()))))
|
||||||
val waitInflightMask = Reg(Vec(StoreBufferSize, UInt(StoreBufferSize.W)))
|
val waitInflightMask = Reg(Vec(StoreBufferSize, UInt(StoreBufferSize.W)))
|
||||||
val data = dataModule.io.dataOut
|
val data = dataModule.io.dataOut
|
||||||
|
val mask = dataModule.io.maskOut
|
||||||
val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U.asTypeOf(new SbufferEntryState))))
|
val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U.asTypeOf(new SbufferEntryState))))
|
||||||
val cohCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(EvictCountBits.W))))
|
val cohCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(EvictCountBits.W))))
|
||||||
val missqReplayCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(MissqReplayCountBits.W))))
|
val missqReplayCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(MissqReplayCountBits.W))))
|
||||||
|
|
@ -186,6 +226,24 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
|
||||||
val missqReplayHasTimeOut = RegNext(missqReplayMayHasTimeOut) && !RegNext(sbuffer_out_s0_fire)
|
val missqReplayHasTimeOut = RegNext(missqReplayMayHasTimeOut) && !RegNext(sbuffer_out_s0_fire)
|
||||||
val missqReplayTimeOutIdxReg = RegEnable(missqReplayTimeOutIdx, missqReplayMayHasTimeOut)
|
val missqReplayTimeOutIdxReg = RegEnable(missqReplayTimeOutIdx, missqReplayMayHasTimeOut)
|
||||||
|
|
||||||
|
//-------------------------sbuffer enqueue-----------------------------
|
||||||
|
|
||||||
|
// Now sbuffer enq logic is divided into 3 stages:
|
||||||
|
|
||||||
|
// sbuffer_in_s0:
|
||||||
|
// * read data and meta from store queue
|
||||||
|
// * store them in 2 entry fifo queue
|
||||||
|
|
||||||
|
// sbuffer_in_s1:
|
||||||
|
// * read data and meta from fifo queue
|
||||||
|
// * update sbuffer meta (vtag, ptag, flag)
|
||||||
|
// * prevert that line from being sent to dcache (add a block condition)
|
||||||
|
// * prepare cacheline level write enable signal, RegNext() data and mask
|
||||||
|
|
||||||
|
// sbuffer_in_s2:
|
||||||
|
// * use cacheline level buffer to update sbuffer data and mask
|
||||||
|
// * remove dcache write block (if there is)
|
||||||
|
|
||||||
val activeMask = VecInit(stateVec.map(s => s.isActive()))
|
val activeMask = VecInit(stateVec.map(s => s.isActive()))
|
||||||
val drainIdx = PriorityEncoder(activeMask)
|
val drainIdx = PriorityEncoder(activeMask)
|
||||||
|
|
||||||
|
|
@ -258,7 +316,15 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
|
||||||
io.in(i).ready := canInserts(i) && (if (i == 0) 1.B else !sameWord(0)(i) && io.in(i - 1).ready)
|
io.in(i).ready := canInserts(i) && (if (i == 0) 1.B else !sameWord(0)(i) && io.in(i - 1).ready)
|
||||||
)
|
)
|
||||||
|
|
||||||
def wordReqToBufLine(req: DCacheWordReq, reqptag: UInt, reqvtag: UInt, insertIdx: UInt, insertVec: UInt, wordOffset: UInt, flushMask: Bool): Unit = {
|
def wordReqToBufLine( // allocate a new line in sbuffer
|
||||||
|
req: DCacheWordReq,
|
||||||
|
reqptag: UInt,
|
||||||
|
reqvtag: UInt,
|
||||||
|
insertIdx: UInt,
|
||||||
|
insertVec: UInt,
|
||||||
|
wordOffset: UInt,
|
||||||
|
flushMask: Bool
|
||||||
|
): Unit = {
|
||||||
assert(UIntToOH(insertIdx) === insertVec)
|
assert(UIntToOH(insertIdx) === insertVec)
|
||||||
val sameBlockInflightMask = genSameBlockInflightMask(reqptag)
|
val sameBlockInflightMask = genSameBlockInflightMask(reqptag)
|
||||||
(0 until StoreBufferSize).map(entryIdx => {
|
(0 until StoreBufferSize).map(entryIdx => {
|
||||||
|
|
@ -272,34 +338,23 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
|
||||||
// missqReplayCount(insertIdx) := 0.U
|
// missqReplayCount(insertIdx) := 0.U
|
||||||
ptag(entryIdx) := reqptag
|
ptag(entryIdx) := reqptag
|
||||||
vtag(entryIdx) := reqvtag // update vtag iff a new sbuffer line is allocated
|
vtag(entryIdx) := reqvtag // update vtag iff a new sbuffer line is allocated
|
||||||
when(flushMask){
|
|
||||||
for(j <- 0 until CacheLineWords){
|
|
||||||
for(i <- 0 until DataBytes){
|
|
||||||
mask(entryIdx)(j)(i) := false.B
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(i <- 0 until DataBytes){
|
|
||||||
when(req.mask(i)){
|
|
||||||
mask(entryIdx)(wordOffset)(i) := true.B
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
def mergeWordReq(req: DCacheWordReq, reqptag: UInt, reqvtag: UInt, mergeIdx: UInt, mergeVec: UInt, wordOffset: UInt): Unit = {
|
def mergeWordReq( // merge write req into an existing line
|
||||||
|
req: DCacheWordReq,
|
||||||
|
reqptag: UInt,
|
||||||
|
reqvtag: UInt,
|
||||||
|
mergeIdx: UInt,
|
||||||
|
mergeVec: UInt,
|
||||||
|
wordOffset: UInt
|
||||||
|
): Unit = {
|
||||||
assert(UIntToOH(mergeIdx) === mergeVec)
|
assert(UIntToOH(mergeIdx) === mergeVec)
|
||||||
(0 until StoreBufferSize).map(entryIdx => {
|
(0 until StoreBufferSize).map(entryIdx => {
|
||||||
when(mergeVec(entryIdx)) {
|
when(mergeVec(entryIdx)) {
|
||||||
cohCount(entryIdx) := 0.U
|
cohCount(entryIdx) := 0.U
|
||||||
// missqReplayCount(entryIdx) := 0.U
|
// missqReplayCount(entryIdx) := 0.U
|
||||||
for(i <- 0 until DataBytes){
|
|
||||||
when(req.mask(i)){
|
|
||||||
mask(entryIdx)(wordOffset)(i) := true.B
|
|
||||||
// data(entryIdx)(wordOffset)(i) := req.data(i*8+7, i*8)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// check if vtag is the same, if not, trigger sbuffer flush
|
// check if vtag is the same, if not, trigger sbuffer flush
|
||||||
when(reqvtag =/= vtag(entryIdx)) {
|
when(reqvtag =/= vtag(entryIdx)) {
|
||||||
XSDebug("reqvtag =/= sbufvtag req(vtag %x ptag %x) sbuffer(vtag %x ptag %x)\n",
|
XSDebug("reqvtag =/= sbufvtag req(vtag %x ptag %x) sbuffer(vtag %x ptag %x)\n",
|
||||||
|
|
@ -320,8 +375,9 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
|
||||||
writeReq(i).bits.mask := in.bits.mask
|
writeReq(i).bits.mask := in.bits.mask
|
||||||
writeReq(i).bits.data := in.bits.data
|
writeReq(i).bits.data := in.bits.data
|
||||||
writeReq(i).bits.wline := in.bits.wline
|
writeReq(i).bits.wline := in.bits.wline
|
||||||
val debug_insertIdx = insertIdxs(i)
|
writeReq(i).bits.cleanMask := false.B
|
||||||
val insertVec = insertVecs(i)
|
val debug_insertIdx = if(i == 0) firstInsertIdx else secondInsertIdx
|
||||||
|
val insertVec = if(i == 0) firstInsertVec else secondInsertVec
|
||||||
assert(!((PopCount(insertVec) > 1.U) && in.fire()))
|
assert(!((PopCount(insertVec) > 1.U) && in.fire()))
|
||||||
val insertIdx = OHToUInt(insertVec)
|
val insertIdx = OHToUInt(insertVec)
|
||||||
val flushMask = if(i == 0) true.B else (0 until i).map(j => !sameTag(i)(j)).reduce(_ && _)
|
val flushMask = if(i == 0) true.B else (0 until i).map(j => !sameTag(i)(j)).reduce(_ && _)
|
||||||
|
|
@ -330,13 +386,12 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
|
||||||
accessIdx(i).bits := RegNext(Mux(canMerge(i), mergeIdx(i), insertIdx))
|
accessIdx(i).bits := RegNext(Mux(canMerge(i), mergeIdx(i), insertIdx))
|
||||||
when(in.fire()){
|
when(in.fire()){
|
||||||
when(canMerge(i)){
|
when(canMerge(i)){
|
||||||
// writeReq(i).bits.idx := mergeIdx(i)
|
|
||||||
writeReq(i).bits.wvec := mergeVec(i)
|
writeReq(i).bits.wvec := mergeVec(i)
|
||||||
mergeWordReq(in.bits, inptags(i), invtags(i), mergeIdx(i), mergeVec(i), wordOffset)
|
mergeWordReq(in.bits, inptags(i), invtags(i), mergeIdx(i), mergeVec(i), wordOffset)
|
||||||
XSDebug(p"merge req $i to line [${mergeIdx(i)}]\n")
|
XSDebug(p"merge req $i to line [${mergeIdx(i)}]\n")
|
||||||
}.otherwise({
|
}.otherwise({
|
||||||
// writeReq(i).bits.idx := insertIdx
|
|
||||||
writeReq(i).bits.wvec := insertVec
|
writeReq(i).bits.wvec := insertVec
|
||||||
|
writeReq(i).bits.cleanMask := flushMask
|
||||||
wordReqToBufLine(in.bits, inptags(i), invtags(i), insertIdx, insertVec, wordOffset, flushMask)
|
wordReqToBufLine(in.bits, inptags(i), invtags(i), insertIdx, insertVec, wordOffset, flushMask)
|
||||||
XSDebug(p"insert req $i to line[$insertIdx]\n")
|
XSDebug(p"insert req $i to line[$insertIdx]\n")
|
||||||
assert(debug_insertIdx === insertIdx)
|
assert(debug_insertIdx === insertIdx)
|
||||||
|
|
@ -430,6 +485,20 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
|
||||||
// sbuffer to dcache pipeline
|
// sbuffer to dcache pipeline
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Now sbuffer deq logic is divided into 2 stages:
|
||||||
|
|
||||||
|
// sbuffer_out_s0:
|
||||||
|
// * read data and meta from sbuffer
|
||||||
|
// * RegNext() them
|
||||||
|
// * set line state to inflight
|
||||||
|
|
||||||
|
// sbuffer_out_s1:
|
||||||
|
// * send write req to dcache
|
||||||
|
|
||||||
|
// sbuffer_out_extra:
|
||||||
|
// * receive write result from dcache
|
||||||
|
// * update line state
|
||||||
|
|
||||||
val sbuffer_out_s1_ready = Wire(Bool())
|
val sbuffer_out_s1_ready = Wire(Bool())
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
@ -462,8 +531,16 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
|
||||||
// sbuffer_out_s1
|
// sbuffer_out_s1
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// TODO: use EnsbufferWidth
|
||||||
|
val shouldWaitWriteFinish = VecInit((0 until StorePipelineWidth).map{i =>
|
||||||
|
(RegNext(writeReq(i).bits.wvec).asUInt & UIntToOH(RegNext(sbuffer_out_s0_evictionIdx))).asUInt.orR &&
|
||||||
|
RegNext(writeReq(i).valid)
|
||||||
|
}).asUInt.orR
|
||||||
|
// block dcache write if read / write hazard
|
||||||
|
val blockDcacheWrite = shouldWaitWriteFinish
|
||||||
|
|
||||||
val sbuffer_out_s1_valid = RegInit(false.B)
|
val sbuffer_out_s1_valid = RegInit(false.B)
|
||||||
sbuffer_out_s1_ready := io.dcache.req.ready || !sbuffer_out_s1_valid
|
sbuffer_out_s1_ready := io.dcache.req.ready && !blockDcacheWrite || !sbuffer_out_s1_valid
|
||||||
val sbuffer_out_s1_fire = io.dcache.req.fire()
|
val sbuffer_out_s1_fire = io.dcache.req.fire()
|
||||||
|
|
||||||
// when sbuffer_out_s1_fire, send dcache req stored in pipeline reg to dcache
|
// when sbuffer_out_s1_fire, send dcache req stored in pipeline reg to dcache
|
||||||
|
|
@ -494,7 +571,7 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
|
||||||
val sbuffer_out_s1_evictionPTag = RegEnable(ptag(sbuffer_out_s0_evictionIdx), enable = sbuffer_out_s0_fire)
|
val sbuffer_out_s1_evictionPTag = RegEnable(ptag(sbuffer_out_s0_evictionIdx), enable = sbuffer_out_s0_fire)
|
||||||
val sbuffer_out_s1_evictionVTag = RegEnable(vtag(sbuffer_out_s0_evictionIdx), enable = sbuffer_out_s0_fire)
|
val sbuffer_out_s1_evictionVTag = RegEnable(vtag(sbuffer_out_s0_evictionIdx), enable = sbuffer_out_s0_fire)
|
||||||
|
|
||||||
io.dcache.req.valid := sbuffer_out_s1_valid
|
io.dcache.req.valid := sbuffer_out_s1_valid && !blockDcacheWrite
|
||||||
io.dcache.req.bits := DontCare
|
io.dcache.req.bits := DontCare
|
||||||
io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR
|
io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR
|
||||||
io.dcache.req.bits.addr := getAddr(sbuffer_out_s1_evictionPTag)
|
io.dcache.req.bits.addr := getAddr(sbuffer_out_s1_evictionPTag)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue