Merge branch 'l2cache' into dev-icache

This commit is contained in:
Allen 2020-09-07 16:24:14 +08:00
commit dbe8465301
21 changed files with 540 additions and 95 deletions

4
.gitmodules vendored
View File

@ -2,3 +2,7 @@
path = rocket-chip
url = https://github.com/chipsalliance/rocket-chip.git
branch = 2bdb03dbca3f77ad4c378cc1b95ab4961bc1448a
[submodule "block-inclusivecache-sifive"]
path = block-inclusivecache-sifive
url = https://github.com/sifive/block-inclusivecache-sifive.git
branch = d4db623ff534f775ffc49f59c4a9ef24d5d759d0

@ -0,0 +1 @@
Subproject commit d4db623ff534f775ffc49f59c4a9ef24d5d759d0

View File

@ -25,11 +25,11 @@ trait CommonModule extends ScalaModule {
override def scalacPluginIvyDeps = Agg(macroParadise)
}
object `rocket-chip` extends SbtModule with CommonModule {
val rocketChisel = Agg(
ivy"edu.berkeley.cs::chisel3:3.3.1"
)
val rocketChisel = Agg(
ivy"edu.berkeley.cs::chisel3:3.3.1"
)
object `rocket-chip` extends SbtModule with CommonModule {
override def ivyDeps = super.ivyDeps() ++ Agg(
ivy"${scalaOrganization()}:scala-reflect:${scalaVersion()}",
@ -53,6 +53,15 @@ object `rocket-chip` extends SbtModule with CommonModule {
}
object `block-inclusivecache-sifive` extends CommonModule {
override def ivyDeps = super.ivyDeps() ++ rocketChisel
override def millSourcePath = super.millSourcePath / 'design / 'craft / 'inclusivecache
override def moduleDeps = super.moduleDeps ++ Seq(`rocket-chip`)
}
object XiangShan extends CommonModule with SbtModule {
override def millSourcePath = millOuterCtx.millSourcePath
@ -62,7 +71,7 @@ object XiangShan extends CommonModule with SbtModule {
ivy"edu.berkeley.cs::chisel3:3.3.2"
)
override def moduleDeps = super.moduleDeps ++ Seq(`rocket-chip`)
override def moduleDeps = super.moduleDeps ++ Seq(`rocket-chip`, `block-inclusivecache-sifive`)
object test extends Tests {
override def ivyDeps = super.ivyDeps() ++ Agg(

View File

@ -99,10 +99,10 @@ P = 4
SUITE = xiangshan.backend.exu.AluTest
unit-test:
cd .. && mill chiselModule.test.testOnly -o -s $(SUITE) -P$(P)
cd .. && mill XiangShan.test.testOnly -o -s $(SUITE) -P$(P)
unit-test-all:
cd .. && mill chiselModule.test.test -P$(P)
cd .. && mill XiangShan.test.test -P$(P)
# ------------------------------------------------------------------
# chore

View File

@ -29,7 +29,7 @@ case class DisableAllPrintAnnotation() extends NoTargetAnnotation
object DisableAllPrintAnnotation extends HasShellOptions {
val options = Seq(
new ShellOption[Unit](
longOption = "disable-log",
longOption = "disable-all",
toAnnotationSeq = _ => Seq(DisableAllPrintAnnotation()),
helpText =
"All the verilog 'printf' will be removed\n",

View File

@ -2,8 +2,9 @@ package utils
import chisel3._
import chipsalliance.rocketchip.config.Parameters
import chisel3.util.DecoupledIO
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import freechips.rocketchip.tilelink.{TLClientNode, TLIdentityNode, TLMasterParameters, TLMasterPortParameters}
import freechips.rocketchip.tilelink.{TLBundle, TLClientNode, TLIdentityNode, TLMasterParameters, TLMasterPortParameters}
import xiangshan.HasXSLog
class DebugIdentityNode()(implicit p: Parameters) extends LazyModule {
@ -19,14 +20,18 @@ class DebugIdentityNode()(implicit p: Parameters) extends LazyModule {
lazy val module = new LazyModuleImp(this) with HasXSLog with HasTLDump{
val (out, _) = node.out(0)
val (in, _) = node.in(0)
when(in.a.fire()){
XSDebug(" ")
in.a.bits.dump
}
when(in.d.fire()){
XSDebug(" ")
in.d.bits.dump
def debug(t: TLBundle, valid: Boolean = false): Unit ={
def fire[T <: Data](x: DecoupledIO[T]) = if(valid) x.valid else x.fire()
val channels = Seq(t.a, t.b, t.c, t.d, t.e)
channels.foreach(c =>
when(fire(c)){
XSDebug(" ")
c.bits.dump
}
)
}
debug(in, true)
}
}

View File

@ -6,40 +6,30 @@ import xiangshan.HasXSLog
trait HasTLDump { this: HasXSLog =>
implicit class dumpA(a: TLBundleA) {
def dump =
XSDebug(false, true.B,
a.channelName + " opcode: %x param: %x size: %x source: %d address: %x mask: %x data: %x corrupt: %b\n",
a.opcode, a.param, a.size, a.source, a.address, a.mask, a.data, a.corrupt
)
}
implicit class dumpB(b: TLBundleB) {
def dump =
XSDebug(false, true.B,
b.channelName + " opcode: %x param: %x size: %x source: %d address: %x mask: %x data: %x corrupt: %b\n",
b.opcode, b.param, b.size, b.source, b.address, b.mask, b.data, b.corrupt
)
}
implicit class dumpC(c: TLBundleC) {
def dump =
XSDebug(false, true.B,
c.channelName + " opcode: %x param: %x size: %x source: %d address: %x data: %x corrupt: %b\n",
c.opcode, c.param, c.size, c.source, c.address, c.data, c.corrupt
)
}
implicit class dumpD(d: TLBundleD) {
def dump =
XSDebug(false, true.B,
d.channelName + " opcode: %x param: %x size: %x source: %d sink: %d denied: %b data: %x corrupt: %b\n",
d.opcode, d.param, d.size, d.source, d.sink, d.denied, d.data, d.corrupt
)
}
implicit class dumpE(e: TLBundleE) {
def dump =
XSDebug(false, true.B, e.channelName + " sink: %d\n", e.sink)
implicit class TLDump(channel: TLChannel) {
def dump = channel match {
case a: TLBundleA =>
XSDebug(false, true.B,
a.channelName + " opcode: %x param: %x size: %x source: %d address: %x mask: %x data: %x corrupt: %b\n",
a.opcode, a.param, a.size, a.source, a.address, a.mask, a.data, a.corrupt
)
case b: TLBundleB =>
XSDebug(false, true.B,
b.channelName + " opcode: %x param: %x size: %x source: %d address: %x mask: %x data: %x corrupt: %b\n",
b.opcode, b.param, b.size, b.source, b.address, b.mask, b.data, b.corrupt
)
case c: TLBundleC =>
XSDebug(false, true.B,
c.channelName + " opcode: %x param: %x size: %x source: %d address: %x data: %x corrupt: %b\n",
c.opcode, c.param, c.size, c.source, c.address, c.data, c.corrupt
)
case d: TLBundleD =>
XSDebug(false, true.B,
d.channelName + " opcode: %x param: %x size: %x source: %d sink: %d denied: %b data: %x corrupt: %b\n",
d.opcode, d.param, d.size, d.source, d.sink, d.denied, d.data, d.corrupt
)
case e: TLBundleE =>
XSDebug(false, true.B, e.channelName + " sink: %d\n", e.sink)
}
}
}

View File

@ -12,7 +12,8 @@ import xiangshan.mem._
import xiangshan.cache.{ICache, DCache, DCacheParameters, ICacheParameters, PTW, Uncache}
import chipsalliance.rocketchip.config
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import freechips.rocketchip.tilelink.{TLBundleParameters, TLCacheCork, TLClientNode, TLIdentityNode, TLXbar}
import freechips.rocketchip.tilelink.{TLBundleParameters, TLCacheCork, TLBuffer, TLClientNode, TLIdentityNode, TLXbar}
import sifive.blocks.inclusivecache.{CacheParameters, InclusiveCache, InclusiveCacheMicroParameters}
import utils._
case class XSCoreParameters
@ -166,7 +167,10 @@ trait HasXSParameter {
val LRSCCycles = 16
val dcacheParameters = DCacheParameters(
tagECC = Some("secded"),
dataECC = Some("secded")
dataECC = Some("secded"),
nMissEntries = 16,
nLoadMissEntries = 8,
nStoreMissEntries = 8
)
}
@ -221,13 +225,32 @@ class XSCore()(implicit p: config.Parameters) extends LazyModule {
val icache = LazyModule(new ICache())
val ptw = LazyModule(new PTW())
// TODO: crossbar Icache/Dcache/PTW here
val mem = TLXbar()
val mem = TLIdentityNode()
val mmio = uncache.clientNode
mem := TLCacheCork(sinkIds = 1) := dcache.clientNode
mem := TLCacheCork(sinkIds = 1) := icache.clientNode
mem := TLCacheCork(sinkIds = 1) := ptw.node
// TODO: refactor these params
private val l2 = LazyModule(new InclusiveCache(
CacheParameters(
level = 2,
ways = 4,
sets = 512 * 1024 / (64 * 4),
blockBytes = 64,
beatBytes = 8
),
InclusiveCacheMicroParameters(
writeBytes = 8
)
))
private val xbar = TLXbar()
xbar := TLBuffer() := DebugIdentityNode() := dcache.clientNode
xbar := TLBuffer() := DebugIdentityNode() := icache.clientNode
xbar := TLBuffer() := DebugIdentityNode() := ptw.node
l2.node := xbar
mem := TLBuffer() := TLCacheCork() := TLBuffer() := l2.node
lazy val module = new XSCoreImp(this)
}

View File

@ -18,6 +18,8 @@ class AtomicsPipe extends DCacheModule
val meta_resp = Input(Vec(nWays, new L1Metadata))
val inflight_req_idxes = Output(Vec(3, Valid(UInt())))
val inflight_req_block_addrs = Output(Vec(3, Valid(UInt())))
val block_probe_addr = Output(Valid(UInt()))
val wb_invalidate_lrsc = Input(Valid(UInt()))
})
// LSU requests
@ -129,6 +131,20 @@ class AtomicsPipe extends DCacheModule
lrsc_count := lrsc_count - 1.U
}
io.block_probe_addr.valid := lrsc_valid
io.block_probe_addr.bits := lrsc_addr
// when we release this block,
// we invalidate this reservation set
when (io.wb_invalidate_lrsc.valid) {
when (io.wb_invalidate_lrsc.bits === lrsc_addr) {
lrsc_count := 0.U
}
// when we release this block, there should be no matching lrsc inflight
assert (!(s2_valid && (s2_lr || s2_sc) && io.wb_invalidate_lrsc.bits === get_block_addr(s2_req.addr)))
}
when (s2_valid) {
when (s2_req.addr === debug_sc_fail_addr) {
when (s2_sc_fail) {

View File

@ -299,13 +299,36 @@ class DuplicatedMetaArray extends DCacheModule {
meta(w).io.write <> io.write
meta(w).io.read <> io.read(w)
io.resp(w) <> meta(w).io.resp
// meta(w).io.resp <> io.resp(w)
}
def dumpRead() = {
(0 until LoadPipelineWidth) map { w =>
when (io.read(w).fire()) {
XSDebug("MetaArray Read channel: $w idx: %d way_en: %x tag: %x\n",
io.read(w).bits.idx, io.read(w).bits.way_en, io.read(w).bits.tag)
}
}
}
def dumpWrite() = {
when (io.write.fire()) {
XSDebug("MetaArray Write: idx: %d way_en: %x tag: %x new_tag: %x new_coh: %x\n",
io.write.bits.idx, io.write.bits.way_en, io.write.bits.tag, io.write.bits.data.tag, io.write.bits.data.coh.state)
}
}
def dumpResp() = {
(0 until LoadPipelineWidth) map { w =>
(0 until nWays) map { i =>
XSDebug(s"MetaArray Resp: channel: $w way: $i tag: %x coh: %x\n",
io.resp(w)(i).tag, io.resp(w)(i).coh.state)
}
}
}
def dump() = {
(0 until LoadPipelineWidth) map { w =>
XSDebug(s"MetaArray $w\n")
meta(w).dump
}
dumpRead
dumpWrite
dumpResp
}
}

View File

@ -108,6 +108,8 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
// core data structures
val dataArray = Module(new DuplicatedDataArray)
val metaArray = Module(new DuplicatedMetaArray)
dataArray.dump()
metaArray.dump()
//----------------------------------------
@ -218,9 +220,13 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
loadArb.io.in(1) <> lsu_0.req
assert(!(lsu_0.req.fire() && lsu_0.req.bits.meta.replay), "LSU should not replay requests")
assert(!(loadReplay.req.fire() && !loadReplay.req.bits.meta.replay), "LoadMissQueue should replay requests")
val ldu_0_block = block_load(loadArb.io.out.bits.addr)
// do not block replayed reqs
block_decoupled(loadArb.io.out, ldu_0.req, ldu_0_block && !loadArb.io.out.bits.meta.replay)
val ldu_0_nack = nack_load(loadArb.io.out.bits.addr)
// do not nack replayed reqs
ldu_0.req <> loadArb.io.out
ldu(0).io.nack := ldu_0_nack && !loadArb.io.out.bits.meta.replay
when (ldu_0_nack) {
printf("DCache: LoadUnit 0 nacked\n")
}
ldu_0.resp.ready := false.B
@ -242,8 +248,13 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
ldu_0.s1_kill := lsu_0.s1_kill
for (w <- 1 until LoadPipelineWidth) {
val load_w_block = block_load(io.lsu.load(w).req.bits.addr)
block_decoupled(io.lsu.load(w).req, ldu(w).io.lsu.req, load_w_block)
val load_w_nack = nack_load(io.lsu.load(w).req.bits.addr)
ldu(w).io.lsu.req <> io.lsu.load(w).req
ldu(w).io.nack := load_w_nack
when (load_w_nack) {
printf(s"DCache: LoadUnit $w nacked\n")
}
ldu(w).io.lsu.resp <> io.lsu.load(w).resp
ldu(w).io.lsu.s1_kill <> io.lsu.load(w).s1_kill
assert(!(io.lsu.load(w).req.fire() && io.lsu.load(w).req.bits.meta.replay), "LSU should not replay requests")
@ -275,9 +286,13 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val store_block = block_store(storeMissQueue.io.replay.req.bits.addr)
block_decoupled(storeMissQueue.io.replay.req, stu.io.lsu.req, store_block && !storeMissQueue.io.replay.req.bits.meta.replay)
storeMissQueue.io.replay.resp <> stu.io.lsu.resp
when (store_block) {
printf("DCache: StorePipe blocked\n")
}
//----------------------------------------
// atomics pipe
atomics.io.wb_invalidate_lrsc := wb.io.inflight_addr
atomicsMissQueue.io.replay <> atomics.io.lsu
val atomicsClientIdWidth = 1
val lsuAtomicsClientId = 0.U(atomicsClientIdWidth.W)
@ -306,6 +321,9 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val atomics_block = block_atomics(atomicsReqArb.io.out.bits.addr)
block_decoupled(atomicsReqArb.io.out, atomicsReq, atomics_block)
when (atomics_block) {
printf("DCache: AtomicsPipe blocked\n")
}
// Response
val atomicsResp = atomicsMissQueue.io.lsu.resp
@ -379,6 +397,9 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val miss_block = block_miss(missReqArb.io.out.bits.addr)
block_decoupled(missReqArb.io.out, missReq, miss_block)
when (miss_block) {
printf("DCache: MissQueue blocked\n")
}
// Response
val missResp = missQueue.io.resp
@ -447,8 +468,11 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
//----------------------------------------
// prober
// bus.b <> prober.io.req
prober.io.req := DontCare
prober.io.block := block_probe(prober.io.inflight_req_block_addr.bits)
prober.io.req <> bus.b
when (prober.io.block) {
printf("DCache: prober blocked\n")
}
//----------------------------------------
// wb
@ -464,7 +488,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
TLArbiter.lowestFromSeq(edge, bus.c, Seq(prober.io.rep, wb.io.release))
// synchronization stuff
def block_load(addr: UInt) = {
def nack_load(addr: UInt) = {
val store_addr_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val store_addr_match = store_addr_matches.reduce(_||_)
@ -481,18 +505,22 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val atomics_addr_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val atomics_addr_match = atomics_addr_matches.reduce(_||_)
val prober_addr_match = prober.io.inflight_req_block_addr.valid && prober.io.inflight_req_block_addr.bits === get_block_addr(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
atomics_addr_match || miss_idx_match
atomics_addr_match || prober_addr_match || miss_idx_match
}
def block_atomics(addr: UInt) = {
val store_addr_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val store_addr_match = store_addr_matches.reduce(_||_)
val prober_addr_match = prober.io.inflight_req_block_addr.valid && prober.io.inflight_req_block_addr.bits === get_block_addr(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
store_addr_match || miss_idx_match
store_addr_match || prober_addr_match || miss_idx_match
}
def block_miss(addr: UInt) = {
@ -502,10 +530,27 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val atomics_idx_matches = VecInit(atomics.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val atomics_idx_match = atomics_idx_matches.reduce(_||_)
val prober_idx_match = prober.io.inflight_req_idx.valid && prober.io.inflight_req_idx.bits === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
store_idx_match || atomics_idx_match || miss_idx_match
store_idx_match || atomics_idx_match || prober_idx_match || miss_idx_match
}
def block_probe(addr: UInt) = {
val store_addr_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val store_addr_match = store_addr_matches.reduce(_||_)
val atomics_addr_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val atomics_addr_match = atomics_addr_matches.reduce(_||_)
val lrsc_addr_match = atomics.io.block_probe_addr.valid && atomics.io.block_probe_addr.bits === get_block_addr(addr)
val miss_idx_matches = VecInit(missQueue.io.block_probe_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
store_addr_match || atomics_addr_match || lrsc_addr_match || miss_idx_match
}
def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = {

View File

@ -13,12 +13,23 @@ class LoadPipe extends DCacheModule
val data_resp = Input(Vec(nWays, Vec(refillCycles, Bits(encRowBits.W))))
val meta_read = DecoupledIO(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
// req got nacked in stage 0?
val nack = Input(Bool())
})
// LSU requests
io.lsu.req.ready := io.meta_read.ready && io.data_read.ready
io.meta_read.valid := io.lsu.req.valid
io.data_read.valid := io.lsu.req.valid
// replayed req should never be nacked
assert(!(io.lsu.req.valid && io.lsu.req.bits.meta.replay && io.nack))
// it you got nacked, you can directly passdown
val not_nacked_ready = io.meta_read.ready && io.data_read.ready
val nacked_ready = true.B
// ready can wait for valid
io.lsu.req.ready := io.lsu.req.valid && ((!io.nack && not_nacked_ready) || (io.nack && nacked_ready))
io.meta_read.valid := io.lsu.req.valid && !io.nack
io.data_read.valid := io.lsu.req.valid && !io.nack
val meta_read = io.meta_read.bits
val data_read = io.data_read.bits
@ -46,7 +57,7 @@ class LoadPipe extends DCacheModule
val s1_req = RegNext(s0_req)
val s1_valid = RegNext(s0_valid, init = false.B)
val s1_addr = s1_req.addr
val s1_nack = false.B
val s1_nack = RegNext(io.nack)
dump_pipeline_reqs("LoadPipe s1", s1_valid, s1_req)

View File

@ -40,6 +40,9 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
val block_idx = Output(Valid(UInt()))
val block_addr = Output(Valid(UInt()))
val block_probe_idx = Output(Valid(UInt()))
val block_probe_addr = Output(Valid(UInt()))
val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val mem_finish = DecoupledIO(new TLBundleE(edge.bundle))
@ -94,6 +97,12 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io.block_idx.bits := req_idx
io.block_addr.bits := req_block_addr
// to preserve forward progress, we allow probe when we are dealing with acquire/grant
io.block_probe_idx.valid := state =/= s_invalid && state =/= s_refill_req && state =/= s_refill_resp
io.block_probe_addr.valid := state =/= s_invalid && state =/= s_refill_req && state =/= s_refill_resp
io.block_probe_idx.bits := req_idx
io.block_probe_addr.bits := req_block_addr
// assign default values to output signals
io.req.ready := false.B
io.resp.valid := false.B
@ -121,6 +130,11 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io.wb_req.bits := DontCare
XSDebug("entry: %d state: %d\n", io.id, state)
XSDebug("entry: %d block_idx_valid: %b block_idx: %d block_addr_valid: %b block_addr: %d\n",
io.id, io.block_idx.valid, io.block_idx.bits, io.block_addr.valid, io.block_addr.bits)
XSDebug("entry: %d block_probe_idx_valid: %b block_probe_idx: %d block_probe_addr_valid: %b block_probe_addr: %d\n",
io.id, io.block_probe_idx.valid, io.block_probe_idx.bits, io.block_probe_addr.valid, io.block_probe_addr.bits)
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
@ -325,12 +339,11 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io.resp.bits.entry_id := io.id
when (io.resp.fire()) {
when (isWrite(req.cmd)) {
// Set dirty
val (is_hit, _, coh_on_hit) = new_coh.onAccess(req.cmd)
assert(is_hit, "We still don't have permissions for this store")
new_coh := coh_on_hit
}
// additional assertion
val (is_hit, _, coh_on_hit) = new_coh.onAccess(req.cmd)
assert(is_hit, "We still don't have permissions for this store")
assert(new_coh === coh_on_hit, "Incorrect coherence meta data")
state := s_client_finish
}
}
@ -365,6 +378,9 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
val inflight_req_idxes = Output(Vec(cfg.nMissEntries, Valid(UInt())))
val inflight_req_block_addrs = Output(Vec(cfg.nMissEntries, Valid(UInt())))
val block_probe_idxes = Output(Vec(cfg.nMissEntries, Valid(UInt())))
val block_probe_addrs = Output(Vec(cfg.nMissEntries, Valid(UInt())))
})
val resp_arb = Module(new Arbiter(new MissResp, cfg.nMissEntries))
@ -419,6 +435,8 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
io.inflight_req_idxes(i) <> entry.io.block_idx
io.inflight_req_block_addrs(i) <> entry.io.block_addr
io.block_probe_idxes(i) <> entry.io.block_probe_idx
io.block_probe_addrs(i) <> entry.io.block_probe_addr
entry
}

View File

@ -7,7 +7,7 @@ import utils.XSDebug
import freechips.rocketchip.tilelink._
import utils.{HasTLDump, XSDebug}
class ProbeUnit(edge: TLEdgeOut) extends DCacheModule {
class ProbeUnit(edge: TLEdgeOut) extends DCacheModule with HasTLDump {
val io = IO(new Bundle {
val req = Flipped(Decoupled(new TLBundleB(edge.bundle)))
val rep = Decoupled(new TLBundleC(edge.bundle))
@ -16,9 +16,12 @@ class ProbeUnit(edge: TLEdgeOut) extends DCacheModule {
val meta_write = Decoupled(new L1MetaWriteReq)
val wb_req = Decoupled(new WritebackReq(edge.bundle.sourceBits))
val wb_resp = Input(Bool())
val block = Input(Bool())
val inflight_req_idx = Output(Valid(UInt()))
val inflight_req_block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_meta_read_req :: s_meta_read_resp :: s_decide_next_state :: s_release :: s_wb_req :: s_wb_resp :: s_meta_write_req :: Nil = Enum(8)
val s_invalid :: s_wait_sync :: s_meta_read_req :: s_meta_read_resp :: s_decide_next_state :: s_release :: s_wb_req :: s_wb_resp :: s_meta_write_req :: Nil = Enum(9)
val state = RegInit(s_invalid)
@ -45,15 +48,30 @@ class ProbeUnit(edge: TLEdgeOut) extends DCacheModule {
io.wb_req.valid := false.B
io.wb_req.bits := DontCare
// state === s_invalid
io.inflight_req_idx.valid := state =/= s_invalid
io.inflight_req_idx.bits := req_idx
io.inflight_req_block_addr.valid := state =/= s_invalid
io.inflight_req_block_addr.bits := req_block_addr
XSDebug("state: %d\n", state)
when (state === s_invalid) {
io.req.ready := true.B
when (io.req.fire()) {
req := io.req.bits
state := s_meta_read_req
state := s_wait_sync
}
}
// we could be blocked by miss queue, or anything else
// just wait for them
when (state === s_wait_sync) {
when (!io.block) {
state := s_meta_read_req
}
}
when (state === s_meta_read_req) {
io.meta_read.valid := true.B
val meta_read = io.meta_read.bits
@ -127,4 +145,20 @@ class ProbeUnit(edge: TLEdgeOut) extends DCacheModule {
state := s_invalid
}
}
// print wb_req
XSDebug(io.wb_req.fire(), "wb_req idx %x tag: %x source: %d param: %x way_en: %x voluntary: %b\n",
io.wb_req.bits.idx, io.wb_req.bits.tag,
io.wb_req.bits.source, io.wb_req.bits.param,
io.wb_req.bits.way_en, io.wb_req.bits.voluntary)
// print tilelink messages
when (io.req.fire()) {
XSDebug("mem_probe ")
io.req.bits.dump
}
when (io.rep.fire()) {
XSDebug("mem_release ")
io.rep.bits.dump
}
}

View File

@ -106,8 +106,8 @@ class StorePipe extends DCacheModule
// now, we do not deal with ECC
for (i <- 0 until refillCycles) {
wdata(i) := io.lsu.req.bits.data(rowBits * (i + 1) - 1, rowBits * i)
wmask(i) := io.lsu.req.bits.mask(rowBytes * (i + 1) - 1, rowBytes * i)
wdata(i) := s2_req.data(rowBits * (i + 1) - 1, rowBits * i)
wmask(i) := s2_req.mask(rowBytes * (i + 1) - 1, rowBytes * i)
wdata_merged(i) := Cat(s2_data(i)(encRowBits - 1, rowBits),
mergePutData(s2_data(i)(rowBits - 1, 0), wdata(i), wmask(i)))
}

View File

@ -24,6 +24,7 @@ class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
val data_resp = Input(Vec(nWays, Vec(refillCycles, Bits(encRowBits.W))))
val release = DecoupledIO(new TLBundleC(edge.bundle))
val mem_grant = Input(Bool())
val inflight_addr = Output(Valid(UInt()))
})
val req = Reg(new WritebackReq(edge.bundle.sourceBits))
@ -47,6 +48,9 @@ class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
io.release.valid := false.B
io.release.bits := DontCare
io.inflight_addr.valid := state =/= s_invalid
io.inflight_addr.bits := req.idx << blockOffBits
XSDebug("state: %d\n", state)
when (state === s_invalid) {

View File

@ -149,6 +149,8 @@ class LoadUnit extends XSModule {
val l4_dcache = RegNext(l3_dcache, false.B)
val l4_bundle = RegNext(l3_bundle)
val fullForward = Wire(Bool())
assert(!(io.dcache.resp.ready && !io.dcache.resp.valid), "DCache response got lost")
io.dcache.resp.ready := l4_valid && l4_dcache
when (io.dcache.resp.fire()) {
@ -158,7 +160,11 @@ class LoadUnit extends XSModule {
l4_out.bits.uop := io.dcache.resp.bits.meta.uop
l4_out.bits.mmio := io.dcache.resp.bits.meta.mmio
l4_out.bits.mask := io.dcache.resp.bits.meta.mask
l4_out.bits.miss := io.dcache.resp.bits.miss
// when we can get the data completely from forward
// we no longer need to access dcache
// treat nack as miss
l4_out.bits.miss := Mux(fullForward, false.B,
io.dcache.resp.bits.miss || io.dcache.resp.bits.nack)
XSDebug(io.dcache.resp.fire(), p"DcacheResp(l4): data:0x${Hexadecimal(io.dcache.resp.bits.data)} paddr:0x${Hexadecimal(io.dcache.resp.bits.meta.paddr)} pc:0x${Hexadecimal(io.dcache.resp.bits.meta.uop.cf.pc)} roqIdx:${io.dcache.resp.bits.meta.uop.roqIdx} lsroqIdx:${io.dcache.resp.bits.meta.uop.lsroqIdx} miss:${io.dcache.resp.bits.miss}\n")
} .otherwise {
l4_out.bits := l4_bundle
@ -193,6 +199,7 @@ class LoadUnit extends XSModule {
})
l4_out.bits.forwardMask := forwardMask
l4_out.bits.forwardData := forwardVec
fullForward := (~l4_out.bits.forwardMask.asUInt & l4_out.bits.mask) === 0.U
PipelineConnect(l4_out, l5_in, io.ldout.fire() || (l5_in.bits.miss || l5_in.bits.mmio) && l5_in.valid, false.B)

View File

@ -193,7 +193,7 @@ uint64_t Emulator::execute(uint64_t n) {
uint32_t lasttime_poll = 0;
uint32_t lasttime_snapshot = 0;
uint64_t lastcommit = n;
const int stuck_limit = 500;
const int stuck_limit = 2000;
uint32_t wdst[DIFFTEST_WIDTH];
uint64_t wdata[DIFFTEST_WIDTH];

254
src/test/scala/cache/L2CacheTest.scala vendored Normal file
View File

@ -0,0 +1,254 @@
package cache
import chipsalliance.rocketchip.config.{Field, Parameters}
import chisel3._
import chisel3.util._
import chiseltest.experimental.TestOptionBuilder._
import chiseltest.internal.VerilatorBackendAnnotation
import chiseltest._
import chisel3.experimental.BundleLiterals._
import chiseltest.ChiselScalatestTester
import device.AXI4RAM
import freechips.rocketchip.amba.axi4.AXI4UserYanker
import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp}
import freechips.rocketchip.tilelink.{TLBuffer, TLCacheCork, TLToAXI4, TLXbar}
import org.scalatest.{FlatSpec, Matchers}
import sifive.blocks.inclusivecache.{CacheParameters, InclusiveCache, InclusiveCacheMicroParameters}
import utils.{DebugIdentityNode, HoldUnless, XSDebug}
import xiangshan.HasXSLog
import xiangshan.cache.{DCache, DCacheLineReq, DCacheWordReq, MemoryOpConstants}
import xiangshan.testutils.AddSinks
import scala.util.Random
case class L2CacheTestParams
(
ways: Int = 4,
banks: Int = 1,
capacityKB: Int = 4,
blockBytes: Int = 64,
beatBytes: Int = 8
) {
require(blockBytes >= beatBytes)
}
case object L2CacheTestKey extends Field[L2CacheTestParams]
class L2TestTopIO extends Bundle {
val in = Flipped(DecoupledIO(new Bundle() {
val wdata = Input(UInt(64.W))
val waddr = Input(UInt(20.W))
val hartId = Input(UInt(1.W))
}))
val out = DecoupledIO(new Bundle() {
val rdata = Output(UInt(64.W))
})
}
class L2TestTop()(implicit p: Parameters) extends LazyModule{
val cores = Array.fill(2)(LazyModule(new DCache()))
val l2params = p(L2CacheTestKey)
val l2 = LazyModule(new InclusiveCache(
CacheParameters(
level = 2,
ways = l2params.ways,
sets = l2params.capacityKB * 1024 / (l2params.blockBytes * l2params.ways * l2params.banks),
blockBytes = l2params.blockBytes,
beatBytes = l2params.beatBytes
),
InclusiveCacheMicroParameters(
writeBytes = l2params.beatBytes
)
))
val ram = LazyModule(new AXI4RAM(
AddressSet(0x0L, 0xffffffffffL),
memByte = 128 * 1024 * 1024,
useBlackBox = false
))
val xbar = TLXbar()
for(core <- cores){
xbar := TLBuffer() := DebugIdentityNode() := core.clientNode
}
l2.node := TLBuffer() := DebugIdentityNode() := xbar
ram.node :=
AXI4UserYanker() :=
TLToAXI4() :=
TLBuffer() :=
TLCacheCork() :=
l2.node
lazy val module = new LazyModuleImp(this) with HasXSLog {
val io = IO(new L2TestTopIO)
val in = HoldUnless(io.in.bits, io.in.fire())
cores.foreach(_.module.io <> DontCare)
val storePorts = cores.map(_.module.io.lsu.store)
val loadPorts = cores.map(_.module.io.lsu.lsroq)
def sendStoreReq(addr: UInt, data: UInt): DCacheLineReq = {
val req = Wire(new DCacheLineReq)
req.cmd := MemoryOpConstants.M_XWR
req.addr := addr
req.data := data
req.mask := Fill(req.mask.getWidth, true.B)
req.meta := DontCare
req
}
def sendLoadReq(addr: UInt): DCacheWordReq = {
val req = Wire(new DCacheWordReq)
req.cmd := MemoryOpConstants.M_XRD
req.addr := addr
req.data := DontCare
req.mask := Fill(req.mask.getWidth, true.B)
req.meta := DontCare
req
}
val s_idle :: s_write_req :: s_write_resp :: s_read_req :: s_read_resp :: s_finish :: Nil = Enum(6)
val state = RegInit(s_idle)
switch(state){
is(s_idle){
when(io.in.fire()){
state := s_write_req
}
}
is(s_write_req){
when(storePorts.map(_.req.fire()).reduce(_||_)){
state := s_write_resp
}
}
is(s_write_resp){
when(storePorts.map(_.resp.fire()).reduce(_||_)){
state := s_read_req
}
}
is(s_read_req){
when(loadPorts.map(_.req.fire()).reduce(_||_)){
state := s_read_resp
}
}
is(s_read_resp){
when(loadPorts.map(_.resp.fire()).reduce(_||_)){
state := s_finish
}
}
}
io.in.ready := state === s_idle
val storeReq = Wire(new DCacheLineReq)
storeReq := sendStoreReq(in.waddr, Fill(8, in.wdata))
storePorts.zipWithIndex.foreach{
case (port, i) =>
port.req.bits := storeReq
port.req.valid := state===s_write_req && i.U===in.hartId
port.resp.ready := true.B
XSDebug(
port.req.fire(),
"write data %x to dcache [%d]\n",
port.req.bits.data,
i.U
)
}
XSDebug(p"state: $state\n")
val loadReq = sendLoadReq(in.waddr)
loadPorts.zipWithIndex.foreach{
case (port, i) =>
port.req.bits := loadReq
port.req.valid := state===s_read_req && i.U=/=in.hartId
port.resp.ready := true.B
XSDebug(
port.resp.fire(),
"read data %x form dcache [%d]\n",
port.resp.bits.data,
i.U
)
}
val rdata = Reg(UInt(64.W))
when(loadPorts.map(_.resp.fire()).reduce(_||_)){
state := s_finish
rdata := PriorityMux(
loadPorts.map(p => p.resp.fire() -> p.resp.bits.data)
)
}
io.out.bits.rdata := rdata
io.out.valid := state === s_finish
when(io.out.fire()){
state := s_idle
}
}
}
class L2TestTopWrapper()(implicit p: Parameters) extends LazyModule {
val testTop = LazyModule(new L2TestTop())
lazy val module = new LazyModuleImp(this){
val io = IO(new L2TestTopIO)
AddSinks()
io <> testTop.module.io
}
}
class L2CacheTest extends FlatSpec with ChiselScalatestTester with Matchers{
top.Parameters.set(top.Parameters.debugParameters)
it should "run" in {
implicit val p = Parameters((site, up, here) => {
case L2CacheTestKey =>
L2CacheTestParams()
})
test(LazyModule(new L2TestTopWrapper()).module)
.withAnnotations(Seq(VerilatorBackendAnnotation)){ c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
c.clock.step(100)
for(i <- 0 until 100){
val addr = Random.nextInt(0xfffff) & 0xffe00 // align to block size
val data = Random.nextLong() & 0x7fffffffffffffffL
c.io.in.enqueue(chiselTypeOf(c.io.in.bits).Lit(
_.waddr -> addr.U,
_.wdata -> data.U,
_.hartId -> Random.nextInt(2).U
))
c.io.out.expectDequeue(chiselTypeOf(c.io.out.bits).Lit(
_.rdata -> data.U
))
}
}
}
}

View File

@ -67,14 +67,12 @@ class XSSimTop()(implicit p: config.Parameters) extends LazyModule {
AXI4UserYanker() :=
TLToAXI4() :=
TLBuffer(BufferParams.default) :=
TLFragmenter(8, 64, holdFirstDeny = true) :=
DebugIdentityNode() :=
soc.mem
axiMMIO.axiBus :=
AXI4UserYanker() :=
TLToAXI4() :=
TLFragmenter(8, 8) :=
soc.extDev
lazy val module = new LazyModuleImp(this) {
@ -136,7 +134,7 @@ object TestMain extends App {
implicit val p = config.Parameters.empty
// generate verilog
XiangShanStage.execute(
args,
args.filterNot(_ == "--disable-log"),
Seq(
ChiselGeneratorAnnotation(() => LazyModule(new XSSimTop).module)
)

View File

@ -2,6 +2,7 @@ package xiangshan.testutils
import chisel3._
import chisel3.util.experimental.BoringUtils
import utils.GTimer
object AddSinks {
def apply(dispBegin: Int = 0, dispEnd: Int = -1) = {
@ -34,7 +35,9 @@ object AddSinks {
for (s <- sinks){ BoringUtils.addSink(tmp, s) }
val disp_enable = WireInit(dispBegin.S(64.W).asUInt() < dispEnd.S(64.W).asUInt())
val time = GTimer()
BoringUtils.addSource(disp_enable, "DISPLAY_LOG_ENABLE")
BoringUtils.addSource(time, "logTimestamp")
}
}