fix(LLPTW): dup_wait_resp should not send last_hptw_req when excp (#4596)
EMU Test / Changes Detection (push) Waiting to run Details
EMU Test / Generate Verilog (push) Blocked by required conditions Details
EMU Test / EMU - Basics (push) Blocked by required conditions Details
EMU Test / EMU - CHI (push) Blocked by required conditions Details
EMU Test / EMU - Performance (push) Blocked by required conditions Details
EMU Test / EMU - MC (push) Blocked by required conditions Details
EMU Test / SIMV - Basics (push) Blocked by required conditions Details
EMU Test / Upload Artifacts (push) Blocked by required conditions Details
EMU Test / Check Submodules (push) Blocked by required conditions Details
EMU Test / Check Format (push) Blocked by required conditions Details

In the original design, the condition for `to_last_hptw_req` was:
`dup_wait_resp && entries(io.mem.resp.bits.id).req_info.s2xlate ===
allStage`. As a result, when a newly entered LLPTW request dups with an
entry currently returning from memory, and the new request is marked as
allStage, the `to_last_hptw_req` signal would be true. This causes the
state machine to transition to the `state_last_hptw_req` state and send
a request to HPTW.

However, if the page table returned from memory contains a `vsStagePf`
or `gStagePf`, it should directly go to `mem_out` or `bitmap_check`
without performing a final HPTW translation. Therefore, this commit
fixes the bug by adding a restriction to the original `to_last_hptw_req`
condition to ensure that no exceptions are present; otherwise, the state
machine will transition to either `mem_out` or `bitmap_check`.

Additionally, this PR also fixes a bug where `last_hptw_req_ppn` did not
account for the napot case.
This commit is contained in:
Haoyuan Feng 2025-04-21 00:47:49 +08:00 committed by GitHub
parent 96b05afa13
commit 57a8ca5e38
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 20 additions and 8 deletions

View File

@ -781,17 +781,28 @@ class LLPTW(implicit p: Parameters) extends XSModule with HasPtwConst with HasPe
val wait_id = Mux(dup_req_fire, mem_arb.io.chosen, ParallelMux(dup_vec_wait zip entries.map(_.wait_id)))
val dup_wait_resp = io.mem.resp.fire && VecInit(dup_vec_wait)(io.mem.resp.bits.id) && !io.mem.flush_latch(io.mem.resp.bits.id) // dup with the entry that data coming next cycle
val to_wait = Cat(dup_vec_wait).orR || dup_req_fire
val to_mem_out = dup_wait_resp && ((entries(io.mem.resp.bits.id).req_info.s2xlate === noS2xlate) || (entries(io.mem.resp.bits.id).req_info.s2xlate === onlyStage1)) && !bitmap_enable
val to_bitmap_req = (if (HasBitmapCheck) true.B else false.B) && dup_wait_resp && ((entries(io.mem.resp.bits.id).req_info.s2xlate === noS2xlate) || (entries(io.mem.resp.bits.id).req_info.s2xlate === onlyStage1)) && bitmap_enable
val to_cache = if (HasBitmapCheck) Cat(dup_vec_bitmap).orR || Cat(dup_vec_having).orR || Cat(dup_vec_last_hptw).orR
else Cat(dup_vec_having).orR || Cat(dup_vec_last_hptw).orR
val to_hptw_req = io.in.bits.req_info.s2xlate === allStage
val to_last_hptw_req = dup_wait_resp && entries(io.mem.resp.bits.id).req_info.s2xlate === allStage
val last_hptw_req_id = io.mem.resp.bits.id
val req_paddr = MakeAddr(io.in.bits.ppn(ppnLen-1, 0), getVpnn(io.in.bits.req_info.vpn, 0))
val req_hpaddr = MakeAddr(entries(last_hptw_req_id).hptw_resp.genPPNS2(get_pn(req_paddr)), getVpnn(io.in.bits.req_info.vpn, 0))
val index = Mux(entries(last_hptw_req_id).req_info.s2xlate === allStage, req_hpaddr, req_paddr)(log2Up(l2tlbParams.blockBytes)-1, log2Up(XLEN/8))
val last_hptw_req_ppn = io.mem.resp.bits.value.asTypeOf(Vec(blockBits / XLEN, new PteBundle()))(index).getPPN()
val last_hptw_req_pte = io.mem.resp.bits.value.asTypeOf(Vec(blockBits / XLEN, new PteBundle()))(index)
val last_hptw_req_ppn = Mux(last_hptw_req_pte.n === 0.U, last_hptw_req_pte.getPPN(), Cat(last_hptw_req_pte.getPPN()(ptePPNLen - 1, pteNapotBits), io.in.bits.req_info.vpn(pteNapotBits - 1, 0)))
// in `to_last_hptw_req`, we have already judged whether s2xlate === allStage
val last_hptw_vsStagePf = last_hptw_req_pte.isPf(0.U, io.csr.hPBMTE) || !last_hptw_req_pte.isLeaf()
val last_hptw_gStagePf = last_hptw_req_pte.isStage1Gpf(io.csr.hgatp.mode) && !last_hptw_vsStagePf
// noS2xlate || onlyStage1 || allStage but exception; do not need Stage2 translate
val noStage2 = ((entries(io.mem.resp.bits.id).req_info.s2xlate === noS2xlate) || (entries(io.mem.resp.bits.id).req_info.s2xlate === onlyStage1)) ||
(entries(io.mem.resp.bits.id).req_info.s2xlate === allStage && (last_hptw_vsStagePf || last_hptw_gStagePf))
val to_mem_out = dup_wait_resp && noStage2 && !bitmap_enable
val to_bitmap_req = (if (HasBitmapCheck) true.B else false.B) && dup_wait_resp && noStage2 && bitmap_enable
val to_cache = if (HasBitmapCheck) Cat(dup_vec_bitmap).orR || Cat(dup_vec_having).orR || Cat(dup_vec_last_hptw).orR
else Cat(dup_vec_having).orR || Cat(dup_vec_last_hptw).orR
val to_hptw_req = io.in.bits.req_info.s2xlate === allStage
val to_last_hptw_req = dup_wait_resp && entries(io.mem.resp.bits.id).req_info.s2xlate === allStage && !(last_hptw_vsStagePf || last_hptw_gStagePf)
val last_hptw_excp = dup_wait_resp && entries(io.mem.resp.bits.id).req_info.s2xlate === allStage && (last_hptw_vsStagePf || last_hptw_gStagePf)
XSError(RegNext(dup_req_fire && Cat(dup_vec_wait).orR, init = false.B), "mem req but some entries already waiting, should not happed")
XSError(io.in.fire && ((to_mem_out && to_cache) || (to_wait && to_cache)), "llptw enq, to cache conflict with to mem")
@ -811,7 +822,7 @@ class LLPTW(implicit p: Parameters) extends XSModule with HasPtwConst with HasPe
// so 2 + FilterSize is enough to avoid dead-lock
state(enq_ptr) := enq_state
entries(enq_ptr).req_info := io.in.bits.req_info
entries(enq_ptr).ppn := Mux(to_last_hptw_req, last_hptw_req_ppn, io.in.bits.ppn)
entries(enq_ptr).ppn := Mux(to_last_hptw_req || last_hptw_excp, last_hptw_req_ppn, io.in.bits.ppn)
entries(enq_ptr).wait_id := Mux(to_wait, wait_id, enq_ptr)
entries(enq_ptr).af := false.B
if (HasBitmapCheck) {
@ -825,6 +836,7 @@ class LLPTW(implicit p: Parameters) extends XSModule with HasPtwConst with HasPe
entries(enq_ptr).cfs := io.in.bits.bitmapCheck.get.cfs
}
entries(enq_ptr).hptw_resp := Mux(to_last_hptw_req, entries(last_hptw_req_id).hptw_resp, Mux(to_wait, entries(wait_id).hptw_resp, entries(enq_ptr).hptw_resp))
entries(enq_ptr).hptw_resp.gpf := Mux(last_hptw_excp, last_hptw_gStagePf, false.B)
entries(enq_ptr).first_s2xlate_fault := false.B
mem_resp_hit(enq_ptr) := to_bitmap_req || to_mem_out || to_last_hptw_req
}