462 lines
14 KiB
Rust
462 lines
14 KiB
Rust
//! 性能回归测试
|
||
//!
|
||
//! 检测性能退化,确保新代码不会显著降低性能
|
||
|
||
use vm_cross_arch::UnifiedExecutor;
|
||
use vm_core::{GuestArch, MMU};
|
||
use std::time::Instant;
|
||
use std::collections::HashMap;
|
||
|
||
/// 性能阈值配置
|
||
#[derive(Debug, Clone)]
|
||
pub struct PerformanceThresholds {
|
||
/// 平均执行时间阈值(微秒)
|
||
pub avg_execution_time_us: u64,
|
||
/// 最大执行时间阈值(微秒)
|
||
pub max_execution_time_us: u64,
|
||
/// JIT编译时间阈值(微秒)
|
||
pub jit_compile_time_us: u64,
|
||
/// GC暂停时间阈值(微秒)
|
||
pub gc_pause_time_us: u64,
|
||
/// TLB命中率阈值(百分比)
|
||
pub tlb_hit_rate_percent: f64,
|
||
}
|
||
|
||
impl Default for PerformanceThresholds {
|
||
fn default() -> Self {
|
||
Self {
|
||
avg_execution_time_us: 1000, // 1ms
|
||
max_execution_time_us: 5000, // 5ms
|
||
jit_compile_time_us: 10000, // 10ms
|
||
gc_pause_time_us: 1000, // 1ms
|
||
tlb_hit_rate_percent: 90.0, // 90%
|
||
}
|
||
}
|
||
}
|
||
|
||
/// 性能基准线
|
||
#[derive(Debug, Clone)]
|
||
pub struct PerformanceBaseline {
|
||
/// 基准名称
|
||
pub name: String,
|
||
/// 阈值配置
|
||
pub thresholds: PerformanceThresholds,
|
||
/// 历史性能数据
|
||
pub historical_data: Vec<PerformanceSnapshot>,
|
||
}
|
||
|
||
/// 性能快照
|
||
#[derive(Debug, Clone)]
|
||
pub struct PerformanceSnapshot {
|
||
/// 时间戳
|
||
pub timestamp: u64,
|
||
/// 平均执行时间(微秒)
|
||
pub avg_execution_time_us: u64,
|
||
/// 最大执行时间(微秒)
|
||
pub max_execution_time_us: u64,
|
||
/// JIT编译时间(微秒)
|
||
pub jit_compile_time_us: u64,
|
||
/// GC暂停时间(微秒)
|
||
pub gc_pause_time_us: u64,
|
||
/// TLB命中率(百分比)
|
||
pub tlb_hit_rate_percent: f64,
|
||
}
|
||
|
||
impl PerformanceBaseline {
|
||
/// 创建新的性能基准线
|
||
pub fn new(name: String, thresholds: PerformanceThresholds) -> Self {
|
||
Self {
|
||
name,
|
||
thresholds,
|
||
historical_data: Vec::new(),
|
||
}
|
||
}
|
||
|
||
/// 记录性能快照
|
||
pub fn record_snapshot(&mut self, snapshot: PerformanceSnapshot) {
|
||
self.historical_data.push(snapshot);
|
||
// 保留最近1000个快照
|
||
if self.historical_data.len() > 1000 {
|
||
self.historical_data.remove(0);
|
||
}
|
||
}
|
||
|
||
/// 检查性能是否退化
|
||
pub fn check_regression(&self, snapshot: &PerformanceSnapshot) -> Vec<String> {
|
||
let mut regressions = Vec::new();
|
||
|
||
if snapshot.avg_execution_time_us > self.thresholds.avg_execution_time_us {
|
||
regressions.push(format!(
|
||
"平均执行时间 {}us 超过阈值 {}us",
|
||
snapshot.avg_execution_time_us,
|
||
self.thresholds.avg_execution_time_us
|
||
));
|
||
}
|
||
|
||
if snapshot.max_execution_time_us > self.thresholds.max_execution_time_us {
|
||
regressions.push(format!(
|
||
"最大执行时间 {}us 超过阈值 {}us",
|
||
snapshot.max_execution_time_us,
|
||
self.thresholds.max_execution_time_us
|
||
));
|
||
}
|
||
|
||
if snapshot.jit_compile_time_us > self.thresholds.jit_compile_time_us {
|
||
regressions.push(format!(
|
||
"JIT编译时间 {}us 超过阈值 {}us",
|
||
snapshot.jit_compile_time_us,
|
||
self.thresholds.jit_compile_time_us
|
||
));
|
||
}
|
||
|
||
if snapshot.gc_pause_time_us > self.thresholds.gc_pause_time_us {
|
||
regressions.push(format!(
|
||
"GC暂停时间 {}us 超过阈值 {}us",
|
||
snapshot.gc_pause_time_us,
|
||
self.thresholds.gc_pause_time_us
|
||
));
|
||
}
|
||
|
||
if snapshot.tlb_hit_rate_percent < self.thresholds.tlb_hit_rate_percent {
|
||
regressions.push(format!(
|
||
"TLB命中率 {:.2}% 低于阈值 {:.2}%",
|
||
snapshot.tlb_hit_rate_percent,
|
||
self.thresholds.tlb_hit_rate_percent
|
||
));
|
||
}
|
||
|
||
regressions
|
||
}
|
||
}
|
||
|
||
/// 测试执行性能
|
||
#[test]
|
||
fn test_execution_performance() {
|
||
let thresholds = PerformanceThresholds::default();
|
||
let mut baseline = PerformanceBaseline::new("execution".to_string(), thresholds.clone());
|
||
|
||
let mut executor = UnifiedExecutor::auto_create(GuestArch::X86_64, 128 * 1024 * 1024)
|
||
.expect("创建执行器失败");
|
||
|
||
let code_base: u64 = 0x1000;
|
||
let test_code = create_performance_test_code();
|
||
|
||
// 加载代码
|
||
for (i, byte) in test_code.iter().enumerate() {
|
||
executor.mmu_mut().write(code_base + i as u64, *byte as u64, 1)
|
||
.expect("写入内存失败");
|
||
}
|
||
|
||
// 预热
|
||
for _ in 0..100 {
|
||
executor.execute(code_base).expect("执行失败");
|
||
}
|
||
|
||
// 性能测试
|
||
let iterations = 1000;
|
||
let mut times = Vec::with_capacity(iterations);
|
||
|
||
for _ in 0..iterations {
|
||
let start = Instant::now();
|
||
executor.execute(code_base).expect("执行失败");
|
||
let elapsed = start.elapsed();
|
||
times.push(elapsed.as_micros() as u64);
|
||
}
|
||
|
||
// 计算统计信息
|
||
let avg_time = times.iter().sum::<u64>() / iterations as u64;
|
||
let max_time = *times.iter().max().unwrap();
|
||
|
||
let snapshot = PerformanceSnapshot {
|
||
timestamp: std::time::SystemTime::now()
|
||
.duration_since(std::time::UNIX_EPOCH)
|
||
.unwrap()
|
||
.as_secs(),
|
||
avg_execution_time_us: avg_time,
|
||
max_execution_time_us: max_time,
|
||
jit_compile_time_us: 0,
|
||
gc_pause_time_us: 0,
|
||
tlb_hit_rate_percent: 95.0,
|
||
};
|
||
|
||
baseline.record_snapshot(snapshot.clone());
|
||
|
||
let regressions = baseline.check_regression(&snapshot);
|
||
assert!(regressions.is_empty(), "性能回归: {:?}", regressions);
|
||
}
|
||
|
||
/// 测试JIT编译性能
|
||
#[test]
|
||
fn test_jit_compile_performance() {
|
||
use vm_engine_jit::Jit;
|
||
use vm_ir::{IRBlock, IROp, Terminator};
|
||
use vm_mem::SoftMmu;
|
||
|
||
let thresholds = PerformanceThresholds::default();
|
||
let mut baseline = PerformanceBaseline::new("jit_compile".to_string(), thresholds.clone());
|
||
|
||
let mut jit = Jit::new();
|
||
let mut mmu = SoftMmu::new(1024 * 1024, false);
|
||
|
||
// 创建测试IR块
|
||
let block = IRBlock {
|
||
start_pc: 0x1000,
|
||
ops: vec![
|
||
IROp::MovImm { dst: 1, imm: 10 },
|
||
IROp::MovImm { dst: 2, imm: 20 },
|
||
IROp::Add { dst: 3, src1: 1, src2: 2 },
|
||
IROp::Sub { dst: 4, src1: 3, src2: 1 },
|
||
IROp::Mul { dst: 5, src1: 2, src2: 2 },
|
||
],
|
||
term: Terminator::Ret,
|
||
};
|
||
|
||
// 测试编译时间
|
||
let start = Instant::now();
|
||
|
||
// 触发编译(通过多次执行)
|
||
for _ in 0..150 { // 超过HOT_THRESHOLD (100)
|
||
let _ = jit.run(&mut mmu, &block);
|
||
}
|
||
|
||
let elapsed = start.elapsed();
|
||
let compile_time_us = elapsed.as_micros() as u64;
|
||
|
||
let snapshot = PerformanceSnapshot {
|
||
timestamp: std::time::SystemTime::now()
|
||
.duration_since(std::time::UNIX_EPOCH)
|
||
.unwrap()
|
||
.as_secs(),
|
||
avg_execution_time_us: 0,
|
||
max_execution_time_us: 0,
|
||
jit_compile_time_us: compile_time_us,
|
||
gc_pause_time_us: 0,
|
||
tlb_hit_rate_percent: 95.0,
|
||
};
|
||
|
||
baseline.record_snapshot(snapshot.clone());
|
||
|
||
let regressions = baseline.check_regression(&snapshot);
|
||
assert!(regressions.is_empty(), "性能回归: {:?}", regressions);
|
||
}
|
||
|
||
/// 测试GC性能
|
||
#[test]
|
||
fn test_gc_performance() {
|
||
use vm_engine_jit::{UnifiedGC, UnifiedGcConfig};
|
||
|
||
let thresholds = PerformanceThresholds::default();
|
||
let mut baseline = PerformanceBaseline::new("gc".to_string(), thresholds.clone());
|
||
|
||
let config = UnifiedGcConfig {
|
||
heap_size_limit: 10 * 1024 * 1024, // 10MB
|
||
mark_quota_us: 1000,
|
||
sweep_quota_us: 500,
|
||
adaptive_quota: true,
|
||
..Default::default()
|
||
};
|
||
|
||
let gc = UnifiedGC::new(config);
|
||
|
||
// 模拟对象分配
|
||
let roots: Vec<u64> = (0..1000).map(|i| i as u64 * 1024).collect();
|
||
|
||
// 执行GC周期并测量暂停时间
|
||
let mut pause_times = Vec::new();
|
||
|
||
for _ in 0..10 {
|
||
let cycle_start = gc.start_gc(&roots);
|
||
|
||
// 增量标记
|
||
loop {
|
||
let (complete, _) = gc.incremental_mark();
|
||
if complete {
|
||
break;
|
||
}
|
||
}
|
||
|
||
gc.terminate_marking();
|
||
|
||
// 增量清扫
|
||
loop {
|
||
let (complete, _) = gc.incremental_sweep();
|
||
if complete {
|
||
break;
|
||
}
|
||
}
|
||
|
||
gc.finish_gc(cycle_start);
|
||
|
||
let stats = gc.stats();
|
||
pause_times.push(stats.get_last_pause_us());
|
||
}
|
||
|
||
let avg_pause = pause_times.iter().sum::<u64>() / pause_times.len() as u64;
|
||
let max_pause = *pause_times.iter().max().unwrap();
|
||
|
||
let snapshot = PerformanceSnapshot {
|
||
timestamp: std::time::SystemTime::now()
|
||
.duration_since(std::time::UNIX_EPOCH)
|
||
.unwrap()
|
||
.as_secs(),
|
||
avg_execution_time_us: 0,
|
||
max_execution_time_us: 0,
|
||
jit_compile_time_us: 0,
|
||
gc_pause_time_us: avg_pause,
|
||
tlb_hit_rate_percent: 95.0,
|
||
};
|
||
|
||
baseline.record_snapshot(snapshot.clone());
|
||
|
||
let regressions = baseline.check_regression(&snapshot);
|
||
assert!(regressions.is_empty(), "性能回归: {:?}", regressions);
|
||
}
|
||
|
||
/// 测试寄存器分配器性能
|
||
#[test]
|
||
fn test_register_allocator_performance() {
|
||
use vm_engine_jit::register_allocator::{LinearScanAllocator, GraphColoringAllocator, RegisterAllocatorTrait};
|
||
use vm_ir::{IROp, IRBuilder, Terminator};
|
||
|
||
let thresholds = PerformanceThresholds::default();
|
||
let mut baseline = PerformanceBaseline::new("register_allocator".to_string(), thresholds.clone());
|
||
|
||
// 创建测试IR块
|
||
let mut builder = IRBuilder::new(0x1000);
|
||
for i in 0..50 {
|
||
builder.push(IROp::MovImm {
|
||
dst: i as u32,
|
||
imm: i as u64,
|
||
});
|
||
if i > 0 {
|
||
builder.push(IROp::Add {
|
||
dst: (i + 50) as u32,
|
||
src1: (i - 1) as u32,
|
||
src2: i as u32,
|
||
});
|
||
}
|
||
}
|
||
builder.set_term(Terminator::Ret);
|
||
let block = builder.build();
|
||
|
||
// 测试线性扫描分配器
|
||
let start = Instant::now();
|
||
let mut linear_allocator = LinearScanAllocator::new();
|
||
linear_allocator.analyze_lifetimes(&block.ops);
|
||
let _allocations = linear_allocator.allocate_registers(&block.ops);
|
||
let linear_time = start.elapsed().as_micros() as u64;
|
||
|
||
// 测试图着色分配器
|
||
let start = Instant::now();
|
||
let mut graph_allocator = GraphColoringAllocator::new();
|
||
graph_allocator.analyze_lifetimes(&block.ops);
|
||
let _allocations = graph_allocator.allocate_registers(&block.ops);
|
||
let graph_time = start.elapsed().as_micros() as u64;
|
||
|
||
println!(" 线性扫描: {}us, 图着色: {}us", linear_time, graph_time);
|
||
|
||
// 验证性能阈值
|
||
assert!(linear_time < 10000, "线性扫描应该 < 10ms");
|
||
assert!(graph_time < 50000, "图着色应该 < 50ms");
|
||
}
|
||
|
||
/// 测试IR工具性能
|
||
#[test]
|
||
fn test_ir_utils_performance() {
|
||
use vm_engine_jit::ir_utils::IrAnalyzer;
|
||
use vm_ir::IROp;
|
||
|
||
// 创建大量IR操作
|
||
let ops: Vec<IROp> = (0..1000)
|
||
.map(|i| {
|
||
if i % 2 == 0 {
|
||
IROp::Add {
|
||
dst: (i % 32) as u32,
|
||
src1: ((i + 1) % 32) as u32,
|
||
src2: ((i + 2) % 32) as u32,
|
||
}
|
||
} else {
|
||
IROp::Load {
|
||
dst: (i % 32) as u32,
|
||
base: ((i + 1) % 32) as u32,
|
||
offset: i as i64,
|
||
size: 8,
|
||
flags: Default::default(),
|
||
}
|
||
}
|
||
})
|
||
.collect();
|
||
|
||
let start = Instant::now();
|
||
for op in &ops {
|
||
let _ = IrAnalyzer::collect_read_regs(op);
|
||
let _ = IrAnalyzer::collect_written_regs(op);
|
||
let _ = IrAnalyzer::is_memory_access(op);
|
||
let _ = IrAnalyzer::is_branch(op);
|
||
}
|
||
let elapsed = start.elapsed().as_micros() as u64;
|
||
|
||
println!(" IR工具处理1000个操作耗时: {}us", elapsed);
|
||
assert!(elapsed < 10000, "IR工具处理应该 < 10ms");
|
||
}
|
||
|
||
/// 测试GC模块性能
|
||
#[test]
|
||
fn test_gc_module_performance() {
|
||
use vm_engine_jit::gc_marker::GcMarker;
|
||
use vm_engine_jit::gc_sweeper::GcSweeper;
|
||
use vm_engine_jit::unified_gc::{LockFreeMarkStack, UnifiedGcStats, GCPhase};
|
||
use std::sync::atomic::{AtomicU64, Ordering};
|
||
use std::sync::{Arc, Mutex, RwLock};
|
||
use std::collections::HashSet;
|
||
|
||
let mark_stack = Arc::new(LockFreeMarkStack::new(1000));
|
||
let marked_set = Arc::new(RwLock::new(HashSet::new()));
|
||
let phase = Arc::new(AtomicU64::new(GCPhase::Marking as u64));
|
||
let stats = Arc::new(UnifiedGcStats::default());
|
||
|
||
let marker = GcMarker::new(
|
||
mark_stack.clone(),
|
||
marked_set.clone(),
|
||
phase.clone(),
|
||
stats.clone(),
|
||
);
|
||
|
||
// 添加根对象
|
||
let roots: Vec<u64> = (0..1000).map(|i| i as u64 * 1024).collect();
|
||
marker.prepare_marking(&roots);
|
||
|
||
// 测试增量标记性能
|
||
let start = Instant::now();
|
||
let (complete, marked_count) = marker.incremental_mark(10000); // 10ms配额
|
||
let elapsed = start.elapsed().as_micros() as u64;
|
||
|
||
println!(" GC标记: 完成={}, 标记={}, 耗时={}us", complete, marked_count, elapsed);
|
||
assert!(elapsed < 10000, "GC标记应该 < 10ms");
|
||
|
||
// 测试清扫器性能
|
||
let sweep_list = Arc::new(Mutex::new((0..1000).map(|i| i as u64 * 1024).collect()));
|
||
let sweeper = GcSweeper::new(sweep_list.clone(), phase.clone(), stats.clone(), 100);
|
||
|
||
sweeper.prepare_sweeping(&(0..1000).map(|i| i as u64 * 1024).collect::<Vec<_>>(), &HashSet::new());
|
||
|
||
let start = Instant::now();
|
||
let (complete, freed_count) = sweeper.incremental_sweep(10000); // 10ms配额
|
||
let elapsed = start.elapsed().as_micros() as u64;
|
||
|
||
println!(" GC清扫: 完成={}, 释放={}, 耗时={}us", complete, freed_count, elapsed);
|
||
assert!(elapsed < 10000, "GC清扫应该 < 10ms");
|
||
}
|
||
|
||
fn create_performance_test_code() -> Vec<u8> {
|
||
// 创建一个稍微复杂的测试代码
|
||
vec![
|
||
0xB8, 0x0A, 0x00, 0x00, 0x00, // mov eax, 10
|
||
0xBB, 0x14, 0x00, 0x00, 0x00, // mov ebx, 20
|
||
0x01, 0xD8, // add eax, ebx
|
||
0x83, 0xC0, 0x05, // add eax, 5
|
||
0x29, 0xD8, // sub eax, ebx
|
||
0xC3, // ret
|
||
]
|
||
}
|